Index: projects/binutils-2.17/bin/sh/arith_lex.l
===================================================================
--- projects/binutils-2.17/bin/sh/arith_lex.l	(revision 215829)
+++ projects/binutils-2.17/bin/sh/arith_lex.l	(revision 215830)
@@ -1,135 +1,143 @@
 %{
 /*-
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Kenneth Almquist.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)arith_lex.l	8.3 (Berkeley) 5/4/95";
 #endif
 #endif /* not lint */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <string.h>
 
 #include "arith.h"
 #include "shell.h"
 #include "y.tab.h"
 #include "error.h"
 #include "memalloc.h"
 #include "var.h"
 
 int yylex(void);
 
 #undef YY_INPUT
 #define YY_INPUT(buf,result,max) \
-	result = (*buf = *arith_buf++) ? 1 : YY_NULL;
+	do { \
+		result = strnlen(arith_buf, max); \
+		if (result == 0) \
+			result = YY_NULL; \
+		else { \
+			memcpy(buf, arith_buf, result); \
+			arith_buf += result; \
+		} \
+	} while (0);
 #define YY_NO_UNPUT
 #define YY_NO_INPUT
 %}
 
 %%
 [ \t\n]	{ ; }
 
 0x[a-fA-F0-9]+	{
 			yylval.l_value = strtoarith_t(yytext, NULL, 16);
 			return ARITH_NUM;
 		}
 
 0[0-7]+		{
 			yylval.l_value = strtoarith_t(yytext, NULL, 8);
 			return ARITH_NUM;
 		}
 
 [0-9]+		{
 			yylval.l_value = strtoarith_t(yytext, NULL, 10);
 			return ARITH_NUM;
 		}
 
 [A-Za-z][A-Za-z0-9_]*	{
 			/*
 			 * If variable doesn't exist, we should initialize
 			 * it to zero.
 			 */
 			char *temp;
 			if (lookupvar(yytext) == NULL)
 				setvarsafe(yytext, "0", 0);
 			temp = stalloc(strlen(yytext) + 1);
 			yylval.s_value = strcpy(temp, yytext);
 
 			return ARITH_VAR;
 		}
 
 "("		{	return ARITH_LPAREN;	}
 ")"		{	return ARITH_RPAREN;	}
 "||"		{	return ARITH_OR;	}
 "&&"		{	return ARITH_AND;	}
 "|"		{	return ARITH_BOR;	}
 "^"		{	return ARITH_BXOR;	}
 "&"		{	return ARITH_BAND;	}
 "=="		{	return ARITH_EQ;	}
 "!="		{	return ARITH_NE;	}
 ">"		{	return ARITH_GT;	}
 ">="		{	return ARITH_GE;	}
 "<"		{	return ARITH_LT;	}
 "<="		{	return ARITH_LE;	}
 "<<"		{	return ARITH_LSHIFT;	}
 ">>"		{	return ARITH_RSHIFT;	}
 "*"		{	return ARITH_MUL;	}
 "/"		{	return ARITH_DIV;	}
 "%"		{	return ARITH_REM;	}
 "+"		{	return ARITH_ADD;	}
 "-"		{	return ARITH_SUB;	}
 "~"		{	return ARITH_BNOT;	}
 "!"		{	return ARITH_NOT;	}
 "="		{	return ARITH_ASSIGN;	}
 "+="		{	return ARITH_ADDASSIGN;	}
 "-="		{	return ARITH_SUBASSIGN;	}
 "*="		{	return ARITH_MULASSIGN;	}
 "/="		{	return ARITH_DIVASSIGN;	}
 "%="		{	return ARITH_REMASSIGN;	}
 ">>="		{	return ARITH_RSHASSIGN;	}
 "<<="		{	return ARITH_LSHASSIGN;	}
 "&="		{	return ARITH_BANDASSIGN; }
 "^="		{	return ARITH_BXORASSIGN; }
 "|="		{	return ARITH_BORASSIGN;	}
 .		{
 			error("arith: syntax error: \"%s\"\n", arith_startbuf);
 		}
 %%
 
 void
 arith_lex_reset(void)
 {
 	YY_NEW_FILE;
 }
Index: projects/binutils-2.17/bin/sh/cd.c
===================================================================
--- projects/binutils-2.17/bin/sh/cd.c	(revision 215829)
+++ projects/binutils-2.17/bin/sh/cd.c	(revision 215830)
@@ -1,415 +1,414 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Kenneth Almquist.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)cd.c	8.2 (Berkeley) 5/4/95";
 #endif
 #endif /* not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <errno.h>
 #include <limits.h>
 
 /*
  * The cd and pwd commands.
  */
 
 #include "shell.h"
 #include "var.h"
 #include "nodes.h"	/* for jobs.h */
 #include "jobs.h"
 #include "options.h"
 #include "output.h"
 #include "memalloc.h"
 #include "error.h"
 #include "exec.h"
 #include "redir.h"
 #include "mystring.h"
 #include "show.h"
 #include "cd.h"
 
 static int cdlogical(char *);
 static int cdphysical(char *);
 static int docd(char *, int, int);
 static char *getcomponent(void);
 static char *findcwd(char *);
 static void updatepwd(char *);
 static char *getpwd(void);
 static char *getpwd2(void);
 
 static char *curdir = NULL;	/* current working directory */
 static char *prevdir;		/* previous working directory */
 static char *cdcomppath;
 
 int
 cdcmd(int argc, char **argv)
 {
 	const char *dest;
 	const char *path;
 	char *p;
 	struct stat statb;
 	int ch, phys, print = 0;
 
 	optreset = 1; optind = 1; opterr = 0; /* initialize getopt */
 	phys = Pflag;
 	while ((ch = getopt(argc, argv, "LP")) != -1) {
 		switch (ch) {
 		case 'L':
 			phys = 0;
 			break;
 		case 'P':
 			phys = 1;
 			break;
 		default:
 			error("unknown option: -%c", optopt);
 			break;
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	if (argc > 1)
 		error("too many arguments");
 
 	if ((dest = *argv) == NULL && (dest = bltinlookup("HOME", 1)) == NULL)
 		error("HOME not set");
 	if (*dest == '\0')
 		dest = ".";
 	if (dest[0] == '-' && dest[1] == '\0') {
 		dest = prevdir ? prevdir : curdir;
 		if (dest)
 			print = 1;
 		else
 			dest = ".";
 	}
 	if (*dest == '/' || (path = bltinlookup("CDPATH", 1)) == NULL)
 		path = nullstr;
 	while ((p = padvance(&path, dest)) != NULL) {
 		if (stat(p, &statb) >= 0 && S_ISDIR(statb.st_mode)) {
 			if (!print) {
 				/*
 				 * XXX - rethink
 				 */
 				if (p[0] == '.' && p[1] == '/' && p[2] != '\0')
 					print = strcmp(p + 2, dest);
 				else
 					print = strcmp(p, dest);
 			}
 			if (docd(p, print, phys) >= 0)
 				return 0;
 		}
 	}
 	error("can't cd to %s", dest);
 	/*NOTREACHED*/
 	return 0;
 }
 
 
 /*
  * Actually change the directory.  In an interactive shell, print the
  * directory name if "print" is nonzero.
  */
 static int
 docd(char *dest, int print, int phys)
 {
 
 	TRACE(("docd(\"%s\", %d, %d) called\n", dest, print, phys));
 
 	/* If logical cd fails, fall back to physical. */
 	if ((phys || cdlogical(dest) < 0) && cdphysical(dest) < 0)
 		return (-1);
 
 	if (print && iflag && curdir)
 		out1fmt("%s\n", curdir);
 
 	return 0;
 }
 
 static int
 cdlogical(char *dest)
 {
 	char *p;
 	char *q;
 	char *component;
 	struct stat statb;
 	int first;
 	int badstat;
 
 	/*
 	 *  Check each component of the path. If we find a symlink or
 	 *  something we can't stat, clear curdir to force a getcwd()
 	 *  next time we get the value of the current directory.
 	 */
 	badstat = 0;
 	cdcomppath = stalloc(strlen(dest) + 1);
 	scopy(dest, cdcomppath);
 	STARTSTACKSTR(p);
 	if (*dest == '/') {
 		STPUTC('/', p);
 		cdcomppath++;
 	}
 	first = 1;
 	while ((q = getcomponent()) != NULL) {
 		if (q[0] == '\0' || (q[0] == '.' && q[1] == '\0'))
 			continue;
 		if (! first)
 			STPUTC('/', p);
 		first = 0;
 		component = q;
-		while (*q)
-			STPUTC(*q++, p);
+		STPUTS(q, p);
 		if (equal(component, ".."))
 			continue;
 		STACKSTRNUL(p);
 		if (lstat(stackblock(), &statb) < 0) {
 			badstat = 1;
 			break;
 		}
 	}
 
 	INTOFF;
 	if ((p = findcwd(badstat ? NULL : dest)) == NULL || chdir(p) < 0) {
 		INTON;
 		return (-1);
 	}
 	updatepwd(p);
 	INTON;
 	return (0);
 }
 
 static int
 cdphysical(char *dest)
 {
 	char *p;
 
 	INTOFF;
-	if (chdir(dest) < 0 || (p = findcwd(NULL)) == NULL) {
+	if (chdir(dest) < 0) {
 		INTON;
 		return (-1);
 	}
+	p = findcwd(NULL);
+	if (p == NULL)
+		out2fmt_flush("cd: warning: failed to get name of current directory\n");
 	updatepwd(p);
 	INTON;
 	return (0);
 }
 
 /*
  * Get the next component of the path name pointed to by cdcomppath.
  * This routine overwrites the string pointed to by cdcomppath.
  */
 static char *
 getcomponent(void)
 {
 	char *p;
 	char *start;
 
 	if ((p = cdcomppath) == NULL)
 		return NULL;
 	start = cdcomppath;
 	while (*p != '/' && *p != '\0')
 		p++;
 	if (*p == '\0') {
 		cdcomppath = NULL;
 	} else {
 		*p++ = '\0';
 		cdcomppath = p;
 	}
 	return start;
 }
 
 
 static char *
 findcwd(char *dir)
 {
 	char *new;
 	char *p;
 
 	/*
 	 * If our argument is NULL, we don't know the current directory
 	 * any more because we traversed a symbolic link or something
 	 * we couldn't stat().
 	 */
 	if (dir == NULL || curdir == NULL)
 		return getpwd2();
 	cdcomppath = stalloc(strlen(dir) + 1);
 	scopy(dir, cdcomppath);
 	STARTSTACKSTR(new);
 	if (*dir != '/') {
-		p = curdir;
-		while (*p)
-			STPUTC(*p++, new);
-		if (p[-1] == '/')
+		STPUTS(curdir, new);
+		if (STTOPC(new) == '/')
 			STUNPUTC(new);
 	}
 	while ((p = getcomponent()) != NULL) {
 		if (equal(p, "..")) {
 			while (new > stackblock() && (STUNPUTC(new), *new) != '/');
 		} else if (*p != '\0' && ! equal(p, ".")) {
 			STPUTC('/', new);
-			while (*p)
-				STPUTC(*p++, new);
+			STPUTS(p, new);
 		}
 	}
 	if (new == stackblock())
 		STPUTC('/', new);
 	STACKSTRNUL(new);
 	return stackblock();
 }
 
 /*
  * Update curdir (the name of the current directory) in response to a
  * cd command.  We also call hashcd to let the routines in exec.c know
  * that the current directory has changed.
  */
 static void
 updatepwd(char *dir)
 {
 	hashcd();				/* update command hash table */
 
 	if (prevdir)
 		ckfree(prevdir);
 	prevdir = curdir;
-	curdir = savestr(dir);
+	curdir = dir ? savestr(dir) : NULL;
 	setvar("PWD", curdir, VEXPORT);
 	setvar("OLDPWD", prevdir, VEXPORT);
 }
 
 int
 pwdcmd(int argc, char **argv)
 {
 	char *p;
 	int ch, phys;
 
 	optreset = 1; optind = 1; opterr = 0; /* initialize getopt */
 	phys = Pflag;
 	while ((ch = getopt(argc, argv, "LP")) != -1) {
 		switch (ch) {
 		case 'L':
 			phys = 0;
 			break;
 		case 'P':
 			phys = 1;
 			break;
 		default:
 			error("unknown option: -%c", optopt);
 			break;
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	if (argc != 0)
 		error("too many arguments");
 
 	if (!phys && getpwd()) {
 		out1str(curdir);
 		out1c('\n');
 	} else {
 		if ((p = getpwd2()) == NULL)
 			error(".: %s", strerror(errno));
 		out1str(p);
 		out1c('\n');
 	}
 
 	return 0;
 }
 
 /*
  * Get the current directory and cache the result in curdir.
  */
 static char *
 getpwd(void)
 {
 	char *p;
 
 	if (curdir)
 		return curdir;
 
 	p = getpwd2();
 	if (p != NULL)
 		curdir = savestr(p);
 
 	return curdir;
 }
 
 #define MAXPWD 256
 
 /*
  * Return the current directory.
  */
 static char *
 getpwd2(void)
 {
 	char *pwd;
 	int i;
 
 	for (i = MAXPWD;; i *= 2) {
 		pwd = stalloc(i);
 		if (getcwd(pwd, i) != NULL)
 			return pwd;
 		stunalloc(pwd);
 		if (errno != ERANGE)
 			break;
 	}
 
 	return NULL;
 }
 
 /*
  * Initialize PWD in a new shell.
  * If the shell is interactive, we need to warn if this fails.
  */
 void
 pwd_init(int warn)
 {
 	char *pwd;
 	struct stat stdot, stpwd;
 
 	pwd = lookupvar("PWD");
 	if (pwd && *pwd == '/' && stat(".", &stdot) != -1 &&
 	    stat(pwd, &stpwd) != -1 &&
 	    stdot.st_dev == stpwd.st_dev &&
 	    stdot.st_ino == stpwd.st_ino) {
 		if (curdir)
 			ckfree(curdir);
 		curdir = savestr(pwd);
 	}
 	if (getpwd() == NULL && warn)
 		out2fmt_flush("sh: cannot determine working directory\n");
 	setvar("PWD", curdir, VEXPORT);
 }
Index: projects/binutils-2.17/bin/sh/eval.c
===================================================================
--- projects/binutils-2.17/bin/sh/eval.c	(revision 215829)
+++ projects/binutils-2.17/bin/sh/eval.c	(revision 215830)
@@ -1,1229 +1,1228 @@
 /*-
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Kenneth Almquist.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)eval.c	8.9 (Berkeley) 6/8/95";
 #endif
 #endif /* not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <paths.h>
 #include <signal.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <sys/resource.h>
 #include <sys/wait.h> /* For WIFSIGNALED(status) */
 #include <errno.h>
 
 /*
  * Evaluate a command.
  */
 
 #include "shell.h"
 #include "nodes.h"
 #include "syntax.h"
 #include "expand.h"
 #include "parser.h"
 #include "jobs.h"
 #include "eval.h"
 #include "builtins.h"
 #include "options.h"
 #include "exec.h"
 #include "redir.h"
 #include "input.h"
 #include "output.h"
 #include "trap.h"
 #include "var.h"
 #include "memalloc.h"
 #include "error.h"
 #include "show.h"
 #include "mystring.h"
 #ifndef NO_HISTORY
 #include "myhistedit.h"
 #endif
 
 
 int evalskip;			/* set if we are skipping commands */
 static int skipcount;		/* number of levels to skip */
 MKINIT int loopnest;		/* current loop nesting level */
 int funcnest;			/* depth of function calls */
 static int builtin_flags;	/* evalcommand flags for builtins */
 
 
 char *commandname;
 struct strlist *cmdenviron;
 int exitstatus;			/* exit status of last command */
 int oexitstatus;		/* saved exit status */
 
 
 static void evalloop(union node *, int);
 static void evalfor(union node *, int);
 static void evalcase(union node *, int);
 static void evalsubshell(union node *, int);
 static void evalredir(union node *, int);
 static void expredir(union node *);
 static void evalpipe(union node *);
 static void evalcommand(union node *, int, struct backcmd *);
 static void prehash(union node *);
 
 
 /*
  * Called to reset things after an exception.
  */
 
 #ifdef mkinit
 INCLUDE "eval.h"
 
 RESET {
 	evalskip = 0;
 	loopnest = 0;
 	funcnest = 0;
 }
 
 SHELLPROC {
 	exitstatus = 0;
 }
 #endif
 
 
 
 /*
  * The eval command.
  */
 
 int
 evalcmd(int argc, char **argv)
 {
         char *p;
         char *concat;
         char **ap;
 
         if (argc > 1) {
                 p = argv[1];
                 if (argc > 2) {
                         STARTSTACKSTR(concat);
                         ap = argv + 2;
                         for (;;) {
-                                while (*p)
-                                        STPUTC(*p++, concat);
+                                STPUTS(p, concat);
                                 if ((p = *ap++) == NULL)
                                         break;
                                 STPUTC(' ', concat);
                         }
                         STPUTC('\0', concat);
                         p = grabstackstr(concat);
                 }
                 evalstring(p, builtin_flags & EV_TESTED);
         } else
                 exitstatus = 0;
         return exitstatus;
 }
 
 
 /*
  * Execute a command or commands contained in a string.
  */
 
 void
 evalstring(char *s, int flags)
 {
 	union node *n;
 	struct stackmark smark;
 	int flags_exit;
 	int any;
 
 	flags_exit = flags & EV_EXIT;
 	flags &= ~EV_EXIT;
 	any = 0;
 	setstackmark(&smark);
 	setinputstring(s, 1);
 	while ((n = parsecmd(0)) != NEOF) {
 		if (n != NULL) {
 			if (flags_exit && preadateof())
 				evaltree(n, flags | EV_EXIT);
 			else
 				evaltree(n, flags);
 			any = 1;
 		}
 		popstackmark(&smark);
 	}
 	popfile();
 	popstackmark(&smark);
 	if (!any)
 		exitstatus = 0;
 	if (flags_exit)
 		exitshell(exitstatus);
 }
 
 
 /*
  * Evaluate a parse tree.  The value is left in the global variable
  * exitstatus.
  */
 
 void
 evaltree(union node *n, int flags)
 {
 	int do_etest;
 	union node *next;
 
 	do_etest = 0;
 	if (n == NULL) {
 		TRACE(("evaltree(NULL) called\n"));
 		exitstatus = 0;
 		goto out;
 	}
 	do {
 		next = NULL;
 #ifndef NO_HISTORY
 		displayhist = 1;	/* show history substitutions done with fc */
 #endif
 		TRACE(("evaltree(%p: %d) called\n", (void *)n, n->type));
 		switch (n->type) {
 		case NSEMI:
 			evaltree(n->nbinary.ch1, flags & ~EV_EXIT);
 			if (evalskip)
 				goto out;
 			next = n->nbinary.ch2;
 			break;
 		case NAND:
 			evaltree(n->nbinary.ch1, EV_TESTED);
 			if (evalskip || exitstatus != 0) {
 				goto out;
 			}
 			next = n->nbinary.ch2;
 			break;
 		case NOR:
 			evaltree(n->nbinary.ch1, EV_TESTED);
 			if (evalskip || exitstatus == 0)
 				goto out;
 			next = n->nbinary.ch2;
 			break;
 		case NREDIR:
 			evalredir(n, flags);
 			break;
 		case NSUBSHELL:
 			evalsubshell(n, flags);
 			do_etest = !(flags & EV_TESTED);
 			break;
 		case NBACKGND:
 			evalsubshell(n, flags);
 			break;
 		case NIF: {
 			evaltree(n->nif.test, EV_TESTED);
 			if (evalskip)
 				goto out;
 			if (exitstatus == 0)
 				next = n->nif.ifpart;
 			else if (n->nif.elsepart)
 				next = n->nif.elsepart;
 			else
 				exitstatus = 0;
 			break;
 		}
 		case NWHILE:
 		case NUNTIL:
 			evalloop(n, flags & ~EV_EXIT);
 			break;
 		case NFOR:
 			evalfor(n, flags & ~EV_EXIT);
 			break;
 		case NCASE:
 			evalcase(n, flags);
 			break;
 		case NDEFUN:
 			defun(n->narg.text, n->narg.next);
 			exitstatus = 0;
 			break;
 		case NNOT:
 			evaltree(n->nnot.com, EV_TESTED);
 			exitstatus = !exitstatus;
 			break;
 
 		case NPIPE:
 			evalpipe(n);
 			do_etest = !(flags & EV_TESTED);
 			break;
 		case NCMD:
 			evalcommand(n, flags, (struct backcmd *)NULL);
 			do_etest = !(flags & EV_TESTED);
 			break;
 		default:
 			out1fmt("Node type = %d\n", n->type);
 			flushout(&output);
 			break;
 		}
 		n = next;
 	} while (n != NULL);
 out:
 	if (pendingsigs)
 		dotrap();
 	if ((flags & EV_EXIT) || (eflag && exitstatus != 0 && do_etest))
 		exitshell(exitstatus);
 }
 
 
 static void
 evalloop(union node *n, int flags)
 {
 	int status;
 
 	loopnest++;
 	status = 0;
 	for (;;) {
 		evaltree(n->nbinary.ch1, EV_TESTED);
 		if (evalskip) {
 skipping:	  if (evalskip == SKIPCONT && --skipcount <= 0) {
 				evalskip = 0;
 				continue;
 			}
 			if (evalskip == SKIPBREAK && --skipcount <= 0)
 				evalskip = 0;
 			if (evalskip == SKIPFUNC || evalskip == SKIPFILE)
 				status = exitstatus;
 			break;
 		}
 		if (n->type == NWHILE) {
 			if (exitstatus != 0)
 				break;
 		} else {
 			if (exitstatus == 0)
 				break;
 		}
 		evaltree(n->nbinary.ch2, flags);
 		status = exitstatus;
 		if (evalskip)
 			goto skipping;
 	}
 	loopnest--;
 	exitstatus = status;
 }
 
 
 
 static void
 evalfor(union node *n, int flags)
 {
 	struct arglist arglist;
 	union node *argp;
 	struct strlist *sp;
 	struct stackmark smark;
 
 	setstackmark(&smark);
 	arglist.lastp = &arglist.list;
 	for (argp = n->nfor.args ; argp ; argp = argp->narg.next) {
 		oexitstatus = exitstatus;
 		expandarg(argp, &arglist, EXP_FULL | EXP_TILDE);
 		if (evalskip)
 			goto out;
 	}
 	*arglist.lastp = NULL;
 
 	exitstatus = 0;
 	loopnest++;
 	for (sp = arglist.list ; sp ; sp = sp->next) {
 		setvar(n->nfor.var, sp->text, 0);
 		evaltree(n->nfor.body, flags);
 		if (evalskip) {
 			if (evalskip == SKIPCONT && --skipcount <= 0) {
 				evalskip = 0;
 				continue;
 			}
 			if (evalskip == SKIPBREAK && --skipcount <= 0)
 				evalskip = 0;
 			break;
 		}
 	}
 	loopnest--;
 out:
 	popstackmark(&smark);
 }
 
 
 
 static void
 evalcase(union node *n, int flags)
 {
 	union node *cp;
 	union node *patp;
 	struct arglist arglist;
 	struct stackmark smark;
 
 	setstackmark(&smark);
 	arglist.lastp = &arglist.list;
 	oexitstatus = exitstatus;
 	exitstatus = 0;
 	expandarg(n->ncase.expr, &arglist, EXP_TILDE);
 	for (cp = n->ncase.cases ; cp && evalskip == 0 ; cp = cp->nclist.next) {
 		for (patp = cp->nclist.pattern ; patp ; patp = patp->narg.next) {
 			if (casematch(patp, arglist.list->text)) {
 				if (evalskip == 0) {
 					evaltree(cp->nclist.body, flags);
 				}
 				goto out;
 			}
 		}
 	}
 out:
 	popstackmark(&smark);
 }
 
 
 
 /*
  * Kick off a subshell to evaluate a tree.
  */
 
 static void
 evalsubshell(union node *n, int flags)
 {
 	struct job *jp;
 	int backgnd = (n->type == NBACKGND);
 
 	expredir(n->nredir.redirect);
 	if ((!backgnd && flags & EV_EXIT && !have_traps()) ||
 			forkshell(jp = makejob(n, 1), n, backgnd) == 0) {
 		if (backgnd)
 			flags &=~ EV_TESTED;
 		redirect(n->nredir.redirect, 0);
 		evaltree(n->nredir.n, flags | EV_EXIT);	/* never returns */
 	} else if (! backgnd) {
 		INTOFF;
 		exitstatus = waitforjob(jp, (int *)NULL);
 		INTON;
 	}
 }
 
 
 /*
  * Evaluate a redirected compound command.
  */
 
 static void
 evalredir(union node *n, int flags)
 {
 	struct jmploc jmploc;
 	struct jmploc *savehandler;
 	volatile int in_redirect = 1;
 
 	expredir(n->nredir.redirect);
 	savehandler = handler;
 	if (setjmp(jmploc.loc)) {
 		int e;
 
 		handler = savehandler;
 		e = exception;
 		if (e == EXERROR || e == EXEXEC) {
 			popredir();
 			if (in_redirect) {
 				exitstatus = 2;
 				return;
 			}
 		}
 		longjmp(handler->loc, 1);
 	} else {
 		INTOFF;
 		handler = &jmploc;
 		redirect(n->nredir.redirect, REDIR_PUSH);
 		in_redirect = 0;
 		INTON;
 		evaltree(n->nredir.n, flags);
 	}
 	INTOFF;
 	handler = savehandler;
 	popredir();
 	INTON;
 }
 
 
 /*
  * Compute the names of the files in a redirection list.
  */
 
 static void
 expredir(union node *n)
 {
 	union node *redir;
 
 	for (redir = n ; redir ; redir = redir->nfile.next) {
 		struct arglist fn;
 		fn.lastp = &fn.list;
 		oexitstatus = exitstatus;
 		switch (redir->type) {
 		case NFROM:
 		case NTO:
 		case NFROMTO:
 		case NAPPEND:
 		case NCLOBBER:
 			expandarg(redir->nfile.fname, &fn, EXP_TILDE | EXP_REDIR);
 			redir->nfile.expfname = fn.list->text;
 			break;
 		case NFROMFD:
 		case NTOFD:
 			if (redir->ndup.vname) {
 				expandarg(redir->ndup.vname, &fn, EXP_TILDE | EXP_REDIR);
 				fixredir(redir, fn.list->text, 1);
 			}
 			break;
 		}
 	}
 }
 
 
 
 /*
  * Evaluate a pipeline.  All the processes in the pipeline are children
  * of the process creating the pipeline.  (This differs from some versions
  * of the shell, which make the last process in a pipeline the parent
  * of all the rest.)
  */
 
 static void
 evalpipe(union node *n)
 {
 	struct job *jp;
 	struct nodelist *lp;
 	int pipelen;
 	int prevfd;
 	int pip[2];
 
 	TRACE(("evalpipe(%p) called\n", (void *)n));
 	pipelen = 0;
 	for (lp = n->npipe.cmdlist ; lp ; lp = lp->next)
 		pipelen++;
 	INTOFF;
 	jp = makejob(n, pipelen);
 	prevfd = -1;
 	for (lp = n->npipe.cmdlist ; lp ; lp = lp->next) {
 		prehash(lp->n);
 		pip[1] = -1;
 		if (lp->next) {
 			if (pipe(pip) < 0) {
 				close(prevfd);
 				error("Pipe call failed: %s", strerror(errno));
 			}
 		}
 		if (forkshell(jp, lp->n, n->npipe.backgnd) == 0) {
 			INTON;
 			if (prevfd > 0) {
 				dup2(prevfd, 0);
 				close(prevfd);
 			}
 			if (pip[1] >= 0) {
 				if (!(prevfd >= 0 && pip[0] == 0))
 					close(pip[0]);
 				if (pip[1] != 1) {
 					dup2(pip[1], 1);
 					close(pip[1]);
 				}
 			}
 			evaltree(lp->n, EV_EXIT);
 		}
 		if (prevfd >= 0)
 			close(prevfd);
 		prevfd = pip[0];
 		close(pip[1]);
 	}
 	INTON;
 	if (n->npipe.backgnd == 0) {
 		INTOFF;
 		exitstatus = waitforjob(jp, (int *)NULL);
 		TRACE(("evalpipe:  job done exit status %d\n", exitstatus));
 		INTON;
 	}
 }
 
 
 
 /*
  * Execute a command inside back quotes.  If it's a builtin command, we
  * want to save its output in a block obtained from malloc.  Otherwise
  * we fork off a subprocess and get the output of the command via a pipe.
  * Should be called with interrupts off.
  */
 
 void
 evalbackcmd(union node *n, struct backcmd *result)
 {
 	int pip[2];
 	struct job *jp;
 	struct stackmark smark;		/* unnecessary */
 
 	setstackmark(&smark);
 	result->fd = -1;
 	result->buf = NULL;
 	result->nleft = 0;
 	result->jp = NULL;
 	if (n == NULL) {
 		exitstatus = 0;
 		goto out;
 	}
 	if (n->type == NCMD) {
 		exitstatus = oexitstatus;
 		evalcommand(n, EV_BACKCMD, result);
 	} else {
 		exitstatus = 0;
 		if (pipe(pip) < 0)
 			error("Pipe call failed: %s", strerror(errno));
 		jp = makejob(n, 1);
 		if (forkshell(jp, n, FORK_NOJOB) == 0) {
 			FORCEINTON;
 			close(pip[0]);
 			if (pip[1] != 1) {
 				dup2(pip[1], 1);
 				close(pip[1]);
 			}
 			evaltree(n, EV_EXIT);
 		}
 		close(pip[1]);
 		result->fd = pip[0];
 		result->jp = jp;
 	}
 out:
 	popstackmark(&smark);
 	TRACE(("evalbackcmd done: fd=%d buf=%p nleft=%d jp=%p\n",
 		result->fd, result->buf, result->nleft, result->jp));
 }
 
 
 
 /*
  * Execute a simple command.
  */
 
 static void
 evalcommand(union node *cmd, int flags, struct backcmd *backcmd)
 {
 	struct stackmark smark;
 	union node *argp;
 	struct arglist arglist;
 	struct arglist varlist;
 	char **argv;
 	int argc;
 	char **envp;
 	int varflag;
 	struct strlist *sp;
 	int mode;
 	int pip[2];
 	struct cmdentry cmdentry;
 	struct job *jp;
 	struct jmploc jmploc;
 	struct jmploc *savehandler;
 	char *savecmdname;
 	struct shparam saveparam;
 	struct localvar *savelocalvars;
 	struct parsefile *savetopfile;
 	volatile int e;
 	char *lastarg;
 	int realstatus;
 	int do_clearcmdentry;
 	const char *path = pathval();
 
 	/* First expand the arguments. */
 	TRACE(("evalcommand(%p, %d) called\n", (void *)cmd, flags));
 	setstackmark(&smark);
 	arglist.lastp = &arglist.list;
 	varlist.lastp = &varlist.list;
 	varflag = 1;
 	do_clearcmdentry = 0;
 	oexitstatus = exitstatus;
 	exitstatus = 0;
 	for (argp = cmd->ncmd.args ; argp ; argp = argp->narg.next) {
 		char *p = argp->narg.text;
 		if (varflag && is_name(*p)) {
 			do {
 				p++;
 			} while (is_in_name(*p));
 			if (*p == '=') {
 				expandarg(argp, &varlist, EXP_VARTILDE);
 				continue;
 			}
 		}
 		expandarg(argp, &arglist, EXP_FULL | EXP_TILDE);
 		varflag = 0;
 	}
 	*arglist.lastp = NULL;
 	*varlist.lastp = NULL;
 	expredir(cmd->ncmd.redirect);
 	argc = 0;
 	for (sp = arglist.list ; sp ; sp = sp->next)
 		argc++;
 	argv = stalloc(sizeof (char *) * (argc + 1));
 
 	for (sp = arglist.list ; sp ; sp = sp->next) {
 		TRACE(("evalcommand arg: %s\n", sp->text));
 		*argv++ = sp->text;
 	}
 	*argv = NULL;
 	lastarg = NULL;
 	if (iflag && funcnest == 0 && argc > 0)
 		lastarg = argv[-1];
 	argv -= argc;
 
 	/* Print the command if xflag is set. */
 	if (xflag) {
 		char sep = 0;
 		const char *p;
 		out2str(ps4val());
 		for (sp = varlist.list ; sp ; sp = sp->next) {
 			if (sep != 0)
 				out2c(' ');
 			p = strchr(sp->text, '=');
 			if (p != NULL) {
 				p++;
 				outbin(sp->text, p - sp->text, out2);
 				out2qstr(p);
 			} else
 				out2qstr(sp->text);
 			sep = ' ';
 		}
 		for (sp = arglist.list ; sp ; sp = sp->next) {
 			if (sep != 0)
 				out2c(' ');
 			/* Disambiguate command looking like assignment. */
 			if (sp == arglist.list &&
 					strchr(sp->text, '=') != NULL &&
 					strchr(sp->text, '\'') == NULL) {
 				out2c('\'');
 				out2str(sp->text);
 				out2c('\'');
 			} else
 				out2qstr(sp->text);
 			sep = ' ';
 		}
 		out2c('\n');
 		flushout(&errout);
 	}
 
 	/* Now locate the command. */
 	if (argc == 0) {
 		/* Variable assignment(s) without command */
 		cmdentry.cmdtype = CMDBUILTIN;
 		cmdentry.u.index = BLTINCMD;
 		cmdentry.special = 0;
 	} else {
 		static const char PATH[] = "PATH=";
 		int cmd_flags = 0, bltinonly = 0;
 
 		/*
 		 * Modify the command lookup path, if a PATH= assignment
 		 * is present
 		 */
 		for (sp = varlist.list ; sp ; sp = sp->next)
 			if (strncmp(sp->text, PATH, sizeof(PATH) - 1) == 0) {
 				path = sp->text + sizeof(PATH) - 1;
 				/*
 				 * On `PATH=... command`, we need to make
 				 * sure that the command isn't using the
 				 * non-updated hash table of the outer PATH
 				 * setting and we need to make sure that
 				 * the hash table isn't filled with items
 				 * from the temporary setting.
 				 *
 				 * It would be better to forbit using and
 				 * updating the table while this command
 				 * runs, by the command finding mechanism
 				 * is heavily integrated with hash handling,
 				 * so we just delete the hash before and after
 				 * the command runs. Partly deleting like
 				 * changepatch() does doesn't seem worth the
 				 * bookinging effort, since most such runs add
 				 * directories in front of the new PATH.
 				 */
 				clearcmdentry(0);
 				do_clearcmdentry = 1;
 			}
 
 		for (;;) {
 			if (bltinonly) {
 				cmdentry.u.index = find_builtin(*argv, &cmdentry.special);
 				if (cmdentry.u.index < 0) {
 					cmdentry.u.index = BLTINCMD;
 					argv--;
 					argc++;
 					break;
 				}
 			} else
 				find_command(argv[0], &cmdentry, cmd_flags, path);
 			/* implement the bltin and command builtins here */
 			if (cmdentry.cmdtype != CMDBUILTIN)
 				break;
 			if (cmdentry.u.index == BLTINCMD) {
 				if (argc == 1)
 					break;
 				argv++;
 				argc--;
 				bltinonly = 1;
 			} else if (cmdentry.u.index == COMMANDCMD) {
 				if (argc == 1)
 					break;
 				if (!strcmp(argv[1], "-p")) {
 					if (argc == 2)
 						break;
 					if (argv[2][0] == '-') {
 						if (strcmp(argv[2], "--"))
 							break;
 						if (argc == 3)
 							break;
 						argv += 3;
 						argc -= 3;
 					} else {
 						argv += 2;
 						argc -= 2;
 					}
 					path = _PATH_STDPATH;
 					clearcmdentry(0);
 					do_clearcmdentry = 1;
 				} else if (!strcmp(argv[1], "--")) {
 					if (argc == 2)
 						break;
 					argv += 2;
 					argc -= 2;
 				} else if (argv[1][0] == '-')
 					break;
 				else {
 					argv++;
 					argc--;
 				}
 				cmd_flags |= DO_NOFUNC;
 				bltinonly = 0;
 			} else
 				break;
 		}
 		/*
 		 * Special builtins lose their special properties when
 		 * called via 'command'.
 		 */
 		if (cmd_flags & DO_NOFUNC)
 			cmdentry.special = 0;
 	}
 
 	/* Fork off a child process if necessary. */
 	if (cmd->ncmd.backgnd
 	 || ((cmdentry.cmdtype == CMDNORMAL || cmdentry.cmdtype == CMDUNKNOWN)
 	    && ((flags & EV_EXIT) == 0 || have_traps()))
 	 || ((flags & EV_BACKCMD) != 0
 	    && (cmdentry.cmdtype != CMDBUILTIN
 		 || cmdentry.u.index == CDCMD
 		 || cmdentry.u.index == DOTCMD
 		 || cmdentry.u.index == EVALCMD))) {
 		jp = makejob(cmd, 1);
 		mode = cmd->ncmd.backgnd;
 		if (flags & EV_BACKCMD) {
 			mode = FORK_NOJOB;
 			if (pipe(pip) < 0)
 				error("Pipe call failed: %s", strerror(errno));
 		}
 		if (forkshell(jp, cmd, mode) != 0)
 			goto parent;	/* at end of routine */
 		if (flags & EV_BACKCMD) {
 			FORCEINTON;
 			close(pip[0]);
 			if (pip[1] != 1) {
 				dup2(pip[1], 1);
 				close(pip[1]);
 			}
 		}
 		flags |= EV_EXIT;
 	}
 
 	/* This is the child process if a fork occurred. */
 	/* Execute the command. */
 	if (cmdentry.cmdtype == CMDFUNCTION) {
 #ifdef DEBUG
 		trputs("Shell function:  ");  trargs(argv);
 #endif
 		saveparam = shellparam;
 		shellparam.malloc = 0;
 		shellparam.reset = 1;
 		shellparam.nparam = argc - 1;
 		shellparam.p = argv + 1;
 		shellparam.optnext = NULL;
 		INTOFF;
 		savelocalvars = localvars;
 		localvars = NULL;
 		reffunc(cmdentry.u.func);
 		savehandler = handler;
 		if (setjmp(jmploc.loc)) {
 			if (exception == EXSHELLPROC)
 				freeparam(&saveparam);
 			else {
 				freeparam(&shellparam);
 				shellparam = saveparam;
 				if (exception == EXERROR || exception == EXEXEC)
 					popredir();
 			}
 			unreffunc(cmdentry.u.func);
 			poplocalvars();
 			localvars = savelocalvars;
 			funcnest--;
 			handler = savehandler;
 			longjmp(handler->loc, 1);
 		}
 		handler = &jmploc;
 		funcnest++;
 		redirect(cmd->ncmd.redirect, REDIR_PUSH);
 		INTON;
 		for (sp = varlist.list ; sp ; sp = sp->next)
 			mklocal(sp->text);
 		exitstatus = oexitstatus;
 		if (flags & EV_TESTED)
 			evaltree(getfuncnode(cmdentry.u.func), EV_TESTED);
 		else
 			evaltree(getfuncnode(cmdentry.u.func), 0);
 		INTOFF;
 		unreffunc(cmdentry.u.func);
 		poplocalvars();
 		localvars = savelocalvars;
 		freeparam(&shellparam);
 		shellparam = saveparam;
 		handler = savehandler;
 		funcnest--;
 		popredir();
 		INTON;
 		if (evalskip == SKIPFUNC) {
 			evalskip = 0;
 			skipcount = 0;
 		}
 		if (flags & EV_EXIT)
 			exitshell(exitstatus);
 	} else if (cmdentry.cmdtype == CMDBUILTIN) {
 #ifdef DEBUG
 		trputs("builtin command:  ");  trargs(argv);
 #endif
 		mode = (cmdentry.u.index == EXECCMD)? 0 : REDIR_PUSH;
 		if (flags == EV_BACKCMD) {
 			memout.nleft = 0;
 			memout.nextc = memout.buf;
 			memout.bufsize = 64;
 			mode |= REDIR_BACKQ;
 			cmdentry.special = 0;
 		}
 		savecmdname = commandname;
 		savetopfile = getcurrentfile();
 		cmdenviron = varlist.list;
 		e = -1;
 		savehandler = handler;
 		if (setjmp(jmploc.loc)) {
 			e = exception;
 			exitstatus = (e == EXINT)? SIGINT+128 : 2;
 			goto cmddone;
 		}
 		handler = &jmploc;
 		redirect(cmd->ncmd.redirect, mode);
 		/*
 		 * If there is no command word, redirection errors should
 		 * not be fatal but assignment errors should.
 		 */
 		if (argc == 0 && !(flags & EV_BACKCMD))
 			cmdentry.special = 1;
 		if (cmdentry.special)
 			listsetvar(cmdenviron);
 		if (argc > 0)
 			bltinsetlocale();
 		commandname = argv[0];
 		argptr = argv + 1;
 		nextopt_optptr = NULL;		/* initialize nextopt */
 		builtin_flags = flags;
 		exitstatus = (*builtinfunc[cmdentry.u.index])(argc, argv);
 		flushall();
 cmddone:
 		if (argc > 0)
 			bltinunsetlocale();
 		cmdenviron = NULL;
 		out1 = &output;
 		out2 = &errout;
 		freestdout();
 		if (e != EXSHELLPROC) {
 			commandname = savecmdname;
 			if (flags & EV_EXIT) {
 				exitshell(exitstatus);
 			}
 		}
 		handler = savehandler;
 		if (flags == EV_BACKCMD) {
 			backcmd->buf = memout.buf;
 			backcmd->nleft = memout.nextc - memout.buf;
 			memout.buf = NULL;
 		}
 		if (cmdentry.u.index != EXECCMD &&
 				(e == -1 || e == EXERROR || e == EXEXEC))
 			popredir();
 		if (e != -1) {
 			if ((e != EXERROR && e != EXEXEC)
 			    || cmdentry.special)
 				exraise(e);
 			popfilesupto(savetopfile);
 			if (flags != EV_BACKCMD)
 				FORCEINTON;
 		}
 	} else {
 #ifdef DEBUG
 		trputs("normal command:  ");  trargs(argv);
 #endif
 		redirect(cmd->ncmd.redirect, 0);
 		for (sp = varlist.list ; sp ; sp = sp->next)
 			setvareq(sp->text, VEXPORT|VSTACK);
 		envp = environment();
 		shellexec(argv, envp, path, cmdentry.u.index);
 		/*NOTREACHED*/
 	}
 	goto out;
 
 parent:	/* parent process gets here (if we forked) */
 	if (mode == FORK_FG) {	/* argument to fork */
 		INTOFF;
 		exitstatus = waitforjob(jp, &realstatus);
 		INTON;
 		if (iflag && loopnest > 0 && WIFSIGNALED(realstatus)) {
 			evalskip = SKIPBREAK;
 			skipcount = loopnest;
 		}
 	} else if (mode == FORK_NOJOB) {
 		backcmd->fd = pip[0];
 		close(pip[1]);
 		backcmd->jp = jp;
 	}
 
 out:
 	if (lastarg)
 		setvar("_", lastarg, 0);
 	if (do_clearcmdentry)
 		clearcmdentry(0);
 	popstackmark(&smark);
 }
 
 
 
 /*
  * Search for a command.  This is called before we fork so that the
  * location of the command will be available in the parent as well as
  * the child.  The check for "goodname" is an overly conservative
  * check that the name will not be subject to expansion.
  */
 
 static void
 prehash(union node *n)
 {
 	struct cmdentry entry;
 
 	if (n && n->type == NCMD && n->ncmd.args)
 		if (goodname(n->ncmd.args->narg.text))
 			find_command(n->ncmd.args->narg.text, &entry, 0,
 				     pathval());
 }
 
 
 
 /*
  * Builtin commands.  Builtin commands whose functions are closely
  * tied to evaluation are implemented here.
  */
 
 /*
  * No command given, a bltin command with no arguments, or a bltin command
  * with an invalid name.
  */
 
 int
 bltincmd(int argc, char **argv)
 {
 	if (argc > 1) {
 		out2fmt_flush("%s: not found\n", argv[1]);
 		return 127;
 	}
 	/*
 	 * Preserve exitstatus of a previous possible redirection
 	 * as POSIX mandates
 	 */
 	return exitstatus;
 }
 
 
 /*
  * Handle break and continue commands.  Break, continue, and return are
  * all handled by setting the evalskip flag.  The evaluation routines
  * above all check this flag, and if it is set they start skipping
  * commands rather than executing them.  The variable skipcount is
  * the number of loops to break/continue, or the number of function
  * levels to return.  (The latter is always 1.)  It should probably
  * be an error to break out of more loops than exist, but it isn't
  * in the standard shell so we don't make it one here.
  */
 
 int
 breakcmd(int argc, char **argv)
 {
 	int n = argc > 1 ? number(argv[1]) : 1;
 
 	if (n > loopnest)
 		n = loopnest;
 	if (n > 0) {
 		evalskip = (**argv == 'c')? SKIPCONT : SKIPBREAK;
 		skipcount = n;
 	}
 	return 0;
 }
 
 /*
  * The `command' command.
  */
 int
 commandcmd(int argc, char **argv)
 {
 	const char *path;
 	int ch;
 	int cmd = -1;
 
 	path = bltinlookup("PATH", 1);
 
 	optind = optreset = 1;
 	opterr = 0;
 	while ((ch = getopt(argc, argv, "pvV")) != -1) {
 		switch (ch) {
 		case 'p':
 			path = _PATH_STDPATH;
 			break;
 		case 'v':
 			cmd = TYPECMD_SMALLV;
 			break;
 		case 'V':
 			cmd = TYPECMD_BIGV;
 			break;
 		case '?':
 		default:
 			error("unknown option: -%c", optopt);
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	if (cmd != -1) {
 		if (argc != 1)
 			error("wrong number of arguments");
 		return typecmd_impl(2, argv - 1, cmd, path);
 	}
 	if (argc != 0)
 		error("commandcmd bad call");
 
 	/*
 	 * Do nothing successfully if no command was specified;
 	 * ksh also does this.
 	 */
 	return 0;
 }
 
 
 /*
  * The return command.
  */
 
 int
 returncmd(int argc, char **argv)
 {
 	int ret = argc > 1 ? number(argv[1]) : oexitstatus;
 
 	if (funcnest) {
 		evalskip = SKIPFUNC;
 		skipcount = 1;
 	} else {
 		/* skip the rest of the file */
 		evalskip = SKIPFILE;
 		skipcount = 1;
 	}
 	return ret;
 }
 
 
 int
 falsecmd(int argc __unused, char **argv __unused)
 {
 	return 1;
 }
 
 
 int
 truecmd(int argc __unused, char **argv __unused)
 {
 	return 0;
 }
 
 
 int
 execcmd(int argc, char **argv)
 {
 	/*
 	 * Because we have historically not supported any options,
 	 * only treat "--" specially.
 	 */
 	if (argc > 1 && strcmp(argv[1], "--") == 0)
 		argc--, argv++;
 	if (argc > 1) {
 		struct strlist *sp;
 
 		iflag = 0;		/* exit on error */
 		mflag = 0;
 		optschanged();
 		for (sp = cmdenviron; sp ; sp = sp->next)
 			setvareq(sp->text, VEXPORT|VSTACK);
 		shellexec(argv + 1, environment(), pathval(), 0);
 
 	}
 	return 0;
 }
 
 
 int
 timescmd(int argc __unused, char **argv __unused)
 {
 	struct rusage ru;
 	long shumins, shsmins, chumins, chsmins;
 	double shusecs, shssecs, chusecs, chssecs;
 
 	if (getrusage(RUSAGE_SELF, &ru) < 0)
 		return 1;
 	shumins = ru.ru_utime.tv_sec / 60;
 	shusecs = ru.ru_utime.tv_sec % 60 + ru.ru_utime.tv_usec / 1000000.;
 	shsmins = ru.ru_stime.tv_sec / 60;
 	shssecs = ru.ru_stime.tv_sec % 60 + ru.ru_stime.tv_usec / 1000000.;
 	if (getrusage(RUSAGE_CHILDREN, &ru) < 0)
 		return 1;
 	chumins = ru.ru_utime.tv_sec / 60;
 	chusecs = ru.ru_utime.tv_sec % 60 + ru.ru_utime.tv_usec / 1000000.;
 	chsmins = ru.ru_stime.tv_sec / 60;
 	chssecs = ru.ru_stime.tv_sec % 60 + ru.ru_stime.tv_usec / 1000000.;
 	out1fmt("%ldm%.3fs %ldm%.3fs\n%ldm%.3fs %ldm%.3fs\n", shumins,
 	    shusecs, shsmins, shssecs, chumins, chusecs, chsmins, chssecs);
 	return 0;
 }
Index: projects/binutils-2.17/bin/sh/expand.c
===================================================================
--- projects/binutils-2.17/bin/sh/expand.c	(revision 215829)
+++ projects/binutils-2.17/bin/sh/expand.c	(revision 215830)
@@ -1,1598 +1,1597 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 1997-2005
  *	Herbert Xu <herbert@gondor.apana.org.au>.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Kenneth Almquist.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)expand.c	8.5 (Berkeley) 5/15/95";
 #endif
 #endif /* not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/stat.h>
 #include <dirent.h>
 #include <errno.h>
 #include <inttypes.h>
 #include <limits.h>
 #include <pwd.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 /*
  * Routines to expand arguments to commands.  We have to deal with
  * backquotes, shell variables, and file metacharacters.
  */
 
 #include "shell.h"
 #include "main.h"
 #include "nodes.h"
 #include "eval.h"
 #include "expand.h"
 #include "syntax.h"
 #include "parser.h"
 #include "jobs.h"
 #include "options.h"
 #include "var.h"
 #include "input.h"
 #include "output.h"
 #include "memalloc.h"
 #include "error.h"
 #include "mystring.h"
 #include "arith.h"
 #include "show.h"
 
 /*
  * Structure specifying which parts of the string should be searched
  * for IFS characters.
  */
 
 struct ifsregion {
 	struct ifsregion *next;	/* next region in list */
 	int begoff;		/* offset of start of region */
 	int endoff;		/* offset of end of region */
 	int inquotes;		/* search for nul bytes only */
 };
 
 
 static char *expdest;			/* output of current string */
 static struct nodelist *argbackq;	/* list of back quote expressions */
 static struct ifsregion ifsfirst;	/* first struct in list of ifs regions */
 static struct ifsregion *ifslastp;	/* last struct in list */
 static struct arglist exparg;		/* holds expanded arg list */
 
 static void argstr(char *, int);
 static char *exptilde(char *, int);
 static void expbackq(union node *, int, int);
 static int subevalvar(char *, char *, int, int, int, int, int);
 static char *evalvar(char *, int);
 static int varisset(char *, int);
 static void varvalue(char *, int, int, int);
 static void recordregion(int, int, int);
 static void removerecordregions(int);
 static void ifsbreakup(char *, struct arglist *);
 static void expandmeta(struct strlist *, int);
 static void expmeta(char *, char *);
 static void addfname(char *);
 static struct strlist *expsort(struct strlist *);
 static struct strlist *msort(struct strlist *, int);
 static char *cvtnum(int, char *);
 static int collate_range_cmp(int, int);
 
 static int
 collate_range_cmp(int c1, int c2)
 {
 	static char s1[2], s2[2];
 
 	s1[0] = c1;
 	s2[0] = c2;
 	return (strcoll(s1, s2));
 }
 
 /*
  * Expand shell variables and backquotes inside a here document.
  *	union node *arg		the document
  *	int fd;			where to write the expanded version
  */
 
 void
 expandhere(union node *arg, int fd)
 {
 	herefd = fd;
 	expandarg(arg, (struct arglist *)NULL, 0);
 	xwrite(fd, stackblock(), expdest - stackblock());
 }
 
 
 /*
  * Perform expansions on an argument, placing the resulting list of arguments
  * in arglist.  Parameter expansion, command substitution and arithmetic
  * expansion are always performed; additional expansions can be requested
  * via flag (EXP_*).
  * The result is left in the stack string.
  * When arglist is NULL, perform here document expansion.  A partial result
  * may be written to herefd, which is then not included in the stack string.
  *
  * Caution: this function uses global state and is not reentrant.
  * However, a new invocation after an interrupted invocation is safe
  * and will reset the global state for the new call.
  */
 void
 expandarg(union node *arg, struct arglist *arglist, int flag)
 {
 	struct strlist *sp;
 	char *p;
 
 	argbackq = arg->narg.backquote;
 	STARTSTACKSTR(expdest);
 	ifsfirst.next = NULL;
 	ifslastp = NULL;
 	argstr(arg->narg.text, flag);
 	if (arglist == NULL) {
 		return;			/* here document expanded */
 	}
 	STPUTC('\0', expdest);
 	p = grabstackstr(expdest);
 	exparg.lastp = &exparg.list;
 	/*
 	 * TODO - EXP_REDIR
 	 */
 	if (flag & EXP_FULL) {
 		ifsbreakup(p, &exparg);
 		*exparg.lastp = NULL;
 		exparg.lastp = &exparg.list;
 		expandmeta(exparg.list, flag);
 	} else {
 		if (flag & EXP_REDIR) /*XXX - for now, just remove escapes */
 			rmescapes(p);
 		sp = (struct strlist *)stalloc(sizeof (struct strlist));
 		sp->text = p;
 		*exparg.lastp = sp;
 		exparg.lastp = &sp->next;
 	}
 	while (ifsfirst.next != NULL) {
 		struct ifsregion *ifsp;
 		INTOFF;
 		ifsp = ifsfirst.next->next;
 		ckfree(ifsfirst.next);
 		ifsfirst.next = ifsp;
 		INTON;
 	}
 	*exparg.lastp = NULL;
 	if (exparg.list) {
 		*arglist->lastp = exparg.list;
 		arglist->lastp = exparg.lastp;
 	}
 }
 
 
 
 /*
  * Perform parameter expansion, command substitution and arithmetic
  * expansion, and tilde expansion if requested via EXP_TILDE/EXP_VARTILDE.
  * Processing ends at a CTLENDVAR character as well as '\0'.
  * This is used to expand word in ${var+word} etc.
  * If EXP_FULL, EXP_CASE or EXP_REDIR are set, keep and/or generate CTLESC
  * characters to allow for further processing.
  * If EXP_FULL is set, also preserve CTLQUOTEMARK characters.
  */
 static void
 argstr(char *p, int flag)
 {
 	char c;
 	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);	/* do CTLESC */
 	int firsteq = 1;
 	int split_lit;
 	int lit_quoted;
 
 	split_lit = flag & EXP_SPLIT_LIT;
 	lit_quoted = flag & EXP_LIT_QUOTED;
 	flag &= ~(EXP_SPLIT_LIT | EXP_LIT_QUOTED);
 	if (*p == '~' && (flag & (EXP_TILDE | EXP_VARTILDE)))
 		p = exptilde(p, flag);
 	for (;;) {
+		CHECKSTRSPACE(2, expdest);
 		switch (c = *p++) {
 		case '\0':
 		case CTLENDVAR:
 			goto breakloop;
 		case CTLQUOTEMARK:
 			lit_quoted = 1;
 			/* "$@" syntax adherence hack */
 			if (p[0] == CTLVAR && p[2] == '@' && p[3] == '=')
 				break;
 			if ((flag & EXP_FULL) != 0)
-				STPUTC(c, expdest);
+				USTPUTC(c, expdest);
 			break;
 		case CTLQUOTEEND:
 			lit_quoted = 0;
 			break;
 		case CTLESC:
 			if (quotes)
-				STPUTC(c, expdest);
+				USTPUTC(c, expdest);
 			c = *p++;
-			STPUTC(c, expdest);
+			USTPUTC(c, expdest);
 			if (split_lit && !lit_quoted)
 				recordregion(expdest - stackblock() -
 				    (quotes ? 2 : 1),
 				    expdest - stackblock(), 0);
 			break;
 		case CTLVAR:
 			p = evalvar(p, flag);
 			break;
 		case CTLBACKQ:
 		case CTLBACKQ|CTLQUOTE:
 			expbackq(argbackq->n, c & CTLQUOTE, flag);
 			argbackq = argbackq->next;
 			break;
 		case CTLENDARI:
 			expari(flag);
 			break;
 		case ':':
 		case '=':
 			/*
 			 * sort of a hack - expand tildes in variable
 			 * assignments (after the first '=' and after ':'s).
 			 */
-			STPUTC(c, expdest);
+			USTPUTC(c, expdest);
 			if (split_lit && !lit_quoted)
 				recordregion(expdest - stackblock() - 1,
 				    expdest - stackblock(), 0);
 			if (flag & EXP_VARTILDE && *p == '~' &&
 			    (c != '=' || firsteq)) {
 				if (c == '=')
 					firsteq = 0;
 				p = exptilde(p, flag);
 			}
 			break;
 		default:
-			STPUTC(c, expdest);
+			USTPUTC(c, expdest);
 			if (split_lit && !lit_quoted)
 				recordregion(expdest - stackblock() - 1,
 				    expdest - stackblock(), 0);
 		}
 	}
 breakloop:;
 }
 
 /*
  * Perform tilde expansion, placing the result in the stack string and
  * returning the next position in the input string to process.
  */
 static char *
 exptilde(char *p, int flag)
 {
 	char c, *startp = p;
 	struct passwd *pw;
 	char *home;
 	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
 
 	while ((c = *p) != '\0') {
 		switch(c) {
 		case CTLESC: /* This means CTL* are always considered quoted. */
 		case CTLVAR:
 		case CTLBACKQ:
 		case CTLBACKQ | CTLQUOTE:
 		case CTLARI:
 		case CTLENDARI:
 		case CTLQUOTEMARK:
 			return (startp);
 		case ':':
 			if (flag & EXP_VARTILDE)
 				goto done;
 			break;
 		case '/':
 		case CTLENDVAR:
 			goto done;
 		}
 		p++;
 	}
 done:
 	*p = '\0';
 	if (*(startp+1) == '\0') {
 		if ((home = lookupvar("HOME")) == NULL)
 			goto lose;
 	} else {
 		if ((pw = getpwnam(startp+1)) == NULL)
 			goto lose;
 		home = pw->pw_dir;
 	}
 	if (*home == '\0')
 		goto lose;
 	*p = c;
 	while ((c = *home++) != '\0') {
 		if (quotes && SQSYNTAX[(int)c] == CCTL)
 			STPUTC(CTLESC, expdest);
 		STPUTC(c, expdest);
 	}
 	return (p);
 lose:
 	*p = c;
 	return (startp);
 }
 
 
 static void
 removerecordregions(int endoff)
 {
 	if (ifslastp == NULL)
 		return;
 
 	if (ifsfirst.endoff > endoff) {
 		while (ifsfirst.next != NULL) {
 			struct ifsregion *ifsp;
 			INTOFF;
 			ifsp = ifsfirst.next->next;
 			ckfree(ifsfirst.next);
 			ifsfirst.next = ifsp;
 			INTON;
 		}
 		if (ifsfirst.begoff > endoff)
 			ifslastp = NULL;
 		else {
 			ifslastp = &ifsfirst;
 			ifsfirst.endoff = endoff;
 		}
 		return;
 	}
 
 	ifslastp = &ifsfirst;
 	while (ifslastp->next && ifslastp->next->begoff < endoff)
 		ifslastp=ifslastp->next;
 	while (ifslastp->next != NULL) {
 		struct ifsregion *ifsp;
 		INTOFF;
 		ifsp = ifslastp->next->next;
 		ckfree(ifslastp->next);
 		ifslastp->next = ifsp;
 		INTON;
 	}
 	if (ifslastp->endoff > endoff)
 		ifslastp->endoff = endoff;
 }
 
 /*
  * Expand arithmetic expression.  Backup to start of expression,
  * evaluate, place result in (backed up) result, adjust string position.
  */
 void
 expari(int flag)
 {
 	char *p, *q, *start;
 	arith_t result;
 	int begoff;
 	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
 	int quoted;
 
 	/*
 	 * This routine is slightly over-complicated for
 	 * efficiency.  First we make sure there is
 	 * enough space for the result, which may be bigger
 	 * than the expression.  Next we
 	 * scan backwards looking for the start of arithmetic.  If the
 	 * next previous character is a CTLESC character, then we
 	 * have to rescan starting from the beginning since CTLESC
 	 * characters have to be processed left to right.
 	 */
 	CHECKSTRSPACE(DIGITS(result) - 2, expdest);
 	USTPUTC('\0', expdest);
 	start = stackblock();
 	p = expdest - 2;
 	while (p >= start && *p != CTLARI)
 		--p;
 	if (p < start || *p != CTLARI)
 		error("missing CTLARI (shouldn't happen)");
 	if (p > start && *(p - 1) == CTLESC)
 		for (p = start; *p != CTLARI; p++)
 			if (*p == CTLESC)
 				p++;
 
 	if (p[1] == '"')
 		quoted=1;
 	else
 		quoted=0;
 	begoff = p - start;
 	removerecordregions(begoff);
 	if (quotes)
 		rmescapes(p+2);
 	q = grabstackstr(expdest);
 	result = arith(p+2);
 	ungrabstackstr(q, expdest);
 	fmtstr(p, DIGITS(result), ARITH_FORMAT_STR, result);
 	while (*p++)
 		;
 	if (quoted == 0)
 		recordregion(begoff, p - 1 - start, 0);
 	result = expdest - p + 1;
 	STADJUST(-result, expdest);
 }
 
 
 /*
  * Perform command substitution.
  */
 static void
 expbackq(union node *cmd, int quoted, int flag)
 {
 	struct backcmd in;
 	int i;
 	char buf[128];
 	char *p;
 	char *dest = expdest;
 	struct ifsregion saveifs, *savelastp;
 	struct nodelist *saveargbackq;
 	char lastc;
 	int startloc = dest - stackblock();
 	char const *syntax = quoted? DQSYNTAX : BASESYNTAX;
 	int saveherefd;
 	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
 	int nnl;
 
 	INTOFF;
 	saveifs = ifsfirst;
 	savelastp = ifslastp;
 	saveargbackq = argbackq;
 	saveherefd = herefd;
 	herefd = -1;
 	p = grabstackstr(dest);
 	evalbackcmd(cmd, &in);
 	ungrabstackstr(p, dest);
 	ifsfirst = saveifs;
 	ifslastp = savelastp;
 	argbackq = saveargbackq;
 	herefd = saveherefd;
 
 	p = in.buf;
 	lastc = '\0';
 	nnl = 0;
 	/* Don't copy trailing newlines */
 	for (;;) {
 		if (--in.nleft < 0) {
 			if (in.fd < 0)
 				break;
 			while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR);
 			TRACE(("expbackq: read returns %d\n", i));
 			if (i <= 0)
 				break;
 			p = buf;
 			in.nleft = i - 1;
 		}
 		lastc = *p++;
 		if (lastc != '\0') {
 			if (quotes && syntax[(int)lastc] == CCTL)
 				STPUTC(CTLESC, dest);
 			if (lastc == '\n') {
 				nnl++;
 			} else {
 				while (nnl > 0) {
 					nnl--;
 					STPUTC('\n', dest);
 				}
 				STPUTC(lastc, dest);
 			}
 		}
 	}
 
 	if (in.fd >= 0)
 		close(in.fd);
 	if (in.buf)
 		ckfree(in.buf);
 	if (in.jp)
 		exitstatus = waitforjob(in.jp, (int *)NULL);
 	if (quoted == 0)
 		recordregion(startloc, dest - stackblock(), 0);
 	TRACE(("expbackq: size=%td: \"%.*s\"\n",
 		((dest - stackblock()) - startloc),
 		(int)((dest - stackblock()) - startloc),
 		stackblock() + startloc));
 	expdest = dest;
 	INTON;
 }
 
 
 
 static int
 subevalvar(char *p, char *str, int strloc, int subtype, int startloc,
   int varflags, int quotes)
 {
 	char *startp;
 	char *loc = NULL;
 	char *q;
 	int c = 0;
 	int saveherefd = herefd;
 	struct nodelist *saveargbackq = argbackq;
 	int amount;
 
 	herefd = -1;
 	argstr(p, (subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX ||
 	    subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX ?
 	    EXP_CASE : 0) | EXP_TILDE);
 	STACKSTRNUL(expdest);
 	herefd = saveherefd;
 	argbackq = saveargbackq;
 	startp = stackblock() + startloc;
 	if (str == NULL)
 	    str = stackblock() + strloc;
 
 	switch (subtype) {
 	case VSASSIGN:
 		setvar(str, startp, 0);
 		amount = startp - expdest;
 		STADJUST(amount, expdest);
 		varflags &= ~VSNUL;
 		if (c != 0)
 			*loc = c;
 		return 1;
 
 	case VSQUESTION:
 		if (*p != CTLENDVAR) {
 			outfmt(out2, "%s\n", startp);
 			error((char *)NULL);
 		}
 		error("%.*s: parameter %snot set", (int)(p - str - 1),
 		      str, (varflags & VSNUL) ? "null or "
 					      : nullstr);
 		return 0;
 
 	case VSTRIMLEFT:
 		for (loc = startp; loc < str; loc++) {
 			c = *loc;
 			*loc = '\0';
 			if (patmatch(str, startp, quotes)) {
 				*loc = c;
 				goto recordleft;
 			}
 			*loc = c;
 			if (quotes && *loc == CTLESC)
 				loc++;
 		}
 		return 0;
 
 	case VSTRIMLEFTMAX:
 		for (loc = str - 1; loc >= startp;) {
 			c = *loc;
 			*loc = '\0';
 			if (patmatch(str, startp, quotes)) {
 				*loc = c;
 				goto recordleft;
 			}
 			*loc = c;
 			loc--;
 			if (quotes && loc > startp && *(loc - 1) == CTLESC) {
 				for (q = startp; q < loc; q++)
 					if (*q == CTLESC)
 						q++;
 				if (q > loc)
 					loc--;
 			}
 		}
 		return 0;
 
 	case VSTRIMRIGHT:
 		for (loc = str - 1; loc >= startp;) {
 			if (patmatch(str, loc, quotes)) {
 				amount = loc - expdest;
 				STADJUST(amount, expdest);
 				return 1;
 			}
 			loc--;
 			if (quotes && loc > startp && *(loc - 1) == CTLESC) {
 				for (q = startp; q < loc; q++)
 					if (*q == CTLESC)
 						q++;
 				if (q > loc)
 					loc--;
 			}
 		}
 		return 0;
 
 	case VSTRIMRIGHTMAX:
 		for (loc = startp; loc < str - 1; loc++) {
 			if (patmatch(str, loc, quotes)) {
 				amount = loc - expdest;
 				STADJUST(amount, expdest);
 				return 1;
 			}
 			if (quotes && *loc == CTLESC)
 				loc++;
 		}
 		return 0;
 
 
 	default:
 		abort();
 	}
 
 recordleft:
 	amount = ((str - 1) - (loc - startp)) - expdest;
 	STADJUST(amount, expdest);
 	while (loc != str - 1)
 		*startp++ = *loc++;
 	return 1;
 }
 
 
 /*
  * Expand a variable, and return a pointer to the next character in the
  * input string.
  */
 
 static char *
 evalvar(char *p, int flag)
 {
 	int subtype;
 	int varflags;
 	char *var;
 	char *val;
 	int patloc;
 	int c;
 	int set;
 	int special;
 	int startloc;
 	int varlen;
 	int easy;
 	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
 
 	varflags = (unsigned char)*p++;
 	subtype = varflags & VSTYPE;
 	var = p;
 	special = 0;
 	if (! is_name(*p))
 		special = 1;
 	p = strchr(p, '=') + 1;
 again: /* jump here after setting a variable with ${var=text} */
 	if (varflags & VSLINENO) {
 		set = 1;
 		special = 0;
 		val = var;
 		p[-1] = '\0';	/* temporarily overwrite '=' to have \0
 				   terminated string */
 	} else if (special) {
 		set = varisset(var, varflags & VSNUL);
 		val = NULL;
 	} else {
 		val = bltinlookup(var, 1);
 		if (val == NULL || ((varflags & VSNUL) && val[0] == '\0')) {
 			val = NULL;
 			set = 0;
 		} else
 			set = 1;
 	}
 	varlen = 0;
 	startloc = expdest - stackblock();
 	if (!set && uflag && *var != '@' && *var != '*') {
 		switch (subtype) {
 		case VSNORMAL:
 		case VSTRIMLEFT:
 		case VSTRIMLEFTMAX:
 		case VSTRIMRIGHT:
 		case VSTRIMRIGHTMAX:
 		case VSLENGTH:
 			error("%.*s: parameter not set", (int)(p - var - 1),
 			    var);
 		}
 	}
 	if (set && subtype != VSPLUS) {
 		/* insert the value of the variable */
 		if (special) {
 			varvalue(var, varflags & VSQUOTE, subtype, flag);
 			if (subtype == VSLENGTH) {
 				varlen = expdest - stackblock() - startloc;
 				STADJUST(-varlen, expdest);
 			}
 		} else {
 			char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX
 								  : BASESYNTAX;
 
 			if (subtype == VSLENGTH) {
 				for (;*val; val++)
 					varlen++;
 			}
 			else {
 				while (*val) {
 					if (quotes &&
 					    syntax[(int)*val] == CCTL)
 						STPUTC(CTLESC, expdest);
 					STPUTC(*val++, expdest);
 				}
 
 			}
 		}
 	}
 
 	if (subtype == VSPLUS)
 		set = ! set;
 
 	easy = ((varflags & VSQUOTE) == 0 ||
 		(*var == '@' && shellparam.nparam != 1));
 
 
 	switch (subtype) {
 	case VSLENGTH:
 		expdest = cvtnum(varlen, expdest);
 		goto record;
 
 	case VSNORMAL:
 		if (!easy)
 			break;
 record:
 		recordregion(startloc, expdest - stackblock(),
 			     varflags & VSQUOTE);
 		break;
 
 	case VSPLUS:
 	case VSMINUS:
 		if (!set) {
 			argstr(p, flag | (flag & EXP_FULL ? EXP_SPLIT_LIT : 0) |
 			    (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0));
 			break;
 		}
 		if (easy)
 			goto record;
 		break;
 
 	case VSTRIMLEFT:
 	case VSTRIMLEFTMAX:
 	case VSTRIMRIGHT:
 	case VSTRIMRIGHTMAX:
 		if (!set)
 			break;
 		/*
 		 * Terminate the string and start recording the pattern
 		 * right after it
 		 */
 		STPUTC('\0', expdest);
 		patloc = expdest - stackblock();
 		if (subevalvar(p, NULL, patloc, subtype,
 		    startloc, varflags, quotes) == 0) {
 			int amount = (expdest - stackblock() - patloc) + 1;
 			STADJUST(-amount, expdest);
 		}
 		/* Remove any recorded regions beyond start of variable */
 		removerecordregions(startloc);
 		goto record;
 
 	case VSASSIGN:
 	case VSQUESTION:
 		if (!set) {
 			if (subevalvar(p, var, 0, subtype, startloc, varflags,
 			    quotes)) {
 				varflags &= ~VSNUL;
 				/*
 				 * Remove any recorded regions beyond
 				 * start of variable
 				 */
 				removerecordregions(startloc);
 				goto again;
 			}
 			break;
 		}
 		if (easy)
 			goto record;
 		break;
 
 	case VSERROR:
 		c = p - var - 1;
 		error("${%.*s%s}: Bad substitution", c, var,
 		    (c > 0 && *p != CTLENDVAR) ? "..." : "");
 
 	default:
 		abort();
 	}
 	p[-1] = '=';	/* recover overwritten '=' */
 
 	if (subtype != VSNORMAL) {	/* skip to end of alternative */
 		int nesting = 1;
 		for (;;) {
 			if ((c = *p++) == CTLESC)
 				p++;
 			else if (c == CTLBACKQ || c == (CTLBACKQ|CTLQUOTE)) {
 				if (set)
 					argbackq = argbackq->next;
 			} else if (c == CTLVAR) {
 				if ((*p++ & VSTYPE) != VSNORMAL)
 					nesting++;
 			} else if (c == CTLENDVAR) {
 				if (--nesting == 0)
 					break;
 			}
 		}
 	}
 	return p;
 }
 
 
 
 /*
  * Test whether a specialized variable is set.
  */
 
 static int
 varisset(char *name, int nulok)
 {
 
 	if (*name == '!')
 		return backgndpidset();
 	else if (*name == '@' || *name == '*') {
 		if (*shellparam.p == NULL)
 			return 0;
 
 		if (nulok) {
 			char **av;
 
 			for (av = shellparam.p; *av; av++)
 				if (**av != '\0')
 					return 1;
 			return 0;
 		}
 	} else if (is_digit(*name)) {
 		char *ap;
 		int num = atoi(name);
 
 		if (num > shellparam.nparam)
 			return 0;
 
 		if (num == 0)
 			ap = arg0;
 		else
 			ap = shellparam.p[num - 1];
 
 		if (nulok && (ap == NULL || *ap == '\0'))
 			return 0;
 	}
 	return 1;
 }
 
 
 
 /*
  * Add the value of a specialized variable to the stack string.
  */
 
 static void
 varvalue(char *name, int quoted, int subtype, int flag)
 {
 	int num;
 	char *p;
 	int i;
 	char sep;
 	char **ap;
 	char const *syntax;
 
 #define STRTODEST(p) \
 	do {\
 	if (flag & (EXP_FULL | EXP_CASE) && subtype != VSLENGTH) { \
 		syntax = quoted? DQSYNTAX : BASESYNTAX; \
 		while (*p) { \
 			if (syntax[(int)*p] == CCTL) \
 				STPUTC(CTLESC, expdest); \
 			STPUTC(*p++, expdest); \
 		} \
 	} else \
-		while (*p) \
-			STPUTC(*p++, expdest); \
+		STPUTS(p, expdest); \
 	} while (0)
 
 
 	switch (*name) {
 	case '$':
 		num = rootpid;
 		goto numvar;
 	case '?':
 		num = oexitstatus;
 		goto numvar;
 	case '#':
 		num = shellparam.nparam;
 		goto numvar;
 	case '!':
 		num = backgndpidval();
 numvar:
 		expdest = cvtnum(num, expdest);
 		break;
 	case '-':
 		for (i = 0 ; i < NOPTS ; i++) {
 			if (optlist[i].val)
 				STPUTC(optlist[i].letter, expdest);
 		}
 		break;
 	case '@':
 		if (flag & EXP_FULL && quoted) {
 			for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
 				STRTODEST(p);
 				if (*ap)
 					STPUTC('\0', expdest);
 			}
 			break;
 		}
 		/* FALLTHROUGH */
 	case '*':
 		if (ifsset())
 			sep = ifsval()[0];
 		else
 			sep = ' ';
 		for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
 			STRTODEST(p);
 			if (*ap && sep)
 				STPUTC(sep, expdest);
 		}
 		break;
 	case '0':
 		p = arg0;
 		STRTODEST(p);
 		break;
 	default:
 		if (is_digit(*name)) {
 			num = atoi(name);
 			if (num > 0 && num <= shellparam.nparam) {
 				p = shellparam.p[num - 1];
 				STRTODEST(p);
 			}
 		}
 		break;
 	}
 }
 
 
 
 /*
  * Record the the fact that we have to scan this region of the
  * string for IFS characters.
  */
 
 static void
 recordregion(int start, int end, int inquotes)
 {
 	struct ifsregion *ifsp;
 
 	if (ifslastp == NULL) {
 		ifsp = &ifsfirst;
 	} else {
 		if (ifslastp->endoff == start
 		    && ifslastp->inquotes == inquotes) {
 			/* extend previous area */
 			ifslastp->endoff = end;
 			return;
 		}
 		ifsp = (struct ifsregion *)ckmalloc(sizeof (struct ifsregion));
 		ifslastp->next = ifsp;
 	}
 	ifslastp = ifsp;
 	ifslastp->next = NULL;
 	ifslastp->begoff = start;
 	ifslastp->endoff = end;
 	ifslastp->inquotes = inquotes;
 }
 
 
 
 /*
  * Break the argument string into pieces based upon IFS and add the
  * strings to the argument list.  The regions of the string to be
  * searched for IFS characters have been stored by recordregion.
  * CTLESC characters are preserved but have little effect in this pass
  * other than escaping CTL* characters.  In particular, they do not escape
  * IFS characters: that should be done with the ifsregion mechanism.
  * CTLQUOTEMARK characters are used to preserve empty quoted strings.
  * This pass treats them as a regular character, making the string non-empty.
  * Later, they are removed along with the other CTL* characters.
  */
 static void
 ifsbreakup(char *string, struct arglist *arglist)
 {
 	struct ifsregion *ifsp;
 	struct strlist *sp;
 	char *start;
 	char *p;
 	char *q;
 	const char *ifs;
 	const char *ifsspc;
 	int had_param_ch = 0;
 
 	start = string;
 
 	if (ifslastp == NULL) {
 		/* Return entire argument, IFS doesn't apply to any of it */
 		sp = (struct strlist *)stalloc(sizeof *sp);
 		sp->text = start;
 		*arglist->lastp = sp;
 		arglist->lastp = &sp->next;
 		return;
 	}
 
 	ifs = ifsset() ? ifsval() : " \t\n";
 
 	for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) {
 		p = string + ifsp->begoff;
 		while (p < string + ifsp->endoff) {
 			q = p;
 			if (*p == CTLESC)
 				p++;
 			if (ifsp->inquotes) {
 				/* Only NULs (should be from "$@") end args */
 				had_param_ch = 1;
 				if (*p != 0) {
 					p++;
 					continue;
 				}
 				ifsspc = NULL;
 			} else {
 				if (!strchr(ifs, *p)) {
 					had_param_ch = 1;
 					p++;
 					continue;
 				}
 				ifsspc = strchr(" \t\n", *p);
 
 				/* Ignore IFS whitespace at start */
 				if (q == start && ifsspc != NULL) {
 					p++;
 					start = p;
 					continue;
 				}
 				had_param_ch = 0;
 			}
 
 			/* Save this argument... */
 			*q = '\0';
 			sp = (struct strlist *)stalloc(sizeof *sp);
 			sp->text = start;
 			*arglist->lastp = sp;
 			arglist->lastp = &sp->next;
 			p++;
 
 			if (ifsspc != NULL) {
 				/* Ignore further trailing IFS whitespace */
 				for (; p < string + ifsp->endoff; p++) {
 					q = p;
 					if (*p == CTLESC)
 						p++;
 					if (strchr(ifs, *p) == NULL) {
 						p = q;
 						break;
 					}
 					if (strchr(" \t\n", *p) == NULL) {
 						p++;
 						break;
 					}
 				}
 			}
 			start = p;
 		}
 	}
 
 	/*
 	 * Save anything left as an argument.
 	 * Traditionally we have treated 'IFS=':'; set -- x$IFS' as
 	 * generating 2 arguments, the second of which is empty.
 	 * Some recent clarification of the Posix spec say that it
 	 * should only generate one....
 	 */
 	if (had_param_ch || *start != 0) {
 		sp = (struct strlist *)stalloc(sizeof *sp);
 		sp->text = start;
 		*arglist->lastp = sp;
 		arglist->lastp = &sp->next;
 	}
 }
 
 
 static char expdir[PATH_MAX];
 #define expdir_end (expdir + sizeof(expdir))
 
 /*
  * Perform pathname generation and remove control characters.
  * At this point, the only control characters should be CTLESC and CTLQUOTEMARK.
  * The results are stored in the list exparg.
  */
 static void
 expandmeta(struct strlist *str, int flag __unused)
 {
 	char *p;
 	struct strlist **savelastp;
 	struct strlist *sp;
 	char c;
 	/* TODO - EXP_REDIR */
 
 	while (str) {
 		if (fflag)
 			goto nometa;
 		p = str->text;
 		for (;;) {			/* fast check for meta chars */
 			if ((c = *p++) == '\0')
 				goto nometa;
 			if (c == '*' || c == '?' || c == '[')
 				break;
 		}
 		savelastp = exparg.lastp;
 		INTOFF;
 		expmeta(expdir, str->text);
 		INTON;
 		if (exparg.lastp == savelastp) {
 			/*
 			 * no matches
 			 */
 nometa:
 			*exparg.lastp = str;
 			rmescapes(str->text);
 			exparg.lastp = &str->next;
 		} else {
 			*exparg.lastp = NULL;
 			*savelastp = sp = expsort(*savelastp);
 			while (sp->next != NULL)
 				sp = sp->next;
 			exparg.lastp = &sp->next;
 		}
 		str = str->next;
 	}
 }
 
 
 /*
  * Do metacharacter (i.e. *, ?, [...]) expansion.
  */
 
 static void
 expmeta(char *enddir, char *name)
 {
 	char *p;
 	char *q;
 	char *start;
 	char *endname;
 	int metaflag;
 	struct stat statb;
 	DIR *dirp;
 	struct dirent *dp;
 	int atend;
 	int matchdot;
 	int esc;
 
 	metaflag = 0;
 	start = name;
 	for (p = name; esc = 0, *p; p += esc + 1) {
 		if (*p == '*' || *p == '?')
 			metaflag = 1;
 		else if (*p == '[') {
 			q = p + 1;
 			if (*q == '!' || *q == '^')
 				q++;
 			for (;;) {
 				while (*q == CTLQUOTEMARK)
 					q++;
 				if (*q == CTLESC)
 					q++;
 				if (*q == '/' || *q == '\0')
 					break;
 				if (*++q == ']') {
 					metaflag = 1;
 					break;
 				}
 			}
 		} else if (*p == '\0')
 			break;
 		else if (*p == CTLQUOTEMARK)
 			continue;
 		else {
 			if (*p == CTLESC)
 				esc++;
 			if (p[esc] == '/') {
 				if (metaflag)
 					break;
 				start = p + esc + 1;
 			}
 		}
 	}
 	if (metaflag == 0) {	/* we've reached the end of the file name */
 		if (enddir != expdir)
 			metaflag++;
 		for (p = name ; ; p++) {
 			if (*p == CTLQUOTEMARK)
 				continue;
 			if (*p == CTLESC)
 				p++;
 			*enddir++ = *p;
 			if (*p == '\0')
 				break;
 			if (enddir == expdir_end)
 				return;
 		}
 		if (metaflag == 0 || lstat(expdir, &statb) >= 0)
 			addfname(expdir);
 		return;
 	}
 	endname = p;
 	if (start != name) {
 		p = name;
 		while (p < start) {
 			while (*p == CTLQUOTEMARK)
 				p++;
 			if (*p == CTLESC)
 				p++;
 			*enddir++ = *p++;
 			if (enddir == expdir_end)
 				return;
 		}
 	}
 	if (enddir == expdir) {
 		p = ".";
 	} else if (enddir == expdir + 1 && *expdir == '/') {
 		p = "/";
 	} else {
 		p = expdir;
 		enddir[-1] = '\0';
 	}
 	if ((dirp = opendir(p)) == NULL)
 		return;
 	if (enddir != expdir)
 		enddir[-1] = '/';
 	if (*endname == 0) {
 		atend = 1;
 	} else {
 		atend = 0;
 		*endname = '\0';
 		endname += esc + 1;
 	}
 	matchdot = 0;
 	p = start;
 	while (*p == CTLQUOTEMARK)
 		p++;
 	if (*p == CTLESC)
 		p++;
 	if (*p == '.')
 		matchdot++;
 	while (! int_pending() && (dp = readdir(dirp)) != NULL) {
 		if (dp->d_name[0] == '.' && ! matchdot)
 			continue;
 		if (patmatch(start, dp->d_name, 0)) {
 			if (enddir + dp->d_namlen + 1 > expdir_end)
 				continue;
 			memcpy(enddir, dp->d_name, dp->d_namlen + 1);
 			if (atend)
 				addfname(expdir);
 			else {
 				if (enddir + dp->d_namlen + 2 > expdir_end)
 					continue;
 				enddir[dp->d_namlen] = '/';
 				enddir[dp->d_namlen + 1] = '\0';
 				expmeta(enddir + dp->d_namlen + 1, endname);
 			}
 		}
 	}
 	closedir(dirp);
 	if (! atend)
 		endname[-esc - 1] = esc ? CTLESC : '/';
 }
 
 
 /*
  * Add a file name to the list.
  */
 
 static void
 addfname(char *name)
 {
 	char *p;
 	struct strlist *sp;
 
 	p = stalloc(strlen(name) + 1);
 	scopy(name, p);
 	sp = (struct strlist *)stalloc(sizeof *sp);
 	sp->text = p;
 	*exparg.lastp = sp;
 	exparg.lastp = &sp->next;
 }
 
 
 /*
  * Sort the results of file name expansion.  It calculates the number of
  * strings to sort and then calls msort (short for merge sort) to do the
  * work.
  */
 
 static struct strlist *
 expsort(struct strlist *str)
 {
 	int len;
 	struct strlist *sp;
 
 	len = 0;
 	for (sp = str ; sp ; sp = sp->next)
 		len++;
 	return msort(str, len);
 }
 
 
 static struct strlist *
 msort(struct strlist *list, int len)
 {
 	struct strlist *p, *q = NULL;
 	struct strlist **lpp;
 	int half;
 	int n;
 
 	if (len <= 1)
 		return list;
 	half = len >> 1;
 	p = list;
 	for (n = half ; --n >= 0 ; ) {
 		q = p;
 		p = p->next;
 	}
 	q->next = NULL;			/* terminate first half of list */
 	q = msort(list, half);		/* sort first half of list */
 	p = msort(p, len - half);		/* sort second half */
 	lpp = &list;
 	for (;;) {
 		if (strcmp(p->text, q->text) < 0) {
 			*lpp = p;
 			lpp = &p->next;
 			if ((p = *lpp) == NULL) {
 				*lpp = q;
 				break;
 			}
 		} else {
 			*lpp = q;
 			lpp = &q->next;
 			if ((q = *lpp) == NULL) {
 				*lpp = p;
 				break;
 			}
 		}
 	}
 	return list;
 }
 
 
 
 /*
  * Returns true if the pattern matches the string.
  */
 
 int
 patmatch(const char *pattern, const char *string, int squoted)
 {
 	const char *p, *q;
 	char c;
 
 	p = pattern;
 	q = string;
 	for (;;) {
 		switch (c = *p++) {
 		case '\0':
 			goto breakloop;
 		case CTLESC:
 			if (squoted && *q == CTLESC)
 				q++;
 			if (*q++ != *p++)
 				return 0;
 			break;
 		case CTLQUOTEMARK:
 			continue;
 		case '?':
 			if (squoted && *q == CTLESC)
 				q++;
 			if (*q++ == '\0')
 				return 0;
 			break;
 		case '*':
 			c = *p;
 			while (c == CTLQUOTEMARK || c == '*')
 				c = *++p;
 			if (c != CTLESC &&  c != CTLQUOTEMARK &&
 			    c != '?' && c != '*' && c != '[') {
 				while (*q != c) {
 					if (squoted && *q == CTLESC &&
 					    q[1] == c)
 						break;
 					if (*q == '\0')
 						return 0;
 					if (squoted && *q == CTLESC)
 						q++;
 					q++;
 				}
 			}
 			do {
 				if (patmatch(p, q, squoted))
 					return 1;
 				if (squoted && *q == CTLESC)
 					q++;
 			} while (*q++ != '\0');
 			return 0;
 		case '[': {
 			const char *endp;
 			int invert, found;
 			char chr;
 
 			endp = p;
 			if (*endp == '!' || *endp == '^')
 				endp++;
 			for (;;) {
 				while (*endp == CTLQUOTEMARK)
 					endp++;
 				if (*endp == '\0')
 					goto dft;		/* no matching ] */
 				if (*endp == CTLESC)
 					endp++;
 				if (*++endp == ']')
 					break;
 			}
 			invert = 0;
 			if (*p == '!' || *p == '^') {
 				invert++;
 				p++;
 			}
 			found = 0;
 			chr = *q++;
 			if (squoted && chr == CTLESC)
 				chr = *q++;
 			if (chr == '\0')
 				return 0;
 			c = *p++;
 			do {
 				if (c == CTLQUOTEMARK)
 					continue;
 				if (c == CTLESC)
 					c = *p++;
 				if (*p == '-' && p[1] != ']') {
 					p++;
 					while (*p == CTLQUOTEMARK)
 						p++;
 					if (*p == CTLESC)
 						p++;
 					if (   collate_range_cmp(chr, c) >= 0
 					    && collate_range_cmp(chr, *p) <= 0
 					   )
 						found = 1;
 					p++;
 				} else {
 					if (chr == c)
 						found = 1;
 				}
 			} while ((c = *p++) != ']');
 			if (found == invert)
 				return 0;
 			break;
 		}
 dft:	        default:
 			if (squoted && *q == CTLESC)
 				q++;
 			if (*q++ != c)
 				return 0;
 			break;
 		}
 	}
 breakloop:
 	if (*q != '\0')
 		return 0;
 	return 1;
 }
 
 
 
 /*
  * Remove any CTLESC and CTLQUOTEMARK characters from a string.
  */
 
 void
 rmescapes(char *str)
 {
 	char *p, *q;
 
 	p = str;
 	while (*p != CTLESC && *p != CTLQUOTEMARK && *p != CTLQUOTEEND) {
 		if (*p++ == '\0')
 			return;
 	}
 	q = p;
 	while (*p) {
 		if (*p == CTLQUOTEMARK || *p == CTLQUOTEEND) {
 			p++;
 			continue;
 		}
 		if (*p == CTLESC)
 			p++;
 		*q++ = *p++;
 	}
 	*q = '\0';
 }
 
 
 
 /*
  * See if a pattern matches in a case statement.
  */
 
 int
 casematch(union node *pattern, const char *val)
 {
 	struct stackmark smark;
 	int result;
 	char *p;
 
 	setstackmark(&smark);
 	argbackq = pattern->narg.backquote;
 	STARTSTACKSTR(expdest);
 	ifslastp = NULL;
 	argstr(pattern->narg.text, EXP_TILDE | EXP_CASE);
 	STPUTC('\0', expdest);
 	p = grabstackstr(expdest);
 	result = patmatch(p, val, 0);
 	popstackmark(&smark);
 	return result;
 }
 
 /*
  * Our own itoa().
  */
 
 static char *
 cvtnum(int num, char *buf)
 {
 	char temp[32];
 	int neg = num < 0;
 	char *p = temp + 31;
 
 	temp[31] = '\0';
 
 	do {
 		*--p = num % 10 + '0';
 	} while ((num /= 10) != 0);
 
 	if (neg)
 		*--p = '-';
 
-	while (*p)
-		STPUTC(*p++, buf);
+	STPUTS(p, buf);
 	return buf;
 }
 
 /*
  * Do most of the work for wordexp(3).
  */
 
 int
 wordexpcmd(int argc, char **argv)
 {
 	size_t len;
 	int i;
 
 	out1fmt("%08x", argc - 1);
 	for (i = 1, len = 0; i < argc; i++)
 		len += strlen(argv[i]);
 	out1fmt("%08x", (int)len);
 	for (i = 1; i < argc; i++)
 		outbin(argv[i], strlen(argv[i]) + 1, out1);
         return (0);
 }
Index: projects/binutils-2.17/bin/sh/histedit.c
===================================================================
--- projects/binutils-2.17/bin/sh/histedit.c	(revision 215829)
+++ projects/binutils-2.17/bin/sh/histedit.c	(revision 215830)
@@ -1,513 +1,512 @@
 /*-
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Kenneth Almquist.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)histedit.c	8.2 (Berkeley) 5/4/95";
 #endif
 #endif /* not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <limits.h>
 #include <paths.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 /*
  * Editline and history functions (and glue).
  */
 #include "shell.h"
 #include "parser.h"
 #include "var.h"
 #include "options.h"
 #include "main.h"
 #include "output.h"
 #include "mystring.h"
 #ifndef NO_HISTORY
 #include "myhistedit.h"
 #include "error.h"
 #include "eval.h"
 #include "memalloc.h"
 
 #define MAXHISTLOOPS	4	/* max recursions through fc */
 #define DEFEDITOR	"ed"	/* default editor *should* be $EDITOR */
 
 History *hist;	/* history cookie */
 EditLine *el;	/* editline cookie */
 int displayhist;
 static FILE *el_in, *el_out, *el_err;
 
 static char *fc_replace(const char *, char *, char *);
 
 /*
  * Set history and editing status.  Called whenever the status may
  * have changed (figures out what to do).
  */
 void
 histedit(void)
 {
 
 #define editing (Eflag || Vflag)
 
 	if (iflag) {
 		if (!hist) {
 			/*
 			 * turn history on
 			 */
 			INTOFF;
 			hist = history_init();
 			INTON;
 
 			if (hist != NULL)
 				sethistsize(histsizeval());
 			else
 				out2fmt_flush("sh: can't initialize history\n");
 		}
 		if (editing && !el && isatty(0)) { /* && isatty(2) ??? */
 			/*
 			 * turn editing on
 			 */
 			char *term;
 
 			INTOFF;
 			if (el_in == NULL)
 				el_in = fdopen(0, "r");
 			if (el_err == NULL)
 				el_err = fdopen(1, "w");
 			if (el_out == NULL)
 				el_out = fdopen(2, "w");
 			if (el_in == NULL || el_err == NULL || el_out == NULL)
 				goto bad;
 			term = lookupvar("TERM");
 			if (term)
 				setenv("TERM", term, 1);
 			else
 				unsetenv("TERM");
 			el = el_init(arg0, el_in, el_out, el_err);
 			if (el != NULL) {
 				if (hist)
 					el_set(el, EL_HIST, history, hist);
 				el_set(el, EL_PROMPT, getprompt);
 				el_set(el, EL_ADDFN, "sh-complete",
 				    "Filename completion",
 				    _el_fn_sh_complete);
 			} else {
 bad:
 				out2fmt_flush("sh: can't initialize editing\n");
 			}
 			INTON;
 		} else if (!editing && el) {
 			INTOFF;
 			el_end(el);
 			el = NULL;
 			INTON;
 		}
 		if (el) {
 			if (Vflag)
 				el_set(el, EL_EDITOR, "vi");
 			else if (Eflag)
 				el_set(el, EL_EDITOR, "emacs");
 			el_set(el, EL_BIND, "^I", "sh-complete", NULL);
 			el_source(el, NULL);
 		}
 	} else {
 		INTOFF;
 		if (el) {	/* no editing if not interactive */
 			el_end(el);
 			el = NULL;
 		}
 		if (hist) {
 			history_end(hist);
 			hist = NULL;
 		}
 		INTON;
 	}
 }
 
 
 void
 sethistsize(hs)
 	const char *hs;
 {
 	int histsize;
 	HistEvent he;
 
 	if (hist != NULL) {
 		if (hs == NULL || *hs == '\0' ||
 		   (histsize = atoi(hs)) < 0)
 			histsize = 100;
 		history(hist, &he, H_SETSIZE, histsize);
 		history(hist, &he, H_SETUNIQUE, 1);
 	}
 }
 
 void
 setterm(const char *term)
 {
 	if (rootshell && el != NULL && term != NULL)
 		el_set(el, EL_TERMINAL, term);
 }
 
 int
 histcmd(int argc, char **argv)
 {
 	int ch;
 	const char *editor = NULL;
 	HistEvent he;
 	int lflg = 0, nflg = 0, rflg = 0, sflg = 0;
 	int i, retval;
 	const char *firststr, *laststr;
 	int first, last, direction;
 	char *pat = NULL, *repl = NULL;
 	static int active = 0;
 	struct jmploc jmploc;
 	struct jmploc *savehandler;
 	char editfilestr[PATH_MAX];
 	char *volatile editfile;
 	FILE *efp = NULL;
 	int oldhistnum;
 
 	if (hist == NULL)
 		error("history not active");
 
 	if (argc == 1)
 		error("missing history argument");
 
 	optreset = 1; optind = 1; /* initialize getopt */
 	opterr = 0;
 	while (not_fcnumber(argv[optind]) &&
 	      (ch = getopt(argc, argv, ":e:lnrs")) != -1)
 		switch ((char)ch) {
 		case 'e':
 			editor = optarg;
 			break;
 		case 'l':
 			lflg = 1;
 			break;
 		case 'n':
 			nflg = 1;
 			break;
 		case 'r':
 			rflg = 1;
 			break;
 		case 's':
 			sflg = 1;
 			break;
 		case ':':
 			error("option -%c expects argument", optopt);
 		case '?':
 		default:
 			error("unknown option: -%c", optopt);
 		}
 	argc -= optind, argv += optind;
 
 	/*
 	 * If executing...
 	 */
 	if (lflg == 0 || editor || sflg) {
 		lflg = 0;	/* ignore */
 		editfile = NULL;
 		/*
 		 * Catch interrupts to reset active counter and
 		 * cleanup temp files.
 		 */
 		savehandler = handler;
 		if (setjmp(jmploc.loc)) {
 			active = 0;
 			if (editfile)
 				unlink(editfile);
 			handler = savehandler;
 			longjmp(handler->loc, 1);
 		}
 		handler = &jmploc;
 		if (++active > MAXHISTLOOPS) {
 			active = 0;
 			displayhist = 0;
 			error("called recursively too many times");
 		}
 		/*
 		 * Set editor.
 		 */
 		if (sflg == 0) {
 			if (editor == NULL &&
 			    (editor = bltinlookup("FCEDIT", 1)) == NULL &&
 			    (editor = bltinlookup("EDITOR", 1)) == NULL)
 				editor = DEFEDITOR;
 			if (editor[0] == '-' && editor[1] == '\0') {
 				sflg = 1;	/* no edit */
 				editor = NULL;
 			}
 		}
 	}
 
 	/*
 	 * If executing, parse [old=new] now
 	 */
 	if (lflg == 0 && argc > 0 &&
 	     ((repl = strchr(argv[0], '=')) != NULL)) {
 		pat = argv[0];
 		*repl++ = '\0';
 		argc--, argv++;
 	}
 	/*
 	 * determine [first] and [last]
 	 */
 	switch (argc) {
 	case 0:
 		firststr = lflg ? "-16" : "-1";
 		laststr = "-1";
 		break;
 	case 1:
 		firststr = argv[0];
 		laststr = lflg ? "-1" : argv[0];
 		break;
 	case 2:
 		firststr = argv[0];
 		laststr = argv[1];
 		break;
 	default:
 		error("too many arguments");
 	}
 	/*
 	 * Turn into event numbers.
 	 */
 	first = str_to_event(firststr, 0);
 	last = str_to_event(laststr, 1);
 
 	if (rflg) {
 		i = last;
 		last = first;
 		first = i;
 	}
 	/*
 	 * XXX - this should not depend on the event numbers
 	 * always increasing.  Add sequence numbers or offset
 	 * to the history element in next (diskbased) release.
 	 */
 	direction = first < last ? H_PREV : H_NEXT;
 
 	/*
 	 * If editing, grab a temp file.
 	 */
 	if (editor) {
 		int fd;
 		INTOFF;		/* easier */
 		sprintf(editfilestr, "%s/_shXXXXXX", _PATH_TMP);
 		if ((fd = mkstemp(editfilestr)) < 0)
 			error("can't create temporary file %s", editfile);
 		editfile = editfilestr;
 		if ((efp = fdopen(fd, "w")) == NULL) {
 			close(fd);
 			error("Out of space");
 		}
 	}
 
 	/*
 	 * Loop through selected history events.  If listing or executing,
 	 * do it now.  Otherwise, put into temp file and call the editor
 	 * after.
 	 *
 	 * The history interface needs rethinking, as the following
 	 * convolutions will demonstrate.
 	 */
 	history(hist, &he, H_FIRST);
 	retval = history(hist, &he, H_NEXT_EVENT, first);
 	for (;retval != -1; retval = history(hist, &he, direction)) {
 		if (lflg) {
 			if (!nflg)
 				out1fmt("%5d ", he.num);
 			out1str(he.str);
 		} else {
 			char *s = pat ?
 			   fc_replace(he.str, pat, repl) : (char *)he.str;
 
 			if (sflg) {
 				if (displayhist) {
 					out2str(s);
 					flushout(out2);
 				}
 				evalstring(s, 0);
 				if (displayhist && hist) {
 					/*
 					 *  XXX what about recursive and
 					 *  relative histnums.
 					 */
 					oldhistnum = he.num;
 					history(hist, &he, H_ENTER, s);
 					/*
 					 * XXX H_ENTER moves the internal
 					 * cursor, set it back to the current
 					 * entry.
 					 */
 					retval = history(hist, &he,
 					    H_NEXT_EVENT, oldhistnum);
 				}
 			} else
 				fputs(s, efp);
 		}
 		/*
 		 * At end?  (if we were to lose last, we'd sure be
 		 * messed up).
 		 */
 		if (he.num == last)
 			break;
 	}
 	if (editor) {
 		char *editcmd;
 
 		fclose(efp);
 		editcmd = stalloc(strlen(editor) + strlen(editfile) + 2);
 		sprintf(editcmd, "%s %s", editor, editfile);
 		evalstring(editcmd, 0);	/* XXX - should use no JC command */
 		INTON;
 		readcmdfile(editfile);	/* XXX - should read back - quick tst */
 		unlink(editfile);
 	}
 
 	if (lflg == 0 && active > 0)
 		--active;
 	if (displayhist)
 		displayhist = 0;
 	return 0;
 }
 
 static char *
 fc_replace(const char *s, char *p, char *r)
 {
 	char *dest;
 	int plen = strlen(p);
 
 	STARTSTACKSTR(dest);
 	while (*s) {
 		if (*s == *p && strncmp(s, p, plen) == 0) {
-			while (*r)
-				STPUTC(*r++, dest);
+			STPUTS(r, dest);
 			s += plen;
 			*p = '\0';	/* so no more matches */
 		} else
 			STPUTC(*s++, dest);
 	}
 	STPUTC('\0', dest);
 	dest = grabstackstr(dest);
 
 	return (dest);
 }
 
 int
 not_fcnumber(const char *s)
 {
 	if (s == NULL)
 		return (0);
 	if (*s == '-')
 		s++;
 	return (!is_number(s));
 }
 
 int
 str_to_event(const char *str, int last)
 {
 	HistEvent he;
 	const char *s = str;
 	int relative = 0;
 	int i, retval;
 
 	retval = history(hist, &he, H_FIRST);
 	switch (*s) {
 	case '-':
 		relative = 1;
 		/*FALLTHROUGH*/
 	case '+':
 		s++;
 	}
 	if (is_number(s)) {
 		i = atoi(s);
 		if (relative) {
 			while (retval != -1 && i--) {
 				retval = history(hist, &he, H_NEXT);
 			}
 			if (retval == -1)
 				retval = history(hist, &he, H_LAST);
 		} else {
 			retval = history(hist, &he, H_NEXT_EVENT, i);
 			if (retval == -1) {
 				/*
 				 * the notion of first and last is
 				 * backwards to that of the history package
 				 */
 				retval = history(hist, &he, last ? H_FIRST : H_LAST);
 			}
 		}
 		if (retval == -1)
 			error("history number %s not found (internal error)",
 			       str);
 	} else {
 		/*
 		 * pattern
 		 */
 		retval = history(hist, &he, H_PREV_STR, str);
 		if (retval == -1)
 			error("history pattern not found: %s", str);
 	}
 	return (he.num);
 }
 
 int
 bindcmd(int argc, char **argv)
 {
 
 	if (el == NULL)
 		error("line editing is disabled");
 	return (el_parse(el, argc, (const char **)argv));
 }
 
 #else
 #include "error.h"
 
 int
 histcmd(int argc, char **argv)
 {
 
 	error("not compiled with history support");
 	/*NOTREACHED*/
 	return (0);
 }
 
 int
 bindcmd(int argc, char **argv)
 {
 
 	error("not compiled with line editing support");
 	return (0);
 }
 #endif
Index: projects/binutils-2.17/bin/sh/memalloc.c
===================================================================
--- projects/binutils-2.17/bin/sh/memalloc.c	(revision 215829)
+++ projects/binutils-2.17/bin/sh/memalloc.c	(revision 215830)
@@ -1,342 +1,359 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Kenneth Almquist.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)memalloc.c	8.3 (Berkeley) 5/4/95";
 #endif
 #endif /* not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include "shell.h"
 #include "output.h"
 #include "memalloc.h"
 #include "error.h"
 #include "mystring.h"
 #include "expand.h"
 #include <stdlib.h>
 #include <unistd.h>
 
 /*
  * Like malloc, but returns an error when out of space.
  */
 
 pointer
 ckmalloc(size_t nbytes)
 {
 	pointer p;
 
 	INTOFF;
 	p = malloc(nbytes);
 	INTON;
 	if (p == NULL)
 		error("Out of space");
 	return p;
 }
 
 
 /*
  * Same for realloc.
  */
 
 pointer
 ckrealloc(pointer p, int nbytes)
 {
 	INTOFF;
 	p = realloc(p, nbytes);
 	INTON;
 	if (p == NULL)
 		error("Out of space");
 	return p;
 }
 
 void
 ckfree(pointer p)
 {
 	INTOFF;
 	free(p);
 	INTON;
 }
 
 
 /*
  * Make a copy of a string in safe storage.
  */
 
 char *
 savestr(const char *s)
 {
 	char *p;
 
 	p = ckmalloc(strlen(s) + 1);
 	scopy(s, p);
 	return p;
 }
 
 
 /*
  * Parse trees for commands are allocated in lifo order, so we use a stack
  * to make this more efficient, and also to avoid all sorts of exception
  * handling code to handle interrupts in the middle of a parse.
  *
  * The size 496 was chosen because with 16-byte alignment the total size
  * for the allocated block is 512.
  */
 
 #define MINSIZE 496		/* minimum size of a block. */
 
 
 struct stack_block {
 	struct stack_block *prev;
 	/* Data follows */
 };
 #define SPACE(sp)	((char*)(sp) + ALIGN(sizeof(struct stack_block)))
 
 static struct stack_block *stackp;
 static struct stackmark *markp;
 char *stacknxt;
 int stacknleft;
 int sstrnleft;
 int herefd = -1;
 
 
 static void
 stnewblock(int nbytes)
 {
 	struct stack_block *sp;
 	int allocsize;
 
 	if (nbytes < MINSIZE)
 		nbytes = MINSIZE;
 
 	allocsize = ALIGN(sizeof(struct stack_block)) + ALIGN(nbytes);
 
 	INTOFF;
 	sp = ckmalloc(allocsize);
 	sp->prev = stackp;
 	stacknxt = SPACE(sp);
 	stacknleft = allocsize - (stacknxt - (char*)sp);
 	stackp = sp;
 	INTON;
 }
 
 
 pointer
 stalloc(int nbytes)
 {
 	char *p;
 
 	nbytes = ALIGN(nbytes);
 	if (nbytes > stacknleft)
 		stnewblock(nbytes);
 	p = stacknxt;
 	stacknxt += nbytes;
 	stacknleft -= nbytes;
 	return p;
 }
 
 
 void
 stunalloc(pointer p)
 {
 	if (p == NULL) {		/*DEBUG */
 		write(STDERR_FILENO, "stunalloc\n", 10);
 		abort();
 	}
 	stacknleft += stacknxt - (char *)p;
 	stacknxt = p;
 }
 
 
 
 void
 setstackmark(struct stackmark *mark)
 {
 	mark->stackp = stackp;
 	mark->stacknxt = stacknxt;
 	mark->stacknleft = stacknleft;
 	mark->marknext = markp;
 	markp = mark;
 }
 
 
 void
 popstackmark(struct stackmark *mark)
 {
 	struct stack_block *sp;
 
 	INTOFF;
 	markp = mark->marknext;
 	while (stackp != mark->stackp) {
 		sp = stackp;
 		stackp = sp->prev;
 		ckfree(sp);
 	}
 	stacknxt = mark->stacknxt;
 	stacknleft = mark->stacknleft;
 	INTON;
 }
 
 
 /*
  * When the parser reads in a string, it wants to stick the string on the
  * stack and only adjust the stack pointer when it knows how big the
  * string is.  Stackblock (defined in stack.h) returns a pointer to a block
  * of space on top of the stack and stackblocklen returns the length of
  * this block.  Growstackblock will grow this space by at least one byte,
  * possibly moving it (like realloc).  Grabstackblock actually allocates the
  * part of the block that has been used.
  */
 
 void
 growstackblock(void)
 {
 	char *p;
 	int newlen;
 	char *oldspace;
 	int oldlen;
 	struct stack_block *sp;
 	struct stack_block *oldstackp;
 	struct stackmark *xmark;
 
 	newlen = (stacknleft == 0) ? MINSIZE : stacknleft * 2 + 100;
 	newlen = ALIGN(newlen);
 	oldspace = stacknxt;
 	oldlen = stacknleft;
 
 	if (stackp != NULL && stacknxt == SPACE(stackp)) {
 		INTOFF;
 		oldstackp = stackp;
 		stackp = oldstackp->prev;
 		sp = ckrealloc((pointer)oldstackp, newlen);
 		sp->prev = stackp;
 		stackp = sp;
 		stacknxt = SPACE(sp);
 		stacknleft = newlen - (stacknxt - (char*)sp);
 
 		/*
 		 * Stack marks pointing to the start of the old block
 		 * must be relocated to point to the new block
 		 */
 		xmark = markp;
 		while (xmark != NULL && xmark->stackp == oldstackp) {
 			xmark->stackp = stackp;
 			xmark->stacknxt = stacknxt;
 			xmark->stacknleft = stacknleft;
 			xmark = xmark->marknext;
 		}
 		INTON;
 	} else {
 		p = stalloc(newlen);
 		if (oldlen != 0)
 			memcpy(p, oldspace, oldlen);
 		stunalloc(p);
 	}
 }
 
 
 
 void
 grabstackblock(int len)
 {
 	len = ALIGN(len);
 	stacknxt += len;
 	stacknleft -= len;
 }
 
 
 
 /*
  * The following routines are somewhat easier to use that the above.
  * The user declares a variable of type STACKSTR, which may be declared
  * to be a register.  The macro STARTSTACKSTR initializes things.  Then
  * the user uses the macro STPUTC to add characters to the string.  In
  * effect, STPUTC(c, p) is the same as *p++ = c except that the stack is
  * grown as necessary.  When the user is done, she can just leave the
  * string there and refer to it using stackblock().  Or she can allocate
  * the space for it using grabstackstr().  If it is necessary to allow
  * someone else to use the stack temporarily and then continue to grow
  * the string, the user should use grabstack to allocate the space, and
  * then call ungrabstr(p) to return to the previous mode of operation.
  *
  * USTPUTC is like STPUTC except that it doesn't check for overflow.
  * CHECKSTACKSPACE can be called before USTPUTC to ensure that there
  * is space for at least one character.
  */
 
 static char *
 growstrstackblock(int n)
 {
 	growstackblock();
 	sstrnleft = stackblocksize() - n;
 	return stackblock() + n;
 }
 
 char *
 growstackstr(void)
 {
 	int len;
 
 	len = stackblocksize();
 	if (herefd >= 0 && len >= 1024) {
 		xwrite(herefd, stackblock(), len);
 		sstrnleft = len;
 		return stackblock();
 	}
 	return growstrstackblock(len);
 }
 
 
 /*
  * Called from CHECKSTRSPACE.
  */
 
 char *
 makestrspace(void)
 {
 	int len;
 
 	len = stackblocksize() - sstrnleft;
 	return growstrstackblock(len);
 }
 
 
 
 void
 ungrabstackstr(char *s, char *p)
 {
 	stacknleft += stacknxt - s;
 	stacknxt = s;
 	sstrnleft = stacknleft - (p - s);
 }
+
+
+char *
+stputbin(const char *data, int len, char *p)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		STPUTC(data[i], p);
+	return (p);
+}
+
+char *
+stputs(const char *data, char *p)
+{
+	return (stputbin(data, strlen(data), p));
+}
Index: projects/binutils-2.17/bin/sh/memalloc.h
===================================================================
--- projects/binutils-2.17/bin/sh/memalloc.h	(revision 215829)
+++ projects/binutils-2.17/bin/sh/memalloc.h	(revision 215830)
@@ -1,84 +1,88 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Kenneth Almquist.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)memalloc.h	8.2 (Berkeley) 5/4/95
  * $FreeBSD$
  */
 
 #include <string.h>
 
 struct stackmark {
 	struct stack_block *stackp;
 	char *stacknxt;
 	int stacknleft;
         struct stackmark *marknext;
 };
 
 
 extern char *stacknxt;
 extern int stacknleft;
 extern int sstrnleft;
 extern int herefd;
 
 pointer ckmalloc(size_t);
 pointer ckrealloc(pointer, int);
 void ckfree(pointer);
 char *savestr(const char *);
 pointer stalloc(int);
 void stunalloc(pointer);
 void setstackmark(struct stackmark *);
 void popstackmark(struct stackmark *);
 void growstackblock(void);
 void grabstackblock(int);
 char *growstackstr(void);
 char *makestrspace(void);
 void ungrabstackstr(char *, char *);
+char *stputbin(const char *data, int len, char *p);
+char *stputs(const char *data, char *p);
 
 
 
 #define stackblock() stacknxt
 #define stackblocksize() stacknleft
 #define STARTSTACKSTR(p)	p = stackblock(), sstrnleft = stackblocksize()
 #define STPUTC(c, p)	(--sstrnleft >= 0? (*p++ = (c)) : (p = growstackstr(), --sstrnleft, *p++ = (c)))
 #define CHECKSTRSPACE(n, p)	{ if (sstrnleft < n) p = makestrspace(); }
 #define USTPUTC(c, p)	(--sstrnleft, *p++ = (c))
 /*
  * STACKSTRNUL's use is where we want to be able to turn a stack
  * (non-sentinel, character counting string) into a C string,
  * and later pretend the NUL is not there.
  * Note: Because of STACKSTRNUL's semantics, STACKSTRNUL cannot be used
  * on a stack that will grabstackstr()ed.
  */
 #define STACKSTRNUL(p)	(sstrnleft == 0? (p = growstackstr(), *p = '\0') : (*p = '\0'))
 #define STUNPUTC(p)	(++sstrnleft, --p)
 #define STTOPC(p)	p[-1]
 #define STADJUST(amount, p)	(p += (amount), sstrnleft -= (amount))
 #define grabstackstr(p)	stalloc(stackblocksize() - sstrnleft)
+#define STPUTBIN(s, len, p)	p = stputbin((s), (len), p)
+#define STPUTS(s, p)	p = stputs((s), p)
Index: projects/binutils-2.17/bin/sh/miscbltin.c
===================================================================
--- projects/binutils-2.17/bin/sh/miscbltin.c	(revision 215829)
+++ projects/binutils-2.17/bin/sh/miscbltin.c	(revision 215830)
@@ -1,501 +1,502 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Kenneth Almquist.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)miscbltin.c	8.4 (Berkeley) 5/4/95";
 #endif
 #endif /* not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Miscellaneous builtins.
  */
 
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <unistd.h>
 #include <ctype.h>
 #include <errno.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <termios.h>
 
 #include "shell.h"
 #include "options.h"
 #include "var.h"
 #include "output.h"
 #include "memalloc.h"
 #include "error.h"
 #include "mystring.h"
 
 #undef eflag
 
 int readcmd(int, char **);
 int umaskcmd(int, char **);
 int ulimitcmd(int, char **);
 
 /*
  * The read builtin.  The -r option causes backslashes to be treated like
  * ordinary characters.
  *
  * This uses unbuffered input, which may be avoidable in some cases.
  *
  * Note that if IFS=' :' then read x y should work so that:
  * 'a b'	x='a', y='b'
  * ' a b '	x='a', y='b'
  * ':b'		x='',  y='b'
  * ':'		x='',  y=''
  * '::'		x='',  y=''
  * ': :'	x='',  y=''
  * ':::'	x='',  y='::'
  * ':b c:'	x='',  y='b c:'
  */
 
 int
 readcmd(int argc __unused, char **argv __unused)
 {
 	char **ap;
 	int backslash;
 	char c;
 	int rflag;
 	char *prompt;
 	const char *ifs;
 	char *p;
 	int startword;
 	int status;
 	int i;
 	int is_ifs;
 	int saveall = 0;
 	struct timeval tv;
 	char *tvptr;
 	fd_set ifds;
 
 	rflag = 0;
 	prompt = NULL;
 	tv.tv_sec = -1;
 	tv.tv_usec = 0;
 	while ((i = nextopt("erp:t:")) != '\0') {
 		switch(i) {
 		case 'p':
 			prompt = shoptarg;
 			break;
 		case 'e':
 			break;
 		case 'r':
 			rflag = 1;
 			break;
 		case 't':
 			tv.tv_sec = strtol(shoptarg, &tvptr, 0);
 			if (tvptr == shoptarg)
 				error("timeout value");
 			switch(*tvptr) {
 			case 0:
 			case 's':
 				break;
 			case 'h':
 				tv.tv_sec *= 60;
 				/* FALLTHROUGH */
 			case 'm':
 				tv.tv_sec *= 60;
 				break;
 			default:
 				error("timeout unit");
 			}
 			break;
 		}
 	}
 	if (prompt && isatty(0)) {
 		out2str(prompt);
 		flushall();
 	}
 	if (*(ap = argptr) == NULL)
 		error("arg count");
 	if ((ifs = bltinlookup("IFS", 1)) == NULL)
 		ifs = " \t\n";
 
 	if (tv.tv_sec >= 0) {
 		/*
 		 * Wait for something to become available.
 		 */
 		FD_ZERO(&ifds);
 		FD_SET(0, &ifds);
 		status = select(1, &ifds, NULL, NULL, &tv);
 		/*
 		 * If there's nothing ready, return an error.
 		 */
 		if (status <= 0)
 			return(1);
 	}
 
 	status = 0;
 	startword = 2;
 	backslash = 0;
 	STARTSTACKSTR(p);
 	for (;;) {
 		if (read(STDIN_FILENO, &c, 1) != 1) {
 			status = 1;
 			break;
 		}
 		if (c == '\0')
 			continue;
+		CHECKSTRSPACE(1, p);
 		if (backslash) {
 			backslash = 0;
 			startword = 0;
 			if (c != '\n')
-				STPUTC(c, p);
+				USTPUTC(c, p);
 			continue;
 		}
 		if (!rflag && c == '\\') {
 			backslash++;
 			continue;
 		}
 		if (c == '\n')
 			break;
 		if (strchr(ifs, c))
 			is_ifs = strchr(" \t\n", c) ? 1 : 2;
 		else
 			is_ifs = 0;
 
 		if (startword != 0) {
 			if (is_ifs == 1) {
 				/* Ignore leading IFS whitespace */
 				if (saveall)
-					STPUTC(c, p);
+					USTPUTC(c, p);
 				continue;
 			}
 			if (is_ifs == 2 && startword == 1) {
 				/* Only one non-whitespace IFS per word */
 				startword = 2;
 				if (saveall)
-					STPUTC(c, p);
+					USTPUTC(c, p);
 				continue;
 			}
 		}
 
 		if (is_ifs == 0) {
 			/* append this character to the current variable */
 			startword = 0;
 			if (saveall)
 				/* Not just a spare terminator */
 				saveall++;
-			STPUTC(c, p);
+			USTPUTC(c, p);
 			continue;
 		}
 
 		/* end of variable... */
 		startword = is_ifs;
 
 		if (ap[1] == NULL) {
 			/* Last variable needs all IFS chars */
 			saveall++;
-			STPUTC(c, p);
+			USTPUTC(c, p);
 			continue;
 		}
 
 		STACKSTRNUL(p);
 		setvar(*ap, stackblock(), 0);
 		ap++;
 		STARTSTACKSTR(p);
 	}
 	STACKSTRNUL(p);
 
 	/* Remove trailing IFS chars */
 	for (; stackblock() <= --p; *p = 0) {
 		if (!strchr(ifs, *p))
 			break;
 		if (strchr(" \t\n", *p))
 			/* Always remove whitespace */
 			continue;
 		if (saveall > 1)
 			/* Don't remove non-whitespace unless it was naked */
 			break;
 	}
 	setvar(*ap, stackblock(), 0);
 
 	/* Set any remaining args to "" */
 	while (*++ap != NULL)
 		setvar(*ap, nullstr, 0);
 	return status;
 }
 
 
 
 int
 umaskcmd(int argc __unused, char **argv __unused)
 {
 	char *ap;
 	int mask;
 	int i;
 	int symbolic_mode = 0;
 
 	while ((i = nextopt("S")) != '\0') {
 		symbolic_mode = 1;
 	}
 
 	INTOFF;
 	mask = umask(0);
 	umask(mask);
 	INTON;
 
 	if ((ap = *argptr) == NULL) {
 		if (symbolic_mode) {
 			char u[4], g[4], o[4];
 
 			i = 0;
 			if ((mask & S_IRUSR) == 0)
 				u[i++] = 'r';
 			if ((mask & S_IWUSR) == 0)
 				u[i++] = 'w';
 			if ((mask & S_IXUSR) == 0)
 				u[i++] = 'x';
 			u[i] = '\0';
 
 			i = 0;
 			if ((mask & S_IRGRP) == 0)
 				g[i++] = 'r';
 			if ((mask & S_IWGRP) == 0)
 				g[i++] = 'w';
 			if ((mask & S_IXGRP) == 0)
 				g[i++] = 'x';
 			g[i] = '\0';
 
 			i = 0;
 			if ((mask & S_IROTH) == 0)
 				o[i++] = 'r';
 			if ((mask & S_IWOTH) == 0)
 				o[i++] = 'w';
 			if ((mask & S_IXOTH) == 0)
 				o[i++] = 'x';
 			o[i] = '\0';
 
 			out1fmt("u=%s,g=%s,o=%s\n", u, g, o);
 		} else {
 			out1fmt("%.4o\n", mask);
 		}
 	} else {
 		if (isdigit(*ap)) {
 			mask = 0;
 			do {
 				if (*ap >= '8' || *ap < '0')
 					error("Illegal number: %s", *argptr);
 				mask = (mask << 3) + (*ap - '0');
 			} while (*++ap != '\0');
 			umask(mask);
 		} else {
 			void *set;
 			INTOFF;
 			if ((set = setmode (ap)) == 0)
 				error("Illegal number: %s", ap);
 
 			mask = getmode (set, ~mask & 0777);
 			umask(~mask & 0777);
 			free(set);
 			INTON;
 		}
 	}
 	return 0;
 }
 
 /*
  * ulimit builtin
  *
  * This code, originally by Doug Gwyn, Doug Kingston, Eric Gisin, and
  * Michael Rendell was ripped from pdksh 5.0.8 and hacked for use with
  * ash by J.T. Conklin.
  *
  * Public domain.
  */
 
 struct limits {
 	const char *name;
 	const char *units;
 	int	cmd;
 	int	factor;	/* multiply by to get rlim_{cur,max} values */
 	char	option;
 };
 
 static const struct limits limits[] = {
 #ifdef RLIMIT_CPU
 	{ "cpu time",		"seconds",	RLIMIT_CPU,	   1, 't' },
 #endif
 #ifdef RLIMIT_FSIZE
 	{ "file size",		"512-blocks",	RLIMIT_FSIZE,	 512, 'f' },
 #endif
 #ifdef RLIMIT_DATA
 	{ "data seg size",	"kbytes",	RLIMIT_DATA,	1024, 'd' },
 #endif
 #ifdef RLIMIT_STACK
 	{ "stack size",		"kbytes",	RLIMIT_STACK,	1024, 's' },
 #endif
 #ifdef  RLIMIT_CORE
 	{ "core file size",	"512-blocks",	RLIMIT_CORE,	 512, 'c' },
 #endif
 #ifdef RLIMIT_RSS
 	{ "max memory size",	"kbytes",	RLIMIT_RSS,	1024, 'm' },
 #endif
 #ifdef RLIMIT_MEMLOCK
 	{ "locked memory",	"kbytes",	RLIMIT_MEMLOCK, 1024, 'l' },
 #endif
 #ifdef RLIMIT_NPROC
 	{ "max user processes",	(char *)0,	RLIMIT_NPROC,      1, 'u' },
 #endif
 #ifdef RLIMIT_NOFILE
 	{ "open files",		(char *)0,	RLIMIT_NOFILE,     1, 'n' },
 #endif
 #ifdef RLIMIT_VMEM
 	{ "virtual mem size",	"kbytes",	RLIMIT_VMEM,	1024, 'v' },
 #endif
 #ifdef RLIMIT_SWAP
 	{ "swap limit",		"kbytes",	RLIMIT_SWAP,	1024, 'w' },
 #endif
 #ifdef RLIMIT_SBSIZE
 	{ "sbsize",		"bytes",	RLIMIT_SBSIZE,	   1, 'b' },
 #endif
 #ifdef RLIMIT_NPTS
 	{ "pseudo-terminals",	(char *)0,	RLIMIT_NPTS,	   1, 'p' },
 #endif
 	{ (char *) 0,		(char *)0,	0,		   0, '\0' }
 };
 
 int
 ulimitcmd(int argc __unused, char **argv __unused)
 {
 	int	c;
 	rlim_t val = 0;
 	enum { SOFT = 0x1, HARD = 0x2 }
 			how = SOFT | HARD;
 	const struct limits	*l;
 	int		set, all = 0;
 	int		optc, what;
 	struct rlimit	limit;
 
 	what = 'f';
 	while ((optc = nextopt("HSatfdsmcnuvlbpw")) != '\0')
 		switch (optc) {
 		case 'H':
 			how = HARD;
 			break;
 		case 'S':
 			how = SOFT;
 			break;
 		case 'a':
 			all = 1;
 			break;
 		default:
 			what = optc;
 		}
 
 	for (l = limits; l->name && l->option != what; l++)
 		;
 	if (!l->name)
 		error("internal error (%c)", what);
 
 	set = *argptr ? 1 : 0;
 	if (set) {
 		char *p = *argptr;
 
 		if (all || argptr[1])
 			error("too many arguments");
 		if (strcmp(p, "unlimited") == 0)
 			val = RLIM_INFINITY;
 		else {
 			val = 0;
 
 			while ((c = *p++) >= '0' && c <= '9')
 			{
 				val = (val * 10) + (long)(c - '0');
 				if (val < 0)
 					break;
 			}
 			if (c)
 				error("bad number");
 			val *= l->factor;
 		}
 	}
 	if (all) {
 		for (l = limits; l->name; l++) {
 			char optbuf[40];
 			if (getrlimit(l->cmd, &limit) < 0)
 				error("can't get limit: %s", strerror(errno));
 			if (how & SOFT)
 				val = limit.rlim_cur;
 			else if (how & HARD)
 				val = limit.rlim_max;
 
 			if (l->units)
 				snprintf(optbuf, sizeof(optbuf),
 					"(%s, -%c) ", l->units, l->option);
 			else
 				snprintf(optbuf, sizeof(optbuf),
 					"(-%c) ", l->option);
 			out1fmt("%-18s %18s ", l->name, optbuf);
 			if (val == RLIM_INFINITY)
 				out1fmt("unlimited\n");
 			else
 			{
 				val /= l->factor;
 				out1fmt("%jd\n", (intmax_t)val);
 			}
 		}
 		return 0;
 	}
 
 	if (getrlimit(l->cmd, &limit) < 0)
 		error("can't get limit: %s", strerror(errno));
 	if (set) {
 		if (how & SOFT)
 			limit.rlim_cur = val;
 		if (how & HARD)
 			limit.rlim_max = val;
 		if (setrlimit(l->cmd, &limit) < 0)
 			error("bad limit: %s", strerror(errno));
 	} else {
 		if (how & SOFT)
 			val = limit.rlim_cur;
 		else if (how & HARD)
 			val = limit.rlim_max;
 
 		if (val == RLIM_INFINITY)
 			out1fmt("unlimited\n");
 		else
 		{
 			val /= l->factor;
 			out1fmt("%jd\n", (intmax_t)val);
 		}
 	}
 	return 0;
 }
Index: projects/binutils-2.17/bin/sh/parser.c
===================================================================
--- projects/binutils-2.17/bin/sh/parser.c	(revision 215829)
+++ projects/binutils-2.17/bin/sh/parser.c	(revision 215830)
@@ -1,1839 +1,1838 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Kenneth Almquist.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)parser.c	8.7 (Berkeley) 5/16/95";
 #endif
 #endif /* not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stdlib.h>
 #include <unistd.h>
 #include <stdio.h>
 
 #include "shell.h"
 #include "parser.h"
 #include "nodes.h"
 #include "expand.h"	/* defines rmescapes() */
 #include "syntax.h"
 #include "options.h"
 #include "input.h"
 #include "output.h"
 #include "var.h"
 #include "error.h"
 #include "memalloc.h"
 #include "mystring.h"
 #include "alias.h"
 #include "show.h"
 #include "eval.h"
 #include "exec.h"	/* to check for special builtins */
 #ifndef NO_HISTORY
 #include "myhistedit.h"
 #endif
 
 /*
  * Shell command parser.
  */
 
 #define	EOFMARKLEN	79
 #define	PROMPTLEN	128
 
 /* values of checkkwd variable */
 #define CHKALIAS	0x1
 #define CHKKWD		0x2
 #define CHKNL		0x4
 
 /* values returned by readtoken */
 #include "token.h"
 
 
 
 struct heredoc {
 	struct heredoc *next;	/* next here document in list */
 	union node *here;		/* redirection node */
 	char *eofmark;		/* string indicating end of input */
 	int striptabs;		/* if set, strip leading tabs */
 };
 
 struct parser_temp {
 	struct parser_temp *next;
 	void *data;
 };
 
 
 static struct heredoc *heredoclist;	/* list of here documents to read */
 static int doprompt;		/* if set, prompt the user */
 static int needprompt;		/* true if interactive and at start of line */
 static int lasttoken;		/* last token read */
 MKINIT int tokpushback;		/* last token pushed back */
 static char *wordtext;		/* text of last word returned by readtoken */
 MKINIT int checkkwd;            /* 1 == check for kwds, 2 == also eat newlines */
 static struct nodelist *backquotelist;
 static union node *redirnode;
 static struct heredoc *heredoc;
 static int quoteflag;		/* set if (part of) last token was quoted */
 static int startlinno;		/* line # where last token started */
 static int funclinno;		/* line # where the current function started */
 static struct parser_temp *parser_temp;
 
 
 static union node *list(int, int);
 static union node *andor(void);
 static union node *pipeline(void);
 static union node *command(void);
 static union node *simplecmd(union node **, union node *);
 static union node *makename(void);
 static void parsefname(void);
 static void parseheredoc(void);
 static int peektoken(void);
 static int readtoken(void);
 static int xxreadtoken(void);
 static int readtoken1(int, char const *, char *, int);
 static int noexpand(char *);
 static void synexpect(int) __dead2;
 static void synerror(const char *) __dead2;
 static void setprompt(int);
 
 
 static void *
 parser_temp_alloc(size_t len)
 {
 	struct parser_temp *t;
 
 	INTOFF;
 	t = ckmalloc(sizeof(*t));
 	t->data = NULL;
 	t->next = parser_temp;
 	parser_temp = t;
 	t->data = ckmalloc(len);
 	INTON;
 	return t->data;
 }
 
 
 static void *
 parser_temp_realloc(void *ptr, size_t len)
 {
 	struct parser_temp *t;
 
 	INTOFF;
 	t = parser_temp;
 	if (ptr != t->data)
 		error("bug: parser_temp_realloc misused");
 	t->data = ckrealloc(t->data, len);
 	INTON;
 	return t->data;
 }
 
 
 static void
 parser_temp_free_upto(void *ptr)
 {
 	struct parser_temp *t;
 	int done = 0;
 
 	INTOFF;
 	while (parser_temp != NULL && !done) {
 		t = parser_temp;
 		parser_temp = t->next;
 		done = t->data == ptr;
 		ckfree(t->data);
 		ckfree(t);
 	}
 	INTON;
 	if (!done)
 		error("bug: parser_temp_free_upto misused");
 }
 
 
 static void
 parser_temp_free_all(void)
 {
 	struct parser_temp *t;
 
 	INTOFF;
 	while (parser_temp != NULL) {
 		t = parser_temp;
 		parser_temp = t->next;
 		ckfree(t->data);
 		ckfree(t);
 	}
 	INTON;
 }
 
 
 /*
  * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
  * valid parse tree indicating a blank line.)
  */
 
 union node *
 parsecmd(int interact)
 {
 	int t;
 
 	/* This assumes the parser is not re-entered,
 	 * which could happen if we add command substitution on PS1/PS2.
 	 */
 	parser_temp_free_all();
 	heredoclist = NULL;
 
 	tokpushback = 0;
 	doprompt = interact;
 	if (doprompt)
 		setprompt(1);
 	else
 		setprompt(0);
 	needprompt = 0;
 	t = readtoken();
 	if (t == TEOF)
 		return NEOF;
 	if (t == TNL)
 		return NULL;
 	tokpushback++;
 	return list(1, 1);
 }
 
 
 static union node *
 list(int nlflag, int erflag)
 {
 	union node *ntop, *n1, *n2, *n3;
 	int tok;
 
 	checkkwd = CHKNL | CHKKWD | CHKALIAS;
 	if (!nlflag && !erflag && tokendlist[peektoken()])
 		return NULL;
 	ntop = n1 = NULL;
 	for (;;) {
 		n2 = andor();
 		tok = readtoken();
 		if (tok == TBACKGND) {
 			if (n2->type == NCMD || n2->type == NPIPE) {
 				n2->ncmd.backgnd = 1;
 			} else if (n2->type == NREDIR) {
 				n2->type = NBACKGND;
 			} else {
 				n3 = (union node *)stalloc(sizeof (struct nredir));
 				n3->type = NBACKGND;
 				n3->nredir.n = n2;
 				n3->nredir.redirect = NULL;
 				n2 = n3;
 			}
 		}
 		if (ntop == NULL)
 			ntop = n2;
 		else if (n1 == NULL) {
 			n1 = (union node *)stalloc(sizeof (struct nbinary));
 			n1->type = NSEMI;
 			n1->nbinary.ch1 = ntop;
 			n1->nbinary.ch2 = n2;
 			ntop = n1;
 		}
 		else {
 			n3 = (union node *)stalloc(sizeof (struct nbinary));
 			n3->type = NSEMI;
 			n3->nbinary.ch1 = n1->nbinary.ch2;
 			n3->nbinary.ch2 = n2;
 			n1->nbinary.ch2 = n3;
 			n1 = n3;
 		}
 		switch (tok) {
 		case TBACKGND:
 		case TSEMI:
 			tok = readtoken();
 			/* FALLTHROUGH */
 		case TNL:
 			if (tok == TNL) {
 				parseheredoc();
 				if (nlflag)
 					return ntop;
 			} else if (tok == TEOF && nlflag) {
 				parseheredoc();
 				return ntop;
 			} else {
 				tokpushback++;
 			}
 			checkkwd = CHKNL | CHKKWD | CHKALIAS;
 			if (!nlflag && !erflag && tokendlist[peektoken()])
 				return ntop;
 			break;
 		case TEOF:
 			if (heredoclist)
 				parseheredoc();
 			else
 				pungetc();		/* push back EOF on input */
 			return ntop;
 		default:
 			if (nlflag || erflag)
 				synexpect(-1);
 			tokpushback++;
 			return ntop;
 		}
 	}
 }
 
 
 
 static union node *
 andor(void)
 {
 	union node *n1, *n2, *n3;
 	int t;
 
 	n1 = pipeline();
 	for (;;) {
 		if ((t = readtoken()) == TAND) {
 			t = NAND;
 		} else if (t == TOR) {
 			t = NOR;
 		} else {
 			tokpushback++;
 			return n1;
 		}
 		n2 = pipeline();
 		n3 = (union node *)stalloc(sizeof (struct nbinary));
 		n3->type = t;
 		n3->nbinary.ch1 = n1;
 		n3->nbinary.ch2 = n2;
 		n1 = n3;
 	}
 }
 
 
 
 static union node *
 pipeline(void)
 {
 	union node *n1, *n2, *pipenode;
 	struct nodelist *lp, *prev;
 	int negate, t;
 
 	negate = 0;
 	checkkwd = CHKNL | CHKKWD | CHKALIAS;
 	TRACE(("pipeline: entered\n"));
 	while (readtoken() == TNOT)
 		negate = !negate;
 	tokpushback++;
 	n1 = command();
 	if (readtoken() == TPIPE) {
 		pipenode = (union node *)stalloc(sizeof (struct npipe));
 		pipenode->type = NPIPE;
 		pipenode->npipe.backgnd = 0;
 		lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
 		pipenode->npipe.cmdlist = lp;
 		lp->n = n1;
 		do {
 			prev = lp;
 			lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
 			checkkwd = CHKNL | CHKKWD | CHKALIAS;
 			t = readtoken();
 			tokpushback++;
 			if (t == TNOT)
 				lp->n = pipeline();
 			else
 				lp->n = command();
 			prev->next = lp;
 		} while (readtoken() == TPIPE);
 		lp->next = NULL;
 		n1 = pipenode;
 	}
 	tokpushback++;
 	if (negate) {
 		n2 = (union node *)stalloc(sizeof (struct nnot));
 		n2->type = NNOT;
 		n2->nnot.com = n1;
 		return n2;
 	} else
 		return n1;
 }
 
 
 
 static union node *
 command(void)
 {
 	union node *n1, *n2;
 	union node *ap, **app;
 	union node *cp, **cpp;
 	union node *redir, **rpp;
 	int t;
 
 	checkkwd = CHKNL | CHKKWD | CHKALIAS;
 	redir = NULL;
 	n1 = NULL;
 	rpp = &redir;
 
 	/* Check for redirection which may precede command */
 	while (readtoken() == TREDIR) {
 		*rpp = n2 = redirnode;
 		rpp = &n2->nfile.next;
 		parsefname();
 	}
 	tokpushback++;
 
 	switch (readtoken()) {
 	case TIF:
 		n1 = (union node *)stalloc(sizeof (struct nif));
 		n1->type = NIF;
 		if ((n1->nif.test = list(0, 0)) == NULL)
 			synexpect(-1);
 		if (readtoken() != TTHEN)
 			synexpect(TTHEN);
 		n1->nif.ifpart = list(0, 0);
 		n2 = n1;
 		while (readtoken() == TELIF) {
 			n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
 			n2 = n2->nif.elsepart;
 			n2->type = NIF;
 			if ((n2->nif.test = list(0, 0)) == NULL)
 				synexpect(-1);
 			if (readtoken() != TTHEN)
 				synexpect(TTHEN);
 			n2->nif.ifpart = list(0, 0);
 		}
 		if (lasttoken == TELSE)
 			n2->nif.elsepart = list(0, 0);
 		else {
 			n2->nif.elsepart = NULL;
 			tokpushback++;
 		}
 		if (readtoken() != TFI)
 			synexpect(TFI);
 		checkkwd = CHKKWD | CHKALIAS;
 		break;
 	case TWHILE:
 	case TUNTIL: {
 		int got;
 		n1 = (union node *)stalloc(sizeof (struct nbinary));
 		n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
 		if ((n1->nbinary.ch1 = list(0, 0)) == NULL)
 			synexpect(-1);
 		if ((got=readtoken()) != TDO) {
 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
 			synexpect(TDO);
 		}
 		n1->nbinary.ch2 = list(0, 0);
 		if (readtoken() != TDONE)
 			synexpect(TDONE);
 		checkkwd = CHKKWD | CHKALIAS;
 		break;
 	}
 	case TFOR:
 		if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
 			synerror("Bad for loop variable");
 		n1 = (union node *)stalloc(sizeof (struct nfor));
 		n1->type = NFOR;
 		n1->nfor.var = wordtext;
 		while (readtoken() == TNL)
 			;
 		if (lasttoken == TWORD && ! quoteflag && equal(wordtext, "in")) {
 			app = &ap;
 			while (readtoken() == TWORD) {
 				n2 = (union node *)stalloc(sizeof (struct narg));
 				n2->type = NARG;
 				n2->narg.text = wordtext;
 				n2->narg.backquote = backquotelist;
 				*app = n2;
 				app = &n2->narg.next;
 			}
 			*app = NULL;
 			n1->nfor.args = ap;
 			if (lasttoken != TNL && lasttoken != TSEMI)
 				synexpect(-1);
 		} else {
 			static char argvars[5] = {
 				CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
 			};
 			n2 = (union node *)stalloc(sizeof (struct narg));
 			n2->type = NARG;
 			n2->narg.text = argvars;
 			n2->narg.backquote = NULL;
 			n2->narg.next = NULL;
 			n1->nfor.args = n2;
 			/*
 			 * Newline or semicolon here is optional (but note
 			 * that the original Bourne shell only allowed NL).
 			 */
 			if (lasttoken != TNL && lasttoken != TSEMI)
 				tokpushback++;
 		}
 		checkkwd = CHKNL | CHKKWD | CHKALIAS;
 		if ((t = readtoken()) == TDO)
 			t = TDONE;
 		else if (t == TBEGIN)
 			t = TEND;
 		else
 			synexpect(-1);
 		n1->nfor.body = list(0, 0);
 		if (readtoken() != t)
 			synexpect(t);
 		checkkwd = CHKKWD | CHKALIAS;
 		break;
 	case TCASE:
 		n1 = (union node *)stalloc(sizeof (struct ncase));
 		n1->type = NCASE;
 		if (readtoken() != TWORD)
 			synexpect(TWORD);
 		n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
 		n2->type = NARG;
 		n2->narg.text = wordtext;
 		n2->narg.backquote = backquotelist;
 		n2->narg.next = NULL;
 		while (readtoken() == TNL);
 		if (lasttoken != TWORD || ! equal(wordtext, "in"))
 			synerror("expecting \"in\"");
 		cpp = &n1->ncase.cases;
 		checkkwd = CHKNL | CHKKWD, readtoken();
 		while (lasttoken != TESAC) {
 			*cpp = cp = (union node *)stalloc(sizeof (struct nclist));
 			cp->type = NCLIST;
 			app = &cp->nclist.pattern;
 			if (lasttoken == TLP)
 				readtoken();
 			for (;;) {
 				*app = ap = (union node *)stalloc(sizeof (struct narg));
 				ap->type = NARG;
 				ap->narg.text = wordtext;
 				ap->narg.backquote = backquotelist;
 				checkkwd = CHKNL | CHKKWD;
 				if (readtoken() != TPIPE)
 					break;
 				app = &ap->narg.next;
 				readtoken();
 			}
 			ap->narg.next = NULL;
 			if (lasttoken != TRP)
 				synexpect(TRP);
 			cp->nclist.body = list(0, 0);
 
 			checkkwd = CHKNL | CHKKWD | CHKALIAS;
 			if ((t = readtoken()) != TESAC) {
 				if (t != TENDCASE)
 					synexpect(TENDCASE);
 				else
 					checkkwd = CHKNL | CHKKWD, readtoken();
 			}
 			cpp = &cp->nclist.next;
 		}
 		*cpp = NULL;
 		checkkwd = CHKKWD | CHKALIAS;
 		break;
 	case TLP:
 		n1 = (union node *)stalloc(sizeof (struct nredir));
 		n1->type = NSUBSHELL;
 		n1->nredir.n = list(0, 0);
 		n1->nredir.redirect = NULL;
 		if (readtoken() != TRP)
 			synexpect(TRP);
 		checkkwd = CHKKWD | CHKALIAS;
 		break;
 	case TBEGIN:
 		n1 = list(0, 0);
 		if (readtoken() != TEND)
 			synexpect(TEND);
 		checkkwd = CHKKWD | CHKALIAS;
 		break;
 	/* Handle an empty command like other simple commands.  */
 	case TBACKGND:
 	case TSEMI:
 	case TAND:
 	case TOR:
 		/*
 		 * An empty command before a ; doesn't make much sense, and
 		 * should certainly be disallowed in the case of `if ;'.
 		 */
 		if (!redir)
 			synexpect(-1);
 	case TNL:
 	case TEOF:
 	case TWORD:
 	case TRP:
 		tokpushback++;
 		n1 = simplecmd(rpp, redir);
 		return n1;
 	default:
 		synexpect(-1);
 	}
 
 	/* Now check for redirection which may follow command */
 	while (readtoken() == TREDIR) {
 		*rpp = n2 = redirnode;
 		rpp = &n2->nfile.next;
 		parsefname();
 	}
 	tokpushback++;
 	*rpp = NULL;
 	if (redir) {
 		if (n1->type != NSUBSHELL) {
 			n2 = (union node *)stalloc(sizeof (struct nredir));
 			n2->type = NREDIR;
 			n2->nredir.n = n1;
 			n1 = n2;
 		}
 		n1->nredir.redirect = redir;
 	}
 
 	return n1;
 }
 
 
 static union node *
 simplecmd(union node **rpp, union node *redir)
 {
 	union node *args, **app;
 	union node **orig_rpp = rpp;
 	union node *n = NULL;
 	int special;
 
 	/* If we don't have any redirections already, then we must reset */
 	/* rpp to be the address of the local redir variable.  */
 	if (redir == 0)
 		rpp = &redir;
 
 	args = NULL;
 	app = &args;
 	/*
 	 * We save the incoming value, because we need this for shell
 	 * functions.  There can not be a redirect or an argument between
 	 * the function name and the open parenthesis.
 	 */
 	orig_rpp = rpp;
 
 	for (;;) {
 		if (readtoken() == TWORD) {
 			n = (union node *)stalloc(sizeof (struct narg));
 			n->type = NARG;
 			n->narg.text = wordtext;
 			n->narg.backquote = backquotelist;
 			*app = n;
 			app = &n->narg.next;
 		} else if (lasttoken == TREDIR) {
 			*rpp = n = redirnode;
 			rpp = &n->nfile.next;
 			parsefname();	/* read name of redirection file */
 		} else if (lasttoken == TLP && app == &args->narg.next
 					    && rpp == orig_rpp) {
 			/* We have a function */
 			if (readtoken() != TRP)
 				synexpect(TRP);
 			funclinno = plinno;
 			/*
 			 * - Require plain text.
 			 * - Functions with '/' cannot be called.
 			 * - Reject name=().
 			 * - Reject ksh extended glob patterns.
 			 */
 			if (!noexpand(n->narg.text) || quoteflag ||
 			    strchr(n->narg.text, '/') ||
 			    strchr("!%*+-=?@}~",
 				n->narg.text[strlen(n->narg.text) - 1]))
 				synerror("Bad function name");
 			rmescapes(n->narg.text);
 			if (find_builtin(n->narg.text, &special) >= 0 &&
 			    special)
 				synerror("Cannot override a special builtin with a function");
 			n->type = NDEFUN;
 			n->narg.next = command();
 			funclinno = 0;
 			return n;
 		} else {
 			tokpushback++;
 			break;
 		}
 	}
 	*app = NULL;
 	*rpp = NULL;
 	n = (union node *)stalloc(sizeof (struct ncmd));
 	n->type = NCMD;
 	n->ncmd.backgnd = 0;
 	n->ncmd.args = args;
 	n->ncmd.redirect = redir;
 	return n;
 }
 
 static union node *
 makename(void)
 {
 	union node *n;
 
 	n = (union node *)stalloc(sizeof (struct narg));
 	n->type = NARG;
 	n->narg.next = NULL;
 	n->narg.text = wordtext;
 	n->narg.backquote = backquotelist;
 	return n;
 }
 
 void
 fixredir(union node *n, const char *text, int err)
 {
 	TRACE(("Fix redir %s %d\n", text, err));
 	if (!err)
 		n->ndup.vname = NULL;
 
 	if (is_digit(text[0]) && text[1] == '\0')
 		n->ndup.dupfd = digit_val(text[0]);
 	else if (text[0] == '-' && text[1] == '\0')
 		n->ndup.dupfd = -1;
 	else {
 
 		if (err)
 			synerror("Bad fd number");
 		else
 			n->ndup.vname = makename();
 	}
 }
 
 
 static void
 parsefname(void)
 {
 	union node *n = redirnode;
 
 	if (readtoken() != TWORD)
 		synexpect(-1);
 	if (n->type == NHERE) {
 		struct heredoc *here = heredoc;
 		struct heredoc *p;
 		int i;
 
 		if (quoteflag == 0)
 			n->type = NXHERE;
 		TRACE(("Here document %d\n", n->type));
 		if (here->striptabs) {
 			while (*wordtext == '\t')
 				wordtext++;
 		}
 		if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN)
 			synerror("Illegal eof marker for << redirection");
 		rmescapes(wordtext);
 		here->eofmark = wordtext;
 		here->next = NULL;
 		if (heredoclist == NULL)
 			heredoclist = here;
 		else {
 			for (p = heredoclist ; p->next ; p = p->next);
 			p->next = here;
 		}
 	} else if (n->type == NTOFD || n->type == NFROMFD) {
 		fixredir(n, wordtext, 0);
 	} else {
 		n->nfile.fname = makename();
 	}
 }
 
 
 /*
  * Input any here documents.
  */
 
 static void
 parseheredoc(void)
 {
 	struct heredoc *here;
 	union node *n;
 
 	while (heredoclist) {
 		here = heredoclist;
 		heredoclist = here->next;
 		if (needprompt) {
 			setprompt(2);
 			needprompt = 0;
 		}
 		readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
 				here->eofmark, here->striptabs);
 		n = (union node *)stalloc(sizeof (struct narg));
 		n->narg.type = NARG;
 		n->narg.next = NULL;
 		n->narg.text = wordtext;
 		n->narg.backquote = backquotelist;
 		here->here->nhere.doc = n;
 	}
 }
 
 static int
 peektoken(void)
 {
 	int t;
 
 	t = readtoken();
 	tokpushback++;
 	return (t);
 }
 
 static int
 readtoken(void)
 {
 	int t;
 	struct alias *ap;
 #ifdef DEBUG
 	int alreadyseen = tokpushback;
 #endif
 
 	top:
 	t = xxreadtoken();
 
 	/*
 	 * eat newlines
 	 */
 	if (checkkwd & CHKNL) {
 		while (t == TNL) {
 			parseheredoc();
 			t = xxreadtoken();
 		}
 	}
 
 	/*
 	 * check for keywords and aliases
 	 */
 	if (t == TWORD && !quoteflag)
 	{
 		const char * const *pp;
 
 		if (checkkwd & CHKKWD)
 			for (pp = parsekwd; *pp; pp++) {
 				if (**pp == *wordtext && equal(*pp, wordtext))
 				{
 					lasttoken = t = pp - parsekwd + KWDOFFSET;
 					TRACE(("keyword %s recognized\n", tokname[t]));
 					goto out;
 				}
 			}
 		if (checkkwd & CHKALIAS &&
 		    (ap = lookupalias(wordtext, 1)) != NULL) {
 			pushstring(ap->val, strlen(ap->val), ap);
 			goto top;
 		}
 	}
 out:
 	if (t != TNOT)
 		checkkwd = 0;
 
 #ifdef DEBUG
 	if (!alreadyseen)
 	    TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
 	else
 	    TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
 #endif
 	return (t);
 }
 
 
 /*
  * Read the next input token.
  * If the token is a word, we set backquotelist to the list of cmds in
  *	backquotes.  We set quoteflag to true if any part of the word was
  *	quoted.
  * If the token is TREDIR, then we set redirnode to a structure containing
  *	the redirection.
  * In all cases, the variable startlinno is set to the number of the line
  *	on which the token starts.
  *
  * [Change comment:  here documents and internal procedures]
  * [Readtoken shouldn't have any arguments.  Perhaps we should make the
  *  word parsing code into a separate routine.  In this case, readtoken
  *  doesn't need to have any internal procedures, but parseword does.
  *  We could also make parseoperator in essence the main routine, and
  *  have parseword (readtoken1?) handle both words and redirection.]
  */
 
 #define RETURN(token)	return lasttoken = token
 
 static int
 xxreadtoken(void)
 {
 	int c;
 
 	if (tokpushback) {
 		tokpushback = 0;
 		return lasttoken;
 	}
 	if (needprompt) {
 		setprompt(2);
 		needprompt = 0;
 	}
 	startlinno = plinno;
 	for (;;) {	/* until token or start of word found */
 		c = pgetc_macro();
 		if (c == ' ' || c == '\t')
 			continue;		/* quick check for white space first */
 		switch (c) {
 		case ' ': case '\t':
 			continue;
 		case '#':
 			while ((c = pgetc()) != '\n' && c != PEOF);
 			pungetc();
 			continue;
 		case '\\':
 			if (pgetc() == '\n') {
 				startlinno = ++plinno;
 				if (doprompt)
 					setprompt(2);
 				else
 					setprompt(0);
 				continue;
 			}
 			pungetc();
 			goto breakloop;
 		case '\n':
 			plinno++;
 			needprompt = doprompt;
 			RETURN(TNL);
 		case PEOF:
 			RETURN(TEOF);
 		case '&':
 			if (pgetc() == '&')
 				RETURN(TAND);
 			pungetc();
 			RETURN(TBACKGND);
 		case '|':
 			if (pgetc() == '|')
 				RETURN(TOR);
 			pungetc();
 			RETURN(TPIPE);
 		case ';':
 			if (pgetc() == ';')
 				RETURN(TENDCASE);
 			pungetc();
 			RETURN(TSEMI);
 		case '(':
 			RETURN(TLP);
 		case ')':
 			RETURN(TRP);
 		default:
 			goto breakloop;
 		}
 	}
 breakloop:
 	return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
 #undef RETURN
 }
 
 
 #define MAXNEST_static 8
 struct tokenstate
 {
 	const char *syntax; /* *SYNTAX */
 	int parenlevel; /* levels of parentheses in arithmetic */
 	enum tokenstate_category
 	{
 		TSTATE_TOP,
 		TSTATE_VAR_OLD, /* ${var+-=?}, inherits dquotes */
 		TSTATE_VAR_NEW, /* other ${var...}, own dquote state */
 		TSTATE_ARITH
 	} category;
 };
 
 
 /*
  * Called to parse command substitutions.
  */
 
 static char *
 parsebackq(char *out, struct nodelist **pbqlist,
 		int oldstyle, int dblquote, int quoted)
 {
 	struct nodelist **nlpp;
 	union node *n;
 	char *volatile str;
 	struct jmploc jmploc;
 	struct jmploc *const savehandler = handler;
 	int savelen;
 	int saveprompt;
 	const int bq_startlinno = plinno;
 	char *volatile ostr = NULL;
 	struct parsefile *const savetopfile = getcurrentfile();
 	struct heredoc *const saveheredoclist = heredoclist;
 	struct heredoc *here;
 
 	str = NULL;
 	if (setjmp(jmploc.loc)) {
 		popfilesupto(savetopfile);
 		if (str)
 			ckfree(str);
 		if (ostr)
 			ckfree(ostr);
 		heredoclist = saveheredoclist;
 		handler = savehandler;
 		if (exception == EXERROR) {
 			startlinno = bq_startlinno;
 			synerror("Error in command substitution");
 		}
 		longjmp(handler->loc, 1);
 	}
 	INTOFF;
 	savelen = out - stackblock();
 	if (savelen > 0) {
 		str = ckmalloc(savelen);
 		memcpy(str, stackblock(), savelen);
 	}
 	handler = &jmploc;
 	heredoclist = NULL;
 	INTON;
         if (oldstyle) {
                 /* We must read until the closing backquote, giving special
                    treatment to some slashes, and then push the string and
                    reread it as input, interpreting it normally.  */
                 char *oout;
                 int c;
                 int olen;
 
 
                 STARTSTACKSTR(oout);
 		for (;;) {
 			if (needprompt) {
 				setprompt(2);
 				needprompt = 0;
 			}
+			CHECKSTRSPACE(2, oout);
 			switch (c = pgetc()) {
 			case '`':
 				goto done;
 
 			case '\\':
                                 if ((c = pgetc()) == '\n') {
 					plinno++;
 					if (doprompt)
 						setprompt(2);
 					else
 						setprompt(0);
 					/*
 					 * If eating a newline, avoid putting
 					 * the newline into the new character
-					 * stream (via the STPUTC after the
+					 * stream (via the USTPUTC after the
 					 * switch).
 					 */
 					continue;
 				}
                                 if (c != '\\' && c != '`' && c != '$'
                                     && (!dblquote || c != '"'))
-                                        STPUTC('\\', oout);
+                                        USTPUTC('\\', oout);
 				break;
 
 			case '\n':
 				plinno++;
 				needprompt = doprompt;
 				break;
 
 			case PEOF:
 			        startlinno = plinno;
 				synerror("EOF in backquote substitution");
  				break;
 
 			default:
 				break;
 			}
-			STPUTC(c, oout);
+			USTPUTC(c, oout);
                 }
 done:
-                STPUTC('\0', oout);
+                USTPUTC('\0', oout);
                 olen = oout - stackblock();
 		INTOFF;
 		ostr = ckmalloc(olen);
 		memcpy(ostr, stackblock(), olen);
 		setinputstring(ostr, 1);
 		INTON;
         }
 	nlpp = pbqlist;
 	while (*nlpp)
 		nlpp = &(*nlpp)->next;
 	*nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
 	(*nlpp)->next = NULL;
 
 	if (oldstyle) {
 		saveprompt = doprompt;
 		doprompt = 0;
 	}
 
 	n = list(0, oldstyle);
 
 	if (oldstyle)
 		doprompt = saveprompt;
 	else {
 		if (readtoken() != TRP)
 			synexpect(TRP);
 	}
 
 	(*nlpp)->n = n;
         if (oldstyle) {
 		/*
 		 * Start reading from old file again, ignoring any pushed back
 		 * tokens left from the backquote parsing
 		 */
                 popfile();
 		tokpushback = 0;
 	}
 	while (stackblocksize() <= savelen)
 		growstackblock();
 	STARTSTACKSTR(out);
 	INTOFF;
 	if (str) {
 		memcpy(out, str, savelen);
 		STADJUST(savelen, out);
 		ckfree(str);
 		str = NULL;
 	}
 	if (ostr) {
 		ckfree(ostr);
 		ostr = NULL;
 	}
 	here = saveheredoclist;
 	if (here != NULL) {
 		while (here->next != NULL)
 			here = here->next;
 		here->next = heredoclist;
 		heredoclist = saveheredoclist;
 	}
 	handler = savehandler;
 	INTON;
 	if (quoted)
 		USTPUTC(CTLBACKQ | CTLQUOTE, out);
 	else
 		USTPUTC(CTLBACKQ, out);
 	return out;
 }
 
 
 /*
  * If eofmark is NULL, read a word or a redirection symbol.  If eofmark
  * is not NULL, read a here document.  In the latter case, eofmark is the
  * word which marks the end of the document and striptabs is true if
  * leading tabs should be stripped from the document.  The argument firstc
  * is the first character of the input token or document.
  *
  * Because C does not have internal subroutines, I have simulated them
  * using goto's to implement the subroutine linkage.  The following macros
  * will run code that appears at the end of readtoken1.
  */
 
 #define CHECKEND()	{goto checkend; checkend_return:;}
 #define PARSEREDIR()	{goto parseredir; parseredir_return:;}
 #define PARSESUB()	{goto parsesub; parsesub_return:;}
 #define	PARSEARITH()	{goto parsearith; parsearith_return:;}
 
 static int
 readtoken1(int firstc, char const *initialsyntax, char *eofmark, int striptabs)
 {
 	int c = firstc;
 	char *out;
 	int len;
 	char line[EOFMARKLEN + 1];
 	struct nodelist *bqlist;
 	int quotef;
 	int newvarnest;
 	int level;
 	int synentry;
 	struct tokenstate state_static[MAXNEST_static];
 	int maxnest = MAXNEST_static;
 	struct tokenstate *state = state_static;
 
 	startlinno = plinno;
 	quotef = 0;
 	bqlist = NULL;
 	newvarnest = 0;
 	level = 0;
 	state[level].syntax = initialsyntax;
 	state[level].parenlevel = 0;
 	state[level].category = TSTATE_TOP;
 
 	STARTSTACKSTR(out);
 	loop: {	/* for each line, until end of word */
 		CHECKEND();	/* set c to PEOF if at end of here document */
 		for (;;) {	/* until end of line or end of word */
 			CHECKSTRSPACE(4, out);	/* permit 4 calls to USTPUTC */
 
 			synentry = state[level].syntax[c];
 
 			switch(synentry) {
 			case CNL:	/* '\n' */
 				if (state[level].syntax == BASESYNTAX)
 					goto endword;	/* exit outer loop */
 				USTPUTC(c, out);
 				plinno++;
 				if (doprompt)
 					setprompt(2);
 				else
 					setprompt(0);
 				c = pgetc();
 				goto loop;		/* continue outer loop */
 			case CWORD:
 				USTPUTC(c, out);
 				break;
 			case CCTL:
 				if (eofmark == NULL || initialsyntax != SQSYNTAX)
 					USTPUTC(CTLESC, out);
 				USTPUTC(c, out);
 				break;
 			case CBACK:	/* backslash */
 				c = pgetc();
 				if (c == PEOF) {
 					USTPUTC('\\', out);
 					pungetc();
 				} else if (c == '\n') {
 					plinno++;
 					if (doprompt)
 						setprompt(2);
 					else
 						setprompt(0);
 				} else {
 					if (state[level].syntax == DQSYNTAX &&
 					    c != '\\' && c != '`' && c != '$' &&
 					    (c != '"' || (eofmark != NULL &&
 						newvarnest == 0)) &&
 					    (c != '}' || state[level].category != TSTATE_VAR_OLD))
 						USTPUTC('\\', out);
 					if ((eofmark == NULL ||
 					    newvarnest > 0) &&
 					    state[level].syntax == BASESYNTAX)
 						USTPUTC(CTLQUOTEMARK, out);
 					if (SQSYNTAX[c] == CCTL)
 						USTPUTC(CTLESC, out);
 					USTPUTC(c, out);
 					if ((eofmark == NULL ||
 					    newvarnest > 0) &&
 					    state[level].syntax == BASESYNTAX &&
 					    state[level].category == TSTATE_VAR_OLD)
 						USTPUTC(CTLQUOTEEND, out);
 					quotef++;
 				}
 				break;
 			case CSQUOTE:
 				USTPUTC(CTLQUOTEMARK, out);
 				state[level].syntax = SQSYNTAX;
 				break;
 			case CDQUOTE:
 				USTPUTC(CTLQUOTEMARK, out);
 				state[level].syntax = DQSYNTAX;
 				break;
 			case CENDQUOTE:
 				if (eofmark != NULL && newvarnest == 0)
 					USTPUTC(c, out);
 				else {
 					if (state[level].category == TSTATE_VAR_OLD)
 						USTPUTC(CTLQUOTEEND, out);
 					state[level].syntax = BASESYNTAX;
 					quotef++;
 				}
 				break;
 			case CVAR:	/* '$' */
 				PARSESUB();		/* parse substitution */
 				break;
 			case CENDVAR:	/* '}' */
 				if (level > 0 &&
 				    ((state[level].category == TSTATE_VAR_OLD &&
 				      state[level].syntax ==
 				      state[level - 1].syntax) ||
 				    (state[level].category == TSTATE_VAR_NEW &&
 				     state[level].syntax == BASESYNTAX))) {
 					if (state[level].category == TSTATE_VAR_NEW)
 						newvarnest--;
 					level--;
 					USTPUTC(CTLENDVAR, out);
 				} else {
 					USTPUTC(c, out);
 				}
 				break;
 			case CLP:	/* '(' in arithmetic */
 				state[level].parenlevel++;
 				USTPUTC(c, out);
 				break;
 			case CRP:	/* ')' in arithmetic */
 				if (state[level].parenlevel > 0) {
 					USTPUTC(c, out);
 					--state[level].parenlevel;
 				} else {
 					if (pgetc() == ')') {
 						if (level > 0 &&
 						    state[level].category == TSTATE_ARITH) {
 							level--;
 							USTPUTC(CTLENDARI, out);
 						} else
 							USTPUTC(')', out);
 					} else {
 						/*
 						 * unbalanced parens
 						 *  (don't 2nd guess - no error)
 						 */
 						pungetc();
 						USTPUTC(')', out);
 					}
 				}
 				break;
 			case CBQUOTE:	/* '`' */
 				out = parsebackq(out, &bqlist, 1,
 				    state[level].syntax == DQSYNTAX &&
 				    (eofmark == NULL || newvarnest > 0),
 				    state[level].syntax == DQSYNTAX || state[level].syntax == ARISYNTAX);
 				break;
 			case CEOF:
 				goto endword;		/* exit outer loop */
 			case CIGN:
 				break;
 			default:
 				if (level == 0)
 					goto endword;	/* exit outer loop */
 				USTPUTC(c, out);
 			}
 			c = pgetc_macro();
 		}
 	}
 endword:
 	if (state[level].syntax == ARISYNTAX)
 		synerror("Missing '))'");
 	if (state[level].syntax != BASESYNTAX && eofmark == NULL)
 		synerror("Unterminated quoted string");
 	if (state[level].category == TSTATE_VAR_OLD ||
 	    state[level].category == TSTATE_VAR_NEW) {
 		startlinno = plinno;
 		synerror("Missing '}'");
 	}
 	if (state != state_static)
 		parser_temp_free_upto(state);
 	USTPUTC('\0', out);
 	len = out - stackblock();
 	out = stackblock();
 	if (eofmark == NULL) {
 		if ((c == '>' || c == '<')
 		 && quotef == 0
 		 && len <= 2
 		 && (*out == '\0' || is_digit(*out))) {
 			PARSEREDIR();
 			return lasttoken = TREDIR;
 		} else {
 			pungetc();
 		}
 	}
 	quoteflag = quotef;
 	backquotelist = bqlist;
 	grabstackblock(len);
 	wordtext = out;
 	return lasttoken = TWORD;
 /* end of readtoken routine */
 
 
 /*
  * Check to see whether we are at the end of the here document.  When this
  * is called, c is set to the first character of the next input line.  If
  * we are at the end of the here document, this routine sets the c to PEOF.
  */
 
 checkend: {
 	if (eofmark) {
 		if (striptabs) {
 			while (c == '\t')
 				c = pgetc();
 		}
 		if (c == *eofmark) {
 			if (pfgets(line, sizeof line) != NULL) {
 				char *p, *q;
 
 				p = line;
 				for (q = eofmark + 1 ; *q && *p == *q ; p++, q++);
 				if (*p == '\n' && *q == '\0') {
 					c = PEOF;
 					plinno++;
 					needprompt = doprompt;
 				} else {
 					pushstring(line, strlen(line), NULL);
 				}
 			}
 		}
 	}
 	goto checkend_return;
 }
 
 
 /*
  * Parse a redirection operator.  The variable "out" points to a string
  * specifying the fd to be redirected.  The variable "c" contains the
  * first character of the redirection operator.
  */
 
 parseredir: {
 	char fd = *out;
 	union node *np;
 
 	np = (union node *)stalloc(sizeof (struct nfile));
 	if (c == '>') {
 		np->nfile.fd = 1;
 		c = pgetc();
 		if (c == '>')
 			np->type = NAPPEND;
 		else if (c == '&')
 			np->type = NTOFD;
 		else if (c == '|')
 			np->type = NCLOBBER;
 		else {
 			np->type = NTO;
 			pungetc();
 		}
 	} else {	/* c == '<' */
 		np->nfile.fd = 0;
 		c = pgetc();
 		if (c == '<') {
 			if (sizeof (struct nfile) != sizeof (struct nhere)) {
 				np = (union node *)stalloc(sizeof (struct nhere));
 				np->nfile.fd = 0;
 			}
 			np->type = NHERE;
 			heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
 			heredoc->here = np;
 			if ((c = pgetc()) == '-') {
 				heredoc->striptabs = 1;
 			} else {
 				heredoc->striptabs = 0;
 				pungetc();
 			}
 		} else if (c == '&')
 			np->type = NFROMFD;
 		else if (c == '>')
 			np->type = NFROMTO;
 		else {
 			np->type = NFROM;
 			pungetc();
 		}
 	}
 	if (fd != '\0')
 		np->nfile.fd = digit_val(fd);
 	redirnode = np;
 	goto parseredir_return;
 }
 
 
 /*
  * Parse a substitution.  At this point, we have read the dollar sign
  * and nothing else.
  */
 
 parsesub: {
 	char buf[10];
 	int subtype;
 	int typeloc;
 	int flags;
 	char *p;
 	static const char types[] = "}-+?=";
 	int bracketed_name = 0; /* used to handle ${[0-9]*} variables */
-	int i;
 	int linno;
 	int length;
 
 	c = pgetc();
 	if (c != '(' && c != '{' && (is_eof(c) || !is_name(c)) &&
 	    !is_special(c)) {
 		USTPUTC('$', out);
 		pungetc();
 	} else if (c == '(') {	/* $(command) or $((arith)) */
 		if (pgetc() == '(') {
 			PARSEARITH();
 		} else {
 			pungetc();
 			out = parsebackq(out, &bqlist, 0,
 			    state[level].syntax == DQSYNTAX &&
 			    (eofmark == NULL || newvarnest > 0),
 			    state[level].syntax == DQSYNTAX ||
 			    state[level].syntax == ARISYNTAX);
 		}
 	} else {
 		USTPUTC(CTLVAR, out);
 		typeloc = out - stackblock();
 		USTPUTC(VSNORMAL, out);
 		subtype = VSNORMAL;
 		flags = 0;
 		if (c == '{') {
 			bracketed_name = 1;
 			c = pgetc();
 			if (c == '#') {
 				if ((c = pgetc()) == '}')
 					c = '#';
 				else
 					subtype = VSLENGTH;
 			}
 			else
 				subtype = 0;
 		}
 		if (!is_eof(c) && is_name(c)) {
 			length = 0;
 			do {
 				STPUTC(c, out);
 				c = pgetc();
 				length++;
 			} while (!is_eof(c) && is_in_name(c));
 			if (length == 6 &&
 			    strncmp(out - length, "LINENO", length) == 0) {
 				/* Replace the variable name with the
 				 * current line number. */
 				linno = plinno;
 				if (funclinno != 0)
 					linno -= funclinno - 1;
 				snprintf(buf, sizeof(buf), "%d", linno);
 				STADJUST(-6, out);
-				for (i = 0; buf[i] != '\0'; i++)
-					STPUTC(buf[i], out);
+				STPUTS(buf, out);
 				flags |= VSLINENO;
 			}
 		} else if (is_digit(c)) {
 			if (bracketed_name) {
 				do {
 					STPUTC(c, out);
 					c = pgetc();
 				} while (is_digit(c));
 			} else {
 				STPUTC(c, out);
 				c = pgetc();
 			}
 		} else {
 			if (! is_special(c)) {
 				subtype = VSERROR;
 				if (c == '}')
 					pungetc();
 				else if (c == '\n' || c == PEOF)
 					synerror("Unexpected end of line in substitution");
 				else
 					USTPUTC(c, out);
 			} else {
 				USTPUTC(c, out);
 				c = pgetc();
 			}
 		}
 		if (subtype == 0) {
 			switch (c) {
 			case ':':
 				flags |= VSNUL;
 				c = pgetc();
 				/*FALLTHROUGH*/
 			default:
 				p = strchr(types, c);
 				if (p == NULL) {
 					if (c == '\n' || c == PEOF)
 						synerror("Unexpected end of line in substitution");
 					if (flags == VSNUL)
 						STPUTC(':', out);
 					STPUTC(c, out);
 					subtype = VSERROR;
 				} else
 					subtype = p - types + VSNORMAL;
 				break;
 			case '%':
 			case '#':
 				{
 					int cc = c;
 					subtype = c == '#' ? VSTRIMLEFT :
 							     VSTRIMRIGHT;
 					c = pgetc();
 					if (c == cc)
 						subtype++;
 					else
 						pungetc();
 					break;
 				}
 			}
 		} else if (subtype != VSERROR) {
 			pungetc();
 		}
 		STPUTC('=', out);
 		if (subtype != VSLENGTH && (state[level].syntax == DQSYNTAX ||
 		    state[level].syntax == ARISYNTAX))
 			flags |= VSQUOTE;
 		*(stackblock() + typeloc) = subtype | flags;
 		if (subtype != VSNORMAL) {
 			if (level + 1 >= maxnest) {
 				maxnest *= 2;
 				if (state == state_static) {
 					state = parser_temp_alloc(
 					    maxnest * sizeof(*state));
 					memcpy(state, state_static,
 					    MAXNEST_static * sizeof(*state));
 				} else
 					state = parser_temp_realloc(state,
 					    maxnest * sizeof(*state));
 			}
 			level++;
 			state[level].parenlevel = 0;
 			if (subtype == VSMINUS || subtype == VSPLUS ||
 			    subtype == VSQUESTION || subtype == VSASSIGN) {
 				/*
 				 * For operators that were in the Bourne shell,
 				 * inherit the double-quote state.
 				 */
 				state[level].syntax = state[level - 1].syntax;
 				state[level].category = TSTATE_VAR_OLD;
 			} else {
 				/*
 				 * The other operators take a pattern,
 				 * so go to BASESYNTAX.
 				 * Also, ' and " are now special, even
 				 * in here documents.
 				 */
 				state[level].syntax = BASESYNTAX;
 				state[level].category = TSTATE_VAR_NEW;
 				newvarnest++;
 			}
 		}
 	}
 	goto parsesub_return;
 }
 
 
 /*
  * Parse an arithmetic expansion (indicate start of one and set state)
  */
 parsearith: {
 
 	if (level + 1 >= maxnest) {
 		maxnest *= 2;
 		if (state == state_static) {
 			state = parser_temp_alloc(
 			    maxnest * sizeof(*state));
 			memcpy(state, state_static,
 			    MAXNEST_static * sizeof(*state));
 		} else
 			state = parser_temp_realloc(state,
 			    maxnest * sizeof(*state));
 	}
 	level++;
 	state[level].syntax = ARISYNTAX;
 	state[level].parenlevel = 0;
 	state[level].category = TSTATE_ARITH;
 	USTPUTC(CTLARI, out);
 	if (state[level - 1].syntax == DQSYNTAX)
 		USTPUTC('"',out);
 	else
 		USTPUTC(' ',out);
 	goto parsearith_return;
 }
 
 } /* end of readtoken */
 
 
 
 #ifdef mkinit
 RESET {
 	tokpushback = 0;
 	checkkwd = 0;
 }
 #endif
 
 /*
  * Returns true if the text contains nothing to expand (no dollar signs
  * or backquotes).
  */
 
 static int
 noexpand(char *text)
 {
 	char *p;
 	char c;
 
 	p = text;
 	while ((c = *p++) != '\0') {
 		if ( c == CTLQUOTEMARK)
 			continue;
 		if (c == CTLESC)
 			p++;
 		else if (BASESYNTAX[(int)c] == CCTL)
 			return 0;
 	}
 	return 1;
 }
 
 
 /*
  * Return true if the argument is a legal variable name (a letter or
  * underscore followed by zero or more letters, underscores, and digits).
  */
 
 int
 goodname(const char *name)
 {
 	const char *p;
 
 	p = name;
 	if (! is_name(*p))
 		return 0;
 	while (*++p) {
 		if (! is_in_name(*p))
 			return 0;
 	}
 	return 1;
 }
 
 
 /*
  * Called when an unexpected token is read during the parse.  The argument
  * is the token that is expected, or -1 if more than one type of token can
  * occur at this point.
  */
 
 static void
 synexpect(int token)
 {
 	char msg[64];
 
 	if (token >= 0) {
 		fmtstr(msg, 64, "%s unexpected (expecting %s)",
 			tokname[lasttoken], tokname[token]);
 	} else {
 		fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
 	}
 	synerror(msg);
 }
 
 
 static void
 synerror(const char *msg)
 {
 	if (commandname)
 		outfmt(out2, "%s: %d: ", commandname, startlinno);
 	outfmt(out2, "Syntax error: %s\n", msg);
 	error((char *)NULL);
 }
 
 static void
 setprompt(int which)
 {
 	whichprompt = which;
 
 #ifndef NO_HISTORY
 	if (!el)
 #endif
 	{
 		out2str(getprompt(NULL));
 		flushout(out2);
 	}
 }
 
 /*
  * called by editline -- any expansions to the prompt
  *    should be added here.
  */
 char *
 getprompt(void *unused __unused)
 {
 	static char ps[PROMPTLEN];
 	char *fmt;
 	const char *pwd;
 	int i, trim;
 	static char internal_error[] = "??";
 
 	/*
 	 * Select prompt format.
 	 */
 	switch (whichprompt) {
 	case 0:
 		fmt = nullstr;
 		break;
 	case 1:
 		fmt = ps1val();
 		break;
 	case 2:
 		fmt = ps2val();
 		break;
 	default:
 		return internal_error;
 	}
 
 	/*
 	 * Format prompt string.
 	 */
 	for (i = 0; (i < 127) && (*fmt != '\0'); i++, fmt++)
 		if (*fmt == '\\')
 			switch (*++fmt) {
 
 				/*
 				 * Hostname.
 				 *
 				 * \h specifies just the local hostname,
 				 * \H specifies fully-qualified hostname.
 				 */
 			case 'h':
 			case 'H':
 				ps[i] = '\0';
 				gethostname(&ps[i], PROMPTLEN - i);
 				/* Skip to end of hostname. */
 				trim = (*fmt == 'h') ? '.' : '\0';
 				while ((ps[i+1] != '\0') && (ps[i+1] != trim))
 					i++;
 				break;
 
 				/*
 				 * Working directory.
 				 *
 				 * \W specifies just the final component,
 				 * \w specifies the entire path.
 				 */
 			case 'W':
 			case 'w':
 				pwd = lookupvar("PWD");
 				if (pwd == NULL)
 					pwd = "?";
 				if (*fmt == 'W' &&
 				    *pwd == '/' && pwd[1] != '\0')
 					strlcpy(&ps[i], strrchr(pwd, '/') + 1,
 					    PROMPTLEN - i);
 				else
 					strlcpy(&ps[i], pwd, PROMPTLEN - i);
 				/* Skip to end of path. */
 				while (ps[i + 1] != '\0')
 					i++;
 				break;
 
 				/*
 				 * Superuser status.
 				 *
 				 * '$' for normal users, '#' for root.
 				 */
 			case '$':
 				ps[i] = (geteuid() != 0) ? '$' : '#';
 				break;
 
 				/*
 				 * A literal \.
 				 */
 			case '\\':
 				ps[i] = '\\';
 				break;
 
 				/*
 				 * Emit unrecognized formats verbatim.
 				 */
 			default:
 				ps[i++] = '\\';
 				ps[i] = *fmt;
 				break;
 			}
 		else
 			ps[i] = *fmt;
 	ps[i] = '\0';
 	return (ps);
 }
Index: projects/binutils-2.17/cddl/contrib/opensolaris
===================================================================
--- projects/binutils-2.17/cddl/contrib/opensolaris	(revision 215829)
+++ projects/binutils-2.17/cddl/contrib/opensolaris	(revision 215830)

Property changes on: projects/binutils-2.17/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/cddl/contrib/opensolaris:r215709-215824
Index: projects/binutils-2.17/contrib/bind9
===================================================================
--- projects/binutils-2.17/contrib/bind9	(revision 215829)
+++ projects/binutils-2.17/contrib/bind9	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/bind9
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/bind9:r215709-215824
Index: projects/binutils-2.17/contrib/binutils
===================================================================
--- projects/binutils-2.17/contrib/binutils	(revision 215829)
+++ projects/binutils-2.17/contrib/binutils	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/binutils
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/binutils:r215709-215824
Index: projects/binutils-2.17/contrib/bzip2
===================================================================
--- projects/binutils-2.17/contrib/bzip2	(revision 215829)
+++ projects/binutils-2.17/contrib/bzip2	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/bzip2
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/bzip2:r215709-215824
Index: projects/binutils-2.17/contrib/ee
===================================================================
--- projects/binutils-2.17/contrib/ee	(revision 215829)
+++ projects/binutils-2.17/contrib/ee	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/ee
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/ee:r215709-215824
Index: projects/binutils-2.17/contrib/expat
===================================================================
--- projects/binutils-2.17/contrib/expat	(revision 215829)
+++ projects/binutils-2.17/contrib/expat	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/expat
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/expat:r215709-215824
Index: projects/binutils-2.17/contrib/file
===================================================================
--- projects/binutils-2.17/contrib/file	(revision 215829)
+++ projects/binutils-2.17/contrib/file	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/file
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/file:r215709-215824
Index: projects/binutils-2.17/contrib/gdb
===================================================================
--- projects/binutils-2.17/contrib/gdb	(revision 215829)
+++ projects/binutils-2.17/contrib/gdb	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/gdb
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/gdb:r215709-215824
Index: projects/binutils-2.17/contrib/gdtoa
===================================================================
--- projects/binutils-2.17/contrib/gdtoa	(revision 215829)
+++ projects/binutils-2.17/contrib/gdtoa	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/gdtoa
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/gdtoa:r215709-215824
Index: projects/binutils-2.17/contrib/gnu-sort
===================================================================
--- projects/binutils-2.17/contrib/gnu-sort	(revision 215829)
+++ projects/binutils-2.17/contrib/gnu-sort	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/gnu-sort
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/gnu-sort:r215709-215824
Index: projects/binutils-2.17/contrib/groff
===================================================================
--- projects/binutils-2.17/contrib/groff	(revision 215829)
+++ projects/binutils-2.17/contrib/groff	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/groff
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/groff:r215709-215824
Index: projects/binutils-2.17/contrib/less
===================================================================
--- projects/binutils-2.17/contrib/less	(revision 215829)
+++ projects/binutils-2.17/contrib/less	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/less
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/less:r215709-215824
Index: projects/binutils-2.17/contrib/libpcap
===================================================================
--- projects/binutils-2.17/contrib/libpcap	(revision 215829)
+++ projects/binutils-2.17/contrib/libpcap	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/libpcap
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/libpcap:r215709-215824
Index: projects/binutils-2.17/contrib/llvm/tools/clang
===================================================================
--- projects/binutils-2.17/contrib/llvm/tools/clang	(revision 215829)
+++ projects/binutils-2.17/contrib/llvm/tools/clang	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/llvm/tools/clang
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/llvm/tools/clang:r215709-215824
Index: projects/binutils-2.17/contrib/llvm
===================================================================
--- projects/binutils-2.17/contrib/llvm	(revision 215829)
+++ projects/binutils-2.17/contrib/llvm	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/llvm
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/llvm:r215709-215824
Index: projects/binutils-2.17/contrib/ncurses
===================================================================
--- projects/binutils-2.17/contrib/ncurses	(revision 215829)
+++ projects/binutils-2.17/contrib/ncurses	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/ncurses
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/ncurses:r215709-215824
Index: projects/binutils-2.17/contrib/netcat
===================================================================
--- projects/binutils-2.17/contrib/netcat	(revision 215829)
+++ projects/binutils-2.17/contrib/netcat	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/netcat
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/netcat:r215709-215824
Index: projects/binutils-2.17/contrib/ntp
===================================================================
--- projects/binutils-2.17/contrib/ntp	(revision 215829)
+++ projects/binutils-2.17/contrib/ntp	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/ntp
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/ntp:r215709-215824
Index: projects/binutils-2.17/contrib/one-true-awk
===================================================================
--- projects/binutils-2.17/contrib/one-true-awk	(revision 215829)
+++ projects/binutils-2.17/contrib/one-true-awk	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/one-true-awk
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/one-true-awk:r215709-215824
Index: projects/binutils-2.17/contrib/openbsm
===================================================================
--- projects/binutils-2.17/contrib/openbsm	(revision 215829)
+++ projects/binutils-2.17/contrib/openbsm	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/openbsm
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/openbsm:r215709-215824
Index: projects/binutils-2.17/contrib/openpam
===================================================================
--- projects/binutils-2.17/contrib/openpam	(revision 215829)
+++ projects/binutils-2.17/contrib/openpam	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/openpam
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/openpam:r215709-215824
Index: projects/binutils-2.17/contrib/pf
===================================================================
--- projects/binutils-2.17/contrib/pf	(revision 215829)
+++ projects/binutils-2.17/contrib/pf	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/pf
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/pf:r215709-215824
Index: projects/binutils-2.17/contrib/sendmail
===================================================================
--- projects/binutils-2.17/contrib/sendmail	(revision 215829)
+++ projects/binutils-2.17/contrib/sendmail	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/sendmail
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/sendmail:r215709-215824
Index: projects/binutils-2.17/contrib/tcpdump
===================================================================
--- projects/binutils-2.17/contrib/tcpdump	(revision 215829)
+++ projects/binutils-2.17/contrib/tcpdump	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/tcpdump
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/tcpdump:r215709-215824
Index: projects/binutils-2.17/contrib/tcsh
===================================================================
--- projects/binutils-2.17/contrib/tcsh	(revision 215829)
+++ projects/binutils-2.17/contrib/tcsh	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/tcsh
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/tcsh:r215709-215824
Index: projects/binutils-2.17/contrib/top/install-sh
===================================================================
--- projects/binutils-2.17/contrib/top/install-sh	(revision 215829)
+++ projects/binutils-2.17/contrib/top/install-sh	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/top/install-sh
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/top/install-sh:r215709-215824
Index: projects/binutils-2.17/contrib/top
===================================================================
--- projects/binutils-2.17/contrib/top	(revision 215829)
+++ projects/binutils-2.17/contrib/top	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/top
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/top:r215709-215824
Index: projects/binutils-2.17/contrib/tzcode/stdtime
===================================================================
--- projects/binutils-2.17/contrib/tzcode/stdtime	(revision 215829)
+++ projects/binutils-2.17/contrib/tzcode/stdtime	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/tzcode/stdtime
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/tzcode/stdtime:r215709-215824
Index: projects/binutils-2.17/contrib/tzcode/zic
===================================================================
--- projects/binutils-2.17/contrib/tzcode/zic	(revision 215829)
+++ projects/binutils-2.17/contrib/tzcode/zic	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/tzcode/zic
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/tzcode/zic:r215709-215824
Index: projects/binutils-2.17/contrib/tzdata
===================================================================
--- projects/binutils-2.17/contrib/tzdata	(revision 215829)
+++ projects/binutils-2.17/contrib/tzdata	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/tzdata
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/tzdata:r215709-215824
Index: projects/binutils-2.17/contrib/wpa
===================================================================
--- projects/binutils-2.17/contrib/wpa	(revision 215829)
+++ projects/binutils-2.17/contrib/wpa	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/wpa
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/wpa:r215709-215824
Index: projects/binutils-2.17/contrib/xz
===================================================================
--- projects/binutils-2.17/contrib/xz	(revision 215829)
+++ projects/binutils-2.17/contrib/xz	(revision 215830)

Property changes on: projects/binutils-2.17/contrib/xz
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/xz:r215709-215824
Index: projects/binutils-2.17/crypto/openssh
===================================================================
--- projects/binutils-2.17/crypto/openssh	(revision 215829)
+++ projects/binutils-2.17/crypto/openssh	(revision 215830)

Property changes on: projects/binutils-2.17/crypto/openssh
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/crypto/openssh:r215709-215824
Index: projects/binutils-2.17/crypto/openssl
===================================================================
--- projects/binutils-2.17/crypto/openssl	(revision 215829)
+++ projects/binutils-2.17/crypto/openssl	(revision 215830)

Property changes on: projects/binutils-2.17/crypto/openssl
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/crypto/openssl:r215709-215824
Index: projects/binutils-2.17/etc/defaults/rc.conf
===================================================================
--- projects/binutils-2.17/etc/defaults/rc.conf	(revision 215829)
+++ projects/binutils-2.17/etc/defaults/rc.conf	(revision 215830)
@@ -1,710 +1,712 @@
 #!/bin/sh
 
 # This is rc.conf - a file full of useful variables that you can set
 # to change the default startup behavior of your system.  You should
 # not edit this file!  Put any overrides into one of the ${rc_conf_files}
 # instead and you will be able to update these defaults later without
 # spamming your local configuration information.
 #
 # The ${rc_conf_files} files should only contain values which override
 # values set in this file.  This eases the upgrade path when defaults
 # are changed and new features are added.
 #
 # All arguments must be in double or single quotes.
 #
 # For a more detailed explanation of all the rc.conf variables, please
 # refer to the rc.conf(5) manual page.
 #
 # $FreeBSD$
 
 ##############################################################
 ###  Important initial Boot-time options  ####################
 ##############################################################
 
 rc_debug="NO"		# Set to YES to enable debugging output from rc.d
 rc_info="NO"		# Enables display of informational messages at boot.
 rc_startmsgs="YES" 	# Show "Starting foo:" messages at boot
 rcshutdown_timeout="30" # Seconds to wait before terminating rc.shutdown
 early_late_divider="FILESYSTEMS"	# Script that separates early/late
 			# stages of the boot process.  Make sure you know
 			# the ramifications if you change this.
 			# See rc.conf(5) for more details.
 
 swapfile="NO"		# Set to name of swapfile if aux swapfile desired.
 apm_enable="NO"		# Set to YES to enable APM BIOS functions (or NO).
 apmd_enable="NO"	# Run apmd to handle APM event from userland.
 apmd_flags=""		# Flags to apmd (if enabled).
 ddb_enable="NO"		# Set to YES to load ddb scripts at boot.
 ddb_config="/etc/ddb.conf"	# ddb(8) config file.
 devd_enable="YES" 	# Run devd, to trigger programs on device tree changes.
 devd_flags=""		# Additional flags for devd(8).
 kldxref_enable="NO"	# Build linker.hints files with kldxref(8).
 kldxref_clobber="NO"	# Overwrite old linker.hints at boot.
 kldxref_module_path=""	# Override kern.module_path. A ';'-delimited list.
 powerd_enable="NO" 	# Run powerd to lower our power usage.
 powerd_flags=""		# Flags to powerd (if enabled).
 tmpmfs="AUTO"		# Set to YES to always create an mfs /tmp, NO to never
 tmpsize="20m"		# Size of mfs /tmp if created
 tmpmfs_flags="-S"	# Extra mdmfs options for the mfs /tmp
 varmfs="AUTO"		# Set to YES to always create an mfs /var, NO to never
 varsize="32m"		# Size of mfs /var if created
 varmfs_flags="-S"	# Extra mount options for the mfs /var
 populate_var="AUTO"	# Set to YES to always (re)populate /var, NO to never
 cleanvar_enable="YES" 	# Clean the /var directory
 local_startup="/usr/local/etc/rc.d" # startup script dirs.
 script_name_sep=" "	# Change if your startup scripts' names contain spaces
 rc_conf_files="/etc/rc.conf /etc/rc.conf.local"
 
 # ZFS support
 zfs_enable="NO"		# Set to YES to automatically mount ZFS file systems
 
+gptboot_enable="YES"	# GPT boot success/failure reporting.
+
 # Experimental - test before enabling
 gbde_autoattach_all="NO" # YES automatically mounts gbde devices from fstab
 gbde_devices="NO" 	# Devices to automatically attach (list, or AUTO)
 gbde_attach_attempts="3" # Number of times to attempt attaching gbde devices
 gbde_lockdir="/etc"	# Where to look for gbde lockfiles
 
 # GELI disk encryption configuration.
 geli_devices=""		# List of devices to automatically attach in addition to
 			# GELI devices listed in /etc/fstab.
 geli_tries=""		# Number of times to attempt attaching geli device.
 			# If empty, kern.geom.eli.tries will be used.
 geli_default_flags=""	# Default flags for geli(8).
 geli_autodetach="YES"	# Automatically detach on last close.
 			# Providers are marked as such when all file systems are
 			# mounted.
 # Example use.
 #geli_devices="da1 mirror/home"
 #geli_da1_flags="-p -k /etc/geli/da1.keys"
 #geli_da1_autodetach="NO"
 #geli_mirror_home_flags="-k /etc/geli/home.keys"
 
 geli_swap_flags="-e aes -l 256 -s 4096 -d"	# Options for GELI-encrypted
 						# swap partitions.
 
 root_rw_mount="YES"	# Set to NO to inhibit remounting root read-write.
 fsck_y_enable="NO"	# Set to YES to do fsck -y if the initial preen fails.
 fsck_y_flags=""		# Additional flags for fsck -y
 background_fsck="YES"	# Attempt to run fsck in the background where possible.
 background_fsck_delay="60" # Time to wait (seconds) before starting the fsck.
 netfs_types="nfs:NFS nfs4:NFS4 smbfs:SMB portalfs:PORTAL nwfs:NWFS" # Net filesystems.
 extra_netfs_types="NO"	# List of network extra filesystem types for delayed
 			# mount at startup (or NO).
 
 ##############################################################
 ###  Network configuration sub-section  ######################
 ##############################################################
 
 ### Basic network and firewall/security options: ###
 hostname=""			# Set this!
 hostid_enable="YES"		# Set host UUID.
 hostid_file="/etc/hostid"	# File with hostuuid.
 nisdomainname="NO"		# Set to NIS domain if using NIS (or NO).
 dhclient_program="/sbin/dhclient"	# Path to dhcp client program.
 dhclient_flags=""		# Extra flags to pass to dhcp client.
 #dhclient_flags_fxp0=""		# Extra dhclient flags for fxp0 only
 background_dhclient="NO"	# Start dhcp client in the background.
 #background_dhclient_fxp0="YES"	# Start dhcp client on fxp0 in the background.
 synchronous_dhclient="NO"	# Start dhclient directly on configured
 				# interfaces during startup.
 defaultroute_delay="30"		# Time to wait for a default route on a DHCP interface.
 defaultroute_carrier_delay="5"	# Time to wait for carrier while waiting for a default route.
 wpa_supplicant_program="/usr/sbin/wpa_supplicant"
 wpa_supplicant_flags="-s"	# Extra flags to pass to wpa_supplicant
 wpa_supplicant_conf_file="/etc/wpa_supplicant.conf"
 #
 firewall_enable="NO"		# Set to YES to enable firewall functionality
 firewall_script="/etc/rc.firewall" # Which script to run to set up the firewall
 firewall_type="UNKNOWN"		# Firewall type (see /etc/rc.firewall)
 firewall_quiet="NO"		# Set to YES to suppress rule display
 firewall_logging="NO"		# Set to YES to enable events logging
 firewall_flags=""		# Flags passed to ipfw when type is a file
 firewall_coscripts=""		# List of executables/scripts to run after
 				# firewall starts/stops
 firewall_client_net="192.0.2.0/24" # IPv4 Network address for "client"
 				# firewall.
 #firewall_client_net_ipv6="2001:db8:2:1::/64" # IPv6 network prefix for
 				# "client" firewall.
 firewall_simple_iif="ed1"	# Inside network interface for "simple"
 				# firewall.
 firewall_simple_inet="192.0.2.16/28" # Inside network address for "simple"
 				# firewall.
 firewall_simple_oif="ed0"	# Outside network interface for "simple"
 				# firewall.
 firewall_simple_onet="192.0.2.0/28" # Outside network address for "simple"
 				# firewall.
 #firewall_simple_iif_ipv6="ed1"	# Inside IPv6 network interface for "simple"
 				# firewall.
 #firewall_simple_inet_ipv6="2001:db8:2:800::/56" # Inside IPv6 network prefix
 				# for "simple" firewall.
 #firewall_simple_oif_ipv6="ed0"	# Outside IPv6 network interface for "simple"
 				# firewall.
 #firewall_simple_onet_ipv6="2001:db8:2:0::/56" # Outside IPv6 network prefix
 				# for "simple" firewall.
 firewall_myservices=""		# List of TCP ports on which this host
 				# offers services for "workstation" firewall.
 firewall_allowservices=""	# List of IPs which have access to
 				# $firewall_myservices for "workstation"
 				# firewall.
 firewall_trusted=""		# List of IPs which have full access to this
 				# host for "workstation" firewall.
 firewall_logdeny="NO"		# Set to YES to log default denied incoming
 				# packets for "workstation" firewall.
 firewall_nologports="135-139,445 1026,1027 1433,1434" # List of TCP/UDP ports
 				# for which denied incoming packets are not
 				# logged for "workstation" firewall.
 firewall_nat_enable="NO"	# Enable kernel NAT (if firewall_enable == YES)
 firewall_nat_interface=""	# Public interface or IPaddress to use
 firewall_nat_flags=""		# Additional configuration parameters
 dummynet_enable="NO"		# Load the dummynet(4) module
 ip_portrange_first="NO"		# Set first dynamically allocated port
 ip_portrange_last="NO"		# Set last dynamically allocated port
 ike_enable="NO"			# Enable IKE daemon (usually racoon or isakmpd)
 ike_program="/usr/local/sbin/isakmpd"	# Path to IKE daemon
 ike_flags=""			# Additional flags for IKE daemon
 ipsec_enable="NO"		# Set to YES to run setkey on ipsec_file
 ipsec_file="/etc/ipsec.conf"	# Name of config file for setkey
 natd_program="/sbin/natd"	# path to natd, if you want a different one.
 natd_enable="NO"		# Enable natd (if firewall_enable == YES).
 natd_interface=""		# Public interface or IPaddress to use.
 natd_flags=""			# Additional flags for natd.
 ipfilter_enable="NO"		# Set to YES to enable ipfilter functionality
 ipfilter_program="/sbin/ipf"	# where the ipfilter program lives
 ipfilter_rules="/etc/ipf.rules"	# rules definition file for ipfilter, see
 				# /usr/src/contrib/ipfilter/rules for examples
 ipfilter_flags=""		# additional flags for ipfilter
 ipnat_enable="NO"		# Set to YES to enable ipnat functionality
 ipnat_program="/sbin/ipnat"	# where the ipnat program lives
 ipnat_rules="/etc/ipnat.rules"	# rules definition file for ipnat
 ipnat_flags=""			# additional flags for ipnat
 ipmon_enable="NO"		# Set to YES for ipmon; needs ipfilter or ipnat
 ipmon_program="/sbin/ipmon"	# where the ipfilter monitor program lives
 ipmon_flags="-Ds"		# typically "-Ds" or "-D /var/log/ipflog"
 ipfs_enable="NO"		# Set to YES to enable saving and restoring
 				# of state tables at shutdown and boot
 ipfs_program="/sbin/ipfs"	# where the ipfs program lives
 ipfs_flags=""			# additional flags for ipfs
 pf_enable="NO"			# Set to YES to enable packet filter (pf)
 pf_rules="/etc/pf.conf"		# rules definition file for pf
 pf_program="/sbin/pfctl"	# where the pfctl program lives
 pf_flags=""			# additional flags for pfctl
 pflog_enable="NO"		# Set to YES to enable packet filter logging
 pflog_logfile="/var/log/pflog"	# where pflogd should store the logfile
 pflog_program="/sbin/pflogd"	# where the pflogd program lives
 pflog_flags=""			# additional flags for pflogd
 ftpproxy_enable="NO"		# Set to YES to enable ftp-proxy(8) for pf
 ftpproxy_flags=""		# additional flags for ftp-proxy(8)
 pfsync_enable="NO"		# Expose pf state to other hosts for syncing
 pfsync_syncdev=""		# Interface for pfsync to work through
 pfsync_syncpeer=""		# IP address of pfsync peer host
 pfsync_ifconfig=""		# Additional options to ifconfig(8) for pfsync
 tcp_extensions="YES"		# Set to NO to turn off RFC1323 extensions.
 log_in_vain="0"			# >=1 to log connects to ports w/o listeners.
 tcp_keepalive="YES"		# Enable stale TCP connection timeout (or NO).
 tcp_drop_synfin="NO"		# Set to YES to drop TCP packets with SYN+FIN
 				# NOTE: this violates the TCP specification
 icmp_drop_redirect="NO" 	# Set to YES to ignore ICMP REDIRECT packets
 icmp_log_redirect="NO"		# Set to YES to log ICMP REDIRECT packets
 network_interfaces="auto"	# List of network interfaces (or "auto").
 cloned_interfaces=""		# List of cloned network interfaces to create.
 #cloned_interfaces="gif0 gif1 gif2 gif3" # Pre-cloning GENERIC config.
 ifconfig_lo0="inet 127.0.0.1"	# default loopback device configuration.
 #ifconfig_lo0_alias0="inet 127.0.0.254 netmask 0xffffffff" # Sample alias entry.
 #ifconfig_ed0_ipx="ipx 0x00010010"	# Sample IPX address family entry.
 #ifconfig_ed0_ipv6="inet6 2001:db8:1::1 prefixlen 64" # Sample IPv6 addr entry
 #ifconfig_ed0_alias0="inet6 2001:db8:2::1 prefixlen 64" # Sample IPv6 alias
 #ifconfig_fxp0_name="net0"	# Change interface name from fxp0 to net0.
 #vlans_fxp0="101 vlan0"		# vlan(4) interfaces for fxp0 device
 #create_args_vlan0="vlan 102"	# vlan tag for vlan0 device
 #wlans_ath0="wlan0"		# wlan(4) interfaces for ath0 device
 #wlandebug_wlan0="scan+auth+assoc"	# Set debug flags with wlanddebug(8)
 #ipv4_addrs_fxp0="192.168.0.1/24 192.168.1.1-5/28" # example IPv4 address entry.
 #
 #autobridge_interfaces="bridge0"	# List of bridges to check
 #autobridge_bridge0="tap* vlan0"	# Interface glob to automatically add to the bridge
 #
 # If you have any sppp(4) interfaces above, you might also want to set
 # the following parameters.  Refer to spppcontrol(8) for their meaning.
 sppp_interfaces=""		# List of sppp interfaces.
 #sppp_interfaces="...0"		# example: sppp over ...
 #spppconfig_...0="authproto=chap myauthname=foo myauthsecret='top secret' hisauthname=some-gw hisauthsecret='another secret'"
 gif_interfaces=""		# List of GIF tunnels.
 #gif_interfaces="gif0 gif1"	# Examples typically for a router.
 				# Choose correct tunnel addrs.
 #gifconfig_gif0="10.1.1.1 10.1.2.1"	# Examples typically for a router.
 #gifconfig_gif1="10.1.1.2 10.1.2.2"	# Examples typically for a router.
 fec_interfaces=""		# List of Fast EtherChannels.
 #fec_interfaces="fec0 fec1"
 #fecconfig_fec0="fxp0 dc0"	# Examples typically for two NICs
 #fecconfig_fec1="em0 em1 bge0 bge1"	# Examples typically for four NICs
 
 # User ppp configuration.
 ppp_enable="NO"		# Start user-ppp (or NO).
 ppp_program="/usr/sbin/ppp"	# Path to user-ppp program.
 ppp_mode="auto"		# Choice of "auto", "ddial", "direct" or "dedicated".
 			# For details see man page for ppp(8). Default is auto.
 ppp_nat="YES"		# Use PPP's internal network address translation or NO.
 ppp_profile="papchap"	# Which profile to use from /etc/ppp/ppp.conf.
 ppp_user="root"		# Which user to run ppp as
 
 # Start multiple instances of ppp at boot time
 #ppp_profile="profile1 profile2 profile3"	# Which profiles to use
 #ppp_profile1_mode="ddial"	# Override ppp mode for profile1
 #ppp_profile2_nat="NO"		# Override nat mode for profile2
 # profile3 uses default ppp_mode and ppp_nat
 
 ### Network daemon (miscellaneous) ###
 hostapd_enable="NO"		# Run hostap daemon.
 syslogd_enable="YES"		# Run syslog daemon (or NO).
 syslogd_program="/usr/sbin/syslogd" # path to syslogd, if you want a different one.
 syslogd_flags="-s"		# Flags to syslogd (if enabled).
 inetd_enable="NO"		# Run the network daemon dispatcher (YES/NO).
 inetd_program="/usr/sbin/inetd"	# path to inetd, if you want a different one.
 inetd_flags="-wW -C 60"		# Optional flags to inetd
 hastd_enable="NO"		# Run the HAST daemon (YES/NO).
 hastd_program="/sbin/hastd"	# path to hastd, if you want a different one.
 hastd_flags=""			# Optional flags to hastd.
 #
 # named.  It may be possible to run named in a sandbox, man security for
 # details.
 #
 named_enable="NO"		# Run named, the DNS server (or NO).
 named_program="/usr/sbin/named" # Path to named, if you want a different one.
 named_conf="/etc/namedb/named.conf" 	# Path to the configuration file
 #named_flags=""			# Use this for flags OTHER than -u and -c
 named_pidfile="/var/run/named/pid" # Must set this in named.conf as well
 named_uid="bind" 		# User to run named as
 named_chrootdir="/var/named"	# Chroot directory (or "" not to auto-chroot it)
 named_chroot_autoupdate="YES"	# Automatically install/update chrooted
 				# components of named. See /etc/rc.d/named.
 named_symlink_enable="YES"	# Symlink the chrooted pid file
 named_wait="NO" 		# Wait for working name service before exiting
 named_wait_host="localhost" 	# Hostname to check if named_wait is enabled
 named_auto_forward="NO" 	# Set up forwarders from /etc/resolv.conf
 named_auto_forward_only="NO" 	# Do "forward only" instead of "forward first"
 
 #
 # kerberos. Do not run the admin daemons on slave servers
 #
 kerberos5_server_enable="NO"	# Run a kerberos 5 master server (or NO).
 kerberos5_server="/usr/libexec/kdc"	# path to kerberos 5 KDC
 kerberos5_server_flags="--detach"	# Additional flags to the kerberos 5 server
 kadmind5_server_enable="NO"	# Run kadmind (or NO)
 kadmind5_server="/usr/libexec/kadmind"	# path to kerberos 5 admin daemon
 kpasswdd_server_enable="NO"	# Run kpasswdd (or NO)
 kpasswdd_server="/usr/libexec/kpasswdd"	# path to kerberos 5 passwd daemon
 
 gssd_enable="NO"		# Run the gssd daemon (or NO).
 gssd_flags=""			# Flags for gssd.
 
 rwhod_enable="NO"		# Run the rwho daemon (or NO).
 rwhod_flags=""			# Flags for rwhod
 rarpd_enable="NO"		# Run rarpd (or NO).
 rarpd_flags="-a"		# Flags to rarpd.
 bootparamd_enable="NO"		# Run bootparamd (or NO).
 bootparamd_flags=""		# Flags to bootparamd
 pppoed_enable="NO"		# Run the PPP over Ethernet daemon.
 pppoed_provider="*"		# Provider and ppp(8) config file entry.
 pppoed_flags="-P /var/run/pppoed.pid"	# Flags to pppoed (if enabled).
 pppoed_interface="fxp0"		# The interface that pppoed runs on.
 sshd_enable="NO"		# Enable sshd
 sshd_program="/usr/sbin/sshd"	# path to sshd, if you want a different one.
 sshd_flags=""			# Additional flags for sshd.
 ftpd_enable="NO"		# Enable stand-alone ftpd.
 ftpd_program="/usr/libexec/ftpd" # Path to ftpd, if you want a different one.
 ftpd_flags=""			# Additional flags to stand-alone ftpd.
 
 ### Network daemon (NFS): All need rpcbind_enable="YES" ###
 amd_enable="NO"			# Run amd service with $amd_flags (or NO).
 amd_program="/usr/sbin/amd"	# path to amd, if you want a different one.
 amd_flags="-a /.amd_mnt -l syslog /host /etc/amd.map /net /etc/amd.map"
 amd_map_program="NO"		# Can be set to "ypcat -k amd.master"
 nfs_client_enable="NO"		# This host is an NFS client (or NO).
 nfs_access_cache="60"		# Client cache timeout in seconds
 nfs_server_enable="NO"		# This host is an NFS server (or NO).
 nfs_server_flags="-u -t -n 4"	# Flags to nfsd (if enabled).
 mountd_enable="NO"		# Run mountd (or NO).
 mountd_flags="-r"		# Flags to mountd (if NFS server enabled).
 weak_mountd_authentication="NO"	# Allow non-root mount requests to be served.
 nfs_reserved_port_only="NO"	# Provide NFS only on secure port (or NO).
 nfs_bufpackets=""		# bufspace (in packets) for client
 rpc_lockd_enable="NO"		# Run NFS rpc.lockd needed for client/server.
 rpc_lockd_flags=""		# Flags to rpc.lockd (if enabled).
 rpc_statd_enable="NO"		# Run NFS rpc.statd needed for client/server.
 rpc_statd_flags=""		# Flags to rpc.statd (if enabled).
 rpcbind_enable="NO"		# Run the portmapper service (YES/NO).
 rpcbind_program="/usr/sbin/rpcbind"	# path to rpcbind, if you want a different one.
 rpcbind_flags=""		# Flags to rpcbind (if enabled).
 rpc_ypupdated_enable="NO"	# Run if NIS master and SecureRPC (or NO).
 keyserv_enable="NO"		# Run the SecureRPC keyserver (or NO).
 keyserv_flags=""		# Flags to keyserv (if enabled).
 nfsv4_server_enable="NO"	# Enable support for NFSv4
 nfscbd_enable="NO"		# NFSv4 client side callback daemon
 nfscbd_flags=""			# Flags for nfscbd
 nfsuserd_enable="NO"		# NFSv4 user/group name mapping daemon
 nfsuserd_flags=""		# Flags for nfsuserd
 
 ### Network Time Services options: ###
 timed_enable="NO"		# Run the time daemon (or NO).
 timed_flags=""			# Flags to timed (if enabled).
 ntpdate_enable="NO"		# Run ntpdate to sync time on boot (or NO).
 ntpdate_program="/usr/sbin/ntpdate"	# path to ntpdate, if you want a different one.
 ntpdate_flags="-b"		# Flags to ntpdate (if enabled).
 ntpdate_config="/etc/ntp.conf"	# ntpdate(8) configuration file
 ntpdate_hosts=""		# Whitespace-separated list of ntpdate(8) servers.
 ntpd_enable="NO"		# Run ntpd Network Time Protocol (or NO).
 ntpd_program="/usr/sbin/ntpd"	# path to ntpd, if you want a different one.
 ntpd_config="/etc/ntp.conf"	# ntpd(8) configuration file
 ntpd_sync_on_start="NO"		# Sync time on ntpd startup, even if offset is high
 ntpd_flags="-p /var/run/ntpd.pid -f /var/db/ntpd.drift"
 				# Flags to ntpd (if enabled).
 
 # Network Information Services (NIS) options: All need rpcbind_enable="YES" ###
 nis_client_enable="NO"		# We're an NIS client (or NO).
 nis_client_flags=""		# Flags to ypbind (if enabled).
 nis_ypset_enable="NO"		# Run ypset at boot time (or NO).
 nis_ypset_flags=""		# Flags to ypset (if enabled).
 nis_server_enable="NO"		# We're an NIS server (or NO).
 nis_server_flags=""		# Flags to ypserv (if enabled).
 nis_ypxfrd_enable="NO"		# Run rpc.ypxfrd at boot time (or NO).
 nis_ypxfrd_flags=""		# Flags to rpc.ypxfrd (if enabled).
 nis_yppasswdd_enable="NO"	# Run rpc.yppasswdd at boot time (or NO).
 nis_yppasswdd_flags=""		# Flags to rpc.yppasswdd (if enabled).
 
 ### SNMP daemon ###
 # Be sure to understand the security implications of running SNMP v1/v2
 # in your network.
 bsnmpd_enable="NO"		# Run the SNMP daemon (or NO).
 bsnmpd_flags=""			# Flags for bsnmpd.
 
 ### Network routing options: ###
 defaultrouter="NO"		# Set to default gateway (or NO).
 static_arp_pairs=""		# Set to static ARP list (or leave empty).
 static_routes=""		# Set to static route list (or leave empty).
 natm_static_routes=""		# Set to static route list for NATM (or leave empty).
 gateway_enable="NO"		# Set to YES if this host will be a gateway.
 routed_enable="NO"		# Set to YES to enable a routing daemon.
 routed_program="/sbin/routed"	# Name of routing daemon to use if enabled.
 routed_flags="-q"		# Flags for routing daemon.
 mrouted_enable="NO"		# Do IPv4 multicast routing.
 mrouted_program="/usr/local/sbin/mrouted"	# Name of IPv4 multicast
 						# routing daemon.  You need to
 						# install it from package or
 						# port.
 mrouted_flags=""		# Flags for multicast routing daemon.
 ipxgateway_enable="NO"		# Set to YES to enable IPX routing.
 ipxrouted_enable="NO"		# Set to YES to run the IPX routing daemon.
 ipxrouted_flags=""		# Flags for IPX routing daemon.
 arpproxy_all="NO"		# replaces obsolete kernel option ARP_PROXYALL.
 forward_sourceroute="NO"	# do source routing (only if gateway_enable is set to "YES")
 accept_sourceroute="NO"		# accept source routed packets to us
 
 ### ATM interface options: ###
 atm_enable="NO"			# Configure ATM interfaces (or NO).
 #atm_netif_hea0="atm 1"		# Network interfaces for physical interface.
 #atm_sigmgr_hea0="uni31"	# Signalling manager for physical interface.
 #atm_prefix_hea0="ILMI"		# NSAP prefix (UNI interfaces only) (or ILMI).
 #atm_macaddr_hea0="NO"		# Override physical MAC address (or NO).
 #atm_arpserver_atm0="0x47.0005.80.999999.9999.9999.9999.999999999999.00" # ATMARP server address (or local).
 #atm_scsparp_atm0="NO"		# Run SCSP/ATMARP on network interface (or NO).
 atm_pvcs=""			# Set to PVC list (or leave empty).
 atm_arps=""			# Set to permanent ARP list (or leave empty).
 
 ### Bluetooth ###
 hcsecd_enable="NO"		# Enable hcsecd(8) (or NO)
 hcsecd_config="/etc/bluetooth/hcsecd.conf" # hcsecd(8) configuration file
 
 sdpd_enable="NO"		# Enable sdpd(8) (or NO)
 sdpd_control="/var/run/sdp"	# sdpd(8) control socket
 sdpd_groupname="nobody"		# set spdp(8) user/group to run as after
 sdpd_username="nobody"		# it initializes
 
 bthidd_enable="NO"		# Enable bthidd(8) (or NO)
 bthidd_config="/etc/bluetooth/bthidd.conf" # bthidd(8) configuration file
 bthidd_hids="/var/db/bthidd.hids" # bthidd(8) known HID devices file
 
 rfcomm_pppd_server_enable="NO"	# Enable rfcomm_pppd(8) in server mode (or NO)
 rfcomm_pppd_server_profile="one two"	# Profile to use from /etc/ppp/ppp.conf
 #
 #rfcomm_pppd_server_one_bdaddr=""	# Override local bdaddr for 'one'
 rfcomm_pppd_server_one_channel="1"	# Override local channel for 'one'
 #rfcomm_pppd_server_one_register_sp="NO"	# Override SP and DUN register
 #rfcomm_pppd_server_one_register_dun="NO"	# for 'one'
 #
 #rfcomm_pppd_server_two_bdaddr=""	# Override local bdaddr for 'two'
 rfcomm_pppd_server_two_channel="3"	# Override local channel for 'two'
 #rfcomm_pppd_server_two_register_sp="NO"	# Override SP and DUN register
 #rfcomm_pppd_server_two_register_dun="NO"	# for 'two'
 
 ubthidhci_enable="NO"		# Switch an USB BT controller present on
 #ubthidhci_busnum="3"		# bus 3 and addr 2 from HID mode to HCI mode.
 #ubthidhci_addr="2"		# Check usbconfig list to find the correct
 				# numbers for your system.
 
 ### Miscellaneous network options: ###
 icmp_bmcastecho="NO"	# respond to broadcast ping packets
 
 ### IPv6 options: ###
 ipv6_network_interfaces="auto"	# List of IPv6 network interfaces
 				# (or "auto" or "none").
 ipv6_activate_all_interfaces="NO"	# If NO, interfaces which have no
 					# corresponding $ifconfig_IF_ipv6 is
 					# marked as IFDISABLED for security
 					# reason.
 ipv6_defaultrouter="NO"		# Set to IPv6 default gateway (or NO).
 #ipv6_defaultrouter="2002:c058:6301::"	# Use this for 6to4 (RFC 3068)
 ipv6_static_routes=""		# Set to static route list (or leave empty).
 #ipv6_static_routes="xxx"	# An example to set fec0:0000:0000:0006::/64
 				#  route toward loopback interface.
 #ipv6_route_xxx="fec0:0000:0000:0006:: -prefixlen 64 ::1"
 ipv6_gateway_enable="NO"	# Set to YES if this host will be a gateway.
 ipv6_privacy="NO"		# Use privacy address on RA-receiving IFs
 				# (RFC 4193)
 
 route6d_enable="NO"		# Set to YES to enable an IPv6 routing daemon.
 route6d_program="/usr/sbin/route6d"	# Name of IPv6 routing daemon.
 route6d_flags=""		# Flags to IPv6 routing daemon.
 #route6d_flags="-l"		# Example for route6d with only IPv6 site local
 				# addrs.
 #route6d_flags="-q"		# If you want to run a routing daemon on an end
 				# node, you should stop advertisement.
 #ipv6_network_interfaces="ed0 ep0"	# Examples for router
 					# or static configuration for end node.
 					# Choose correct prefix value.
 #ipv6_prefix_ed0="fec0:0000:0000:0001 fec0:0000:0000:0002"  # Examples for rtr.
 #ipv6_prefix_ep0="fec0:0000:0000:0003 fec0:0000:0000:0004"  # Examples for rtr.
 ipv6_default_interface="NO"	# Default output interface for scoped addrs.
 				# This works only with
 				# ipv6_gateway_enable="NO".
 rtsol_flags=""			# Flags to IPv6 router solicitation.
 rtsold_enable="NO"		# Set to YES to enable an IPv6 router
 				# solicitation daemon.
 rtsold_flags="-a"		# Flags to an IPv6 router solicitation
 				# daemon.
 rtadvd_enable="NO"		# Set to YES to enable an IPv6 router
 				# advertisement daemon. If set to YES,
 				# this router becomes a possible candidate
 				# IPv6 default router for local subnets.
 rtadvd_interfaces=""		# Interfaces rtadvd sends RA packets.
 mroute6d_enable="NO"		# Do IPv6 multicast routing.
 mroute6d_program="/usr/local/sbin/pim6dd"	# Name of IPv6 multicast
 						# routing daemon.  You need to
 						# install it from package or
 						# port.
 mroute6d_flags=""		# Flags to IPv6 multicast routing daemon.
 stf_interface_ipv4addr=""	# Local IPv4 addr for 6to4 IPv6 over IPv4
 				# tunneling interface. Specify this entry
 				# to enable 6to4 interface.
 stf_interface_ipv4plen="0"	# Prefix length for 6to4 IPv4 addr,
 				# to limit peer addr range. Effective value
 				# is 0-31.
 stf_interface_ipv6_ifid="0:0:0:1"	# IPv6 interface id for stf0.
 				# If you like, you can set "AUTO" for this.
 stf_interface_ipv6_slaid="0000"	# IPv6 Site Level Aggregator for stf0
 ipv6_faith_prefix="NO"		# Set faith prefix to enable a FAITH
 				# IPv6-to-IPv4 TCP translator.  You also need
 				# faithd(8) setup.
 ipv6_ipv4mapping="NO"		# Set to "YES" to enable IPv4 mapped IPv6 addr
 				# communication. (like ::ffff:a.b.c.d)
 ipv6_ipfilter_rules="/etc/ipf6.rules"	# rules definition file for ipfilter,
 					# see /usr/src/contrib/ipfilter/rules
 					# for examples
 ip6addrctl_enable="YES"	# Set to YES to enable default address selection
 ip6addrctl_verbose="NO"	# Set to YES to enable verbose configuration messages
 ip6addrctl_policy="AUTO"	# A pre-defined address selection policy
 				# (ipv4_prefer, ipv6_prefer, or AUTO)
 
 ##############################################################
 ###  System console options  #################################
 ##############################################################
 
 keyboard=""		# keyboard device to use (default /dev/kbd0).
 keymap="NO"		# keymap in /usr/share/syscons/keymaps/* (or NO).
 keyrate="NO"		# keyboard rate to: slow, normal, fast (or NO).
 keybell="NO" 		# See kbdcontrol(1) for options.  Use "off" to disable.
 keychange="NO"		# function keys default values (or NO).
 cursor="NO"		# cursor type {normal|blink|destructive} (or NO).
 scrnmap="NO"		# screen map in /usr/share/syscons/scrnmaps/* (or NO).
 font8x16="NO"		# font 8x16 from /usr/share/syscons/fonts/* (or NO).
 font8x14="NO"		# font 8x14 from /usr/share/syscons/fonts/* (or NO).
 font8x8="NO"		# font 8x8 from /usr/share/syscons/fonts/* (or NO).
 blanktime="300"		# blank time (in seconds) or "NO" to turn it off.
 saver="NO"		# screen saver: Uses /boot/kernel/${saver}_saver.ko
 moused_nondefault_enable="YES" # Treat non-default mice as enabled unless
 			       # specifically overriden in rc.conf(5).
 moused_enable="NO"	# Run the mouse daemon.
 moused_type="auto"	# See man page for rc.conf(5) for available settings.
 moused_port="/dev/psm0"	# Set to your mouse port.
 moused_flags=""		# Any additional flags to moused.
 mousechar_start="NO"	# if 0xd0-0xd3 default range is occupied in your
 			# language code table, specify alternative range
 			# start like mousechar_start=3, see vidcontrol(1)
 allscreens_flags=""	# Set this vidcontrol mode for all virtual screens
 allscreens_kbdflags=""	# Set this kbdcontrol mode for all virtual screens
 
 ##############################################################
 ###  Mail Transfer Agent (MTA) options  ######################
 ##############################################################
 
 mta_start_script="/etc/rc.sendmail"
 			# Script to start your chosen MTA, called by /etc/rc.
 # Settings for /etc/rc.sendmail and /etc/rc.d/sendmail:
 sendmail_enable="NO"	# Run the sendmail inbound daemon (YES/NO).
 sendmail_pidfile="/var/run/sendmail.pid"	# sendmail pid file
 sendmail_procname="/usr/sbin/sendmail"		# sendmail process name
 sendmail_flags="-L sm-mta -bd -q30m" # Flags to sendmail (as a server)
 sendmail_submit_enable="YES"	# Start a localhost-only MTA for mail submission
 sendmail_submit_flags="-L sm-mta -bd -q30m -ODaemonPortOptions=Addr=localhost"
 				# Flags for localhost-only MTA
 sendmail_outbound_enable="YES"	# Dequeue stuck mail (YES/NO).
 sendmail_outbound_flags="-L sm-queue -q30m" # Flags to sendmail (outbound only)
 sendmail_msp_queue_enable="YES"	# Dequeue stuck clientmqueue mail (YES/NO).
 sendmail_msp_queue_flags="-L sm-msp-queue -Ac -q30m"
 				# Flags for sendmail_msp_queue daemon.
 sendmail_rebuild_aliases="NO"	# Run newaliases if necessary (YES/NO).
 
 
 ##############################################################
 ###  Miscellaneous administrative options  ###################
 ##############################################################
 
 auditd_enable="NO"	# Run the audit daemon.
 auditd_program="/usr/sbin/auditd"	# Path to the audit daemon.
 auditd_flags=""		# Which options to pass to the audit daemon.
 cron_enable="YES"	# Run the periodic job daemon.
 cron_program="/usr/sbin/cron"	# Which cron executable to run (if enabled).
 cron_dst="YES"		# Handle DST transitions intelligently (YES/NO)
 cron_flags=""		# Which options to pass to the cron daemon.
 lpd_enable="NO"		# Run the line printer daemon.
 lpd_program="/usr/sbin/lpd"	# path to lpd, if you want a different one.
 lpd_flags=""		# Flags to lpd (if enabled).
 nscd_enable="NO"	# Run the nsswitch caching daemon.
 chkprintcap_enable="NO"	# Run chkprintcap(8) before running lpd.
 chkprintcap_flags="-d"	# Create missing directories by default.
 dumpdev="AUTO"		# Device to crashdump to (device name, AUTO, or NO).
 dumpdir="/var/crash"	# Directory where crash dumps are to be stored
 savecore_flags=""	# Used if dumpdev is enabled above, and present.
 crashinfo_enable="YES"	# Automatically generate crash dump summary.
 crashinfo_program="/usr/sbin/crashinfo"	# Script to generate crash dump summary.
 quota_enable="NO"	# turn on quotas on startup (or NO).
 check_quotas="YES"	# Check quotas on startup (or NO).
 quotaon_flags="-a"	# Turn quotas on for all file systems (if enabled)
 quotaoff_flags="-a"	# Turn quotas off for all file systems at shutdown
 quotacheck_flags="-a"	# Check all file system quotas (if enabled)
 accounting_enable="NO"	# Turn on process accounting (or NO).
 ibcs2_enable="NO"	# Ibcs2 (SCO) emulation loaded at startup (or NO).
 ibcs2_loaders="coff"	# List of additional Ibcs2 loaders (or NO).
 
 # Emulation/compatibility services provided by /etc/rc.d/abi
 sysvipc_enable="NO"	# Load System V IPC primitives at startup (or NO).
 linux_enable="NO"	# Linux binary compatibility loaded at startup (or NO).
 svr4_enable="NO"	# SysVR4 emulation loaded at startup (or NO).
 clear_tmp_enable="NO"	# Clear /tmp at startup.
 clear_tmp_X="YES" 	# Clear and recreate X11-related directories in /tmp
 ldconfig_insecure="NO"	# Set to YES to disable ldconfig security checks
 ldconfig_paths="/usr/lib/compat /usr/local/lib /usr/local/lib/compat/pkg"
 			# shared library search paths
 ldconfig32_paths="/usr/lib32" # 32-bit compatibility shared library search paths
 ldconfig_paths_aout="/usr/lib/compat/aout /usr/local/lib/aout"
 			# a.out shared library search paths
 ldconfig_local_dirs="/usr/local/libdata/ldconfig"
 			# Local directories with ldconfig configuration files.
 ldconfig_local32_dirs="/usr/local/libdata/ldconfig32"
 			# Local directories with 32-bit compatibility ldconfig
 			# configuration files.
 kern_securelevel_enable="NO"	# kernel security level (see security(7))
 kern_securelevel="-1"	# range: -1..3 ; `-1' is the most insecure
 			# Note that setting securelevel to 0 will result
 			# in the system booting with securelevel set to 1, as
 			# init(8) will raise the level when rc(8) completes.
 update_motd="YES"	# update version info in /etc/motd (or NO)
 entropy_file="/entropy"	# Set to NO to disable caching entropy through reboots.
 			# /var/db/entropy-file is preferred if / is not avail.
 entropy_dir="/var/db/entropy" # Set to NO to disable caching entropy via cron.
 entropy_save_sz="2048"	# Size of the entropy cache files.
 entropy_save_num="8"	# Number of entropy cache files to save.
 harvest_interrupt="YES"	# Entropy device harvests interrupt randomness
 harvest_ethernet="YES"	# Entropy device harvests ethernet randomness
 harvest_p_to_p="YES"	# Entropy device harvests point-to-point randomness
 dmesg_enable="YES"	# Save dmesg(8) to /var/run/dmesg.boot
 watchdogd_enable="NO"	# Start the software watchdog daemon
 watchdogd_flags=""	# Flags to watchdogd (if enabled)
 devfs_rulesets="/etc/defaults/devfs.rules /etc/devfs.rules" # Files containing
 							    # devfs(8) rules.
 devfs_system_ruleset=""	# The name (NOT number) of a ruleset to apply to /dev
 devfs_set_rulesets=""	# A list of /mount/dev=ruleset_name settings to
 			# apply (must be mounted already, i.e. fstab(5))
 performance_cx_lowest="HIGH"	# Online CPU idle state
 performance_cpu_freq="NONE"	# Online CPU frequency
 economy_cx_lowest="HIGH"	# Offline CPU idle state
 economy_cpu_freq="NONE"		# Offline CPU frequency
 virecover_enable="YES"	# Perform housekeeping for the vi(1) editor
 ugidfw_enable="NO"	# Load mac_bsdextended(4) rules on boot
 bsdextended_script="/etc/rc.bsdextended"	# Default mac_bsdextended(4)
 						# ruleset file.
 newsyslog_enable="YES"	# Run newsyslog at startup.
 newsyslog_flags="-CN"	# Newsyslog flags to create marked files
 mixer_enable="YES"	# Run the sound mixer.
 
 ##############################################################
 ### Jail Configuration #######################################
 ##############################################################
 jail_enable="NO"	# Set to NO to disable starting of any jails
 jail_parallel_start="NO"	# Start jails in the background
 jail_list=""		# Space separated list of names of jails
 jail_set_hostname_allow="YES" # Allow root user in a jail to change its hostname
 jail_socket_unixiproute_only="YES" # Route only TCP/IP within a jail
 jail_sysvipc_allow="NO"	# Allow SystemV IPC use from within a jail
 
 #
 # To use rc's built-in jail infrastructure create entries for
 # each jail, specified in jail_list, with the following variables.
 # NOTES:
 # - replace 'example' with the jail's name.
 # - except rootdir, hostname, ip and the _multi<n> addresses,
 #   all of the following variables may be made global jail variables
 #   if you don't specify a jail name (ie. jail_interface, jail_devfs_ruleset).
 #
 #jail_example_rootdir="/usr/jail/default"	# Jail's root directory
 #jail_example_hostname="default.domain.com"	# Jail's hostname
 #jail_example_interface=""			# Jail's interface variable to create IP aliases on
 #jail_example_fib="0"				# Routing table for setfib(1)
 #jail_example_ip="192.0.2.10,2001:db8::17"	# Jail's primary IPv4 and IPv6 address
 #jail_example_ip_multi0="2001:db8::10"		#  and another IPv6 address
 #jail_example_exec_start="/bin/sh /etc/rc"		# command to execute in jail for starting
 #jail_example_exec_afterstart0="/bin/sh command"	# command to execute after the one for
 							# starting the jail. More than one can be
 							# specified using a trailing number
 #jail_example_exec_stop="/bin/sh /etc/rc.shutdown"	# command to execute in jail for stopping
 #jail_example_devfs_enable="NO"			# mount devfs in the jail
 #jail_example_devfs_ruleset="ruleset_name"	# devfs ruleset to apply to jail -
 						# usually you want "devfsrules_jail".
 #jail_example_fdescfs_enable="NO"		# mount fdescfs in the jail
 #jail_example_procfs_enable="NO"		# mount procfs in jail
 #jail_example_mount_enable="NO"			# mount/umount jail's fs
 #jail_example_fstab=""				# fstab(5) for mount/umount
 #jail_example_flags="-l -U root"		# flags for jail(8)
 
 ##############################################################
 ### Define source_rc_confs, the mechanism used by /etc/rc.* ##
 ### scripts to source rc_conf_files overrides safely.	    ##
 ##############################################################
 
 if [ -z "${source_rc_confs_defined}" ]; then
 	source_rc_confs_defined=yes
 	source_rc_confs () {
 		local i sourced_files
 		for i in ${rc_conf_files}; do
 			case ${sourced_files} in
 			*:$i:*)
 				;;
 			*)
 				sourced_files="${sourced_files}:$i:"
 				if [ -r $i ]; then
 					. $i
 				fi
 				;;
 			esac
 		done
 	}
 fi
Index: projects/binutils-2.17/etc/rc.d/gptboot
===================================================================
--- projects/binutils-2.17/etc/rc.d/gptboot	(revision 215829)
+++ projects/binutils-2.17/etc/rc.d/gptboot	(revision 215830)
@@ -1,76 +1,77 @@
 #!/bin/sh
 #
 # Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 # $FreeBSD$
 #
 
 # PROVIDE: gptboot
 # REQUIRE: mountcritremote
 # KEYWORD: nojail
 
 . /etc/rc.subr
 
 name="gptboot"
+rcvar=`set_rcvar`
 start_cmd="gptboot_report"
 
 gptboot_report()
 {
 	gpart show | \
 		egrep '(^=>| freebsd-ufs .*(\[|,)(bootfailed|bootonce)(,|\]))' | \
 		sed 's/^=>//' | \
 		egrep -v '(\[|,)bootme(,|\])' | \
 	while read start size pos type attrs rest; do
 		case "${pos}" in
 		[0-9]*)
 			if [ -n "${disk}" ]; then
 				part="${disk}p${pos}"
 				echo "${attrs}" | egrep -q '(\[|,)bootfailed(,|\])'
 				bootfailed=$?
 				echo "${attrs}" | egrep -q '(\[|,)bootonce(,|\])'
 				bootonce=$?
 				if [ ${bootfailed} -eq 0 ]; then
 					logger -t gptboot -p local0.notice "Boot from ${part} failed."
 					gpart unset -a bootfailed -i ${pos} ${disk} >/dev/null
 				elif [ ${bootonce} -eq 0 ]; then
 					# We want to log success after all failures.
 					echo -n "Boot from ${part} succeeded."
 					gpart unset -a bootonce -i ${pos} ${disk} >/dev/null
 				fi
 			fi
 			;;
 		*)
 			if [ "${type}" = "GPT" ]; then
 				disk="${pos}"
 			else
 				disk=""
 			fi
 			;;
 		esac
 	done | logger -t gptboot -p local0.notice
 }
 
 load_rc_config $name
 run_rc_command "$1"
Index: projects/binutils-2.17/etc/rc.d/mountcritlocal
===================================================================
--- projects/binutils-2.17/etc/rc.d/mountcritlocal	(revision 215829)
+++ projects/binutils-2.17/etc/rc.d/mountcritlocal	(revision 215830)
@@ -1,54 +1,54 @@
 #!/bin/sh
 #
 # $FreeBSD$
 #
 
 # PROVIDE: mountcritlocal
 # REQUIRE: root hostid_save mdconfig
-# KEYWORD: nojail
+# KEYWORD: nojail shutdown
 
 . /etc/rc.subr
 
 name="mountcritlocal"
 start_cmd="mountcritlocal_start"
-stop_cmd=":"
+stop_cmd=sync
 
 mountcritlocal_start()
 {
 	local err
 
 	# Set up the list of network filesystem types for which mounting
 	# should be delayed until after network initialization.
 	case ${extra_netfs_types} in
 	[Nn][Oo])
 		;;
 	*)
 		netfs_types="${netfs_types} ${extra_netfs_types}"
 		;;
 	esac
 
 	# Mount everything except nfs filesystems.
 	check_startmsgs && echo -n 'Mounting local file systems:'
 	mount_excludes='no'
 	for i in ${netfs_types}; do
 		fstype=${i%:*}
 		mount_excludes="${mount_excludes}${fstype},"
 	done
 	mount_excludes=${mount_excludes%,}
 	mount -a -t ${mount_excludes}
 	err=$?
 	check_startmsgs && echo '.'
 
 	case ${err} in
 	0)
 		;;
 	*)
 		echo 'Mounting /etc/fstab filesystems failed,' \
 		    ' startup aborted'
 		stop_boot true
 		;;
 	esac
 }
 
 load_rc_config $name
 run_rc_command "$1"
Index: projects/binutils-2.17/lib/libc/stdtime
===================================================================
--- projects/binutils-2.17/lib/libc/stdtime	(revision 215829)
+++ projects/binutils-2.17/lib/libc/stdtime	(revision 215830)

Property changes on: projects/binutils-2.17/lib/libc/stdtime
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/lib/libc/stdtime:r215709-215824
Index: projects/binutils-2.17/lib/libc
===================================================================
--- projects/binutils-2.17/lib/libc	(revision 215829)
+++ projects/binutils-2.17/lib/libc	(revision 215830)

Property changes on: projects/binutils-2.17/lib/libc
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/lib/libc:r215709-215824
Index: projects/binutils-2.17/lib/libutil
===================================================================
--- projects/binutils-2.17/lib/libutil	(revision 215829)
+++ projects/binutils-2.17/lib/libutil	(revision 215830)

Property changes on: projects/binutils-2.17/lib/libutil
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/lib/libutil:r215709-215824
Index: projects/binutils-2.17/lib/libz
===================================================================
--- projects/binutils-2.17/lib/libz	(revision 215829)
+++ projects/binutils-2.17/lib/libz	(revision 215830)

Property changes on: projects/binutils-2.17/lib/libz
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/lib/libz:r215709-215824
Index: projects/binutils-2.17/sbin/ipfw
===================================================================
--- projects/binutils-2.17/sbin/ipfw	(revision 215829)
+++ projects/binutils-2.17/sbin/ipfw	(revision 215830)

Property changes on: projects/binutils-2.17/sbin/ipfw
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sbin/ipfw:r215709-215824
Index: projects/binutils-2.17/sbin
===================================================================
--- projects/binutils-2.17/sbin	(revision 215829)
+++ projects/binutils-2.17/sbin	(revision 215830)

Property changes on: projects/binutils-2.17/sbin
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sbin:r215709-215824
Index: projects/binutils-2.17/share/man/man4/bge.4
===================================================================
--- projects/binutils-2.17/share/man/man4/bge.4	(revision 215829)
+++ projects/binutils-2.17/share/man/man4/bge.4	(revision 215830)
@@ -1,282 +1,261 @@
 .\" Copyright (c) 2001 Wind River Systems
 .\" Copyright (c) 1997, 1998, 1999, 2000, 2001
 .\"	Bill Paul <wpaul@windriver.com>. All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. All advertising materials mentioning features or use of this software
 .\"    must display the following acknowledgement:
 .\"	This product includes software developed by Bill Paul.
 .\" 4. Neither the name of the author nor the names of any co-contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"   without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
 .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 .\" THE POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd November 7, 2010
+.Dd November 23, 2010
 .Dt BGE 4
 .Os
 .Sh NAME
 .Nm bge
 .Nd "Broadcom BCM57xx/BCM590x Gigabit/Fast Ethernet driver"
 .Sh SYNOPSIS
 To compile this driver into the kernel,
 place the following lines in your
 kernel configuration file:
 .Bd -ragged -offset indent
 .Cd "device miibus"
 .Cd "device bge"
 .Ed
 .Pp
 Alternatively, to load the driver as a
 module at boot time, place the following line in
 .Xr loader.conf 5 :
 .Bd -literal -offset indent
 if_bge_load="YES"
 .Ed
 .Sh DESCRIPTION
 The
 .Nm
 driver provides support for various NICs based on the Broadcom BCM570x,
 571x, 572x, 575x, 576x, 578x, 5776x and 5778x Gigabit Ethernet controller
 chips and the 590x and 5779x Fast Ethernet controller chips.
 .Pp
 All of these NICs are capable of 10, 100 and 1000Mbps speeds over CAT5
 copper cable, except for the SysKonnect SK-9D41 which supports only
 1000Mbps over multimode fiber.
 The BCM570x builds upon the technology of the Alteon Tigon II.
 It has two R4000 CPU cores and is PCI v2.2 and PCI-X v1.0 compliant.
 It supports IP, TCP
 and UDP checksum offload for both receive and transmit,
 multiple RX and TX DMA rings for QoS applications, rules-based
 receive filtering, and VLAN tag stripping/insertion as well as
 a 256-bit multicast hash filter.
 Additional features may be
 provided via value-add firmware updates.
 The BCM570x supports TBI (ten bit interface) and GMII
 transceivers, which means it can be used with either copper or 1000baseX
 fiber applications.
 Note however the device only supports a single
 speed in TBI mode.
 .Pp
 Most BCM5700-based cards also use the Broadcom BCM5401 or BCM5411 10/100/1000
 copper gigabit transceivers,
 which support autonegotiation of 10, 100 and 1000Mbps modes in
 full or half duplex.
 .Pp
 The BCM5700, BCM5701, BCM5702, BCM5703, BCM5704 and BCM5717 also support
 jumbo frames, which can be configured
 via the interface MTU setting.
 Selecting an MTU larger than 1500 bytes with the
 .Xr ifconfig 8
 utility configures the adapter to receive and transmit jumbo frames.
 Using jumbo frames can greatly improve performance for certain tasks,
 such as file transfers and data streaming.
 .Pp
 The
 .Nm
 driver supports the following media types:
 .Bl -tag -width ".Cm 10baseT/UTP"
 .It Cm autoselect
 Enable autoselection of the media type and options.
 The user can manually override
 the autoselected mode by adding media options to
 .Xr rc.conf 5 .
 .It Cm 10baseT/UTP
 Set 10Mbps operation.
 The
 .Xr ifconfig 8
 .Ic mediaopt
 option can also be used to select either
 .Cm full-duplex
 or
 .Cm half-duplex
 modes.
 .It Cm 100baseTX
 Set 100Mbps (Fast Ethernet) operation.
 The
 .Xr ifconfig 8
 .Ic mediaopt
 option can also be used to select either
 .Cm full-duplex
 or
 .Cm half-duplex
 modes.
 .It Cm 1000baseTX
 Set 1000baseTX operation over twisted pair.
 Only
 .Cm full-duplex
 mode is supported.
 .It Cm 1000baseSX
 Set 1000Mbps (Gigabit Ethernet) operation.
 Both
 .Cm full-duplex
 and
 .Cm half-duplex
 modes are supported.
 .El
 .Pp
 The
 .Nm
 driver supports the following media options:
 .Bl -tag -width ".Cm full-duplex"
 .It Cm full-duplex
 Force full duplex operation.
 .It Cm half-duplex
 Force half duplex operation.
-.El
-.Pp
-The
-.Nm
-driver also supports one special link option for 1000baseTX cards:
-.Bl -tag -width ".Cm link0"
-.It Cm link0
-With 1000baseTX cards, establishing a link between two ports requires
-that one port be configured as a master and the other a slave.
-With autonegotiation,
-the master/slave settings will be chosen automatically.
-However when manually selecting the link state, it is necessary to
-force one side of the link to be a master and the other a slave.
-The
-.Nm
-driver configures the ports as slaves by default.
-Setting the
-.Cm link0
-flag with
-.Xr ifconfig 8
-will set a port as a master instead.
 .El
 .Pp
 For more information on configuring this device, see
 .Xr ifconfig 8 .
 .Sh HARDWARE
 The
 .Nm
 driver provides support for various NICs based on the Broadcom BCM570x
 family of Gigabit Ethernet controller chips, including the
 following:
 .Pp
 .Bl -bullet -compact
 .It
 3Com 3c996-SX (1000baseSX)
 .It
 3Com 3c996-T (10/100/1000baseTX)
 .It
 Dell PowerEdge 1750 integrated BCM5704C NIC (10/100/1000baseTX)
 .It
 Dell PowerEdge 2550 integrated BCM5700 NIC (10/100/1000baseTX)
 .It
 Dell PowerEdge 2650 integrated BCM5703 NIC (10/100/1000baseTX)
 .It
 Dell PowerEdge R200 integrated BCM5750 NIC (10/100/1000baseTX)
 .It
 Dell PowerEdge R300 integrated BCM5722 NIC (10/100/1000baseTX)
 .It
 IBM x235 server integrated BCM5703x NIC (10/100/1000baseTX)
 .It
 HP Compaq dc7600 integrated BCM5752 NIC (10/100/1000baseTX)
 .It
 HP ProLiant NC7760 embedded Gigabit NIC (10/100/1000baseTX)
 .It
 HP ProLiant NC7770 PCI-X Gigabit NIC (10/100/1000baseTX)
 .It
 HP ProLiant NC7771 PCI-X Gigabit NIC (10/100/1000baseTX)
 .It
 HP ProLiant NC7781 embedded PCI-X Gigabit NIC (10/100/1000baseTX)
 .It
 Netgear GA302T (10/100/1000baseTX)
 .It
 SysKonnect SK-9D21 (10/100/1000baseTX)
 .It
 SysKonnect SK-9D41 (1000baseSX)
 .El
 .Sh LOADER TUNABLES
 The following tunable can be set at the
 .Xr loader 8
 prompt before booting the kernel, or stored in
 .Xr loader.conf 5 .
 .Bl -tag -width indent
 .It Va hw.bge.allow_asf
 Allow the ASF feature for cooperating with IPMI.
 Can cause system lockup problems on a small number of systems.
 Enabled by default.
 .El
 .Sh SYSCTL VARIABLES
 The following variables are available as both
 .Xr sysctl 8
 variables and
 .Xr loader 8
 tunables:
 .Bl -tag -width indent
 .It Va dev.bge.%d.forced_collapse
 Allow collapsing multiple transmit buffers into a single buffer
 to increase transmit performance with the cost of CPU cycles.
 The default value is 0 to disable transmit buffer collapsing.
 .It Va dev.bge.%d.forced_udpcsum
 Enable UDP transmit checksum offloading even if controller can generate
 UDP datagrams with checksum value 0.
 UDP datagrams with checksum value 0 can confuse receiver host as it means
 sender did not compute UDP checksum.
 The default value is 0 which disables UDP transmit checksum offloading.
 The interface need to be brought down and up again before a change takes
 effect.
 .El
 .Sh DIAGNOSTICS
 .Bl -diag
 .It "bge%d: couldn't map memory"
 A fatal initialization error has occurred.
 .It "bge%d: couldn't map ports"
 A fatal initialization error has occurred.
 .It "bge%d: couldn't map interrupt"
 A fatal initialization error has occurred.
 .It "bge%d: no memory for softc struct!"
 The driver failed to allocate memory for per-device instance information
 during initialization.
 .It "bge%d: failed to enable memory mapping!"
 The driver failed to initialize PCI shared memory mapping.
 This might
 happen if the card is not in a bus-master slot.
 .It "bge%d: no memory for jumbo buffers!"
 The driver failed to allocate memory for jumbo frames during
 initialization.
 .It "bge%d: watchdog timeout"
 The device has stopped responding to the network, or there is a problem with
 the network connection (cable).
 .El
 .Sh SEE ALSO
 .Xr altq 4 ,
 .Xr arp 4 ,
 .Xr miibus 4 ,
 .Xr netintro 4 ,
 .Xr ng_ether 4 ,
 .Xr polling 4 ,
 .Xr vlan 4 ,
 .Xr ifconfig 8
 .Sh HISTORY
 The
 .Nm
 device driver first appeared in
 .Fx 4.5 .
 .Sh AUTHORS
 The
 .Nm
 driver was written by
 .An Bill Paul Aq wpaul@windriver.com .
Index: projects/binutils-2.17/share/man/man4/msk.4
===================================================================
--- projects/binutils-2.17/share/man/man4/msk.4	(revision 215829)
+++ projects/binutils-2.17/share/man/man4/msk.4	(revision 215830)
@@ -1,275 +1,254 @@
 .\" Copyright (c) 2006 Pyun YongHyeon
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd April 30, 2010
+.Dd November 23, 2010
 .Dt MSK 4
 .Os
 .Sh NAME
 .Nm msk
 .Nd Marvell/SysKonnect Yukon II Gigabit Ethernet adapter driver
 .Sh SYNOPSIS
 To compile this driver into the kernel,
 place the following lines in your
 kernel configuration file:
 .Bd -ragged -offset indent
 .Cd "device miibus"
 .Cd "device msk"
 .Ed
 .Pp
 Alternatively, to load the driver as a
 module at boot time, place the following line in
 .Xr loader.conf 5 :
 .Bd -literal -offset indent
 if_msk_load="YES"
 .Ed
 .Sh DESCRIPTION
 The
 .Nm
 device driver provides support for various NICs based on the
 Marvell/SysKonnect Yukon II Gigabit Ethernet controller chip.
 .Pp
 All NICs supported by the
 .Nm
 driver have TCP/UDP/IP checksum offload for transmit, TCP
 segmentation offload (TSO), hardware VLAN tag stripping/insertion
 features and an interrupt moderation mechanism as well as a 64-bit
 multicast hash filter.
 The Yukon II supports TBI (ten bit interface) and GMII
 transceivers, which means it can be used with either copper or
 1000baseX fiber applications.
 .Pp
 The Yukon II also supports Jumbo Frames (up to 9022 bytes), which can be
 configured via the interface MTU setting.
 Selecting an MTU larger than 1500 bytes with the
 .Xr ifconfig 8
 utility configures the adapter to receive and transmit Jumbo Frames.
 .Pp
 The
 .Nm
 driver supports the following media types:
 .Bl -tag -width ".Cm 10baseT/UTP"
 .It Cm autoselect
 Enable autoselection of the media type and options.
 The user can manually override
 the autoselected mode by adding media options to
 .Xr rc.conf 5 .
 .It Cm 10baseT/UTP
 Set 10Mbps operation.
 The
 .Xr ifconfig 8
 .Cm mediaopt
 option can also be used to select either
 .Cm full-duplex
 or
 .Cm half-duplex
 modes.
 .It Cm 100baseTX
 Set 100Mbps (Fast Ethernet) operation.
 The
 .Xr ifconfig 8
 .Cm mediaopt
 option can also be used to select either
 .Cm full-duplex
 or
 .Cm half-duplex
 modes.
 .It Cm 1000baseTX
 Set 1000baseTX operation over twisted pair.
 The
 .Xr ifconfig 8
 .Cm mediaopt
 option can also be used to select either
 .Cm full-duplex
 or
 .Cm half-duplex
 modes.
 .It Cm 1000baseSX
 Set 1000Mbps (Gigabit Ethernet) operation.
 Both
 .Cm full-duplex
 and
 .Cm half-duplex
 modes are supported.
 .El
 .Pp
 The
 .Nm
 driver supports the following media options:
 .Bl -tag -width ".Cm full-duplex"
 .It Cm full-duplex
 Force full duplex operation.
 .It Cm half-duplex
 Force half duplex operation.
-.El
-.Pp
-The
-.Nm
-driver also supports one special link option for 1000baseTX cards:
-.Bl -tag -width ".Cm link0"
-.It Cm link0
-With 1000baseTX cards, establishing a link between two ports requires
-that one port is configured as master and the other one as slave.
-With autonegotiation,
-the master/slave settings will be chosen automatically.
-However when manually selecting the link state, it is necessary to
-force one side of the link to be a master and the other a slave.
-The
-.Nm
-driver configures the ports as slaves by default.
-Setting the
-.Cm link0
-flag with
-.Xr ifconfig 8
-will set a port as a master instead.
 .El
 .Pp
 For more information on configuring this device, see
 .Xr ifconfig 8 .
 .Sh HARDWARE
 The
 .Nm
 driver provides support for various NICs based on the Marvell/SysKonnect
 Yukon II based Gigabit Ethernet controller chips, including:
 .Pp
 .Bl -bullet -compact
 .It
 D-Link 550SX Gigabit Ethernet
 .It
 D-Link 560SX Gigabit Ethernet
 .It
 D-Link 560T Gigabit Ethernet
 .It
 Marvell Yukon 88E8021CU Gigabit Ethernet
 .It
 Marvell Yukon 88E8021 SX/LX Gigabit Ethernet
 .It
 Marvell Yukon 88E8022CU Gigabit Ethernet
 .It
 Marvell Yukon 88E8022 SX/LX Gigabit Ethernet
 .It
 Marvell Yukon 88E8061CU Gigabit Ethernet
 .It
 Marvell Yukon 88E8061 SX/LX Gigabit Ethernet
 .It
 Marvell Yukon 88E8062CU Gigabit Ethernet
 .It
 Marvell Yukon 88E8062 SX/LX Gigabit Ethernet
 .It
 Marvell Yukon 88E8035 Fast Ethernet
 .It
 Marvell Yukon 88E8036 Fast Ethernet
 .It
 Marvell Yukon 88E8038 Fast Ethernet
 .It
 Marvell Yukon 88E8039 Fast Ethernet
 .It
 Marvell Yukon 88E8040 Fast Ethernet
 .It
 Marvell Yukon 88E8040T Fast Ethernet
 .It
 Marvell Yukon 88E8042 Fast Ethernet
 .It
 Marvell Yukon 88E8048 Fast Ethernet
 .It
 Marvell Yukon 88E8050 Gigabit Ethernet
 .It
 Marvell Yukon 88E8052 Gigabit Ethernet
 .It
 Marvell Yukon 88E8053 Gigabit Ethernet
 .It
 Marvell Yukon 88E8055 Gigabit Ethernet
 .It
 Marvell Yukon 88E8056 Gigabit Ethernet
 .It
 Marvell Yukon 88E8057 Gigabit Ethernet
 .It
 Marvell Yukon 88E8058 Gigabit Ethernet
 .It
 Marvell Yukon 88E8059 Gigabit Ethernet
 .It
 Marvell Yukon 88E8070 Gigabit Ethernet
 .It
 Marvell Yukon 88E8071 Gigabit Ethernet
 .It
 Marvell Yukon 88E8072 Gigabit Ethernet
 .It
 SysKonnect SK-9Sxx Gigabit Ethernet
 .It
 SysKonnect SK-9Exx Gigabit Ethernet
 .El
 .Sh LOADER TUNABLES
 Tunables can be set at the
 .Xr loader 8
 prompt before booting the kernel or stored in
 .Xr loader.conf 5 .
 .Bl -tag -width indent
 .It Va hw.msk.msi_disable
 This tunable disables MSI support on the Ethernet hardware.
 The default value is 0.
 .El
 .Sh SYSCTL VARIABLES
 The following variables are available as both
 .Xr sysctl 8
 variables and
 .Xr loader 8
 tunables:
 .Bl -tag -width indent
 .It Va dev.mskc.%d.int_holdoff
 Maximum number of time to delay interrupts.
 The valid range is 0 to 34359738 for 125MHz clock in units of 1us,
 the default is 100 (100us).
 The interface need to be brought down and up again before a change
 takes effect.
 .It Va dev.mskc.%d.process_limit
 Maximum amount of Rx events to be processed in the event loop before
 rescheduling a taskqueue.
 The accepted range is 30 to 256, the default value is 128 events.
 The interface does not need to be brought down and up again before
 a change takes effect.
 .El
 .Sh SEE ALSO
 .Xr altq 4 ,
 .Xr arp 4 ,
 .Xr miibus 4 ,
 .Xr netintro 4 ,
 .Xr ng_ether 4 ,
 .Xr vlan 4 ,
 .Xr ifconfig 8
 .Sh HISTORY
 The
 .Nm
 driver was written by
 .An Pyun YongHyeon
 .Aq yongari@FreeBSD.org
 and it is based on
 .Xr sk 4
 and Marvell's
 .Fx
 driver.
 It first appeared in
 .Fx 7.0
 and
 .Fx 6.3 .
Index: projects/binutils-2.17/share/man/man4/nge.4
===================================================================
--- projects/binutils-2.17/share/man/man4/nge.4	(revision 215829)
+++ projects/binutils-2.17/share/man/man4/nge.4	(revision 215830)
@@ -1,248 +1,227 @@
 .\" Copyright (c) 2001 Wind River Systems
 .\" Copyright (c) 1997, 1998, 1999, 2000, 2001
 .\"	Bill Paul <wpaul@bsdi.com>. All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. All advertising materials mentioning features or use of this software
 .\"    must display the following acknowledgement:
 .\"	This product includes software developed by Bill Paul.
 .\" 4. Neither the name of the author nor the names of any co-contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"   without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
 .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 .\" THE POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd May 21, 2009
+.Dd November 23, 2010
 .Dt NGE 4
 .Os
 .Sh NAME
 .Nm nge
 .Nd "National Semiconductor PCI Gigabit Ethernet adapter driver"
 .Sh SYNOPSIS
 To compile this driver into the kernel,
 place the following lines in your
 kernel configuration file:
 .Bd -ragged -offset indent
 .Cd "device miibus"
 .Cd "device nge"
 .Ed
 .Pp
 Alternatively, to load the driver as a
 module at boot time, place the following line in
 .Xr loader.conf 5 :
 .Bd -literal -offset indent
 if_nge_load="YES"
 .Ed
 .Sh DESCRIPTION
 The
 .Nm
 driver provides support for various NICs based on the National Semiconductor
 DP83820 and DP83821 Gigabit Ethernet controller chips.
 .Pp
 The DP83820 supports TBI (ten bit interface) and GMII
 transceivers, which means it can be used with either copper or 1000baseX
 fiber applications.
 The DP83820 supports TCP/IP checksum offload and
 VLAN tagging/insertion as well as a 2048-bit multicast hash filter
 and up to 4 pattern match buffers.
 .Pp
 Most cards also use the DP83861 10/100/1000 copper gigabit transceiver
 chip, which supports autonegotiation of 10, 100 and 1000Mbps modes in
 full or half duplex.
 .Pp
 The DP83820 and DP83821 also support jumbo frames, which can be
 configured via the interface MTU setting.
 Selecting an MTU larger than 1500 bytes with the
 .Xr ifconfig 8
 utility configures the adapter to receive and transmit jumbo frames.
 Using jumbo frames can greatly improve performance for certain tasks,
 such as file transfers and data streaming.
 .Pp
 The
 .Nm
 driver supports the following media types:
 .Bl -tag -width 10baseTXUTP
 .It Cm autoselect
 Enable autoselection of the media type and options.
 The user can manually override
 the autoselected mode by adding media options to
 .Xr rc.conf 5 .
 .It Cm 10baseT/UTP
 Set 10Mbps operation.
 The
 .Xr ifconfig 8
 .Ic mediaopt
 option can also be used to select either
 .Cm full-duplex
 or
 .Cm half-duplex
 modes.
 .It Cm 100baseTX
 Set 100Mbps (Fast Ethernet) operation.
 The
 .Xr ifconfig 8
 .Ic mediaopt
 option can also be used to select either
 .Cm full-duplex
 or
 .Cm half-duplex
 modes.
 .It Cm 1000baseTX
 Set 1000baseTX operation over twisted pair.
 .Cm full-duplex
 and
 .Cm half-duplex
 modes are supported.
 .It Cm 1000baseSX
 Set 1000Mbps (Gigabit Ethernet) operation.
 Both
 .Cm full-duplex
 and
 .Cm half-duplex
 modes are supported.
 .El
 .Pp
 The
 .Nm
 driver supports the following media options:
 .Bl -tag -width full-duplex
 .It Cm full-duplex
 Force full duplex operation.
 .It Cm half-duplex
 Force half duplex operation.
-.El
-.Pp
-The
-.Nm
-driver also supports one special link option for 1000baseTX cards:
-.Bl -tag -width link0
-.It Cm link0
-With 1000baseTX cards, establishing a link between two ports requires
-that one port be configured as a master and the other a slave.
-With autonegotiation,
-the master/slave settings will be chosen automatically.
-However when manually selecting the link state, it is necessary to
-force one side of the link to be a master and the other a slave.
-The
-.Nm
-driver configures the ports as slaves by default.
-Setting the
-.Cm link0
-flag with
-.Xr ifconfig 8
-will set a port as a master instead.
 .El
 .Pp
 For more information on configuring this device, see
 .Xr ifconfig 8 .
 .Sh HARDWARE
 The
 .Nm
 driver supports National Semiconductor DP83820 and DP83821 based
 Gigabit Ethernet adapters including:
 .Pp
 .Bl -bullet -compact
 .It
 Addtron AEG320T
 .It
 Ark PC SOHO-GA2500T (32-bit PCI) and SOHO-GA2000T (64-bit PCI)
 .It
 Asante FriendlyNet GigaNIX 1000TA and 1000TPC
 .It
 D-Link DGE-500T
 .It
 Linksys EG1032, revision 1
 .It
 Netgear GA621
 .It
 Netgear GA622T
 .It
 SMC EZ Card 1000 (SMC9462TX)
 .It
 Surecom Technology EP-320G-TX
 .It
 Trendware TEG-PCITX (32-bit PCI) and TEG-PCITX2 (64-bit PCI)
 .El
 .Sh SYSCTL VARIABLES
 The following variables are available as both
 .Xr sysctl 8
 variables and
 .Xr loader 8
 tunables:
 .Bl -tag -width "xxxxxx"
 .It Va dev.nge.%d.int_holdoff
 Maximum amount of time to delay interrupt processing in units of
 100us.
 The accepted range is 0 to 255, the default is 1(100us).
 Value 0 completely disables the interrupt moderation.
 The interface has to be brought down and up again before a change
 takes effect.
 .El
 .Sh DIAGNOSTICS
 .Bl -diag
 .It "nge%d: couldn't map memory"
 A fatal initialization error has occurred.
 .It "nge%d: couldn't map ports"
 A fatal initialization error has occurred.
 .It "nge%d: couldn't map interrupt"
 A fatal initialization error has occurred.
 .It "nge%d: no memory for softc struct!"
 The driver failed to allocate memory for per-device instance information
 during initialization.
 .It "nge%d: failed to enable memory mapping!"
 The driver failed to initialize PCI shared memory mapping.
 This might
 happen if the card is not in a bus-master slot.
 .It "nge%d: no memory for jumbo buffers!"
 The driver failed to allocate memory for jumbo frames during
 initialization.
 .It "nge%d: watchdog timeout"
 The device has stopped responding to the network, or there is a problem with
 the network connection (cable).
 .El
 .Sh SEE ALSO
 .Xr altq 4 ,
 .Xr arp 4 ,
 .Xr miibus 4 ,
 .Xr netintro 4 ,
 .Xr ng_ether 4 ,
 .Xr polling 4 ,
 .Xr vlan 4 ,
 .Xr ifconfig 8
 .Rs
 .%T National Semiconductor DP83820 datasheet
 .%U http://www.national.com
 .Re
 .Rs
 .%T National Semiconductor DP83861 datasheet
 .%U http://www.national.com
 .Re
 .Sh HISTORY
 The
 .Nm
 device driver first appeared in
 .Fx 4.4 .
 .Sh AUTHORS
 The
 .Nm
 driver was written by
 .An Bill Paul Aq wpaul@bsdi.com .
Index: projects/binutils-2.17/share/man/man4/sk.4
===================================================================
--- projects/binutils-2.17/share/man/man4/sk.4	(revision 215829)
+++ projects/binutils-2.17/share/man/man4/sk.4	(revision 215830)
@@ -1,262 +1,241 @@
 .\" Copyright (c) 1997, 1998, 1999
 .\"	Bill Paul <wpaul@ctr.columbia.edu>. All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. All advertising materials mentioning features or use of this software
 .\"    must display the following acknowledgement:
 .\"	This product includes software developed by Bill Paul.
 .\" 4. Neither the name of the author nor the names of any co-contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"   without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
 .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 .\" THE POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd January 23, 2009
+.Dd November 23, 2010
 .Dt SK 4
 .Os
 .Sh NAME
 .Nm sk
 .Nd "SysKonnect SK-984x and SK-982x PCI Gigabit Ethernet adapter driver"
 .Sh SYNOPSIS
 To compile this driver into the kernel,
 place the following lines in your
 kernel configuration file:
 .Bd -ragged -offset indent
 .Cd "device miibus"
 .Cd "device sk"
 .Ed
 .Pp
 Alternatively, to load the driver as a
 module at boot time, place the following line in
 .Xr loader.conf 5 :
 .Bd -literal -offset indent
 if_sk_load="YES"
 .Ed
 .Sh DESCRIPTION
 The
 .Nm
 driver provides support for the SysKonnect SK-984x and SK-982x series PCI
 Gigabit Ethernet adapters.
 .Pp
 The SysKonnect adapters consist of two main components: the XaQti Corp.
 XMAC II gigabit MAC and the SysKonnect GEnesis controller ASIC.
 The
 XMAC provides the gigabit MAC and PHY support while the GEnesis
 provides an interface to the PCI bus, DMA support, packet buffering
 and arbitration.
 The GEnesis can control up to two XMACs simultaneously,
 allowing dual-port NIC configurations.
 .Pp
 The SK-982x 1000baseT adapters also include a Broadcom BCM5400 1000baseTX
 PHY which is used in place of the XMAC's internal PHY.
 The Broadcom PHY is connected to the XMAC via its GMII port.
 .Pp
 The
 .Nm
 driver configures dual port SysKonnect adapters such that each XMAC
 is treated as a separate logical network interface.
 Both ports can
 operate independently of each other and can be connected to separate
 networks.
 The SysKonnect driver software currently only uses the
 second port on dual port adapters for failover purposes: if the link
 on the primary port fails, the SysKonnect driver will automatically
 switch traffic onto the second port.
 .Pp
 Also supported is the Marvell Semiconductor 88E100* gigabit PHY.
 .Pp
 The XaQti XMAC II supports full and half duplex operation with
 autonegotiation.
 The XMAC also supports unlimited frame sizes.
 Support for jumbo frames is provided via the interface MTU setting.
 Selecting an MTU larger than 1500 bytes with the
 .Xr ifconfig 8
 utility configures the adapter to receive and transmit jumbo frames.
 Using jumbo frames can greatly improve performance for certain tasks,
 such as file transfers and data streaming.
 .Pp
 The
 .Nm
 driver supports the following media types:
 .Bl -tag -width xxxxxxxxxxxxxxxxxxxx
 .It autoselect
 Enable autoselection of the media type and options.
 The user can manually override
 the autoselected mode by adding media options to the
 .Pa /etc/rc.conf
 file.
 .It 1000baseTX
 Set 1000baseTX operation over twisted pair.
 This is only available
 for SK-982x series adapters with 1000baseT ports.
 Both
 .Ar full-duplex
 and
 .Ar half-duplex
 modes are supported.
 .It 1000baseSX
 Set 1000Mbps (Gigabit Ethernet) operation.
 Both
 .Ar full-duplex
 and
 .Ar half-duplex
 modes are supported.
 .El
 .Pp
 The
 .Nm
 driver supports the following media options:
 .Bl -tag -width xxxxxxxxxxxxxxxxxxxx
 .It full-duplex
 Force full duplex operation
 .It half-duplex
 Force half duplex operation.
-.El
-.Pp
-The
-.Nm
-driver also supports one special link option for 1000baseTX cards:
-.Bl -tag -width xxxxxxxxxxxxxxxxxxxx
-.It link0
-With 1000baseTX cards, establishing a link between two ports requires
-that one port is configured as master and the other one as slave.
-With autonegotiation,
-the master/slave settings will be chosen automatically.
-However when manually selecting the link state, it is necessary to
-force one side of the link to be a master and the other a slave.
-The
-.Nm
-driver configures the ports as slaves by default.
-Setting the
-.Ar link0
-flag with
-.Xr ifconfig 8
-will set a port as a master instead.
 .El
 .Pp
 For more information on configuring this device, see
 .Xr ifconfig 8 .
 .Sh HARDWARE
 Adapters supported by the
 .Nm
 driver include:
 .Pp
 .Bl -bullet -compact
 .It
 3Com 3C940 single port, 1000baseT adapter
 .It
 3Com 3C2000-T single port, 1000baseT adapter
 .It
 Belkin F5D5005 single port, 1000baseT adapter
 .It
 D-Link DGE-530T single port, 1000baseT adapter
 .It
 Linksys (revision 2) single port, 1000baseT adapter
 .It
 SK-9521 SK-NET GE-T single port, 1000baseT adapter
 .It
 SK-9821 SK-NET GE-T single port, 1000baseT adapter
 .It
 SK-9822 SK-NET GE-T dual port, 1000baseT adapter
 .It
 SK-9841 SK-NET GE-LX single port, single mode fiber adapter
 .It
 SK-9842 SK-NET GE-LX dual port, single mode fiber adapter
 .It
 SK-9843 SK-NET GE-SX single port, multimode fiber adapter
 .It
 SK-9844 SK-NET GE-SX dual port, multimode fiber adapter
 .It
 SMC 9452TX single port, 1000baseT adapter
 .El
 .Sh LOADER TUNABLES
 Tunables can be set at the
 .Xr loader 8
 prompt before booting the kernel or stored in
 .Xr loader.conf 5 .
 .Bl -tag -width xxxxxx
 .It Va hw.skc.jumbo_disable
 Disable jumbo frame support.
 Systems with less memory can set it to a non-zero value to save memory.
 The default value is 0.
 .El
 .Sh SYSCTL VARIABLES
 The following variable is available as both
 .Xr sysctl 8
 variable and
 .Xr loader 8
 tunable:
 .Bl -tag -width xxxxxx
 .It Va dev.skc.%d.int_mod
 This variable controls interrupt moderation.
 The accepted range is 10 to 10000.
 The default value is 100 microseconds.
 The interface has to be brought down and up again before a change takes effect.
 .El
 .Sh DIAGNOSTICS
 .Bl -diag
 .It "sk%d: couldn't map memory"
 A fatal initialization error has occurred.
 .It "sk%d: couldn't map ports"
 A fatal initialization error has occurred.
 .It "sk%d: couldn't map interrupt"
 A fatal initialization error has occurred.
 .It "sk%d: no memory for softc struct!"
 The driver failed to allocate memory for per-device instance information
 during initialization.
 .It "sk%d: failed to enable memory mapping!"
 The driver failed to initialize PCI shared memory mapping.
 This might
 happen if the card is not in a bus-master slot.
 .It "sk%d: no memory for jumbo buffers!"
 The driver failed to allocate memory for jumbo frames during
 initialization.
 .It "sk%d: watchdog timeout"
 The device has stopped responding to the network, or there is a problem with
 the network connection (cable).
 .El
 .Sh SEE ALSO
 .Xr altq 4 ,
 .Xr arp 4 ,
 .Xr miibus 4 ,
 .Xr netintro 4 ,
 .Xr ng_ether 4 ,
 .Xr vlan 4 ,
 .Xr ifconfig 8
 .Rs
 .%T XaQti XMAC II datasheet
 .%U http://www.xaqti.com
 .Re
 .Rs
 .%T SysKonnect GEnesis programming manual
 .%U http://www.syskonnect.com
 .Re
 .Sh HISTORY
 The
 .Nm
 device driver first appeared in
 .Fx 3.0 .
 .Sh AUTHORS
 The
 .Nm
 driver was written by
 .An Bill Paul Aq wpaul@ctr.columbia.edu .
Index: projects/binutils-2.17/share/man/man4/stge.4
===================================================================
--- projects/binutils-2.17/share/man/man4/stge.4	(revision 215829)
+++ projects/binutils-2.17/share/man/man4/stge.4	(revision 215830)
@@ -1,223 +1,202 @@
 .\"	$NetBSD: stge.4,v 1.7 2003/02/14 15:20:20 grant Exp $
 .\"
 .\" Copyright (c) 2001 The NetBSD Foundation, Inc.
 .\" All rights reserved.
 .\"
 .\" This code is derived from software contributed to The NetBSD Foundation
 .\" by Jason R. Thorpe.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 .\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd July 25, 2006
+.Dd November 23, 2010
 .Dt STGE 4
 .Os
 .Sh NAME
 .Nm stge
 .Nd Sundance/Tamarack TC9021 Gigabit Ethernet adapter driver
 .Sh SYNOPSIS
 To compile this driver into the kernel,
 place the following lines in your
 kernel configuration file:
 .Bd -ragged -offset indent
 .Cd "device miibus"
 .Cd "device stge"
 .Ed
 .Pp
 Alternatively, to load the driver as a
 module at boot time, place the following line in
 .Xr loader.conf 5 :
 .Bd -literal -offset indent
 if_stge_load="YES"
 .Ed
 .Sh DESCRIPTION
 The
 .Nm
 device driver provides support for various NICs based on the
 Sundance/Tamarack TC9021 Gigabit Ethernet controller chip.
 .Pp
 The Sundance/Tamarack TC9021 is found on the D-Link DGE-550T
 and the Antares Microsystems Gigabit Ethernet board.
 It uses an external PHY or an external 10-bit interface.
 .Pp
 All NICs supported by the
 .Nm
 driver have TCP/UDP/IP checksum offload for both receive and
 transmit, hardware VLAN tag stripping/insertion features, and
 receive interrupt moderation mechanism as well as a 64-bit
 multicast hash filter.
 The Sundance/Tamarack TC9021 supports TBI (ten bit interface)
 and GMII transceivers, which means it can be used with either
 copper or 1000baseX fiber applications.
 .Pp
 The Sundance/Tamarack TC9021 also supports jumbo frames, which can be
 configured via the interface MTU setting.
 Selecting an MTU larger than 1500 bytes with the
 .Xr ifconfig 8
 utility configures the adapter to receive and transmit jumbo frames.
 .Pp
 The
 .Nm
 driver supports the following media types:
 .Bl -tag -width ".Cm 10baseT/UTP"
 .It Cm autoselect
 Enable autoselection of the media type and options.
 The user can manually override
 the autoselected mode by adding media options to
 .Xr rc.conf 5 .
 .It Cm 10baseT/UTP
 Set 10Mbps operation.
 The
 .Xr ifconfig 8
 .Cm mediaopt
 option can also be used to select either
 .Cm full-duplex
 or
 .Cm half-duplex
 modes.
 .It Cm 100baseTX
 Set 100Mbps (Fast Ethernet) operation.
 The
 .Xr ifconfig 8
 .Cm mediaopt
 option can also be used to select either
 .Cm full-duplex
 or
 .Cm half-duplex
 modes.
 .It Cm 1000baseTX
 Set 1000baseTX operation over twisted pair.
 The Sundance/Tamarack supports 1000Mbps in
 .Cm autoselect
 mode only.
 .\" .It Cm 1000baseSX
 .\" Set 1000Mbps (Gigabit Ethernet) operation.
 .\" Both
 .\" .Cm full-duplex
 .\" and
 .\" .Cm half-duplex
 .\" modes are supported.
 .El
 .Pp
 The
 .Nm
 driver supports the following media options:
 .Bl -tag -width ".Cm full-duplex"
 .It Cm full-duplex
 Force full duplex operation.
 .It Cm half-duplex
 Force half duplex operation.
-.El
-.Pp
-The
-.Nm
-driver also supports one special link option for 1000baseTX cards:
-.Bl -tag -width ".Cm link0"
-.It Cm link0
-With 1000baseTX cards, establishing a link between two ports requires
-that one port is configured as master and the other one as slave.
-With autonegotiation,
-the master/slave settings will be chosen automatically.
-However when manually selecting the link state, it is necessary to
-force one side of the link to be a master and the other a slave.
-The
-.Nm
-driver configures the ports as slaves by default.
-Setting the
-.Cm link0
-flag with
-.Xr ifconfig 8
-will set a port as a master instead.
 .El
 .Pp
 For more information on configuring this device, see
 .Xr ifconfig 8 .
 .Sh HARDWARE
 The
 .Nm
 driver provides support for various NICs based on the Sundance/Tamarack
 TC9021 based Gigabit Ethernet controller chips, including:
 .Pp
 .Bl -bullet -compact
 .It
 Antares Microsystems Gigabit Ethernet
 .It
 ASUS NX1101 Gigabit Ethernet
 .It
 D-Link DL-4000 Gigabit Ethernet
 .It
 IC Plus IP1000A Gigabit Ethernet
 .It
 Sundance ST-2021 Gigabit Ethernet
 .It
 Sundance ST-2023 Gigabit Ethernet
 .It
 Sundance TC9021 Gigabit Ethernet
 .It
 Tamarack TC9021 Gigabit Ethernet
 .El
 .Sh SYSCTL VARIABLES
 The following variables are available as both
 .Xr sysctl 8
 variables and
 .Xr loader 8
 tunables:
 .Bl -tag -width indent
 .It Va dev.stge.%d.rxint_nframe
 Number of frames between RxDMAComplete interrupts.
 The accepted range is 1 to 255, default value is 8 frames.
 The interface has to be brought down and up again before a change takes effect.
 .It Va dev.stge.%d.rxint_dmawait
 Maximum amount of time to wait in 1us increments before issuing
 an Rx interrupt if the number of frames received is less than
 .Va rxint_nframe .
 The accepted range is 0 to 4194, default value is 30 microseconds.
 The interface has to be brought down and up again before a change takes effect.
 .El
 .Sh SEE ALSO
 .Xr altq 4 ,
 .Xr arp 4 ,
 .Xr miibus 4 ,
 .Xr netintro 4 ,
 .Xr ng_ether 4 ,
 .Xr polling 4 ,
 .Xr vlan 4 ,
 .Xr ifconfig 8
 .Sh HISTORY
 The
 .Nm
 driver was ported from
 .Nx
 and first appeared in
 .Fx 6.2 .
 The
 .Nx
 version was written by
 .An Jason R. Thorpe
 .Aq thorpej@NetBSD.org .
 .Sh AUTHORS
 The
 .Nm
 driver was ported by
 .An Pyun YongHyeon
 .Aq yongari@FreeBSD.org .
Index: projects/binutils-2.17/share/man/man4/vge.4
===================================================================
--- projects/binutils-2.17/share/man/man4/vge.4	(revision 215829)
+++ projects/binutils-2.17/share/man/man4/vge.4	(revision 215830)
@@ -1,243 +1,222 @@
 .\" Copyright (c) 2004
 .\"	Bill Paul <wpaul@windriver.com>. All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. All advertising materials mentioning features or use of this software
 .\"    must display the following acknowledgement:
 .\"	This product includes software developed by Bill Paul.
 .\" 4. Neither the name of the author nor the names of any co-contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"   without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
 .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 .\" THE POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd December 18, 2009
+.Dd November 23, 2010
 .Dt VGE 4
 .Os
 .Sh NAME
 .Nm vge
 .Nd "VIA Networking Technologies Velocity Gigabit Ethernet adapter driver"
 .Sh SYNOPSIS
 To compile this driver into the kernel,
 place the following lines in your
 kernel configuration file:
 .Bd -ragged -offset indent
 .Cd "device miibus"
 .Cd "device vge"
 .Ed
 .Pp
 Alternatively, to load the driver as a
 module at boot time, place the following line in
 .Xr loader.conf 5 :
 .Bd -literal -offset indent
 if_vge_load="YES"
 .Ed
 .Sh DESCRIPTION
 The
 .Nm
 driver provides support for various NICs and embedded Ethernet interfaces
 based on the VIA Technologies VT6120, VT6122, VT6130 and VT6132 Velocity
 Family Gigabit Ethernet controller chips.
 .Pp
 The VT6120/VT6122 is a 33/66MHz 64-bit PCI device which combines a tri-speed
 MAC with an integrated 10/100/1000 copper PHY.
 (Some older cards use an external PHY.)
 The VT6130/VT6132 is the PCI express version of Velocity family.
 The MAC supports TCP/IP hardware
 checksums (IPv4 only), TCP large send, VLAN tag insertion and stripping,
 as well as VLAN filtering, a 64-entry CAM filter and a 64-entry VLAN filter,
 64-bit multicast hash filter, 4 separate transmit DMA queues, flow control
 and jumbo frames up to 16K in size.
 The Velocity family controllers have a 16K receive FIFO and 48K transmit FIFO.
 .Pp
 The
 .Nm
 driver takes advantage of the controller's checksum offload and VLAN
 tagging features, as well as the jumbo frame and CAM filter support.
 The CAM filter is used for multicast address filtering to provide
 64 perfect multicast address filter support.
 If it is necessary for the interface to join more than 64 multicast
 groups, the driver will switch over to using the hash filter.
 .Pp
 The jumbo frame support can be enabled by setting the interface MTU
 to any value larger than the default of 1500 bytes, up to a maximum
 of 9000 bytes.
 The receive and transmit checksum offload support
 can be toggled on and off using the
 .Xr ifconfig 8
 utility.
 .Pp
 The
 .Nm
 driver supports the following media types:
 .Bl -tag -width ".Cm 10baseT/UTP"
 .It Cm autoselect
 Enable autoselection of the media type and options.
 The user can manually override
 the autoselected mode by adding media options to
 .Xr rc.conf 5 .
 .It Cm 10baseT/UTP
 Set 10Mbps operation.
 The
 .Xr ifconfig 8
 .Cm mediaopt
 option can also be used to select either
 .Cm full-duplex
 or
 .Cm half-duplex
 modes.
 .It Cm 100baseTX
 Set 100Mbps (Fast Ethernet) operation.
 The
 .Xr ifconfig 8
 .Cm mediaopt
 option can also be used to select either
 .Cm full-duplex
 or
 .Cm half-duplex
 modes.
 .It Cm 1000baseTX
 Set 1000baseTX operation over twisted pair.
 The
 .Xr ifconfig 8
 .Cm mediaopt
 option can also be used to select either
 .Cm full-duplex
 or
 .Cm half-duplex
 modes.
 .El
 .Pp
 The
 .Nm
 driver supports the following media options:
 .Bl -tag -width ".Cm full-duplex"
 .It Cm full-duplex
 Force full duplex operation.
 .It Cm half-duplex
 Force half duplex operation.
-.El
-.Pp
-The
-.Nm
-driver also supports one special link option for 1000baseTX cards:
-.Bl -tag -width ".Cm link0"
-.It Cm link0
-With 1000baseTX cards, establishing a link between two ports requires
-that one port be configured as a master and the other a slave.
-With autonegotiation,
-the master/slave settings will be chosen automatically.
-However when manually selecting the link state, it is necessary to
-force one side of the link to be a master and the other a slave.
-The
-.Nm
-driver configures the ports as slaves by default.
-Setting the
-.Cm link0
-flag with
-.Xr ifconfig 8
-will set a port as a master instead.
 .El
 .Pp
 For more information on configuring this device, see
 .Xr ifconfig 8 .
 .Sh HARDWARE
 The
 .Nm
 driver supports VIA Networking VT6120, VT6122, VT6130 and VT6132 based
 Gigabit Ethernet adapters including:
 .Pp
 .Bl -bullet -compact
 .It
 VIA Networking LAN-on-motherboard Gigabit Ethernet
 .It
 ZyXEL GN650-T 64-bit PCI Gigabit Ethernet NIC (ZX1701)
 .It
 ZyXEL GN670-T 32-bit PCI Gigabit Ethernet NIC (ZX1702)
 .El
 .Sh LOADER TUNABLES
 Tunables can be set at the
 .Xr loader 8
 prompt before booting the kernel or stored in
 .Xr loader.conf 5 .
 .Bl -tag -width "xxxxxx"
 .It Va hw.vge.msi_disable
 This tunable disables MSI support on the Ethernet hardware.
 The default value is 0.
 .El
 .Sh SYSCTL VARIABLES
 The following variables are available as both
 .Xr sysctl 8
 variables and
 .Xr loader 8
 tunables:
 .Bl -tag -width "xxxxxx"
 .It Va dev.vge.%d.int_holdoff
 Maximum number of time to delay interrupts.
 The valid range is 0 to 5100 in units of 1us, the default is
 150 (150us).
 The resolution of of timer is about 20us so finer tuning than
 20us wouldn't be available.
 The interface should be brought down and up again before a change
 takes effect.
 .It Va dev.vge.%d.rx_coal_pkt
 Maximum number of packets to fire Rx completion interrupt.
 The valid range is 1 to 255, the default is 64.
 .It Va dev.vge.%d.tx_coal_pkt
 Maximum number of packets to fire Tx completion interrupt.
 The valid range is 1 to 255, the default is 128.
 .El
 .Sh DIAGNOSTICS
 .Bl -diag
 .It "vge%d: couldn't map memory"
 A fatal initialization error has occurred.
 .It "vge%d: couldn't map ports"
 A fatal initialization error has occurred.
 .It "vge%d: couldn't map interrupt"
 A fatal initialization error has occurred.
 .It "vge%d: failed to enable memory mapping!"
 The driver failed to initialize PCI shared memory mapping.
 This might
 happen if the card is not in a bus-master slot.
 .It "vge%d: watchdog timeout"
 The device has stopped responding to the network, or there is a problem with
 the network connection (cable).
 .El
 .Sh SEE ALSO
 .Xr altq 4 ,
 .Xr arp 4 ,
 .Xr miibus 4 ,
 .Xr netintro 4 ,
 .Xr ng_ether 4 ,
 .Xr polling 4 ,
 .Xr vlan 4 ,
 .Xr ifconfig 8
 .Sh HISTORY
 The
 .Nm
 device driver first appeared in
 .Fx 5.3 .
 .Sh AUTHORS
 The
 .Nm
 driver was written by
 .An Bill Paul Aq wpaul@windriver.com .
Index: projects/binutils-2.17/share/man/man5/rc.conf.5
===================================================================
--- projects/binutils-2.17/share/man/man5/rc.conf.5	(revision 215829)
+++ projects/binutils-2.17/share/man/man5/rc.conf.5	(revision 215830)
@@ -1,4331 +1,4342 @@
 .\" Copyright (c) 1995
 .\"	Jordan K. Hubbard
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd November 13, 2010
+.Dd November 24, 2010
 .Dt RC.CONF 5
 .Os
 .Sh NAME
 .Nm rc.conf
 .Nd system configuration information
 .Sh DESCRIPTION
 The file
 .Nm
 contains descriptive information about the local host name, configuration
 details for any potential network interfaces and which services should be
 started up at system initial boot time.
 In new installations, the
 .Nm
 file is generally initialized by the system installation utility,
 .Xr sysinstall 8 .
 .Pp
 The purpose of
 .Nm
 is not to run commands or perform system startup actions
 directly.
 Instead, it is included by the
 various generic startup scripts in
 .Pa /etc
 which conditionalize their
 internal actions according to the settings found there.
 .Pp
 The
 .Pa /etc/rc.conf
 file is included from the file
 .Pa /etc/defaults/rc.conf ,
 which specifies the default settings for all the available options.
 Options need only be specified in
 .Pa /etc/rc.conf
 when the system administrator wishes to override these defaults.
 The file
 .Pa /etc/rc.conf.local
 is used to override settings in
 .Pa /etc/rc.conf
 for historical reasons.
 See the
 .Va rc_conf_files
 variable below.
 .Pp
 Options are set with
 .Dq Ar name Ns Li = Ns Ar value
 assignments that use
 .Xr sh 1
 syntax.
 The following list provides a name and short description for each
 variable that can be set in the
 .Nm
 file:
 .Bl -tag -width indent-two
 .It Va rc_debug
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 enable output of debug messages from rc scripts.
 This variable can be helpful in diagnosing mistakes when
 editing or integrating new scripts.
 Beware that this produces copious output to the terminal and
 .Xr syslog 3 .
 .It Va rc_info
 .Pq Vt bool
 If set to
 .Dq Li NO ,
 disable informational messages from the rc scripts.
 Informational messages are displayed when
 a condition that is not serious enough to warrant a warning or
 an error occurs.
 .It Va rc_startmsgs
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 show
 .Dq Starting foo:
 when faststart is used (e.g., at boot time).
 .It Va early_late_divider
 .Pq Vt str
 The name of the script that should be used as the
 delimiter between the
 .Dq early
 and
 .Dq late
 stages of the boot process.
 The early stage should contain all the services needed to
 get the disks (local or remote) mounted so that the late
 stage can include scripts contained in the directories
 listed in the
 .Va local_startup
 variable (see below).
 Thus, the two likely candidates for this value are
 .Pa mountcritlocal
 for the typical system, and
 .Pa mountcritremote
 if the system needs remote file
 systems mounted to get access to the
 .Va local_startup
 directories; for example when
 .Pa /usr/local
 is NFS mounted.
 For
 .Pa rc.conf
 within a
 .Xr jail 8
 .Pa NETWORKING
 is likely to be an appropriate value.
 Extreme care should be taken when changing this value,
 and before changing it one should ensure that there are
 adequate provisions to recover from a failed boot
 (such as physical contact with the machine,
 or reliable remote console access).
 .It Va swapfile
 .Pq Vt str
 If set to
 .Dq Li NO ,
 no swapfile is installed, otherwise the value is used as the full
 pathname to a file to use for additional swap space.
 .It Va apm_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 enable support for Automatic Power Management with
 the
 .Xr apm 8
 command.
 .It Va apmd_enable
 .Pq Vt bool
 Run
 .Xr apmd 8
 to handle APM event from userland.
 This also enables support for APM.
 .It Va apmd_flags
 .Pq Vt str
 If
 .Va apmd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr apmd 8
 daemon.
 .It Va devd_enable
 .Pq Vt bool
 Run
 .Xr devd 8
 to handle device added, removed or unknown events from the kernel.
 .It Va ddb_enable
 .Pq Vt bool
 Run
 .Xr ddb 8
 to install 
 .Xr ddb 4
 scripts at boot time.
 .It Va ddb_config
 .Pq Vt str
 Configuration file for
 .Xr ddb 8 .
 Default
 .Pa /etc/ddb.conf .
 .It Va kldxref_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 Set to
 .Dq Li YES
 to automatically rebuild
 .Pa linker.hints
 files with
 .Xr kldxref 8
 at boot time.
 .It Va kldxref_clobber
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 If
 .Va kldxref_enable
 is true,
 setting to
 .Dq Li YES
 will overwrite existing
 .Pa linker.hints
 files at boot time.
 Otherwise,
 only missing
 .Pa linker.hints
 files are generated.
 .It Va kldxref_module_path
 .Pq Vt str
 Empty by default.
 A semi-colon
 .Pq Ql \&;
 delimited list of paths containing
 .Xr kld 4
 modules.
 If empty,
 the contents of the
 .Va kern.module_path
 .Xr sysctl 8
 are used.
 .It Va powerd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 enable the system power control facility with the
 .Xr powerd 8
 daemon.
 .It Va powerd_flags
 .Pq Vt str
 If
 .Va powerd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr powerd 8
 daemon.
 .It Va tmpmfs
 Controls the creation of a
 .Pa /tmp
 memory file system.
 Always happens if set to
 .Dq Li YES
 and never happens if set to
 .Dq Li NO .
 If set to anything else, a memory file system is created if
 .Pa /tmp
 is not writable.
 .It Va tmpsize
 Controls the size of a created
 .Pa /tmp
 memory file system.
 .It Va tmpmfs_flags
 Extra options passed to the
 .Xr mdmfs 8
 utility when the memory file system for
 .Pa /tmp
 is created.
 The default is
 .Dq Li "-S" ,
 which inhibits the use of softupdates on
 .Pa /tmp
 so that file system space is freed without delay
 after file truncation or deletion.
 See
 .Xr mdmfs 8
 for other options you can use in
 .Va tmpmfs_flags .
 .It Va varmfs
 Controls the creation of a
 .Pa /var
 memory file system.
 Always happens if set to
 .Dq Li YES
 and never happens if set to
 .Dq Li NO .
 If set to anything else, a memory file system is created if
 .Pa /var
 is not writable.
 .It Va varsize
 Controls the size of a created
 .Pa /var
 memory file system.
 .It Va varmfs_flags
 Extra options passed to the
 .Xr mdmfs 8
 utility when the memory file system for
 .Pa /var
 is created.
 The default is
 .Dq Li "-S" ,
 which inhibits the use of softupdates on
 .Pa /var
 so that file system space is freed without delay
 after file truncation or deletion.
 See
 .Xr mdmfs 8
 for other options you can use in
 .Va varmfs_flags .
 .It Va populate_var
 Controls the automatic population of the
 .Pa /var
 file system.
 Always happens if set to
 .Dq Li YES
 and never happens if set to
 .Dq Li NO .
 If set to anything else, a memory file system is created if
 .Pa /var
 is not writable.
 Note that this process requires access to certain commands in
 .Pa /usr
 before
 .Pa /usr
 is mounted on normal systems.
 .It Va cleanvar_enable
 .Pq Vt bool
 Clean the
 .Pa /var
 directory.
 .It Va local_startup
 .Pq Vt str
 List of directories to search for startup script files.
 .It Va script_name_sep
 .Pq Vt str
 The field separator to use for breaking down the list of startup script files
 into individual filenames.
 The default is a space.
 It is not necessary to change this unless there are startup scripts with names
 containing spaces.
 .It Va hostapd_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to start
 .Xr hostapd 8
 at system boot time.
 .It Va hostname
 .Pq Vt str
 The fully qualified domain name (FQDN) of this host on the network.
 This should almost certainly be set to something meaningful, even if
 there is no network connection.
 If
 .Xr dhclient 8
 is used to set the hostname via DHCP,
 this variable should be set to an empty string.
 If this value remains unset when the system is done booting
 your console login will display the default hostname of
 .Dq Amnesiac.
 .It Va nisdomainname
 .Pq Vt str
 The NIS domain name of this host, or
 .Dq Li NO
 if NIS is not used.
 .It Va dhclient_program
 .Pq Vt str
 Path to the DHCP client program
 .Pa ( /sbin/dhclient ,
 the
 .Ox
 DHCP client,
 is the default).
 .It Va dhclient_flags
 .Pq Vt str
 Additional flags to pass to the DHCP client program.
 For the
 .Ox
 DHCP client, see the
 .Xr dhclient 8
 manpage for a description of the command line options available.
 .It Va dhclient_flags_ Ns Aq Ar iface
 Additional flags to pass to the DHCP client program running on
 .Ar iface
 only.
 When specified, this variable overrides
 .Va dhclient_flags .
 .It Va background_dhclient
 .Pq Vt bool
 Set to
 .Dq Li YES
 to start the DHCP client in background.
 This can cause trouble with applications depending on
 a working network, but it will provide a faster startup
 in many cases.
 .It Va background_dhclient_ Ns Aq Ar iface
 When specified, this variable overrides the
 .Va background_dhclient
 variable for interface
 .Ar iface
 only.
 .It Va synchronous_dhclient
 .Pq Vt bool
 Set to
 .Dq Li YES
 to start
 .Xr dhclient 8
 synchronously at startup.
 This behavior can be overridden on a per-interface basis by replacing
 the
 .Dq Li DHCP
 keyword in the
 .Va ifconfig_ Ns Aq Ar interface
 variable with
 .Dq Li SYNCDHCP
 or
 .Dq Li NOSYNCDHCP .
 .It Va defaultroute_delay
 .Pq Vt int
 When set to a positive value, wait up to this long after configuring
 DHCP interfaces at startup to give the interfaces time to receive a lease.
 .It Va firewall_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to load firewall rules at startup.
 If the kernel was not built with
 .Cd "options IPFIREWALL" ,
 the
 .Pa ipfw.ko
 kernel module will be loaded.
 See also
 .Va ipfilter_enable .
 .It Va firewall_script
 .Pq Vt str
 This variable specifies the full path to the firewall script to run.
 The default is
 .Pa /etc/rc.firewall .
 .It Va firewall_type
 .Pq Vt str
 Names the firewall type from the selection in
 .Pa /etc/rc.firewall ,
 or the file which contains the local firewall ruleset.
 Valid selections from
 .Pa /etc/rc.firewall
 are:
 .Pp
 .Bl -tag -width ".Li simple" -compact
 .It Li open
 unrestricted IP access
 .It Li closed
 all IP services disabled, except via
 .Dq Li lo0
 .It Li client
 basic protection for a workstation
 .It Li simple
 basic protection for a LAN.
 .El
 .Pp
 If a filename is specified, the full path
 must be given.
 .It Va firewall_quiet
 .Pq Vt bool
 Set to
 .Dq Li YES
 to disable the display of firewall rules on the console during boot.
 .It Va firewall_logging
 .Pq Vt bool
 Set to
 .Dq Li YES
 to enable firewall event logging.
 This is equivalent to the
 .Dv IPFIREWALL_VERBOSE
 kernel option.
 .It Va firewall_flags
 .Pq Vt str
 Flags passed to
 .Xr ipfw 8
 if
 .Va firewall_type
 specifies a filename.
 .It Va firewall_coscripts
 .Pq Vt str
 List of executables and/or rc scripts to run after firewall starts/stops.
 Default is empty.
 .\" ----- firewall_nat_enable setting --------------------------------
 .It Va firewall_nat_enable
 .Pq Vt bool
 The
 .Xr ipfw 8
 equivalent of
 .Va natd_enable .
 Setting this to
 .Dq Li YES
 enables kernel NAT.
 .Va firewall_enable
 must also be set to
 .Dq Li YES .
 .It Va firewall_nat_interface
 .Pq Vt str
 The
 .Xr ipfw 8
 equivalent of
 .Va natd_interface .
 This is the name of the public interface or IP address on which
 kernel NAT should run.
 .It Va firewall_nat_flags
 .Pq Vt str
 Additional configuration parameters for kernel NAT should be placed here.
 .It Va dummynet_enable
 .Pq Vt bool
 Setting this to
 .Dq Li YES
 will automatically load the
 .Xr dummynet 4
 module if
 .Va firewall_enable
 is also set to
 .Dq Li YES .
 .\" -------------------------------------------------------------------
 .It Va natd_program
 .Pq Vt str
 Path to
 .Xr natd 8 .
 .It Va natd_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to enable
 .Xr natd 8 .
 .Va firewall_enable
 must also be set to
 .Dq Li YES ,
 and
 .Xr divert 4
 sockets must be enabled in the kernel.
 If the kernel was not built with
 .Cd "options IPDIVERT" ,
 the
 .Pa ipdivert.ko
 kernel module will be loaded.
 .It Va natd_interface
 .Pq Vt str
 This is the name of the public interface on which
 .Xr natd 8
 should run.
 The interface may be given as an interface name or as an IP address.
 .It Va natd_flags
 .Pq Vt str
 Additional
 .Xr natd 8
 flags should be placed here.
 The
 .Fl n
 or
 .Fl a
 flag is automatically added with the above
 .Va natd_interface
 as an argument.
 .\" ----- ipfilter_enable setting --------------------------------
 .It Va ipfilter_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 Setting this to
 .Dq Li YES
 enables
 .Xr ipf 8
 packet filtering.
 .Pp
 Typical usage will require putting
 .Bd -literal
 ipfilter_enable="YES"
 ipnat_enable="YES"
 ipmon_enable="YES"
 ipfs_enable="YES"
 .Ed
 .Pp
 into
 .Pa /etc/rc.conf
 and editing
 .Pa /etc/ipf.rules
 and
 .Pa /etc/ipnat.rules
 appropriately.
 .Pp
 Note that
 .Va ipfilter_enable
 and
 .Va ipnat_enable
 can be enabled independently.
 .Va ipmon_enable
 and
 .Va ipfs_enable
 both require at least one of
 .Va ipfilter_enable
 and
 .Va ipnat_enable
 to be enabled.
 .Pp
 Having
 .Bd -literal
 options IPFILTER
 options IPFILTER_LOG
 options IPFILTER_DEFAULT_BLOCK
 .Ed
 .Pp
 in the kernel configuration file is a good idea, too.
 .\" ----- ipfilter_program setting ------------------------------
 .It Va ipfilter_program
 .Pq Vt str
 Path to
 .Xr ipf 8
 (default
 .Pa /sbin/ipf ) .
 .\" ----- ipfilter_rules setting --------------------------------
 .It Va ipfilter_rules
 .Pq Vt str
 Set to
 .Pa /etc/ipf.rules
 by default.
 This variable contains the name of the filter rule definition file.
 The file is expected to be readable for the
 .Xr ipf 8
 command to execute.
 .\" ----- ipv6_ipfilter_rules setting ---------------------------
 .It Va ipv6_ipfilter_rules
 .Pq Vt str
 Set to
 .Pa /etc/ipf6.rules
 by default.
 This variable contains the IPv6 filter rule definition file.
 The file is expected to be readable for the
 .Xr ipf 8
 command to execute.
 .\" ----- ipfilter_flags setting --------------------------------
 .It Va ipfilter_flags
 .Pq Vt str
 Empty by default.
 This variable contains flags passed to the
 .Xr ipf 8
 program.
 .\" ----- ipnat_enable setting ----------------------------------
 .It Va ipnat_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 Set it to
 .Dq Li YES
 to enable
 .Xr ipnat 8
 network address translation.
 See
 .Va ipfilter_enable
 for a detailed discussion.
 .\" ----- ipnat_program setting ---------------------------------
 .It Va ipnat_program
 .Pq Vt str
 Path to
 .Xr ipnat 8
 (default
 .Pa /sbin/ipnat ) .
 .\" ----- ipnat_rules setting -----------------------------------
 .It Va ipnat_rules
 .Pq Vt str
 Set to
 .Pa /etc/ipnat.rules
 by default.
 This variable contains the name of the file
 holding the network address translation definition.
 This file is expected to be readable for the
 .Xr ipnat 8
 command to execute.
 .\" ----- ipnat_flags setting -----------------------------------
 .It Va ipnat_flags
 .Pq Vt str
 Empty by default.
 This variable contains flags passed to the
 .Xr ipnat 8
 program.
 .\" ----- ipmon_enable setting ----------------------------------
 .It Va ipmon_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 Set it to
 .Dq Li YES
 to enable
 .Xr ipmon 8
 monitoring (logging
 .Xr ipf 8
 and
 .Xr ipnat 8
 events).
 Setting this variable needs setting
 .Va ipfilter_enable
 or
 .Va ipnat_enable
 too.
 See
 .Va ipfilter_enable
 for a detailed discussion.
 .\" ----- ipmon_program setting ---------------------------------
 .It Va ipmon_program
 .Pq Vt str
 Path to
 .Xr ipmon 8
 (default
 .Pa /sbin/ipmon ) .
 .\" ----- ipmon_flags setting -----------------------------------
 .It Va ipmon_flags
 .Pq Vt str
 Set to
 .Dq Li -Ds
 by default.
 This variable contains flags passed to the
 .Xr ipmon 8
 program.
 Another typical example would be
 .Dq Fl D Pa /var/log/ipflog
 to have
 .Xr ipmon 8
 log directly to a file bypassing
 .Xr syslogd 8 .
 Make sure to adjust
 .Pa /etc/newsyslog.conf
 in such case like this:
 .Bd -literal
 /var/log/ipflog  640  10  100  *  Z  /var/run/ipmon.pid
 .Ed
 .\" ----- ipfs_enable setting -----------------------------------
 .It Va ipfs_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 Set it to
 .Dq Li YES
 to enable
 .Xr ipfs 8
 saving the filter and NAT state tables during shutdown
 and reloading them during startup again.
 Setting this variable needs setting
 .Va ipfilter_enable
 or
 .Va ipnat_enable
 to
 .Dq Li YES
 too.
 See
 .Va ipfilter_enable
 for a detailed discussion.
 Note that if
 .Va kern_securelevel
 is set to 3,
 .Va ipfs_enable
 cannot be used
 because the raised securelevel will prevent
 .Xr ipfs 8
 from saving the state tables at shutdown time.
 .\" ----- ipfs_program setting ----------------------------------
 .It Va ipfs_program
 .Pq Vt str
 Path to
 .Xr ipfs 8
 (default
 .Pa /sbin/ipfs ) .
 .\" ----- ipfs_flags setting ------------------------------------
 .It Va ipfs_flags
 .Pq Vt str
 Empty by default.
 This variable contains flags passed to the
 .Xr ipfs 8
 program.
 .\" ----- end of added ipf hook ---------------------------------
 .It Va pf_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 Setting this to
 .Dq Li YES
 enables
 .Xr pf 4
 packet filtering.
 .Pp
 Typical usage will require putting
 .Pp
 .Dl pf_enable="YES"
 .Pp
 into
 .Pa /etc/rc.conf
 and editing
 .Pa /etc/pf.conf
 appropriately.
 Adding
 .Pp
 .Dl "device pf"
 .Pp
 builds support for
 .Xr pf 4
 into the kernel, otherwise the
 kernel module will be loaded.
 .It Va pf_rules
 .Pq Vt str
 Path to
 .Xr pf 4
 ruleset configuration file
 (default
 .Pa /etc/pf.conf ) .
 .It Va pf_program
 .Pq Vt str
 Path to
 .Xr pfctl 8
 (default
 .Pa /sbin/pfctl ) .
 .It Va pf_flags
 .Pq Vt str
 If
 .Va pf_enable
 is set to
 .Dq Li YES ,
 these flags are passed to the
 .Xr pfctl 8
 program when loading the ruleset.
 .It Va pflog_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 Setting this to
 .Dq Li YES
 enables
 .Xr pflogd 8
 which logs packets from the
 .Xr pf 4
 packet filter.
 .It Va pflog_logfile
 .Pq Vt str
 If
 .Va pflog_enable
 is set to
 .Dq Li YES
 this controls where
 .Xr pflogd 8
 stores the logfile
 (default
 .Pa /var/log/pflog ) .
 Check
 .Pa /etc/newsyslog.conf
 to adjust logfile rotation for this.
 .It Va pflog_program
 .Pq Vt str
 Path to
 .Xr pflogd 8
 (default
 .Pa /sbin/pflogd ) .
 .It Va pflog_flags
 .Pq Vt str
 Empty by default.
 This variable contains additional flags passed to the
 .Xr pflogd 8
 program.
 .It Va ftpproxy_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 Setting this to
 .Dq Li YES
 enables
 .Xr ftp-proxy 8
 which supports the
 .Xr pf 4
 packet filter in translating ftp connections.
 .It Va ftpproxy_flags
 .Pq Vt str
 Empty by default.
 This variable contains additional flags passed to the
 .Xr ftp-proxy 8
 program.
 .It Va pfsync_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 Setting this to
 .Dq Li YES
 enables exposing
 .Xr pf 4
 state changes to other hosts over the network by means of
 .Xr pfsync 4 .
 The
 .Va pfsync_syncdev
 variable
 must also be set then.
 .It Va pfsync_syncdev
 .Pq Vt str
 Empty by default.
 This variable specifies the name of the network interface
 .Xr pfsync 4
 should operate through.
 It must be set accordingly if
 .Va pfsync_enable
 is set to
 .Dq Li YES .
 .It Va pfsync_syncpeer
 .Pq Vt str
 Empty by default.
 This variable is optional.
 By default, state change messages are sent out on the synchronisation
 interface using IP multicast packets.
 The protocol is IP protocol 240, PFSYNC, and the multicast group used is
 224.0.0.240.
 When a peer address is specified using the
 .Va pfsync_syncpeer
 option, the peer address is used as a destination for the pfsync
 traffic, and the traffic can then be protected using
 .Xr ipsec 4 .
 See the
 .Xr pfsync 4
 manpage for more details about using
 .Xr ipsec 4
 with
 .Xr pfsync 4
 interfaces.
 .It Va pfsync_ifconfig
 .Pq Vt str
 Empty by default.
 This variable can contain additional options to be passed to the
 .Xr ifconfig 8
 command used to set up
 .Xr pfsync 4 .
 .It Va tcp_extensions
 .Pq Vt bool
 Set to
 .Dq Li YES
 by default.
 Setting this to
 .Dq Li NO
 disables certain TCP options as described by
 .Rs
 .%T "RFC 1323"
 .Re
 Setting this to
 .Dq Li NO
 might help remedy such problems with connections as randomly hanging
 or other weird behavior.
 Some network devices are known
 to be broken with respect to these options.
 .It Va log_in_vain
 .Pq Vt int
 Set to 0 by default.
 The
 .Xr sysctl 8
 variables,
 .Va net.inet.tcp.log_in_vain
 and
 .Va net.inet.udp.log_in_vain ,
 as described in
 .Xr tcp 4
 and
 .Xr udp 4 ,
 are set to the given value.
 .It Va tcp_keepalive
 .Pq Vt bool
 Set to
 .Dq Li YES
 by default.
 Setting to
 .Dq Li NO
 will disable probing idle TCP connections to verify that the
 peer is still up and reachable.
 .It Va tcp_drop_synfin
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 Setting to
 .Dq Li YES
 will cause the kernel to ignore TCP frames that have both
 the SYN and FIN flags set.
 This prevents OS fingerprinting, but may
 break some legitimate applications.
 .It Va icmp_drop_redirect
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 Setting to
 .Dq Li YES
 will cause the kernel to ignore ICMP REDIRECT packets.
 Refer to
 .Xr icmp 4
 for more information.
 .It Va icmp_log_redirect
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 Setting to
 .Dq Li YES
 will cause the kernel to log ICMP REDIRECT packets.
 Note that
 the log messages are not rate-limited, so this option should only be used
 for troubleshooting networks.
 Refer to
 .Xr icmp 4
 for more information.
 .It Va icmp_bmcastecho
 .Pq Vt bool
 Set to
 .Dq Li YES
 to respond to broadcast or multicast ICMP ping packets.
 Refer to
 .Xr icmp 4
 for more information.
 .It Va ip_portrange_first
 .Pq Vt int
 If not set to
 .Dq Li NO ,
 this is the first port in the default portrange.
 Refer to
 .Xr ip 4
 for more information.
 .It Va ip_portrange_last
 .Pq Vt int
 If not set to
 .Dq Li NO ,
 this is the last port in the default portrange.
 Refer to
 .Xr ip 4
 for more information.
 .It Va network_interfaces
 .Pq Vt str
 Set to the list of network interfaces to configure on this host or
 .Dq Li AUTO
 (the default) for all current interfaces.
 Setting the
 .Va network_interfaces
 variable to anything other than the default is deprecated.
 Interfaces that the administrator wishes to store configuration for,
 but not start at boot should be configured with the
 .Dq Li NOAUTO
 keyword in their
 .Va ifconfig_ Ns Aq Ar interface
 variables as described below.
 .Pp
 An
 .Va ifconfig_ Ns Aq Ar interface
 variable is also assumed to exist for each value of
 .Ar interface .
 When an interface name contains any of the characters
 .Dq Li .-/+
 they are translated to
 .Dq Li _
 before lookup.
 The variable can contain arguments to
 .Xr ifconfig 8 ,
 as well as special case-insensitive keywords described below.
 Such keywords are removed before passing the value to
 .Xr ifconfig 8
 while the order of the other arguments is preserved.
 .Pp
 One can configure more than one IPv4 address with the
 .Va ipv4_addrs_ Ns Aq Ar interface
 variable.
 One or more IP addresses must be provided in Classless Inter-Domain
 Routing (CIDR) address notation, whose last byte can be a range like
 192.0.2.5-23/24.
 In this case the address 192.0.2.5 will be configured with the
 netmask /24 and the addresses 192.0.2.6 to 192.0.2.23 with
 the non-conflicting netmask /32 as explained in the
 .Xr ifconfig 8
 alias section.
 With the interface in question being
 .Li ed0 ,
 an example could look like:
 .Bd -literal
 ipv4_addrs_ed0="192.0.2.129/27 192.0.2.1-5/28"
 .Ed
 .Pp
 It is also possible to add IP alias entries using
 .Xr ifconfig 8
 syntax.
 Assuming that the interface in question was
 .Li ed0 ,
 it might look
 something like this:
 .Bd -literal
 ifconfig_ed0_alias0="inet 127.0.0.253 netmask 0xffffffff"
 ifconfig_ed0_alias1="inet 127.0.0.254 netmask 0xffffffff"
 .Ed
 .Pp
 And so on.
 For each
 .Va ifconfig_ Ns Ao Ar interface Ac Ns Va _alias Ns Aq Ar n
 entry that is found,
 its contents are passed to
 .Xr ifconfig 8 .
 Execution stops at the first unsuccessful access, so if
 something like this is present:
 .Bd -literal
 ifconfig_ed0_alias0="inet 127.0.0.251 netmask 0xffffffff"
 ifconfig_ed0_alias1="inet 127.0.0.252 netmask 0xffffffff"
 ifconfig_ed0_alias2="inet 127.0.0.253 netmask 0xffffffff"
 ifconfig_ed0_alias4="inet 127.0.0.254 netmask 0xffffffff"
 .Ed
 .Pp
 Then note that alias4 would
 .Em not
 be added since the search would
 stop with the missing
 .Dq Li alias3
 entry.
 Due to this difficult to manage behavior, the
 .Va ifconfig_ Ns Ao Ar interface Ac Ns Va _alias Ns Aq Ar n
 form is deprecated.
 .Pp
 If the
 .Pa /etc/start_if. Ns Aq Ar interface
 file is present, it is read and executed by the
 .Xr sh 1
 interpreter
 before configuring the interface as specified in the
 .Va ifconfig_ Ns Aq Ar interface
 and
 .Va ifconfig_ Ns Ao Ar interface Ac Ns Va _alias Ns Aq Ar n
 variables.
 .Pp
 If a
 .Va vlans_ Ns Aq Ar interface
 variable is set,
 a
 .Xr vlan 4
 interface will be created for each item in the list with the
 .Ar vlandev
 argument set to
 .Ar interface .
 If a vlan interface's name is a number,
 then that number is used as the vlan tag and the new vlan interface is
 named
 .Ar interface . Ns Ar tag .
 Otherwise,
 the vlan tag must be specified via a
 .Va vlan
 parameter in the
 .Va create_args_ Ns Aq Ar interface
 variable.
 .Pp
 To create a vlan device named
 .Li em0.101
 on
 .Li em0
 with the vlan tag 101 and the optional the IPv4 address 192.0.2.1/24:
 .Bd -literal
 vlans_em0="101"
 ifconfig_em0_101="inet 192.0.2.1/24"
 .Ed
 .Pp
 To create a vlan device named
 .Li myvlan
 on
 .Li em0
 with the vlan tag 102:
 .Bd -literal
 vlans_em0="myvlan"
 create_args_myvlan="vlan 102"
 .Ed
 .Pp
 If a
 .Va wlans_ Ns Aq Ar interface
 variable is set,
 an
 .Xr wlan 4
 interface will be created for each item in the list with the
 .Ar wlandev
 argument set to
 .Ar interface .
 Further wlan cloning arguments may be passed to the
 .Xr ifconfig 8
 .Cm create
 command by setting the
 .Va create_args_ Ns Aq Ar interface
 variable.
 One or more
 .Xr wlan 4
 devices must be created for each wireless devices as of
 .Fx 8.0 .
 Debugging flags for
 .Xr wlan 4
 devices as set by
 .Xr wlandebug 8
 may be specified with an
 .Va wlandebug_ Ns Aq Ar interface
 variable.
 The contents of this variable will be passed directly to
 .Xr wlandebug 8 .
 .Pp
 If the
 .Va ifconfig_ Ns Aq Ar interface
 contains the keyword
 .Dq Li NOAUTO
 then the interface will not be configured
 at boot or by
 .Pa /etc/pccard_ether
 when
 .Va network_interfaces
 is set to
 .Dq Li AUTO .
 .Pp
 It is possible to bring up an interface with DHCP by adding
 .Dq Li DHCP
 to the
 .Va ifconfig_ Ns Aq Ar interface
 variable.
 For instance, to initialize the
 .Li ed0
 device via DHCP,
 it is possible to use something like:
 .Bd -literal
 ifconfig_ed0="DHCP"
 .Ed
 .Pp
 Also, if you want to configure your wireless interface with
 .Xr wpa_supplicant 8
 for use with WPA, EAP/LEAP or WEP, you need to add
 .Dq Li WPA
 to the
 .Va ifconfig_ Ns Aq Ar interface
 variable.
 .Pp
 Finally, you can add
 .Xr ifconfig 8
 options in this variable, in addition to the
 .Pa /etc/start_if. Ns Aq Ar interface
 file.
 For instance, to configure an
 .Xr ath 4
 wireless device in station mode with an address obtained 
 via DHCP, using WPA authentication and 802.11b mode, it is
 possible to use something like:
 .Bd -literal
 wlans_ath0="wlan0"
 ifconfig_wlan0="DHCP WPA mode 11b"
 .Ed
 .Pp
 In addition to the
 .Va ifconfig_ Ns Aq Ar interface
 form, a fallback variable
 .Va ifconfig_DEFAULT
 may be configured.
 It will be used for all interfaces with no
 .Va ifconfig_ Ns Aq Ar interface
 variable.
 This is intended to replace the no longer supported
 .Va pccard_ifconfig
 variable.
 .Pp
 It is also possible to rename an interface by doing:
 .Bd -literal
 ifconfig_ed0_name="net0"
 ifconfig_net0="inet 192.0.2.1 netmask 0xffffff00"
 .Ed
 .It Va ipv6_enable
 .Pq Vt bool
 If the variable is
 .Dq Li YES ,
 .Dq Li inet6 accept_rtadv
 is added to all of
 .Va ifconfig_ Ns Ao Ar interface Ac Ns _ipv6
 and the
 .Va ipv6_activate_all_interfaces
 is defined as
 .Dq Li YES .
 .Pp
 This variable is deprecated.  Use
 .Va ifconfig_ Ns Ao Ar interface Ac Ns _ipv6
 and
 .Va ipv6_activate_all_interfaces
 if necessary.
 .It Va ipv6_prefer
 .Pq Vt bool
 If the variable is
 .Dq Li YES ,
 the default address selection policy table set by
 .Xr ip6addrctl 8
 will be IPv6-preferred.
 .Pp
 If the variable is
 .Dq Li NO ,
 the default address selection policy table set by
 .Xr ip6addrctl 8
 will be IPv4-preferred.
 .Pp
 This variable is deprecated.  Use
 .Va ip6addtctl_policy
 instead.
 .It Va ipv6_activate_all_interfaces
 If the variable is
 .Dq Li NO ,
 all of interfaces which do not have the corrsponding
 .Va ifconfig_ Ns Ao Ar interface Ac Ns _ipv6
 variable will be marked as
 .Dq Li IFDISABLED
 for security reason.  This means only IPv6 functionality on that interface
 is completely disabled.  For more details of
 .Dq Li IFDISABLED
 flag and keywords
 .Dq Li inet6 ifdisabled ,
 see
 .Xr ifconfig 8 .
 .Pp
 Default is
 .Dq Li NO .
 .It Va ipv6_privacy
 .Pq Vt bool
 If the variable is
 .Dq Li YES
 privacy addresses will be generated for each IPv6
 interface as described in RFC 4193.
 .It Va ipv6_network_interfaces
 .Pq Vt str
 This is the IPv6 equivalent of
 .Va network_interfaces .
 Normally manual configuration of this variable is not needed.
 .Pp
 .It Va ifconfig_ Ns Ao Ar interface Ac Ns _ipv6
 .Pq Vt str
 IPv6 functionality on an interface should be configured by
 .Va ifconfig_ Ns Ao Ar interface Ac Ns _ipv6 ,
 instead of setting ifconfig parameters in
 .Va ifconfig_ Ns Aq Ar interface .
 Aliases should be set by
 .Va ifconfig_ Ns Ao Ar interface Ac Ns Va _alias Ns Aq Ar n
 with
 .Dq Li inet6
 keyword.  For example:
 .Bd -literal
 ifconfig_ed0_ipv6="inet6 2001:db8:1::1 prefixlen 64"
 ifconfig_ed0_alias0="inet6 2001:db8:2::1 prefixlen 64"
 .Ed
 .Pp
 Interfaces that have an
 .Dq Li inet6 accept_rtadv
 keyword in
 .Va ifconfig_ Ns Ao Ar interface Ac Ns _ipv6
 setting will be automatically configured by
 .Xr rtsol 8 .
 Note that this automatic configuration is disabled if the
 .Va ipv6_gateway_enable
 is set to
 .Dq Li YES .
 .It Va ipv6_prefix_ Ns Aq Ar interface
 .Pq Vt str
 If one or more prefixes are defined in
 .Va ipv6_prefix_ Ns Aq Ar interface
 addresses based on each prefix and the EUI-64 interface index will be
 configured on that interface.
 .It Va ipv6_default_interface
 .Pq Vt str
 If not set to
 .Dq Li NO ,
 this is the default output interface for scoped addresses.
 This works only with ipv6_gateway_enable="NO".
 .It Va ip6addrctl_enable
 .Pq Vt bool
 This variable is to enable configuring default address selection policy table
 .Pq RFC 3484 .
 The table can be specified in another variable
 .Va ip6addrctl_policy .
 For
 .Va ip6addrctl_policy
 the following keywords can be specified:
 .Dq Li ipv4_prefer ,
 .Dq Li ipv6_prefer ,
 or
 .Dq Li AUTO .
 .Pp
 If
 .Dq Li ipv4_prefer
 or
 .Dq Li ipv6_prefer
 is specified,
 .Xr ip6addrctl 8
 installs a pre-defined policy table described in Section 2.1
 .Pq IPv6-preferred
 or 10.3
 .Pq IPv4-preferred
 of RFC 3484.
 .Pp
 If
 .Dq Li AUTO
 is specified, it attempts to read a file
 .Pa /etc/ip6addrctl.conf
 first.  If this file is found,
 .Xr ip6addrctl 8
 reads and installs it.  If not found, a policy is automatically set
 according to
 .Va ipv6_activate_all_interfaces
 variable; if the variable is set to
 .Dq Li YES
 the IPv6-preferred one is used.  Otherwise IPv4-preferred.
 .Pp
 The default value of
 .Va ip6addrctl_enable
 and
 .Va ip6addrctl_policy
 are
 .Dq Li YES
 and
 .Dq Li AUTO ,
 respectively.
 .It Va cloned_interfaces
 .Pq Vt str
 Set to the list of clonable network interfaces to create on this host.
 Further cloning arguments may be passed to the
 .Xr ifconfig 8
 .Cm create
 command for each interface by setting the
 .Va create_args_ Ns Aq Ar interface
 variable.
 Entries in
 .Va cloned_interfaces
 are automatically appended to
 .Va network_interfaces
 for configuration.
 .It Va fec_interfaces
 .Pq Vt str
 Set to the list of
 .Xr ng_fec 4
 Fast EtherChannel interfaces to configure on this host.
 A
 .Va fecconfig_ Ns Aq Ar interface
 variable is assumed to exist for each value of
 .Ar interface .
 The value of this variable is used to configure link aggregated interfaces
 according to the syntax of the
 .Cm NGM_FEC_ADD_IFACE
 to
 .Xr ngctl 8
 msg.
 Additionally, this option ensures that each listed interface is created
 via the
 .Cm mkpeer
 command to
 .Xr ngctl 8
 before attempting to configure it.
 For example:
 .Bd -literal
 fec_interfaces="fec0"
 fecconfig_fec0="em0 em1"
 ifconfig_fec0="DHCP"
 .Ed
 .It Va gif_interfaces
 .Pq Vt str
 Set to the list of
 .Xr gif 4
 tunnel interfaces to configure on this host.
 A
 .Va gifconfig_ Ns Aq Ar interface
 variable is assumed to exist for each value of
 .Ar interface .
 The value of this variable is used to configure the link layer of the
 tunnel according to the syntax of the
 .Cm tunnel
 option to
 .Xr ifconfig 8 .
 Additionally, this option ensures that each listed interface is created
 via the
 .Cm create
 option to
 .Xr ifconfig 8
 before attempting to configure it.
 .It Va sppp_interfaces
 .Pq Vt str
 Set to the list of
 .Xr sppp 4
 interfaces to configure on this host.
 A
 .Va spppconfig_ Ns Aq Ar interface
 variable is assumed to exist for each value of
 .Ar interface .
 Each interface should also be configured by a general
 .Va ifconfig_ Ns Aq Ar interface
 setting.
 Refer to
 .Xr spppcontrol 8
 for more information about available options.
 .It Va ppp_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr ppp 8
 daemon.
 .It Va ppp_profile
 .Pq Vt str
 The name of the profile to use from
 .Pa /etc/ppp/ppp.conf .
 Also used for per-profile overrides of
 .Va ppp_mode
 and
 .Va ppp_nat ,
 and
 .Va ppp_ Ns Ao Ar profile Ac Ns _unit .
 When the profile name contains any of the characters
 .Dq Li .-/+
 they are translated to
 .Dq Li _
 for the proposes of the override variable names.
 .It Va ppp_mode
 .Pq Vt str
 Mode in which to run the
 .Xr ppp 8
 daemon.
 .It Va ppp_ Ns Ao Ar profile Ac Ns _mode
 .Pq Vt str
 Overrides the global
 .Va ppp_mode
 for
 .Ar profile .
 Accepted modes are
 .Dq Li auto ,
 .Dq Li ddial ,
 .Dq Li direct
 and
 .Dq Li dedicated .
 See the manual for a full description.
 .It Va ppp_nat
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 enables network address translation.
 Used in conjunction with
 .Va gateway_enable
 allows hosts on private network addresses access to the Internet using
 this host as a network address translating router.
 .It Va ppp_ Ns Ao Ar profile Ac Ns _nat
 .Pq Vt str
 Overrides the global
 .Va ppp_nat
 for
 .Ar profile .
 .It Va ppp_ Ns Ao Ar profile Ac Ns _unit
 .Pq Vt int
 Set the unit number to be used for this profile.
 See the manual description of
 .Fl unit Ns Ar N
 for details.
 .It Va ppp_user
 .Pq Vt str
 The name of the user under which
 .Xr ppp 8
 should be started.
 By
 default,
 .Xr ppp 8
 is started as
 .Dq Li root .
 .It Va rc_conf_files
 .Pq Vt str
 This option is used to specify a list of files that will override
 the settings in
 .Pa /etc/defaults/rc.conf .
 The files will be read in the order in which they are specified and should
 include the full path to the file.
 By default, the files specified are
 .Pa /etc/rc.conf
 and
 .Pa /etc/rc.conf.local
 .It Va zfs_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 .Pa /etc/rc.d/zfs
 will attempt to automatically mount ZFS file systems and initialize ZFS volumes
 (ZVOLs).
+.It Va gptboot_enable
+.Pq Vt bool
+If set to
+.Dq Li YES ,
+.Pa /etc/rc.d/gptboot
+will log if the system successfully (or not) booted from a GPT partition,
+which had the
+.Ar bootonce
+attribute set using
+.Xr gpart 8
+utility.
 .It Va gbde_autoattach_all
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 .Pa /etc/rc.d/gbde
 will attempt to automatically initialize your .bde devices in
 .Pa /etc/fstab .
 .It Va gbde_devices
 .Pq Vt str
 List the devices that the script should try to attach,
 or
 .Dq Li AUTO .
 .It Va gbde_lockdir
 .Pq Vt str
 The directory where the
 .Xr gbde 4
 lockfiles are located.
 The default lockfile directory is
 .Pa /etc .
 .Pp
 The lockfile for each individual
 .Xr gbde 4
 device can be overridden by setting the variable
 .Va gbde_lock_ Ns Aq Ar device ,
 where
 .Ar device
 is the encrypted device without the
 .Dq Pa /dev/
 and
 .Dq Pa .bde
 parts.
 .It Va gbde_attach_attempts
 .Pq Vt int
 Number of times to attempt attaching to a
 .Xr gbde 4
 device, i.e., how many times the user is asked for the pass-phrase.
 Default is 3.
 .It Va geli_devices
 .Pq Vt str
 List of devices to automatically attach on boot.
 Note that .eli devices from
 .Pa /etc/fstab
 are automatically appended to this list.
 .It Va geli_tries
 .Pq Vt int
 Number of times user is asked for the pass-phrase.
 If empty, it will be taken from
 .Va kern.geom.eli.tries
 sysctl variable.
 .It Va geli_default_flags
 .Pq Vt str
 Default flags to use by
 .Xr geli 8
 when configuring disk encryption.
 Flags can be configured for every device separately by defining
 .Va geli_ Ns Ao Ar device Ac Ns Va _flags
 variable.
 .It Va geli_autodetach
 .Pq Vt str
 Specifies if GELI devices should be marked for detach on last close after
 file systems are mounted.
 Default is
 .Dq Li YES .
 This can be changed for every device separately by defining
 .Va geli_ Ns Ao Ar device Ac Ns Va _autodetach
 variable.
 .It Va geli_swap_flags
 Options passed to the
 .Xr geli 8
 utility when encrypted GEOM providers for swap partitions are created.
 The default is
 .Dq Li "-e aes -l 256 -s 4096 -d" .
 .It Va root_rw_mount
 .Pq Vt bool
 Set to
 .Dq Li YES
 by default.
 After the file systems are checked at boot time, the root file system
 is remounted as read-write if this is set to
 .Dq Li YES .
 Diskless systems that mount their root file system from a read-only remote
 NFS share should set this to
 .Dq Li NO
 in their
 .Pa rc.conf .
 .It Va fsck_y_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 .Xr fsck 8
 will be run with the
 .Fl y
 flag if the initial preen
 of the file systems fails.
 .It Va background_fsck
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 the system will attempt to run
 .Xr fsck 8
 in the background where possible.
 .It Va background_fsck_delay
 .Pq Vt int
 The amount of time in seconds to sleep before starting a background
 .Xr fsck 8 .
 It defaults to sixty seconds to allow large applications such as
 the X server to start before disk I/O bandwidth is monopolized by
 .Xr fsck 8 .
 If set to a negative number, the background file system check will be
 delayed indefinitely to allow the administrator to run it at a more
 convenient time.
 For example it may be run from
 .Xr cron 8
 by adding a line like
 .Pp
 .Dl "0 4 * * * root /etc/rc.d/bgfsck forcestart"
 .Pp
 to
 .Pa /etc/crontab .
 .It Va netfs_types
 .Pq Vt str
 List of file system types that are network-based.
 This list should generally not be modified by end users.
 Use
 .Va extra_netfs_types
 instead.
 .It Va extra_netfs_types
 .Pq Vt str
 If set to something other than
 .Dq Li NO
 (the default),
 this variable extends the list of file system types
 for which automatic mounting at startup by
 .Xr rc 8
 should be delayed until the network is initialized.
 It should contain
 a whitespace-separated list of network file system descriptor pairs,
 each consisting of a file system type as passed to
 .Xr mount 8
 and a human-readable, one-word description,
 joined with a colon
 .Pq Ql \&: .
 Extending the default list in this way is only necessary
 when third party file system types are used.
 .It Va syslogd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr syslogd 8
 daemon.
 .It Va syslogd_program
 .Pq Vt str
 Path to
 .Xr syslogd 8
 (default
 .Pa /usr/sbin/syslogd ) .
 .It Va syslogd_flags
 .Pq Vt str
 If
 .Va syslogd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to
 .Xr syslogd 8 .
 .It Va inetd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr inetd 8
 daemon.
 .It Va inetd_program
 .Pq Vt str
 Path to
 .Xr inetd 8
 (default
 .Pa /usr/sbin/inetd ) .
 .It Va inetd_flags
 .Pq Vt str
 If
 .Va inetd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to
 .Xr inetd 8 .
 .It Va hastd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr hastd 8
 daemon.
 .It Va hastd_program
 .Pq Vt str
 Path to
 .Xr hastd 8
 (default
 .Pa /sbin/hastd ) .
 .It Va hastd_flags
 .Pq Vt str
 If
 .Va hastd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to
 .Xr hastd 8 .
 .It Va named_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr named 8
 daemon.
 .It Va named_program
 .Pq Vt str
 Path to
 .Xr named 8
 (default
 .Pa /usr/sbin/named ) .
 .It Va named_conf
 .Pq Vt str
 Path to
 .Xr named 8
 configuration file, (default
 .Pa /etc/namedb/named.conf ) .
 .It Va named_flags
 .Pq Vt str
 If
 .Va named_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to
 .Xr named 8 .
 .It Va named_pidfile
 .Pq Vt str
 This is the default path to the
 .Xr named 8
 daemon's PID file.
 This must match the location in
 .Xr named.conf 5 .
 .It Va named_uid
 .Pq Vt str
 The user that the
 .Xr named 8
 process should be run as.
 .It Va named_chrootdir
 .Pq Vt str
 The root directory for a name server run in a
 .Xr chroot 8
 environment (default
 .Pa /var/named ) .
 If left empty
 .Xr named 8
 will not be run in a
 .Xr chroot 8
 environment.
 .It Va named_chroot_autoupdate
 .Pq Vt bool
 Set to
 .Dq Li NO
 to disable automatic update of the
 .Xr chroot 8
 environment.
 .It Va named_symlink_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 to disable symlinking of
 daemon's PID file
 into the
 .Xr chroot 8
 environment.
 .It Va named_wait
 .Pq Vt bool
 Set to have
 .Pa /etc/rc.d/named
 loop until working name service is established.
 .It Va named_wait_host
 .Pq Vt str
 Name of host to lookup for the named_wait option.
 (Default localhost)
 .It Va named_auto_forward
 .Pq Vt bool
 Set to enable automatic creation of a forwarder
 configuration file derived from
 .Pa /etc/resolv.conf .
 .It Va named_auto_forward_only
 .Pq Vt bool
 Set to change the default forwarder configuration from
 .Dq forward first
 to
 .Dq forward only .
 .It Va kerberos5_server_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to start a Kerberos 5 authentication server
 at boot time.
 .It Va kerberos5_server
 .Pq Vt str
 If
 .Va kerberos5_server_enable
 is set to
 .Dq Li YES
 this is the path to Kerberos 5 Authentication Server.
 .It Va kerberos5_server_flags
 .Pq Vt str
 Empty by default.
 This variable contains additional flags to be passed to the Kerberos 5
 authentication server.
 .It Va kadmind5_server_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to start
 .Xr kadmind 8 ,
 the Kerberos 5 Administration Daemon; set to
 .Dq Li NO
 on a slave server.
 .It Va kadmind5_server
 .Pq Vt str
 If
 .Va kadmind5_server_enable
 is set to
 .Dq Li YES
 this is the path to Kerberos 5 Administration Daemon.
 .It Va kpasswdd_server_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to start
 .Xr kpasswdd 8 ,
 the Kerberos 5 Password-Changing Daemon; set to
 .Dq Li NO
 on a slave server.
 .It Va kpasswdd_server
 .Pq Vt str
 If
 .Va kpasswdd_server_enable
 is set to
 .Dq Li YES
 this is the path to Kerberos 5 Password-Changing Daemon.
 .It Va rwhod_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr rwhod 8
 daemon at boot time.
 .It Va rwhod_flags
 .Pq Vt str
 If
 .Va rwhod_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to it.
 .It Va amd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr amd 8
 daemon at boot time.
 .It Va amd_flags
 .Pq Vt str
 If
 .Va amd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to it.
 See the
 .Xr amd 8
 manpage for more information.
 .It Va amd_map_program
 .Pq Vt str
 If set,
 the specified program is run to get the list of
 .Xr amd 8
 maps.
 For example, if the
 .Xr amd 8
 maps are stored in NIS, one can set this to
 run
 .Xr ypcat 1
 to get a list of
 .Xr amd 8
 maps from the
 .Pa amd.master
 NIS map.
 .It Va update_motd
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 .Pa /etc/motd
 will be updated at boot time to reflect the kernel release
 being run.
 If set to
 .Dq Li NO ,
 .Pa /etc/motd
 will not be updated.
 .It Va nfs_client_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the NFS client daemons at boot time.
 .It Va nfs_access_cache
 .Pq Vt int
 If
 .Va nfs_client_enable
 is set to
 .Dq Li YES ,
 this can be set to
 .Dq Li 0
 to disable NFS ACCESS RPC caching, or to the number of seconds for which
 NFS ACCESS
 results should be cached.
 A value of 2-10 seconds will substantially reduce network
 traffic for many NFS operations.
 .It Va nfs_server_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the NFS server daemons at boot time.
 .It Va nfs_server_flags
 .Pq Vt str
 If
 .Va nfs_server_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr nfsd 8
 daemon.
 .It Va idmapd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the ID mapping daemon for NFS version 4.
 .It Va idmapd_flags
 .Pq Vt str
 If
 .Va idmapd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr idmapd 8
 daemon.
 .It Va mountd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 and no
 .Va nfs_server_enable
 is set, start
 .Xr mountd 8 ,
 but not
 .Xr nfsd 8
 daemon.
 It is commonly needed to run CFS without real NFS used.
 .It Va mountd_flags
 .Pq Vt str
 If
 .Va mountd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr mountd 8
 daemon.
 .It Va weak_mountd_authentication
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 allow services like PCNFSD to make non-privileged mount
 requests.
 .It Va nfs_reserved_port_only
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 provide NFS services only on a secure port.
 .It Va nfs_bufpackets
 .Pq Vt int
 If set to a number, indicates the number of packets worth of
 socket buffer space to reserve on an NFS client.
 The kernel default is typically 4.
 Using a higher number may be
 useful on gigabit networks to improve performance.
 The minimum value is
 2 and the maximum is 64.
 .It Va rpc_lockd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES
 and also an NFS server or client, run
 .Xr rpc.lockd 8
 at boot time.
 .It Va rpc_lockd_flags
 .Pq Vt str
 If
 .Va rpc_lockd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr rpc.lockd 8
 daemon.
 .It Va rpc_statd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES
 and also an NFS server or client, run
 .Xr rpc.statd 8
 at boot time.
 .It Va rpc_statd_flags
 .Pq Vt str
 If
 .Va rpc_statd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr rpc.statd 8
 daemon.
 .It Va rpcbind_program
 .Pq Vt str
 Path to
 .Xr rpcbind 8
 (default
 .Pa /usr/sbin/rpcbind ) .
 .It Va rpcbind_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr rpcbind 8
 service at boot time.
 .It Va rpcbind_flags
 .Pq Vt str
 If
 .Va rpcbind_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr rpcbind 8
 daemon.
 .It Va keyserv_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr keyserv 8
 daemon on boot for running Secure RPC.
 .It Va keyserv_flags
 .Pq Vt str
 If
 .Va keyserv_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to
 .Xr keyserv 8
 daemon.
 .It Va pppoed_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr pppoed 8
 daemon at boot time to provide PPP over Ethernet services.
 .It Va pppoed_ Ns Aq Ar provider
 .Pq Vt str
 .Xr pppoed 8
 listens to requests to this
 .Ar provider
 and ultimately runs
 .Xr ppp 8
 with a
 .Ar system
 argument of the same name.
 .It Va pppoed_flags
 .Pq Vt str
 Additional flags to pass to
 .Xr pppoed 8 .
 .It Va pppoed_interface
 .Pq Vt str
 The network interface to run
 .Xr pppoed 8
 on.
 This is mandatory when
 .Va pppoed_enable
 is set to
 .Dq Li YES .
 .It Va timed_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr timed 8
 service at boot time.
 This command is intended for networks of
 machines where a consistent
 .Dq "network time"
 for all hosts must be established.
 This is often useful in large NFS
 environments where time stamps on files are expected to be consistent
 network-wide.
 .It Va timed_flags
 .Pq Vt str
 If
 .Va timed_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr timed 8
 service.
 .It Va ntpdate_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run
 .Xr ntpdate 8
 at system startup.
 This command is intended to
 synchronize the system clock only
 .Em once
 from some standard reference.
 An option to set this up initially
 (from a list of known servers) is also provided by the
 .Xr sysinstall 8
 program when the system is first installed.
 .It Va ntpdate_config
 .Pq Vt str
 Configuration file for
 .Xr ntpdate 8 .
 Default
 .Pa /etc/ntp.conf .
 .It Va ntpdate_hosts
 .Pq Vt str
 A whitespace-separated list of NTP servers to synchronize with at startup.
 The default is to use the servers listed in
 .Va ntpdate_config ,
 if that file exists.
 .It Va ntpdate_program
 .Pq Vt str
 Path to
 .Xr ntpdate 8
 (default
 .Pa /usr/sbin/ntpdate ) .
 .It Va ntpdate_flags
 .Pq Vt str
 If
 .Va ntpdate_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr ntpdate 8
 command (typically a hostname).
 .It Va ntpd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr ntpd 8
 command at boot time.
 .It Va ntpd_program
 .Pq Vt str
 Path to
 .Xr ntpd 8
 (default
 .Pa /usr/sbin/ntpd ) .
 .It Va ntpd_config
 .Pq Vt str
 Path to
 .Xr ntpd 8
 configuration file.
 Default
 .Pa /etc/ntp.conf .
 .It Va ntpd_flags
 .Pq Vt str
 If
 .Va ntpd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr ntpd 8
 daemon.
 .It Va ntpd_sync_on_start
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 .Xr ntpd 8
 is run with the
 .Fl g
 flag, which syncs the system's clock on startup.
 See
 .Xr ntpd 8
 for more information regarding the
 .Fl g
 option.
 This is a preferred alternative to using
 .Xr ntpdate 8
 or specifying the
 .Va ntpdate_enable
 variable.
 .It Va nis_client_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr ypbind 8
 service at system boot time.
 .It Va nis_client_flags
 .Pq Vt str
 If
 .Va nis_client_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr ypbind 8
 service.
 .It Va nis_ypset_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr ypset 8
 daemon at system boot time.
 .It Va nis_ypset_flags
 .Pq Vt str
 If
 .Va nis_ypset_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr ypset 8
 daemon.
 .It Va nis_server_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr ypserv 8
 daemon at system boot time.
 .It Va nis_server_flags
 .Pq Vt str
 If
 .Va nis_server_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr ypserv 8
 daemon.
 .It Va nis_ypxfrd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr rpc.ypxfrd 8
 daemon at system boot time.
 .It Va nis_ypxfrd_flags
 .Pq Vt str
 If
 .Va nis_ypxfrd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr rpc.ypxfrd 8
 daemon.
 .It Va nis_yppasswdd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr rpc.yppasswdd 8
 daemon at system boot time.
 .It Va nis_yppasswdd_flags
 .Pq Vt str
 If
 .Va nis_yppasswdd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr rpc.yppasswdd 8
 daemon.
 .It Va rpc_ypupdated_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Nm rpc.ypupdated
 daemon at system boot time.
 .It Va bsnmpd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr bsnmpd 1
 daemon at system boot time.
 Be sure to understand the security implications of running SNMP daemon
 on your host.
 .It Va bsnmpd_flags
 .Pq Vt str
 If
 .Va bsnmpd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr bsnmpd 1
 daemon.
 .It Va defaultrouter
 .Pq Vt str
 If not set to
 .Dq Li NO ,
 create a default route to this host name or IP address
 (use an IP address if this router is also required to get to the
 name server!).
 .It Va ipv6_defaultrouter
 .Pq Vt str
 The IPv6 equivalent of
 .Va defaultrouter .
 .It Va static_arp_pairs
 .Pq Vt str
 Set to the list of static ARP pairs that are to be added at system
 boot time.
 For each whitespace separated
 .Ar element
 in the value, a
 .Va static_arp_ Ns Aq Ar element
 variable is assumed to exist whose contents will later be passed to a
 .Dq Nm arp Cm -S
 operation.
 For example
 .Bd -literal
 static_arp_pairs="gw"
 static_arp_gw="192.168.1.1 00:01:02:03:04:05"
 .Ed
 .It Va static_routes
 .Pq Vt str
 Set to the list of static routes that are to be added at system
 boot time.
 If not set to
 .Dq Li NO
 then for each whitespace separated
 .Ar element
 in the value, a
 .Va route_ Ns Aq Ar element
 variable is assumed to exist
 whose contents will later be passed to a
 .Dq Nm route Cm add
 operation.
 For example:
 .Bd -literal
 static_routes="mcast gif0local"
 route_mcast="-net 224.0.0.0/4 -iface gif0"
 route_gif0local="-host 169.254.1.1 -iface lo0"
 .Ed
 .It Va ipv6_static_routes
 .Pq Vt str
 The IPv6 equivalent of
 .Va static_routes .
 If not set to
 .Dq Li NO
 then for each whitespace separated
 .Ar element
 in the value, a
 .Va ipv6_route_ Ns Aq Ar element
 variable is assumed to exist
 whose contents will later be passed to a
 .Dq Nm route Cm add Fl inet6
 operation.
 .It Va natm_static_routes
 .Pq Vt str
 The
 .Xr natmip 4
 equivalent of
 .Va static_routes .
 If not empty then for each whitespace separated
 .Ar element
 in the value, a
 .Va route_ Ns Aq Ar element
 variable is assumed to exist whose contents will later be passed to a
 .Dq Nm atmconfig Cm natm Cm add
 operation.
 .It Va gateway_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 configure host to act as an IP router, e.g.\& to forward packets
 between interfaces.
 .It Va ipv6_gateway_enable
 .Pq Vt bool
 The IPv6 equivalent of
 .Va gateway_enable .
 .It Va routed_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run a routing daemon of some sort, based on the
 settings of
 .Va routed_program
 and
 .Va routed_flags .
 .It Va route6d_enable
 .Pq Vt bool
 The IPv6 equivalent of
 .Va routed_enable .
 If set to
 .Dq Li YES ,
 run a routing daemon of some sort, based on the
 settings of
 .Va route6d_program
 and
 .Va route6d_flags .
 .It Va routed_program
 .Pq Vt str
 If
 .Va routed_enable
 is set to
 .Dq Li YES ,
 this is the name of the routing daemon to use.
 .It Va route6d_program
 .Pq Vt str
 The IPv6 equivalent of
 .Va routed_program .
 .It Va routed_flags
 .Pq Vt str
 If
 .Va routed_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the routing daemon.
 .It Va route6d_flags
 .Pq Vt str
 The IPv6 equivalent of
 .Va routed_flags .
 .It Va mrouted_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the multicast routing daemon,
 .Xr mrouted 8 .
 .It Va mroute6d_enable
 .Pq Vt bool
 The IPv6 equivalent of
 .Va mrouted_enable .
 If set to
 .Dq Li YES ,
 run the IPv6 multicast routing daemon.
 .Pp
 Note that multicast routing daemons are no longer included in the
 .Fx
 base system, however, both
 .Xr mrouted 8
 and
 .Xr pim6dd 8
 may be installed from the
 .Fx
 Ports Collection.
 .It Va mrouted_flags
 .Pq Vt str
 If
 .Va mrouted_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr mrouted 8
 daemon.
 .It Va mroute6d_flags
 .Pq Vt str
 The IPv6 equivalent of
 .Va mrouted_flags .
 If
 .Va mroute6d_enable
 is set to
 .Dq Li YES ,
 these are the flags passed to the IPv6 multicast routing daemon.
 .It Va mroute6d_program
 .Pq Vt str
 If
 .Va mroute6d_enable
 is set to
 .Dq Li YES ,
 this is the path to the IPv6 multicast routing daemon.
 .It Va rtadvd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr rtadvd 8
 daemon at boot time.
 .Xr rtadvd 8
 will only run if
 .Va ipv6_gateway_enable
 is also set to
 .Dq Li YES .
 The
 .Xr rtadvd 8
 utility sends router advertisement packets to the interfaces specified in
 .Va rtadvd_interfaces
 and should only be enabled with great care.
 You may want to fine-tune
 .Xr rtadvd.conf 5 .
 .It Va rtadvd_interfaces
 .Pq Vt str
 If
 .Va rtadvd_enable
 is set to
 .Dq Li YES
 this is the list of interfaces to use.
 .It Va ipxgateway_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 enable the routing of IPX traffic.
 .It Va ipxrouted_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr IPXrouted 8
 daemon at system boot time.
 .It Va ipxrouted_flags
 .Pq Vt str
 If
 .Va ipxrouted_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr IPXrouted 8
 daemon.
 .It Va arpproxy_all
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 enable global proxy ARP.
 .It Va forward_sourceroute
 .Pq Vt bool
 If set to
 .Dq Li YES
 and
 .Va gateway_enable
 is also set to
 .Dq Li YES ,
 source-routed packets are forwarded.
 .It Va accept_sourceroute
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 the system will accept source-routed packets directed at it.
 .It Va rarpd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr rarpd 8
 daemon at system boot time.
 .It Va rarpd_flags
 .Pq Vt str
 If
 .Va rarpd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr rarpd 8
 daemon.
 .It Va bootparamd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr bootparamd 8
 daemon at system boot time.
 .It Va bootparamd_flags
 .Pq Vt str
 If
 .Va bootparamd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr bootparamd 8
 daemon.
 .It Va stf_interface_ipv4addr
 .Pq Vt str
 If not set to
 .Dq Li NO ,
 this is the local IPv4 address for 6to4 (IPv6 over IPv4 tunneling
 interface).
 Specify this entry to enable the 6to4 interface.
 .It Va stf_interface_ipv4plen
 .Pq Vt int
 Prefix length for 6to4 IPv4 addresses, to limit peer address range.
 An effective value is 0-31.
 .It Va stf_interface_ipv6_ifid
 .Pq Vt str
 IPv6 interface ID for
 .Xr stf 4 .
 This can be set to
 .Dq Li AUTO .
 .It Va stf_interface_ipv6_slaid
 .Pq Vt str
 IPv6 Site Level Aggregator for
 .Xr stf 4 .
 .It Va ipv6_faith_prefix
 .Pq Vt str
 If not set to
 .Dq Li NO ,
 this is the faith prefix to enable a FAITH IPv6-to-IPv4 TCP
 translator.
 You also need
 .Xr faithd 8
 setup.
 .It Va ipv6_ipv4mapping
 .Pq Vt bool
 If set to
 .Dq Li YES
 this enables IPv4 mapped IPv6 address communication (like
 .Li ::ffff:a.b.c.d ) .
 .It Va atm_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to enable the configuration of ATM interfaces at system boot time.
 For all of the ATM variables described below, please refer to the
 .Xr atm 8
 manual page for further details on the available command parameters.
 Also refer to the files in
 .Pa /usr/share/examples/atm
 for more detailed configuration information.
 .It Va atm_load
 .Pq Vt str
 This is a list of physical ATM interface drivers to load.
 Typical values are
 .Dq Li hfa_pci
 and/or
 .Dq Li hea_pci .
 .It Va atm_netif_ Ns Aq Ar intf
 .Pq Vt str
 For the ATM physical interface
 .Ar intf ,
 this variable defines the name prefix and count for the ATM network
 interfaces to be created.
 The value will be passed as the parameters of an
 .Dq Nm atm Cm "set netif" Ar intf
 command.
 .It Va atm_sigmgr_ Ns Aq Ar intf
 .Pq Vt str
 For the ATM physical interface
 .Ar intf ,
 this variable defines the ATM signalling manager to be used.
 The value will be passed as the parameters of an
 .Dq Nm atm Cm attach Ar intf
 command.
 .It Va atm_prefix_ Ns Aq Ar intf
 .Pq Vt str
 For the ATM physical interface
 .Ar intf ,
 this variable defines the NSAP prefix for interfaces using a UNI signalling
 manager.
 If set to
 .Dq Li ILMI ,
 the prefix will automatically be set via the
 .Xr ilmid 8
 daemon.
 Otherwise, the value will be passed as the parameters of an
 .Dq Nm atm Cm "set prefix" Ar intf
 command.
 .It Va atm_macaddr_ Ns Aq Ar intf
 .Pq Vt str
 For the ATM physical interface
 .Ar intf ,
 this variable defines the MAC address for interfaces using a UNI signalling
 manager.
 If set to
 .Dq Li NO ,
 the hardware MAC address contained in the ATM interface card will be used.
 Otherwise, the value will be passed as the parameters of an
 .Dq Nm atm Cm "set mac" Ar intf
 command.
 .It Va atm_arpserver_ Ns Aq Ar netif
 .Pq Vt str
 For the ATM network interface
 .Ar netif ,
 this variable defines the ATM address for a host which is to provide ATMARP
 service.
 This variable is only applicable to interfaces using a UNI signalling
 manager.
 If set to
 .Dq Li local ,
 this host will become an ATMARP server.
 The value will be passed as the parameters of an
 .Dq Nm atm Cm "set arpserver" Ar netif
 command.
 .It Va atm_scsparp_ Ns Aq Ar netif
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 SCSP/ATMARP service for the network interface
 .Ar netif
 will be initiated using the
 .Xr scspd 8
 and
 .Xr atmarpd 8
 daemons.
 This variable is only applicable if
 .Va atm_arpserver_ Ns Aq Ar netif
 is set to
 .Dq Li local .
 .It Va atm_pvcs
 .Pq Vt str
 Set to the list of ATM PVCs to be added at system
 boot time.
 For each whitespace separated
 .Ar element
 in the value, an
 .Va atm_pvc_ Ns Aq Ar element
 variable is assumed to exist.
 The value of each of these variables
 will be passed as the parameters of an
 .Dq Nm atm Cm "add pvc"
 command.
 .It Va atm_arps
 .Pq Vt str
 Set to the list of permanent ATM ARP entries to be added
 at system boot time.
 For each whitespace separated
 .Ar element
 in the value, an
 .Va atm_arp_ Ns Aq Ar element
 variable is assumed to exist.
 The value of each of these variables
 will be passed as the parameters of an
 .Dq Nm atm Cm "add arp"
 command.
 .It Va natm_interfaces
 .Pq Vt str
 Set to the list of
 .Xr natm 4
 interfaces that will also be used for HARP through
 .Xr harp 4 .
 If this list is not empty all interfaces in the list will be brought up
 with
 .Xr ifconfig 8
 and
 .Xr harp 4
 will be loaded.
 For this to work the interface drivers must be either compiled into the
 kernel or must reside on the root partition.
 .It Va keybell
 .Pq Vt str
 The keyboard bell sound.
 Set to
 .Dq Li normal ,
 .Dq Li visual ,
 .Dq Li off ,
 or
 .Dq Li NO
 if the default behavior is desired.
 For details, refer to the
 .Xr kbdcontrol 1
 manpage.
 .It Va keyboard
 .Pq Vt str
 If set to a non-null string, the virtual console's keyboard input is
 set to this device.
 .It Va keymap
 .Pq Vt str
 If set to
 .Dq Li NO ,
 no keymap is installed, otherwise the value is used to install
 the keymap file in
 .Pa /usr/share/syscons/keymaps/ Ns Ao Ar value Ac Ns Pa .kbd .
 .It Va keyrate
 .Pq Vt str
 The keyboard repeat speed.
 Set to
 .Dq Li slow ,
 .Dq Li normal ,
 .Dq Li fast ,
 or
 .Dq Li NO
 if the default behavior is desired.
 .It Va keychange
 .Pq Vt str
 If not set to
 .Dq Li NO ,
 attempt to program the function keys with the value.
 The value should
 be a single string of the form:
 .Dq Ar funkey_number new_value Op Ar funkey_number new_value ... .
 .It Va cursor
 .Pq Vt str
 Can be set to the value of
 .Dq Li normal ,
 .Dq Li blink ,
 .Dq Li destructive ,
 or
 .Dq Li NO
 to set the cursor behavior explicitly or choose the default behavior.
 .It Va scrnmap
 .Pq Vt str
 If set to
 .Dq Li NO ,
 no screen map is installed, otherwise the value is used to install
 the screen map file in
 .Pa /usr/share/syscons/scrnmaps/ Ns Aq Ar value .
 .It Va font8x16
 .Pq Vt str
 If set to
 .Dq Li NO ,
 the default 8x16 font value is used for screen size requests, otherwise
 the value in
 .Pa /usr/share/syscons/fonts/ Ns Aq Ar value
 is used.
 .It Va font8x14
 .Pq Vt str
 If set to
 .Dq Li NO ,
 the default 8x14 font value is used for screen size requests, otherwise
 the value in
 .Pa /usr/share/syscons/fonts/ Ns Aq Ar value
 is used.
 .It Va font8x8
 .Pq Vt str
 If set to
 .Dq Li NO ,
 the default 8x8 font value is used for screen size requests, otherwise
 the value in
 .Pa /usr/share/syscons/fonts/ Ns Aq Ar value
 is used.
 .It Va blanktime
 .Pq Vt int
 If set to
 .Dq Li NO ,
 the default screen blanking interval is used, otherwise it is set
 to
 .Ar value
 seconds.
 .It Va saver
 .Pq Vt str
 If not set to
 .Dq Li NO ,
 this is the actual screen saver to use
 .Li ( blank , snake , daemon ,
 etc).
 .It Va moused_nondefault_enable
 .Pq Vt str
 If set to
 .Dq Li NO ,
 the mouse device specified on
 the command line is not automatically treated as enabled by the
 .Pa /etc/rc.d/moused
 script.
 Having this variable set to
 .Dq Li YES
 allows a
 .Xr usb 4
 mouse,
 for example,
 to be enabled as soon as it is plugged in.
 .It Va moused_enable
 .Pq Vt str
 If set to
 .Dq Li YES ,
 the
 .Xr moused 8
 daemon is started for doing cut/paste selection on the console.
 .It Va moused_type
 .Pq Vt str
 This is the protocol type of the mouse connected to this host.
 This variable must be set if
 .Va moused_enable
 is set to
 .Dq Li YES .
 The
 .Xr moused 8
 daemon
 is able to detect the appropriate mouse type automatically in many cases.
 Set this variable to
 .Dq Li auto
 to let the daemon detect it, or
 select one from the following list if the automatic detection fails.
 .Pp
 If the mouse is attached to the PS/2 mouse port, choose
 .Dq Li auto
 or
 .Dq Li ps/2 ,
 regardless of the brand and model of the mouse.
 Likewise, if the
 mouse is attached to the bus mouse port, choose
 .Dq Li auto
 or
 .Dq Li busmouse .
 All other protocols are for serial mice and will not work with
 the PS/2 and bus mice.
 If this is a USB mouse,
 .Dq Li auto
 is the only protocol type which will work.
 .Pp
 .Bl -tag -width ".Li x10mouseremote" -compact
 .It Li microsoft
 Microsoft mouse (serial)
 .It Li intellimouse
 Microsoft IntelliMouse (serial)
 .It Li mousesystems
 Mouse systems Corp.\& mouse (serial)
 .It Li mmseries
 MM Series mouse (serial)
 .It Li logitech
 Logitech mouse (serial)
 .It Li busmouse
 A bus mouse
 .It Li mouseman
 Logitech MouseMan and TrackMan (serial)
 .It Li glidepoint
 ALPS GlidePoint (serial)
 .It Li thinkingmouse
 Kensington ThinkingMouse (serial)
 .It Li ps/2
 PS/2 mouse
 .It Li mmhittab
 MM HitTablet (serial)
 .It Li x10mouseremote
 X10 MouseRemote (serial)
 .It Li versapad
 Interlink VersaPad (serial)
 .El
 .Pp
 Even if the mouse is not in the above list, it may be compatible
 with one in the list.
 Refer to the manual page for
 .Xr moused 8
 for compatibility information.
 .Pp
 It should also be noted that while this is enabled, any
 other client of the mouse (such as an X server) should access
 the mouse through the virtual mouse device,
 .Pa /dev/sysmouse ,
 and configure it as a
 .Dq Li sysmouse
 type mouse, since all
 mouse data is converted to this single canonical format when
 using
 .Xr moused 8 .
 If the client program does not support the
 .Dq Li sysmouse
 type,
 specify the
 .Dq Li mousesystems
 type.
 It is the second preferred type.
 .It Va moused_port
 .Pq Vt str
 If
 .Va moused_enable
 is set to
 .Dq Li YES ,
 this is the actual port the mouse is on.
 It might be
 .Pa /dev/cuad0
 for a COM1 serial mouse,
 .Pa /dev/psm0
 for a PS/2 mouse or
 .Pa /dev/mse0
 for a bus mouse, for example.
 .It Va moused_flags
 .Pq Vt str
 If
 .Va moused_flags
 is set, its value is used as an additional set of flags to pass to the
 .Xr moused 8
 daemon.
 .It Va "moused_" Ns Ar XXX Ns Va "_flags"
 When
 .Va moused_nondefault_enable
 is enabled, and a
 .Xr moused 8
 daemon is started for a non-default port, the
 .Va "moused_" Ns Ar XXX Ns Va "_flags"
 set of options has precedence over and replaces the default
 .Va moused_flags (where
 .Ar XXX
 is the name of the non-default port, i.e.\&
 .Ar ums0 ) .
 By setting
 .Va "moused_" Ns Ar XXX Ns Va "_flags"
 it is possible to set up a different set of default flags for each
 .Xr moused 8
 instance.
 For example, you can use
 .Dq Li "-3"
 for the default
 .Va moused_flags
 to make your laptop's touchpad more comfortable to use,
 but an empty set of options for
 .Va moused_ums0_flags
 when your
 .Xr usb 4
 mouse has three or more buttons.
 .It Va mousechar_start
 .Pq Vt int
 If set to
 .Dq Li NO ,
 the default mouse cursor character range
 .Li 0xd0 Ns - Ns Li 0xd3
 is used,
 otherwise the range start is set
 to
 .Ar value
 character, see
 .Xr vidcontrol 1 .
 Use if the default range is occupied in the language code table.
 .It Va allscreens_flags
 .Pq Vt str
 If set,
 .Xr vidcontrol 1
 is run with these options for each of the virtual terminals
 .Pq Pa /dev/ttyv* .
 For example,
 .Dq Fl m Cm on
 will enable the mouse pointer on all virtual terminals
 if
 .Va moused_enable
 is set to
 .Dq Li YES .
 .It Va allscreens_kbdflags
 .Pq Vt str
 If set,
 .Xr kbdcontrol 1
 is run with these options for each of the virtual terminals
 .Pq Pa /dev/ttyv* .
 For example,
 .Dq Fl h Li 200
 will set the
 .Xr syscons 4
 scrollback (history) buffer to 200 lines.
 .It Va cron_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr cron 8
 daemon at system boot time.
 .It Va cron_program
 .Pq Vt str
 Path to
 .Xr cron 8
 (default
 .Pa /usr/sbin/cron ) .
 .It Va cron_flags
 .Pq Vt str
 If
 .Va cron_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to
 .Xr cron 8 .
 .It Va cron_dst
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 enable the special handling of transitions to and from the
 Daylight Saving Time in
 .Xr cron 8
 (equivalent to using the flag
 .Fl s ) .
 .It Va lpd_program
 .Pq Vt str
 Path to
 .Xr lpd 8
 (default
 .Pa /usr/sbin/lpd ) .
 .It Va lpd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr lpd 8
 daemon at system boot time.
 .It Va lpd_flags
 .Pq Vt str
 If
 .Va lpd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr lpd 8
 daemon.
 .It Va chkprintcap_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run the
 .Xr chkprintcap 8
 command before starting the
 .Xr lpd 8
 daemon.
 .It Va chkprintcap_flags
 .Pq Vt str
 If
 .Va lpd_enable
 and
 .Va chkprintcap_enable
 are set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr chkprintcap 8
 program.
 The default is
 .Dq Li -d ,
 which causes missing directories to be created.
 .It Va mta_start_script
 .Pq Vt str
 This variable specifies the full path to the script to run to start
 a mail transfer agent.
 The default is
 .Pa /etc/rc.sendmail .
 The
 .Va sendmail_*
 variables which
 .Pa /etc/rc.sendmail
 uses are documented in the
 .Xr rc.sendmail 8
 manual page.
 .It Va dumpdev
 .Pq Vt str
 Indicates the device (usually a swap partition) to which a crash dump
 should be written in the event of a system crash.
 If the value of this variable is
 .Dq Li AUTO ,
 the first suitable swap device listed in
 .Pa /etc/fstab
 will be used as dump device.
 Otherwise, the value of this variable is passed as the argument to
 .Xr dumpon 8 .
 To disable crash dumps, set this variable to
 .Dq Li NO .
 .It Va dumpdir
 .Pq Vt str
 When the system reboots after a crash and a crash dump is found on the
 device specified by the
 .Va dumpdev
 variable,
 .Xr savecore 8
 will save that crash dump and a copy of the kernel to the directory
 specified by the
 .Va dumpdir
 variable.
 The default value is
 .Pa /var/crash .
 Set to
 .Dq Li NO
 to not run
 .Xr savecore 8
 at boot time when
 .Va dumpdir
 is set.
 .It Va savecore_flags
 .Pq Vt str
 If crash dumps are enabled, these are the flags to pass to the
 .Xr savecore 8
 utility.
 .It Va quota_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to turn on user and group disk quotas on system startup via the
 .Xr quotaon 8
 command for all file systems marked as having quotas enabled in
 .Pa /etc/fstab .
 The kernel must be built with
 .Cd "options QUOTA"
 for disk quotas to function.
 .It Va check_quotas
 .Pq Vt bool
 Set to
 .Dq Li YES
 to enable user and group disk quota checking via the
 .Xr quotacheck 8
 command.
 .It Va quotacheck_flags
 .Pq Vt str
 If
 .Va quota_enable
 is set to
 .Dq Li YES ,
 and
 .Va check_quotas
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr quotacheck 8
 utility.
 The default is
 .Dq Li "-a" ,
 which checks quotas for all file systems with quotas enabled in
 .Pa /etc/fstab .
 .It Va quotaon_flags
 .Pq Vt str
 If
 .Va quota_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr quotaon 8
 utility.
 The default is
 .Dq Li "-a" ,
 which enables quotas for all file systems with quotas enabled in
 .Pa /etc/fstab .
 .It Va quotaoff_flags
 .Pq Vt str
 If
 .Va quota_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr quotaoff 8
 utility when shutting down the quota system.
 The default is
 .Dq Li "-a" ,
 which disables quotas for all file systems with quotas enabled in
 .Pa /etc/fstab .
 .It Va accounting_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to enable system accounting through the
 .Xr accton 8
 facility.
 .It Va ibcs2_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to enable iBCS2 (SCO) binary emulation at system initial boot
 time.
 .It Va ibcs2_loaders
 .Pq Vt str
 If not set to
 .Dq Li NO
 and if
 .Va ibcs2_enable
 is set to
 .Dq Li YES ,
 this specifies a list of additional iBCS2 loaders to enable.
 .It Va linux_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to enable Linux/ELF binary emulation at system initial
 boot time.
 .It Va svr4_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 enable SysVR4 emulation at boot time.
 .It Va sysvipc_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 load System V IPC primitives at boot time.
 .It Va clear_tmp_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to have
 .Pa /tmp
 cleaned at startup.
 .It Va clear_tmp_X
 .Pq Vt bool
 Set to
 .Dq Li NO
 to disable removing of X11 lock files,
 and the removal and (secure) recreation
 of the various socket directories for X11
 related programs.
 .It Va ldconfig_paths
 .Pq Vt str
 Set to the list of shared library paths to use with
 .Xr ldconfig 8 .
 NOTE:
 .Pa /usr/lib
 will always be added first, so it need not appear in this list.
 .It Va ldconfig32_paths
 .Pq Vt str
 Set to the list of 32-bit compatibility shared library paths to
 use with
 .Xr ldconfig 8 .
 .It Va ldconfig_paths_aout
 .Pq Vt str
 Set to the list of shared library paths to use with
 .Xr ldconfig 8
 legacy
 .Xr a.out 5
 support.
 .It Va ldconfig_insecure
 .Pq Vt bool
 The
 .Xr ldconfig 8
 utility normally refuses to use directories
 which are writable by anyone except root.
 Set this variable to
 .Dq Li YES
 to disable that security check during system startup.
 .It Va ldconfig_local_dirs
 .Pq Vt str
 Set to the list of local
 .Xr ldconfig 8
 directories.
 The names of all files in the directories listed will be
 passed as arguments to
 .Xr ldconfig 8 .
 .It Va ldconfig_local32_dirs
 .Pq Vt str
 Set to the list of local 32-bit compatibility
 .Xr ldconfig 8
 directories.
 The names of all files in the directories listed will be
 passed as arguments to
 .Dq Nm ldconfig Fl 32 .
 .It Va kern_securelevel_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to set the kernel security level at system startup.
 .It Va kern_securelevel
 .Pq Vt int
 The kernel security level to set at startup.
 The allowed range of
 .Ar value
 ranges from \-1 (the compile time default) to 3 (the
 most secure).
 See
 .Xr security 7
 for the list of possible security levels and their effect
 on system operation.
 .It Va sshd_program
 .Pq Vt str
 Path to the SSH server program
 .Pa ( /usr/sbin/sshd
 is the default).
 .It Va sshd_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to start
 .Xr sshd 8
 at system boot time.
 .It Va sshd_flags
 .Pq Vt str
 If
 .Va sshd_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr sshd 8
 daemon.
 .It Va ftpd_program
 .Pq Vt str
 Path to the FTP server program
 .Pa ( /usr/libexec/ftpd
 is the default).
 .It Va ftpd_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to start
 .Xr ftpd 8
 as a stand-alone daemon at system boot time.
 .It Va ftpd_flags
 .Pq Vt str
 If
 .Va ftpd_enable
 is set to
 .Dq Li YES ,
 these are the additional flags to pass to the
 .Xr ftpd 8
 daemon.
 .It Va watchdogd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 start the
 .Xr watchdogd 8
 daemon at boot time.
 This requires that the kernel have been compiled with a
 .Xr watchdog 4
 compatible device.
 .It Va watchdogd_flags
 .Pq Vt str
 If
 .Va watchdogd_enable
 is set to
 .Dq Li YES ,
 these are the flags passed to the
 .Xr watchdogd 8
 daemon.
 .It Va performance_cx_lowest
 .Pq Vt str
 CPU idle state to use while on AC power.
 The string
 .Dq Li LOW
 indicates that
 .Xr acpi 4
 should use the lowest power state available while
 .Dq Li HIGH
 indicates that the lowest latency state (less power savings) should be used.
 .It Va performance_cpu_freq
 .Pq Vt str
 CPU clock frequency to use while on AC power.
 The string
 .Dq Li LOW
 indicates that
 .Xr cpufreq 4
 should use the lowest frequency available while
 .Dq Li HIGH
 indicates that the highest frequency (less power savings) should be used.
 .It Va economy_cx_lowest
 .Pq Vt str
 CPU idle state to use when off AC power.
 The string
 .Dq Li LOW
 indicates that
 .Xr acpi 4
 should use the lowest power state available while
 .Dq Li HIGH
 indicates that the lowest latency state (less power savings) should be used.
 .It Va economy_cpu_freq
 .Pq Vt str
 CPU clock frequency to use when off AC power.
 The string
 .Dq Li LOW
 indicates that
 .Xr cpufreq 4
 should use the lowest frequency available while
 .Dq Li HIGH
 indicates that the highest frequency (less power savings) should be used.
 .It Va jail_enable
 .Pq Vt bool
 If set to
 .Dq Li NO ,
 any configured jails will not be started.
 .It jail_parallel_start
 .Pq Vt bool
 If set to
 .Dq Li YES
 all configured jails will be started in the background (= in parallel).
 .It Va jail_list
 .Pq Vt str
 A space separated list of names for jails.
 This is purely a configuration aid to help identify and
 configure multiple jails.
 The names specified in this list will be used to
 identify settings common to an instance of a jail,
 and should contain alphanumeric characters only.
 Assuming that the jail in question was named
 .Li vjail ,
 you would have the following dependent variables:
 .Bd -literal
 jail_vjail_hostname="jail.example.com"
 jail_vjail_ip="192.0.2.100"
 jail_vjail_rootdir="/var/jails/vjail/root"
 .Ed
 .Pp
 .It Va jail_flags
 .Pq Vt str
 Unset by default.
 When set, use as default value for
 .Va jail_ Ns Ao Ar jname Ac Ns Va _flags
 for every jail in
 .Va jail_list .
 .It Va jail_interface
 .Pq Vt str
 Unset by default.
 When set, use as default value for
 .Va jail_ Ns Ao Ar jname Ac Ns Va _interface
 for every jail in
 .Va jail_list .
 .It Va jail_fstab
 .Pq Vt str
 Unset by default.
 When set, use as default value for
 .Va jail_ Ns Ao Ar jname Ac Ns Va _fstab
 for every jail in
 .Va jail_list .
 .It Va jail_mount_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 When set to
 .Dq Li YES ,
 sets
 .Va jail_ Ns Ao Ar jname Ac Ns Va _mount_enable
 to
 .Dq Li YES
 by default for every jail in
 .Va jail_list .
 .It Va jail_devfs_ruleset
 .Pq Vt str
 Unset by default.
 When set, sets
 .Va jail_ Ns Ao Ar jname Ac Ns Va _devfs_ruleset
 to given value for every jail in
 .Va jail_list .
 .It Va jail_devfs_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 When set to
 .Dq Li YES ,
 sets
 .Va jail_ Ns Ao Ar jname Ac Ns Va _devfs_enable
 to
 .Dq Li YES
 by default for every jail in
 .Va jail_list .
 .It Va jail_fdescfs_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 When set to
 .Dq Li YES ,
 sets
 .Va jail_ Ns Ao Ar jname Ac Ns Va _fdescfs_enable
 to
 .Dq Li YES
 by default for every jail in
 .Va jail_list .
 .It Va jail_procfs_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 When set to
 .Dq Li YES ,
 sets
 .Va jail_ Ns Ao Ar jname Ac Ns Va _fdescfs_enable
 to
 .Dq Li YES
 by default for every jail in
 .Va jail_list .
 .It Va jail_exec_prestart Ns Aq Ar N
 .Pq Vt str
 Unset by default.
 When set, use as default value for
 .Va jail_ Ns Ao Ar jname Ac Ns Va _exec_prestart Ns Aq Ar N
 for every jail in
 .Va jail_list .
 .It Va jail_exec_start
 .Pq Vt str
 Unset by default.
 When set, use as default value for
 .Va jail_ Ns Ao Ar jname Ac Ns Va _exec_start
 for every jail in
 .Va jail_list .
 .It Va jail_exec_afterstart Ns Aq Ar N
 .Pq Vt str
 Unset by default.
 When set, use as default value for
 .Va jail_ Ns Ao Ar jname Ac Ns Va _exec_afterstart Ns Aq Ar N
 for every jail in
 .Va jail_list .
 .It Va jail_exec_poststart Ns Aq Ar N
 .Pq Vt str
 Unset by default.
 When set, use as default value for
 .Va jail_ Ns Ao Ar jname Ac Ns Va _exec_poststart Ns Aq Ar N
 for every jail in
 .Va jail_list .
 .It Va jail_exec_prestop Ns Aq Ar N
 .Pq Vt str
 Unset by default.
 When set, use as default value for
 .Va jail_ Ns Ao Ar jname Ac Ns Va _exec_prestop Ns Aq Ar N
 for every jail in
 .Va jail_list .
 .It Va jail_exec_stop
 Unset by default.
 When set, use as default value for
 .Va jail_ Ns Ao Ar jname Ac Ns Va _exec_stop
 for every jail in
 .Va jail_list .
 .It Va jail_exec_poststop Ns Aq Ar N
 .Pq Vt str
 Unset by default.
 When set, use as default value for
 .Va jail_ Ns Ao Ar jname Ac Ns Va _exec_poststop Ns Aq Ar N
 for every jail in
 .Va jail_list .
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _rootdir
 .Pq Vt str
 Unset by default.
 Set to the root directory used by jail
 .Va jname .
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _hostname
 .Pq Vt str
 Unset by default.
 Set to the fully qualified domain name (FQDN) assigned to jail
 .Va jname .
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _ip
 .Pq Vt str
 Unset by default.
 Set to the (primary) IPv4 and/or IPv6 address(es) assigned to the jail.
 The argument can be a sole address or a comma separated list of addresses.
 Additionally each address can be prefixed by the name of an interface
 followed by a pipe to overwrite
 .Va jail_ Ns Ao Ar jname Ac Ns Va _interface
 or
 .Va jail_interface
 and/or suffixed by a netmask, prefixlen or prefix.
 In case no netmask, prefixlen or prefix is given,
 .Sq /32
 will be used for IPv4 and
 .Sq /128
 will be used for an IPv6 address.
 If no address is given for the jail then the jail will be started with
 no networking support.
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _ip_multi Ns Aq Ar n
 .Pq Vt str
 Unset by default.
 Set additional IPv4 and/or IPv6 address(es) assigned to the jail.
 The sequence starts with
 .Dq Li _multi0
 and the numbers have to be strictly ascending.
 These entries follow the same syntax as their primary
 .Va jail_ Ns Ao Ar jname Ac Ns Va _ip
 entry.
 The order of the entries can be important as the first address for
 each address family found will be the primary address of the jail.
 See
 .Va ip-addresses
 option in
 .Xr jail 8
 for more details.
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _flags
 .Pq Vt str
 Set to
 .Dq Li -l -U root
 by default.
 These are flags to pass to
 .Xr jail .
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _interface
 .Pq Vt str
 Unset by default.
 When set, sets the interface to use when setting IP address alias.
 Note that the alias is created at jail startup and removed at jail shutdown.
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _fib
 .Pq Vt str
 Unset by default.
 When set, the jail is started with the specified forwarding table (sometimes
 referred to as a routing table) via
 .Xr setfib 1 .
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _fstab
 .Pq Vt str
 Set to
 .Pa /etc/fstab. Ns Aq Ar jname
 by default.
 This is the file system information file to use for jail
 .Va jname .
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _mount_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 When set to
 .Dq Li YES ,
 mount all file systems from
 .Va jail_ Ns Ao Ar jname Ac Ns Va _fstab
 at jail startup.
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _devfs_ruleset
 .Pq Vt str
 Unset by default.
 When set, defines the device file system ruleset file to use for jail
 .Va jname .
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _devfs_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 When set to
 .Dq Li YES ,
 mount the device file system inside jail
 .Ar jname
 at jail startup.
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _fdescfs_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 When set to
 .Dq Li YES ,
 mount the file-descriptor file system inside jail
 .Ar jname
 at jail startup.
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _procfs_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 by default.
 When set to
 .Dq Li YES ,
 mount the process file system inside jail
 .Ar jname
 at jail startup.
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _exec_prestart Ns Aq Ar N
 .Pq Vt str
 Unset by default.
 This is the command run as
 .Ar N Ns
 th command
 before jail startup, where
 .Ar N
 is 0, 1, and so on.
 It is run outside the jail.
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _exec_start
 .Pq Vt str
 Set to
 .Dq Li /bin/sh /etc/rc
 by default.
 This is the command executed in a jail at jail startup.
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _exec_afterstart Ns Aq Ar N
 .Pq Vt str
 Unset by default.
 This is the command run as
 .Ar N Ns
 th command
 in a jail
 after jail startup, where
 .Ar N
 is 1, 2, and so on.
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _exec_poststart Ns Aq Ar N
 .Pq Vt str
 Unset by default.
 This is the command run as
 .Ar N Ns
 th command
 after jail startup, where
 .Ar N
 is 0, 1, and so on.
 It is run outside the jail.
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _exec_prestop Ns Aq Ar N
 .Pq Vt str
 Unset by default.
 This is the command run as
 .Ar N Ns
 th command
 before jail shutdown, where
 .Ar N
 is 0, 1, and so on.
 It is run outside the jail.
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _exec_stop
 .Pq Vt str
 Set to
 .Dq Li /bin/sh /etc/rc.shutdown
 by default.
 This is the command executed in a jail at jail shutdown.
 .It Va jail_ Ns Ao Ar jname Ac Ns Va _exec_poststop Ns Aq Ar N
 .Pq Vt str
 Unset by default.
 This is the command run as
 .Ar N Ns
 th command
 after jail shutdown, where
 .Ar N
 is 0, 1, and so on.
 It is run outside the jail.
 .It Va jail_set_hostname_allow
 .Pq Vt bool
 If set to
 .Dq Li NO ,
 do not allow the root user in a jail to set its hostname.
 .It Va jail_socket_unixiproute_only
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 do not allow any sockets,
 besides UNIX/IP/route sockets,
 to be used within a jail.
 .It Va jail_sysvipc_allow
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 allow applications within a jail to use System V IPC.
 .\" -----------------------------------------------------
 .It Va harvest_interrupt
 .Pq Vt bool
 Set to
 .Dq Li YES
 to use hardware interrupts as an entropy source.
 Refer to
 .Xr random 4
 for more information.
 .It Va harvest_ethernet
 .Pq Vt bool
 Set to
 .Dq Li YES
 to use LAN traffic as an entropy source.
 Refer to
 .Xr random 4
 for more information.
 .It Va harvest_p_to_p
 .Pq Vt bool
 Set to
 .Dq Li YES
 to use serial line traffic as an entropy source.
 Refer to
 .Xr random 4
 for more information.
 .It Va entropy_dir
 .Pq Vt str
 Set to
 .Dq Li NO
 to disable caching entropy via
 .Xr cron 8 .
 Otherwise set to the directory used to store entropy files in.
 .It Va entropy_file
 .Pq Vt str
 Set to
 .Dq Li NO
 to disable caching entropy through reboots.
 Otherwise set to the filename used to store cached entropy through
 reboots.
 This file should be located on the root file system to seed the
 .Xr random 4
 device as early as possible in the boot process.
 .It Va entropy_save_sz
 .Pq Vt int
 Size of the entropy cache files saved by
 .Nm save-entropy
 periodically.
 .It Va entropy_save_num
 .Pq Vt int
 Number of entropy cache files to save by
 .Nm save-entropy
 periodically.
 .It Va ipsec_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to run
 .Xr setkey 8
 on
 .Va ipsec_file
 at boot time.
 .It Va ipsec_file
 .Pq Vt str
 Configuration file for
 .Xr setkey 8 .
 .It Va dmesg_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to save
 .Xr dmesg 8
 to
 .Pa /var/run/dmesg.boot
 on boot.
 .It Va rcshutdown_timeout
 .Pq Vt int
 If set, start a watchdog timer in the background which will terminate
 .Pa rc.shutdown
 if
 .Xr shutdown 8
 has not completed within the specified time (in seconds).
 Notice that in addition to this soft timeout,
 .Xr init 8
 also applies a hard timeout for the execution of
 .Pa rc.shutdown .
 This is configured via
 .Xr sysctl 8
 variable
 .Va kern.init_shutdown_timeout
 and defaults to 120 seconds.
 Setting the value of
 .Va rcshutdown_timeout
 to more than 120 seconds will have no effect until the
 .Xr sysctl 8
 variable
 .Va kern.init_shutdown_timeout
 is also increased.
 .It Va virecover_enable
 .Pq Vt bool
 Set to
 .Dq Li NO
 to prevent the system from trying to
 recover pre-maturely terminated
 .Xr vi 1
 sessions.
 .It Va ugidfw_enable
 .Pq Vt bool
 Set to
 .Dq Li YES
 to load the
 .Xr mac_bsdextended 4
 module upon system initialization and load a default
 ruleset file.
 .It Va bsdextended_script
 .Pq Vt str
 The default
 .Xr mac_bsdextended 4
 ruleset file to load.
 The default value of this variable is
 .Pa /etc/rc.bsdextended .
 .It Va newsyslog_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 run
 .Xr newsyslog 8
 command at startup.
 .It Va newsyslog_flags
 .Pq Vt str
 If
 .Va newsyslog_enable
 is set to
 .Dq Li YES ,
 these are the flags to pass to the
 .Xr newsyslog 8
 program.
 The default is
 .Dq Li -CN ,
 which causes log files flagged with a
 .Cm C
 to be created.
 .It Va mdconfig_md Ns Aq Ar X
 .Pq Vt str
 Arguments to
 .Xr mdconfig 8
 for
 .Xr md 4
 device
 .Ar X .
 At minimum a
 .Fl t Ar type
 must be specified and either a
 .Fl s Ar size
 for malloc or swap backed
 .Xr md 4
 devices or a
 .Fl f Ar file
 for vnode backed
 .Xr md 4
 devices.
 Note that
 .Va mdconfig_md Ns Aq Ar X
 variables are evaluated until one variable is unset or null.
 .It Va mdconfig_md Ns Ao Ar X Ac Ns Va _newfs
 .Pq Vt str
 Optional arguments passed to
 .Xr newfs 8
 to initialize
 .Xr md 4
 device
 .Ar X .
 .It Va mdconfig_md Ns Ao Ar X Ac Ns Va _owner
 .Pq Vt str
 An ownership specification passed to
 .Xr chown 8
 after the specified
 .Xr md 4
 device
 .Ar X
 has been mounted.
 Both the
 .Xr md 4
 device and the mount point will be changed.
 .It Va mdconfig_md Ns Ao Ar X Ac Ns Va _perms
 .Pq Vt str
 A mode string passed to
 .Xr chmod 1
 after the specified
 .Xr md 4
 device
 .Ar X
 has been mounted.
 Both the
 .Xr md 4
 device and the mount point will be changed.
 .It Va mdconfig_md Ns Ao Ar X Ac Ns Va _files
 .Pq Vt str
 Files to be copied to the mount point of the
 .Xr md 4
 device
 .Ar X
 after it has been mounted.
 .It Va mdconfig_md Ns Ao Ar X Ac Ns Va _cmd
 .Pq Vt str
 Command to execute after the specified
 .Xr md 4
 device
 .Ar X
 has been mounted.
 Note that the command is passed to
 .Ic eval
 and that both
 .Va _dev
 and
 .Va _mp
 variables can be used to reference respectively the
 .Xr md 4
 device and the mount point.
 Assuming that the
 .Xr md 4
 device is
 .Li md0 ,
 one could set the following:
 .Bd -literal
 mdconfig_md0_cmd="tar xfzC /var/file.tgz \e${_mp}"
 .Ed
 .It Va autobridge_interfaces
 .Pq Vt str
 Set to the list of bridge interfaces that will have newly arriving interfaces
 checked against to be automatically added.
 If not set to
 .Dq Li NO
 then for each whitespace separated
 .Ar element
 in the value, a
 .Va autobridge_ Ns Aq Ar element
 variable is assumed to exist which has a whitespace separated list of interface
 names to match, these names can use wildcards.
 For example:
 .Bd -literal
 autobridge_interfaces="bridge0"
 autobridge_bridge0="tap* dc0 vlan[345]"
 .Ed
 .It Va mixer_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 enable support for sound mixer.
 .It Va hcsecd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 enable Bluetooth security daemon.
 .It Va hcsecd_config
 .Pq Vt str
 Configuration file for
 .Xr hcsecd 8 .
 Default
 .Pa /etc/bluetooth/hcsecd.conf .
 .It Va sdpd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 enable Bluetooth Service Discovery Protocol daemon.
 .It Va sdpd_control
 .Pq Vt str
 Path to
 .Xr sdpd 8
 control socket.
 Default
 .Pa /var/run/sdp .
 .It Va sdpd_groupname
 .Pq Vt str
 Sets
 .Xr sdpd 8
 group to run as after it initializes.
 Default
 .Dq Li nobody .
 .It Va sdpd_username
 .Pq Vt str
 Sets
 .Xr sdpd 8
 user to run as after it initializes.
 Default
 .Dq Li nobody .
 .It Va bthidd_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 enable Bluetooth Human Interface Device daemon.
 .It Va bthidd_config
 .Pq Vt str
 Configuration file for
 .Xr bthidd 8 .
 Default
 .Pa /etc/bluetooth/bthidd.conf .
 .It Va bthidd_hids
 .Pq Vt str
 Path to a file, where
 .Xr bthidd 8
 will store information about known HID devices.
 Default
 .Pa /var/db/bthidd.hids .
 .It Va rfcomm_pppd_server_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 enable Bluetooth RFCOMM PPP wrapper daemon.
 .It Va rfcomm_pppd_server_profile
 .Pq Vt str
 The name of the profile to use from
 .Pa /etc/ppp/ppp.conf .
 Multiple profiles can be specified here.
 Also used to specify per-profile overrides.
 When the profile name contains any of the characters
 .Dq Li .-/+
 they are translated to
 .Dq Li _
 for the proposes of the override variable names.
 .It Va rfcomm_pppd_server_ Ns Ao Ar profile Ac Ns _bdaddr
 .Pq Vt str
 Overrides local address to listen on.
 By default
 .Xr rfcomm_pppd 8
 will listen on
 .Dq Li ANY
 address.
 The address can be specified as BD_ADDR or name.
 .It Va rfcomm_pppd_server_ Ns Ao Ar profile Ac Ns _channel
 .Pq Vt str
 Overrides local RFCOMM channel to listen on.
 By default
 .Xr rfcomm_pppd 8
 will listen on RFCOMM channel 1.
 Must set properly if multiple profiles used in the same time.
 .It Va rfcomm_pppd_server_ Ns Ao Ar profile Ac Ns _register_sp
 .Pq Vt bool
 Tells
 .Xr rfcomm_pppd 8
 if it should register Serial Port service on the specified RFCOMM channel.
 Default
 .Dq Li NO .
 .It Va rfcomm_pppd_server_ Ns Ao Ar profile Ac Ns _register_dun
 .Pq Vt bool
 Tells
 .Xr rfcomm_pppd 8
 if it should register Dial-Up Networking service on the specified
 RFCOMM channel.
 Default
 .Dq Li NO .
 .It Va ubthidhci_enable
 .Pq Vt bool
 If set to
 .Dq Li YES ,
 change the USB Bluetooth controller from HID mode to HCI mode.
 You also need to specify the location of USB Bluetooth controller with the
 .Va ubthidhci_busnum
 and
 .Va ubthidhci_addr
 variables.
 .It Va ubthidhci_busnum
 Bus number where the USB Bluetooth controller is located.
 Check the output of
 .Xr usbconfig 8
 on your system to find this information.
 .It Va ubthidhci_addr
 Bus address of the USB Bluetooth controller.
 Check the output of
 .Xr usbconfig 8
 on your system to find this information.
 .El
 .Sh FILES
 .Bl -tag -width ".Pa /etc/defaults/rc.conf" -compact
 .It Pa /etc/defaults/rc.conf
 .It Pa /etc/rc.conf
 .It Pa /etc/rc.conf.local
 .El
 .Sh SEE ALSO
 .Xr catman 1 ,
 .Xr chmod 1 ,
 .Xr gdb 1 ,
 .Xr info 1 ,
 .Xr kbdcontrol 1 ,
 .Xr makewhatis 1 ,
 .Xr sh 1 ,
 .Xr vi 1 ,
 .Xr vidcontrol 1 ,
 .Xr bridge 4 ,
 .Xr dummynet 4 ,
 .Xr ip 4 ,
 .Xr ipf 4 ,
 .Xr ipfw 4 ,
 .Xr ipnat 4 ,
 .Xr kld 4 ,
 .Xr pf 4 ,
 .Xr pflog 4 ,
 .Xr pfsync 4 ,
 .Xr tcp 4 ,
 .Xr udp 4 ,
 .Xr exports 5 ,
 .Xr fstab 5 ,
 .Xr ipf 5 ,
 .Xr ipnat 5 ,
 .Xr motd 5 ,
 .Xr newsyslog.conf 5 ,
 .Xr pf.conf 5 ,
 .Xr security 7 ,
 .Xr accton 8 ,
 .Xr amd 8 ,
 .Xr apm 8 ,
 .Xr atm 8 ,
 .Xr bthidd 8 ,
 .Xr chkprintcap 8 ,
 .Xr chown 8 ,
 .Xr cron 8 ,
 .Xr dhclient 8 ,
 .Xr ftpd 8 ,
 .Xr geli 8 ,
 .Xr hcsecd 8 ,
 .Xr ifconfig 8 ,
 .Xr inetd 8 ,
 .Xr ipf 8 ,
 .Xr ipfw 8 ,
 .Xr ipnat 8 ,
 .Xr jail 8 ,
 .Xr kldxref 8 ,
 .Xr lpd 8 ,
 .Xr mdconfig 8 ,
 .Xr mdmfs 8 ,
 .Xr mixer 8 ,
 .Xr mountd 8 ,
 .Xr moused 8 ,
 .Xr mrouted 8 ,
 .Xr named 8 ,
 .Xr newfs 8 ,
 .Xr newsyslog 8 ,
 .Xr nfsd 8 ,
 .Xr ntpd 8 ,
 .Xr ntpdate 8 ,
 .Xr pfctl 8 ,
 .Xr pflogd 8 ,
 .Xr powerd 8 ,
 .Xr quotacheck 8 ,
 .Xr quotaon 8 ,
 .Xr rc 8 ,
 .Xr rc.sendmail 8 ,
 .Xr rfcomm_pppd 8 ,
 .Xr route 8 ,
 .Xr routed 8 ,
 .Xr rpcbind 8 ,
 .Xr rpc.lockd 8 ,
 .Xr rpc.statd 8 ,
 .Xr rwhod 8 ,
 .Xr savecore 8 ,
 .Xr sdpd 8 ,
 .Xr sshd 8 ,
 .Xr swapon 8 ,
 .Xr sysctl 8 ,
 .Xr syslogd 8 ,
 .Xr timed 8 ,
 .Xr usbconfig 8 ,
 .Xr wlandebug 8 ,
 .Xr yp 8 ,
 .Xr ypbind 8 ,
 .Xr ypserv 8 ,
 .Xr ypset 8
 .Sh HISTORY
 The
 .Nm
 file appeared in
 .Fx 2.2.2 .
 .Sh AUTHORS
 .An Jordan K. Hubbard .
Index: projects/binutils-2.17/share/man/man9/vrele.9
===================================================================
--- projects/binutils-2.17/share/man/man9/vrele.9	(revision 215829)
+++ projects/binutils-2.17/share/man/man9/vrele.9	(revision 215830)
@@ -1,105 +1,101 @@
 .\" -*- nroff -*-
 .\"
 .\" Copyright (c) 1996 Doug Rabson
 .\" Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
 .\"
 .\" All rights reserved.
 .\"
 .\" This program is free software.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR
 .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 .\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
 .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 17, 2010
+.Dd November 20, 2010
 .Dt VRELE 9
 .Os
 .Sh NAME
 .Nm vput ,
 .Nm vrele ,
 .Nm vunref
 .Nd decrement the use count for a vnode
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/vnode.h
 .Ft void
 .Fn vput "struct vnode *vp"
 .Ft void
 .Fn vrele "struct vnode *vp"
 .Ft void
 .Fn vunref "struct vnode *vp"
 .Sh DESCRIPTION
 Decrement the
 .Va v_usecount
 field of a vnode.
 .Bl -tag -width 2n
 .It Fa vp
 the vnode to decrement
 .El
 .Pp
 The
 .Fn vrele
 function takes an unlocked vnode and returns with the vnode unlocked.
 .Pp
 The
 .Fn vput
 function should be given a locked vnode as argument, the vnode is unlocked
 after the function returned.
 The
 .Fn vput
 is operationally equivalent to calling
 .Xr VOP_UNLOCK 9
 followed by
 .Xr vrele 9 ,
 with less overhead.
 .Pp
 The
 .Fn vunref
 function takes a locked vnode as argument, and returns with the vnode locked.
-Nonetheless, the
-.Fn vunref
-might drop the vnode lock during the operation, so caller should not expect
-that non-doomed vnode is still non-doomed after the function returned.
 .Pp
 Any code in the system which signified its use of a vnode by usecount
 should call one of the listed function to decrement use counter.
 If the
 .Va v_usecount
 field of the non-doomed vnode reaches zero, then it will be inactivated
 and placed on the free list.
 Since the functions might need to call VOPs for the vnode, the
 .Va Giant
 mutex should be conditionally locked around the call.
 .Pp
 The hold count for the vnode is always greater or equal to the usecount.
 Non-forced unmount fails when mount point owns a vnode that has non-zero
 usecount, see
 .Xr vflush 9 .
 .Sh SEE ALSO
 .Xr vget 9 ,
 .Xr vnode 9 ,
 .Xr vref 9 ,
 .Xr vrefcnt 9
 .Sh AUTHORS
 This manual page was written by
 .An Doug Rabson
 and
 .An Konstantin Belousov .
Index: projects/binutils-2.17/share/misc/bsd-family-tree
===================================================================
--- projects/binutils-2.17/share/misc/bsd-family-tree	(revision 215829)
+++ projects/binutils-2.17/share/misc/bsd-family-tree	(revision 215830)
@@ -1,586 +1,587 @@
 The UNIX system family tree: Research and BSD
 ---------------------------------------------
 
 First Edition (V1)
      |
 Second Edition (V2)
      |
 Third Edition (V3)
      |
 Fourth Edition (V4)
      |
 Fifth Edition (V5)
      |
 Sixth Edition (V6) -----*
        \                |
         \               |
          \              |
 Seventh Edition (V7)    |
             \           |
              \        1BSD
              32V        |
                \      2BSD---------------*
                 \    /                   |
                  \  /                    |
                   \/                     |
                  3BSD                    |
                   |                      |
                4.0BSD                2.79BSD
                   |                      |
                4.1BSD --------------> 2.8BSD
                   |                      |
               4.1aBSD -----------\       |
                   |                \     |
               4.1bBSD                \   |
                   |                    \ |
       *------ 4.1cBSD --------------> 2.9BSD
      /            |                      |
 Eighth Edition    |                   2.9BSD-Seismo
      |            |                      |
      +----<--- 4.2BSD               2.9.1BSD
      |            |                      |
      +----<--- 4.3BSD -------------> 2.10BSD
      |            |               /      |
 Ninth Edition     |              / 2.10.1BSD
      |         4.3BSD Tahoe-----+        |
      |            |              \       |
      |            |                \     |
      v            |                  2.11BSD
 Tenth Edition     |                      |
                   |                  2.11BSD rev #430
                4.3BSD NET/1              |
                   |                      v
                4.3BSD Reno
                   |
    *---------- 4.3BSD NET/2 -------------------+-------------*
    |                    |                      |             |
 386BSD 0.0              |                      |           BSD/386 ALPHA
    |                    |                      |             |
 386BSD 0.1 ------------>+                      |           BSD/386 0.3.[13]
    |     \              |                  4.4BSD Alpha      |
    |     386BSD 1.0     |                      |           BSD/386 0.9.[34]
    |                    |                  4.4BSD            |
    |                    |                    / |             |
    |                    |   4.4BSD-Encumbered  |             |
    |                 NetBSD 0.8                |           BSD/386 1.0
    |                    |                      |             |
 FreeBSD 1.0          NetBSD 0.9                |           BSD/386 1.1
    |                    |           .----- 4.4BSD Lite       |
 FreeBSD 1.1             |          /   /       |     \       |
    |                    |         /   /        |      \      |
 FreeBSD 1.1.5       .---|--------'   /         |       \     |
    |               /    |           /          |        \    |
 FreeBSD 1.1.5.1   /     |          /           |         \   |
    |             /   NetBSD 1.0 <-'            |          \  |
    |            /       |                      |           \ |
 FreeBSD 2.0 <--'        |                      |           BSD/OS 2.0
    |                     \                     |             |
 FreeBSD 2.0.5             \                    |           BSD/OS 2.0.1
    |                 .-----\------------- 4.4BSD Lite2       |
    |                 |      \             |  |   |   |       |
    |                 | .-----|------Rhapsody |   |   |       |
    |                 | |     |        NetBSD 1.3 |   |       |
    |                 | |     |           OpenBSD 2.3 |       |
    |                 | |     |                BSD/OS 3.0     |
 FreeBSD 2.1          | |     |                               |
  |   |               | |  NetBSD 1.1 ------.               BSD/OS 2.1
  | FreeBSD 2.1.5     | |     |              \                |
  |     |             | |  NetBSD 1.2         \             BSD/OS 3.0
  | FreeBSD 2.1.6     | |     |  \          OpenBSD 2.0       |
  |     |             | |     |   \             |             |
  | FreeBSD 2.1.6.1   | |     |    \            |             |
  |     |             | |     |     \           |             |
  | FreeBSD 2.1.7     | |     |      |          |             |
  |     |             | |     |  NetBSD 1.2.1   |             |
  | FreeBSD 2.1.7.1   | |     |                 |             |
  |                   | |     |                 |             |
  |                   | |     |                 |             |
  *-FreeBSD 2.2       | |     |                 |             |
  |        \          | |     |                 |             |
  |     FreeBSD 2.2.1 | |     |                 |             |
  |         |         | |     |                 |             |
  |     FreeBSD 2.2.2 | |     |             OpenBSD 2.1       |
  |         |         | |     |                 |             |
  |     FreeBSD 2.2.5 | |     |                 |             |
  |         |         | |     |             OpenBSD 2.2       |
  |         |         | |  NetBSD 1.3           |             |
  |     FreeBSD 2.2.6 | |     |    |            |             |
  |         |         | |     | NetBSD 1.3.1    |           BSD/OS 3.1
  |         |         | |     |    |        OpenBSD 2.3       |
  |         |         | |     | NetBSD 1.3.2    |             |
  |     FreeBSD 2.2.7 | |     |    |            |             |
  |         |         | |     |    |            |           BSD/OS 4.0
  |         v         | |     |    |            |             |
  |     FreeBSD 2.2.8 | |     |    |            |             |
  |                   | |     |    |        OpenBSD 2.4       |
 FreeBSD 3.0 <--------* |     |    v            |             |
  |                     |     | NetBSD 1.3.3    |             |
  *---FreeBSD 3.1       |     |                 |             |
  |       |             |     |                 |           BSD/OS 4.0.1
  |   FreeBSD 3.2----*  |  NetBSD 1.4       OpenBSD 2.5       |
  |       |          |  |  |  |    |            |             |
  |       |          |  |  |  |    |            |             |
  |       |          |  |  |  |    |            |             |
  |   FreeBSD 3.3    |  |  |  | NetBSD 1.4.1    |             |
  |       |          |  |  |  |    |        OpenBSD 2.6       |
  |   FreeBSD 3.4    |  |  |  |    |            |             |
  |           |      |  |  |  |    |            |           BSD/OS 4.1
 FreeBSD 4.0  |      |  |  |  | NetBSD 1.4.2    |             |
  |           |      |  |  |  |    |            |             |
  |           |      |  |  |  |    |            |             |
  |   FreeBSD 3.5    |  |  |  |    |        OpenBSD 2.7       |
  |           |      |  |  |  |    |            |             |
  |   FreeBSD 3.5.1  |  |  |  |    |            |             |
  |                  |  |  |  |    |            |             |
  *---FreeBSD 4.1    |  |  |  |    |            |             |
  |      |           |  | (?) |    |            |             |
  |   FreeBSD 4.1.1  |  |  /  |    |            |             |
  |      |           |  | /   |    |            |             |
  |   FreeBSD 4.2   Darwin/   | NetBSD 1.4.3    |             |
  |      |         Mac OS X   |             OpenBSD 2.8     BSD/OS 4.2
  |      |             |      |                 |             |
  |      |             |      |                 |             |
  |      |           10.0  NetBSD 1.5           |             |
  |   FreeBSD 4.3      |      |    |            |             |
  |      |             |      |    |        OpenBSD 2.9       |
  |      |             |      | NetBSD 1.5.1    |             |
  |      |             |      |    |            |             |
  |   FreeBSD 4.4-.    |      | NetBSD 1.5.2    |             |
  |      |        | Mac OS X  |    |            |             |
  |      |        |   10.1    |    |        OpenBSD 3.0       |
  |   FreeBSD 4.5 |    |      |    |            |             |
  |      |         \   |      |    |            |           BSD/OS 4.3
  |   FreeBSD 4.6   \  |      |    |        OpenBSD 3.1       |
  |      |           \ |      | NetBSD 1.5.3    |             |
  |   FreeBSD 4.6.2 Mac OS X  |                 |             |
  |      |            10.2    |                 |             |
  |   FreeBSD 4.7      |      |                 |             |
  |      |             |   NetBSD 1.6       OpenBSD 3.2       |
  |   FreeBSD 4.8      |      |    |            |             |
  |      |             |      | NetBSD 1.6.1    |             |
  |      |--------.    |      |    |        OpenBSD 3.3     BSD/OS 5.0
  |      |         \   |      |    |            |             |
  |   FreeBSD 4.9   |  |      |    |        OpenBSD 3.4     BSD/OS 5.1 ISE
  |      |          |  |      |    |            |
  |      |          |  |      | NetBSD 1.6.2    |
  |      |          |  |      |    |            |
  |      |          |  |      |    |        OpenBSD 3.5
  |      |          |  |      |    v            |
  |   FreeBSD 4.10  |  |      |                 |
  |      |          |  |      |                 |
  |   FreeBSD 4.11  |  |      |                 |
  |                 |  |      |                 |
  |                  `-|------|-----------------|---------------------.
  |                    |      |                 |                      \
 FreeBSD 5.0           |      |                 |                       |
  |                    |      |                 |                       |
 FreeBSD 5.1           |      |                 |                 DragonFly 1.0
  |          \         |      |                 |                       |
  |           ----- Mac OS X  |                 |                       |
  |                   10.3    |                 |                       |
 FreeBSD 5.2           |      |                 |                       |
  |      |             |      |                 |                       |
  |   FreeBSD 5.2.1    |      |                 |                       |
  |                    |      |                 |                       |
  *-------FreeBSD 5.3  |      |                 |                       |
  |           |        |      |             OpenBSD 3.6                 |
  |           |        |   NetBSD 2.0           |                       |
  |           |        |      | |  |            |                DragonFly 1.2.0
  |           |     Mac OS X  | | NetBSD 2.0.2  |                       |
  |           |       10.4    | |  |            |                       |
  |       FreeBSD 5.4  |      | |  |            |                       |
  |           |        |      | |  |        OpenBSD 3.7                 |
  |           |        |      | | NetBSD 2.0.3  |                       |
  |           |        |      | |  |            |                       |
  *--FreeBSD  |        |      | |  v        OpenBSD 3.8                 |
  |    6.0    |        |      | |               |                       |
  |     |     |        |      |  \              |                       |
  |     |     |        |      | NetBSD 2.1      |                       |
  |     |     |        |      |                 |                       |
  |     |     |        |   NetBSD 3.0           |                       |
  |     |     |        |      | |  |            |                DragonFly 1.4.0
  |     |     |        |      | |  |        OpenBSD 3.9                 |
  |  FreeBSD  |        |      | |  |            |                       |
  |    6.1    |        |      | |  |            |                       |
  |     |  FreeBSD 5.5 |      | |  |            |                       |
  |     |              |      | | NetBSD 3.0.1  |                DragonFly 1.6.0
  |     |              |      | |  |            |                       |
  |     |              |      | |  |        OpenBSD 4.0                 |
  |     |              |      | | NetBSD 3.0.2  |                       |
  |     |              |      | NetBSD 3.1      |                       |
  | FreeBSD 6.2        |      |                 |                       |
  |     |              |      |                 |                DragonFly 1.8.0
  |     |              |      |             OpenBSD 4.1                 |
  |     |              |      |                 |                DragonFly 1.10.0
  |     |           Mac OS X  |                 |                       |
  |     |             10.5    |                 |                       |
  |     |              |      |             OpenBSD 4.2                 |
  |     |              |   NetBSD 4.0           |                       |
  | FreeBSD 6.3        |      |                 |                       |
  |            \       |      |                 |                       |
  *--FreeBSD    |      |      |                 |                DragonFly 1.12.0
  |    7.0      |      |      |                 |                       |
  |     |       |      |      |             OpenBSD 4.3                 |
  |     |       |      |      |                 |                DragonFly 2.0.0
  |     |    FreeBSD   |      |             OpenBSD 4.4                 |
  |     |      6.4     |      |                 |                       |
  |     |              |      |                 |                       |
  |  FreeBSD 7.1       |      |                 |                       |
  |     |              |      |                 |                DragonFly 2.2.0
  |  FreeBSD 7.2       |   NetBSD 5.0       OpenBSD 4.5                 |
- |             \      |      |                 |                       |
- |              |     |      |                 |                DragonFly 2.4.0
- |              |     |      |             OpenBSD 4.6                 |
- |              |     |      |                 |                       |
- *--FreeBSD     |     |      |                 |                       |
- |    8.0       |     |      |                 |                       |
- |     |    FreeBSD   |      |                 |                       |
- |     |       7.3    |      |                 |                DragonFly 2.6.0
- |     |              |      |             OpenBSD 4.7                 |
- |  FreeBSD           |      |                 |                       |
- |    8.1             |      |                 |                       |
- |     |              |      |                 |                DragonFly 2.8.0
- |     |              |      |             OpenBSD 4.8                 |
- |     V              |      |                 |                       |
+ |             \      |      |    |            |                       |
+ |              |     |      |    |            |                DragonFly 2.4.0
+ |              |     |      |    |        OpenBSD 4.6                 |
+ |              |     |      |    |            |                       |
+ *--FreeBSD     |     |      |    |            |                       |
+ |    8.0       |     |      |    |            |                       |
+ |     |    FreeBSD   |      |    |            |                       |
+ |     |       7.3    |      |    |            |                DragonFly 2.6.0
+ |     |              |      |    |        OpenBSD 4.7                 |
+ |  FreeBSD           |      |    |            |                       |
+ |    8.1             |      |    |            |                       |
+ |     |              |      |    |            |                DragonFly 2.8.0
+ |     |              |      |    |        OpenBSD 4.8                 |
+ |     V              |      | NetBSD 5.1      |                       |
  |                    |      |                 |                       |
 FreeBSD 9 -current    |  NetBSD -current  OpenBSD -current             |
  |                    |      |                 |                       |
  v                    v      v                 v                       v
 
 Time
 ----------------
 
 Time tolerance +/- 6 months, depending on which book/article you read; if it
 was the announcement in Usenet or if it was available as tape.
 
 [44B] McKusick, Marshall Kirk, Keith Bostic, Michael J Karels,
 	and John Quarterman. The Design and Implementation of
 	the 4.4BSD Operating System.
 [APL] Apple website [http://www.apple.com/macosx/]
 [BSDI] Berkeley Software Design, Inc.
 [DFB] DragonFlyBSD Project, The.
 [DOC] README, COPYRIGHT on tape.
 [FBD] FreeBSD Project, The.
 [KB]  Keith Bostic. BSD2.10 available from Usenix. comp.unix.sources,
 	Volume 11, Info 4, April, 1987.
 [KKK] Mike Karels, Kirk McKusick, and Keith Bostic. tahoe announcement.
 	comp.bugs.4bsd.ucb-fixes, June 15, 1988.
 [KSJ] Michael J. Karels, Carl F. Smith, and William F. Jolitz.
 	Changes in the Kernel in 2.9BSD. Second Berkeley Software
 	Distribution UNIX Version 2.9, July, 1983.
 [NBD] NetBSD Project, The.
 [OBD] OpenBSD Project, The.
 [QCU] Salus, Peter H. A quarter century of UNIX.
 [SMS] Steven M. Schultz. 2.11BSD, UNIX for the PDP-11.
 [TUHS] The Unix Historical Society.  http://minnie.tuhs.org/Unix_History/.
 [USE] Usenet announcement.
 [WRS] Wind River Systems, Inc.
 [dmr] Dennis Ritchie, via E-Mail
 
 Multics                 1965
 UNIX                    Summer 1969
 				DEC PDP-7
 First   Edition         1971-11-03 [QCU]
 				DEC PDP-11/20, Assembler
 Second  Edition         1972-06-12 [QCU]
 				10 UNIX installations
 Third   Edition         1973-02-xx [QCU]
 				Pipes, 16 installations
 Fourth  Edition         1973-11-xx [QCU]
 				rewriting in C effected,
                                 above 30 installations
 Fifth   Edition         1974-06-xx [QCU]
 				above 50 installations
 Sixth   Edition         1975-05-xx [QCU]
 				port to DEC Vax
 Seventh Edition         1979-01-xx [QCU]
 				first portable UNIX
 Eighth  Edition         1985-02-xx [QCU]
 				VAX 11/750, VAX 11/780 [dmr]
 				descended from 4.1c BSD [dmr]
 				descended from 4.1 BSD [44B]
 			scooping-out and replacement of the character-device
 			and networking part by the streams mechanism
 
 Ninth   Edition         1986-09-xx [QCU]
 Tenth   Edition         1989-10-xx [QCU]
 
 1BSD                    late 1977
 			1978-03-09 [QCU]
 				PDP-11, Pascal, ex(1)
 				30 free copies of 1BSD sent out
 				35 tapes sold for 50 USD [QCU]
 2BSD                    mid 1978 [QCU] 1979-05-10 [TUHS]
 				75 2BSD tapes shipped
 2.79BSD			1980-04-xx [TUHS]
 2.8BSD			1981-07-xx [KSJ]
 
 2.8.1BSD		1982-01-xx [QCU]
 				set of performance improvements
 2.9BSD			1983-07-xx [KSJ]
 2.9.1BSD                1983-11-xx [TUHS]
 2.9BSD-Seismo		1985-08-xx [SMS]
 2.10BSD			1987-04-xx [KKK]
 2.10.1BSD		1989-01-xx [SMS]
 2.11BSD			1992-02-xx [SMS]
 2.11BSD rev #430	1999-12-13 [SMS]
 
 32V			1978-1[01]-xx [QCU]
 3BSD                    late 1979 [QCU] March 1980 [TUHS]
 				virtual memory, page replacement,
                         	demand paging
 4.0BSD                  1980-10-xx
 4.1BSD                  1981-07-08 [DOC]
 4.1aBSD                 1982-04-xx
 				alpha release, 100 sites, networking [44B]
 4.1bBSD				internal release, fast filesystem [44B]
 4.1cBSD                 late 1982
 				beta release, IPC [44B]
 4.2BSD                  1983-09-xx [QCU]
                         1983-08-03 [DOC]
 4.3BSD                  1986-06-xx [QCU]
 			1986-04-05 [KB], [DOC]
 4.3BSD Tahoe            1988-06-15 [QCU], [DOC]
 4.3BSD NET/1            1988-11-xx [QCU]
                         1989-01-01 [DOC]
 4.3BSD Reno             1990-06-29 [QCU], [DOC]
 4.3BSD NET/2            1991-06-28 [QCU], [DOC]
 
 BSD/386 ALPHA		1991-12-xx [BSDI]
 				first code released to people outside BSDI
 386BSD 0.0              1992-02-xx [DOC]
 BSD/386 0.3.1	        1992-04-xx [BSDI] first ext. beta; B customers
 BSD/386 0.3.3		1992-06-xx [BSDI] first CDROM version
 386BSD 0.1              1992-07-28 [DOC]
 4.4BSD Alpha            1992-07-07
 BSD/386 0.9.3		1992-10-xx [BSDI]
 				first external gamma; G customers
 BSD/386 0.9.4		1992-12-xx [BSDI]
 				would have been 1.0 except for request
 				for preliminary injunction
 BSD/386 1.0 		1993-03-xx [BSDI]
 				injunction denied; first official release
 NetBSD 0.8              1993-04-20 [NBD]
 4.4BSD                  1993-06-01 [USE]
 NetBSD 0.9              1993-08-23 [NBD]
 FreeBSD 1.0		1993-11-01 [FBD]
 FreeBSD 1.0.2		1993-11-14 [FBD]
 				supersedes 1.0 13 days after release.
 BSD/386 1.1		1994-02-xx [BSDI]
 4.4BSD Lite             1994-03-01 [USE]
 FreeBSD 1.1		1994-05-07 [FBD]
 FreeBSD 1.1.5		1994-06-30 [FBD]
 FreeBSD 1.1.5.1		1994-07-05 [FBD]
 				supersedes 1.1.5 5 days after release.
 NetBSD 1.0              1994-10-26 [NBD]
 386BSD 1.0              1994-11-12 [USE]
 FreeBSD 2.0		1994-11-23 [FBD]
 BSD/OS 2.0		1995-01-xx [BSDI] 4.4 lite based
 FreeBSD 2.0.5		1995-06-10 [FBD]
 BSD/OS 2.0.1		1995-06-xx [BSDI]
 4.4BSD Lite Release 2	1995-06-xx [44B]
 				the true final distribution from the CSRG
 FreeBSD 2.1.0		1995-11-19 [FBD]
 NetBSD 1.1              1995-11-26 [NBD]
 BSD/OS 2.1		1996-01-xx [BSDI]
 FreeBSD 2.1.5		1996-07-14 [FBD]
 NetBSD 1.2              1996-10-04 [NBD]
 OpenBSD 2.0             1996-10-18 [OBD]
 FreeBSD 2.1.6		1996-11-16 [FBD]
 FreeBSD 2.1.6.1		1996-11-25 [FBD] (sendmail security release)
 Rhapsody		1997-xx-xx
 FreeBSD 2.1.7		1997-02-20 [FBD]
 BSD/OS 3.0		1997-02-xx [BSDI] 4.4 lite2 based
 FreeBSD 2.2.0		1997-03-16 [FBD]
 FreeBSD 2.2.1		1997-03-25 [FBD]
 FreeBSD 2.2.2		1997-05-16 [FBD]
 NetBSD 1.2.1            1997-05-20 [NBD] (patch release)
 OpenBSD 2.1             1997-06-01 [OBD]
 FreeBSD 2.2.5		1997-10-22 [FBD]
 OpenBSD 2.2             1997-12-01 [OBD]
 NetBSD 1.3              1998-01-04 [NBD]
 FreeBSD 2.2.6		1998-03-25 [FBD]
 NetBSD 1.3.1            1998-03-09 [NBD] (patch release)
 BSD/OS 3.1		1998-03-xx [BSDI]
 OpenBSD 2.3             1998-05-19 [OBD]
 NetBSD 1.3.2            1998-05-29 [NBD] (patch release)
 FreeBSD 2.2.7		1998-07-22 [FBD]
 BSD/OS 4.0		1998-08-xx [BSDI]
 				2-lock MP support, ELF executables
 FreeBSD 3.0		1998-10-16 [FBD]
 				FreeBSD-3.0 is a snapshot from -current,
 				while 3.1 and 3.2 are from 3.x-stable which
 				was branched quite some time after 3.0-release
 FreeBSD 2.2.8           1998-11-29 [FBD]
 OpenBSD 2.4             1998-12-01 [OBD]
 NetBSD 1.3.3            1998-12-23 [NBD] (patch release)
 FreeBSD 3.1		1999-02-15 [FBD]
 BSD/OS 4.0.1		1999-03-xx [BSDI]
 NetBSD 1.4              1999-05-12 [NBD]
 FreeBSD 3.2             1999-05-17 [FBD]
 OpenBSD 2.5             1999-05-19 [OBD]
 NetBSD 1.4.1            1999-08-26 [NBD] (patch release)
 FreeBSD 3.3             1999-09-17 [FBD]
 OpenBSD 2.6             1999-12-01 [OBD]
 FreeBSD 3.4             1999-12-20 [FBD]
 BSD/OS 4.1		1999-12-xx [BSDI]
 FreeBSD 4.0             2000-03-13 [FBD]
 NetBSD 1.4.2            2000-03-19 [NBD] (patch release)
 OpenBSD 2.7             2000-06-15 [OBD]
 FreeBSD 3.5		2000-06-24 [FBD]
 FreeBSD 4.1		2000-07-27 [FBD]
 FreeBSD 3.5.1		2000-07-28 [FBD]
 FreeBSD 4.1.1		2000-09-25 [FBD] (a network-only patch release)
 FreeBSD 4.2  		2000-11-21 [FBD]
 NetBSD 1.4.3		2000-11-25 [NBD] (patch release)
 BSD/OS 4.2		2000-11-29 [BSDI]
 OpenBSD 2.8		2000-12-01 [OBD]
 NetBSD 1.5              2000-12-06 [NBD]
 Mac OS X 10.0           2001-03-24 [APL]
 FreeBSD 4.3  		2001-04-20 [FBD]
 OpenBSD 2.9  		2001-06-01 [OBD]
 NetBSD 1.5.1		2001-07-11 [NBD] (patch release)
 NetBSD 1.5.2		2001-09-13 [NBD] (patch release)
 FreeBSD 4.4  		2001-09-18 [FBD]
 Mac OS X 10.1		2001-09-29 [APL]
 OpenBSD 3.0		2001-12-01 [OBD]
 FreeBSD 4.5  		2002-01-29 [FBD]
 BSD/OS 4.3		2002-03-14 [WRS]
 OpenBSD 3.1		2002-05-19 [OBD]
 FreeBSD 4.6  		2002-06-15 [FBD]
 NetBSD 1.5.3		2002-07-22 [NBD] (patch release)
 FreeBSD 4.6.2		2002-08-15 [FBD] (patch release)
 Mac OS X 10.2		2002-08-23 [APL]
 NetBSD 1.6		2002-09-14 [NBD]
 FreeBSD 4.7		2002-10-08 [FBD]
 OpenBSD 3.2		2002-11-01 [OBD]
 FreeBSD 5.0		2003-01-17 [FBD]
 				FreeBSD 5.0 is a separate branch off of
 				-current, similar to 3.0.
 FreeBSD 4.8		2003-04-03 [FBD]
 NetBSD 1.6.1		2003-04-21 [NBD] (patch release)
 OpenBSD 3.3		2003-05-01 [OBD]
 BSD/OS 5.0		2003-05-?? [WRS]
 FreeBSD 5.1		2003-06-09 [FBD]
 Mac OS X 10.3		2003-10-24 [APL]
 FreeBSD 4.9		2003-10-28 [FBD]
 BSD/OS 5.1 ISE		2003-10-?? [WRS] (final version)
 OpenBSD 3.4		2003-11-01 [OBD]
 FreeBSD 5.2		2004-01-12 [FBD]
 FreeBSD 5.2.1		2004-02-22 [FBD] (patch release)
 NetBSD 1.6.2		2004-03-01 [NBD] (patch release)
 OpenBSD 3.5		2004-04-01 [OBD]
 FreeBSD 4.10		2004-05-27 [FBD]
 DragonFly 1.0		2004-07-12 [DFB]
 OpenBSD 3.6		2004-10-29 [OBD]
 FreeBSD 5.3		2004-11-06 [FBD]
 NetBSD 2.0		2004-12-09 [NBD]
 FreeBSD 4.11		2005-01-25 [FBD]
 DragonFly 1.2.0		2005-04-08 [DFB]
 NetBSD 2.0.2		2005-04-14 [NBD] (security/critical release)
 Mac OS X 10.4		2005-04-29 [APL]
 FreeBSD 5.4		2005-05-09 [FBD]
 OpenBSD 3.7		2005-05-19 [OBD]
 NetBSD 2.0.3		2005-10-31 [NBD] (security/critical release)
 OpenBSD 3.8		2005-11-01 [OBD]
 FreeBSD 6.0		2005-11-01 [FBD]
 NetBSD 2.1		2005-11-02 [NBD]
 NetBSD 3.0		2005-12-23 [NBD]
 DragonFly 1.4.0		2006-01-08 [DFB]
 OpenBSD 3.9		2006-05-01 [OBD]
 FreeBSD 6.1		2006-05-08 [FBD]
 FreeBSD 5.5		2006-05-25 [FBD]
 NetBSD 3.0.1		2006-07-24 [NBD] (security/critical release)
 DragonFly 1.6.0		2006-07-24 [DFB]
 OpenBSD 4.0		2006-11-01 [OBD]
 NetBSD 3.0.2		2006-11-04 [NBD] (security/critical release)
 NetBSD 3.1		2006-11-04 [NBD]
 FreeBSD 6.2		2007-01-15 [FBD]
 DragonFly 1.8.0		2007-01-30 [DFB]
 OpenBSD 4.1		2007-05-01 [OBD]
 DragonFly 1.10.0	2007-08-06 [DFB]
 Mac OS X 10.5		2007-10-26 [APL]
 OpenBSD 4.2		2007-11-01 [OBD]
 NetBSD 4.0		2007-12-19 [NBD]
 FreeBSD 6.3		2008-01-18 [FBD]
 DragonFly 1.12.0	2008-02-26 [DFB]
 FreeBSD 7.0		2008-02-27 [FBD]
 OpenBSD 4.3		2008-05-01 [OBD]
 DragonFly 2.0.0		2008-07-21 [DFB]
 OpenBSD 4.4		2008-11-01 [OBD]
 FreeBSD 6.4		2008-11-28 [FBD]
 FreeBSD 7.1		2009-01-04 [FBD]
 DragonFly 2.2.0		2009-02-17 [DFB]
 NetBSD 5.0		2009-04-29 [NBD]
 OpenBSD 4.5		2009-05-01 [OBD]
 FreeBSD 7.2		2009-05-04 [FBD]
 DragonFly 2.4.0		2009-09-16 [DFB]
 OpenBSD 4.6		2009-10-18 [OBD]
 FreeBSD 8.0		2009-11-26 [FBD]
 FreeBSD 7.3		2010-03-23 [FBD]
 DragonFly 2.6.0		2010-03-28 [DFB]
 OpenBSD 4.7		2010-05-19 [OBD]
 FreeBSD 8.1		2010-07-24 [FBD]
 DragonFly 2.8.0		2010-10-30 [DFB]
 OpenBSD 4.8		2010-11-01 [OBD]
+NetBSD 5.1		2010-11-19 [NBD]
 
 Bibliography
 ------------------------
 
 Leffler, Samuel J., Marshall Kirk McKusick, Michael J Karels and John
 Quarterman. The Design and Implementation of the 4.3BSD UNIX Operating
 System. Reading, Mass. Addison-Wesley, 1989. ISBN 0-201-06196-1
 
 Salus, Peter H. A quarter century of UNIX. Addison-Wesley Publishing
 Company, Inc., 1994. ISBN 0-201-54777-5
 
 McKusick, Marshall Kirk, Keith Bostic, Michael J Karels, and John
 Quarterman. The Design and Implementation of the 4.4BSD Operating
 System. Reading, Mass. Addison-Wesley, 1996. ISBN 0-201-54979-4
 
 McKusick, Marshall Kirk, George Neville-Neil. The Design and
 Implementation of the FreeBSD Operating System.
 Addison-Wesley Professional, Published: Aug 2, 2004. ISBN 0-201-70245-2
 
 Doug McIlroy. Research Unix Reader.
 
 Michael G. Brown. The Role of BSD in the Development of Unix.
 Presented to the Tasmanian Unix Special Interest Group of the
 Australian Computer Society, Hobart, August 1993.
 
 Peter H. Salus. Unix at 25. Byte Magazine, October 1994.
 URL: http://www.byte.com/art/9410/sec8/art3.htm
 
 Andreas Klemm, Lars K�ller. If you're going to San Francisco ...
 Die freien BSD-Varianten von Unix. c't April 1997, page 368ff.
 
 BSD Release Announcements collection.
 URL: http://www.FreeBSD.org/releases/
 
 BSD Hypertext Man Pages
 URL: http://www.FreeBSD.org/cgi/man.cgi
 
 UNIX history graphing project
 URL: http://minnie.tuhs.org/Unix_History/index.html
 
 UNIX history
 URL: http://www.levenez.com/unix/
 
 James Howard: The BSD Family Tree
 URL: http://ezine.daemonnews.org/200104/bsd_family.html
 ("what are the differences between FreeBSD, NetBSD, and OpenBSD?")
 
 
 Acknowledgments
 ---------------
 
 Josh Gilliam for suggestions, bug fixes, and finding very old
 original BSD announcements from Usenet or tapes.
 
 Steven M. Schultz for providing 2.8BSD, 2.10BSD, 2.11BSD manual pages.
 
 --
 Copyright (c) 1997-2007 Wolfram Schneider <wosch@FreeBSD.ORG>
 URL: http://cvsweb.freebsd.org/src/share/misc/bsd-family-tree
 
 $FreeBSD$
Index: projects/binutils-2.17/share/mk/bsd.arch.inc.mk
===================================================================
--- projects/binutils-2.17/share/mk/bsd.arch.inc.mk	(revision 215829)
+++ projects/binutils-2.17/share/mk/bsd.arch.inc.mk	(revision 215830)

Property changes on: projects/binutils-2.17/share/mk/bsd.arch.inc.mk
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/share/mk/bsd.arch.inc.mk:r215709-215824
Index: projects/binutils-2.17/share/zoneinfo
===================================================================
--- projects/binutils-2.17/share/zoneinfo	(revision 215829)
+++ projects/binutils-2.17/share/zoneinfo	(revision 215830)

Property changes on: projects/binutils-2.17/share/zoneinfo
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/share/zoneinfo:r215709-215824
Index: projects/binutils-2.17/sys/amd64/acpica/acpi_switch.S
===================================================================
--- projects/binutils-2.17/sys/amd64/acpica/acpi_switch.S	(revision 215829)
+++ projects/binutils-2.17/sys/amd64/acpica/acpi_switch.S	(revision 215830)
@@ -1,169 +1,163 @@
 /*-
  * Copyright (c) 2001 Takanori Watanabe <takawata@jp.freebsd.org>
  * Copyright (c) 2001 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
  * Copyright (c) 2008-2010 Jung-uk Kim <jkim@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <machine/asmacros.h>
 #include <machine/specialreg.h>
 
 #include "acpi_wakedata.h"
 #include "assym.s"
 
 #define	WAKEUP_CTX(member)	wakeup_ ## member - wakeup_ctx(%rsi)
 
 ENTRY(acpi_restorecpu)
 	/* Switch to KPML4phys. */
 	movq	%rdi, %cr3
 
 	/* Restore GDT. */
 	lgdt	WAKEUP_CTX(gdt)
 	jmp	1f
 1:
 
 	/* Fetch PCB. */
 	movq	WAKEUP_CTX(pcb), %rdi
 
 	/* Force kernel segment registers. */
 	movl	$KDSEL, %eax
 	movw	%ax, %ds
 	movw	%ax, %es
 	movw	%ax, %ss
 	movl	$KUF32SEL, %eax
 	movw	%ax, %fs
 	movl	$KUG32SEL, %eax
 	movw	%ax, %gs
 
 	movl	$MSR_FSBASE, %ecx
 	movl	PCB_FSBASE(%rdi), %eax
 	movl	4 + PCB_FSBASE(%rdi), %edx
 	wrmsr
 	movl	$MSR_GSBASE, %ecx
 	movl	PCB_GSBASE(%rdi), %eax
 	movl	4 + PCB_GSBASE(%rdi), %edx
 	wrmsr
 	movl	$MSR_KGSBASE, %ecx
 	movl	PCB_KGSBASE(%rdi), %eax
 	movl	4 + PCB_KGSBASE(%rdi), %edx
 	wrmsr
 
 	/* Restore EFER. */
 	movl	$MSR_EFER, %ecx
 	movl	WAKEUP_CTX(efer), %eax
 	wrmsr
 
-	/* Restore PAT. */
-	movl	$MSR_PAT, %ecx
-	movl	WAKEUP_CTX(pat), %eax
-	movl	4 + WAKEUP_CTX(pat), %edx
-	wrmsr
-
 	/* Restore fast syscall stuff. */
 	movl	$MSR_STAR, %ecx
 	movl	WAKEUP_CTX(star), %eax
 	movl	4 + WAKEUP_CTX(star), %edx
 	wrmsr
 	movl	$MSR_LSTAR, %ecx
 	movl	WAKEUP_CTX(lstar), %eax
 	movl	4 + WAKEUP_CTX(lstar), %edx
 	wrmsr
 	movl	$MSR_CSTAR, %ecx
 	movl	WAKEUP_CTX(cstar), %eax
 	movl	4 + WAKEUP_CTX(cstar), %edx
 	wrmsr
 	movl	$MSR_SF_MASK, %ecx
 	movl	WAKEUP_CTX(sfmask), %eax
 	wrmsr
 
 	/* Restore CR0 except for FPU mode. */
 	movq	PCB_CR0(%rdi), %rax
 	movq	%rax, %rcx
 	andq	$~(CR0_EM | CR0_TS), %rax
 	movq	%rax, %cr0
 
 	/* Restore CR2 and CR4. */
 	movq	PCB_CR2(%rdi), %rax
 	movq	%rax, %cr2
 	movq	PCB_CR4(%rdi), %rax
 	movq	%rax, %cr4
 
 	/* Restore descriptor tables. */
 	lidt	PCB_IDT(%rdi)
 	lldt	PCB_LDT(%rdi)
 
 #define	SDT_SYSTSS	9
 #define	SDT_SYSBSY	11
 
 	/* Clear "task busy" bit and reload TR. */
 	movq	PCPU(TSS), %rax
 	andb	$(~SDT_SYSBSY | SDT_SYSTSS), 5(%rax)
 	movw	PCB_TR(%rdi), %ax
 	ltr	%ax
 
 #undef	SDT_SYSTSS
 #undef	SDT_SYSBSY
 
 	/* Restore other callee saved registers. */
 	movq	PCB_R15(%rdi), %r15
 	movq	PCB_R14(%rdi), %r14
 	movq	PCB_R13(%rdi), %r13
 	movq	PCB_R12(%rdi), %r12
 	movq	PCB_RBP(%rdi), %rbp
 	movq	PCB_RSP(%rdi), %rsp
 	movq	PCB_RBX(%rdi), %rbx
 
 	/* Restore debug registers. */
 	movq	PCB_DR0(%rdi), %rax
 	movq	%rax, %dr0
 	movq	PCB_DR1(%rdi), %rax
 	movq	%rax, %dr1
 	movq	PCB_DR2(%rdi), %rax
 	movq	%rax, %dr2
 	movq	PCB_DR3(%rdi), %rax
 	movq	%rax, %dr3
 	movq	PCB_DR6(%rdi), %rax
 	movq	%rax, %dr6
 	movq	PCB_DR7(%rdi), %rax
 	movq	%rax, %dr7
 
 	/* Restore FPU state. */
 	fninit
 	fxrstor	PCB_USERFPU(%rdi)
 
 	/* Reload CR0. */
 	movq	%rcx, %cr0
 
 	/* Restore return address. */
 	movq	PCB_RIP(%rdi), %rax
 	movq	%rax, (%rsp)
 
 	/* Indicate the CPU is resumed. */
 	xorl	%eax, %eax
 	movl	%eax, WAKEUP_CTX(cpu)
 
 	ret
 END(acpi_restorecpu)
Index: projects/binutils-2.17/sys/amd64/acpica/acpi_wakecode.S
===================================================================
--- projects/binutils-2.17/sys/amd64/acpica/acpi_wakecode.S	(revision 215829)
+++ projects/binutils-2.17/sys/amd64/acpica/acpi_wakecode.S	(revision 215830)
@@ -1,289 +1,287 @@
 /*-
  * Copyright (c) 2001 Takanori Watanabe <takawata@jp.freebsd.org>
  * Copyright (c) 2001 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
  * Copyright (c) 2003 Peter Wemm
  * Copyright (c) 2008-2010 Jung-uk Kim <jkim@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <machine/asmacros.h>
 #include <machine/specialreg.h>
 
 #include "assym.s"
 
 /*
  * Resume entry point for real mode.
  *
  * If XFirmwareWakingVector is zero and FirmwareWakingVector is non-zero
  * in FACS, the BIOS enters here in real mode after POST with CS set to
  * (FirmwareWakingVector >> 4) and IP set to (FirmwareWakingVector & 0xf).
  * Depending on the previous sleep state, we may need to initialize more
  * of the system (i.e., S3 suspend-to-RAM vs. S4 suspend-to-disk).
  *
  * Note: If XFirmwareWakingVector is non-zero, it should disable address
  * translation/paging and interrupts, load all segment registers with
  * a flat 4 GB address space, and set EFLAGS.IF to zero.  Currently
  * this mode is not supported by this code.
  */
 
 	.data				/* So we can modify it */
 
 	ALIGN_TEXT
 	.code16
 wakeup_start:
 	/*
 	 * Set up segment registers for real mode, a small stack for
 	 * any calls we make, and clear any flags.
 	 */
 	cli				/* make sure no interrupts */
 	mov	%cs, %ax		/* copy %cs to %ds.  Remember these */
 	mov	%ax, %ds		/* are offsets rather than selectors */
 	mov	%ax, %ss
 	movw	$PAGE_SIZE, %sp
 	xorw	%ax, %ax
 	pushw	%ax
 	popfw
 
 	/* To debug resume hangs, beep the speaker if the user requested. */
 	testb	$~0, resume_beep - wakeup_start
 	jz	1f
 	movb	$0, resume_beep - wakeup_start
 	movb	$0xc0, %al
 	outb	%al, $0x42
 	movb	$0x04, %al
 	outb	%al, $0x42
 	inb	$0x61, %al
 	orb	$0x3, %al
 	outb	%al, $0x61
 1:
 
 	/* Re-initialize video BIOS if the reset_video tunable is set. */
 	testb	$~0, reset_video - wakeup_start
 	jz	1f
 	movb	$0, reset_video - wakeup_start
 	lcall	$0xc000, $3
 
 	/* When we reach here, int 0x10 should be ready.  Hide cursor. */
 	movb	$0x01, %ah
 	movb	$0x20, %ch
 	int	$0x10
 
 	/* Re-start in case the previous BIOS call clobbers them. */
 	jmp	wakeup_start
 1:
 
 	/*
 	 * Find relocation base and patch the gdt descript and ljmp targets
 	 */
 	xorl	%ebx, %ebx
 	mov	%cs, %bx
 	sall	$4, %ebx		/* %ebx is now our relocation base */
 
 	/*
 	 * Load the descriptor table pointer.  We'll need it when running
 	 * in 16-bit protected mode.
 	 */
 	lgdtl	bootgdtdesc - wakeup_start
 
 	/* Enable protected mode */
 	movl	$CR0_PE, %eax
 	mov	%eax, %cr0
 
 	/*
 	 * Now execute a far jump to turn on protected mode.  This
 	 * causes the segment registers to turn into selectors and causes
 	 * %cs to be loaded from the gdt.
 	 *
 	 * The following instruction is:
 	 * ljmpl $bootcode32 - bootgdt, $wakeup_32 - wakeup_start
 	 * but gas cannot assemble that.  And besides, we patch the targets
 	 * in early startup and its a little clearer what we are patching.
 	 */
 wakeup_sw32:
 	.byte	0x66			/* size override to 32 bits */
 	.byte	0xea			/* opcode for far jump */
 	.long	wakeup_32 - wakeup_start /* offset in segment */
 	.word	bootcode32 - bootgdt	/* index in gdt for 32 bit code */
 
 	/*
 	 * At this point, we are running in 32 bit legacy protected mode.
 	 */
 	ALIGN_TEXT
 	.code32
 wakeup_32:
 
 	mov	$bootdata32 - bootgdt, %eax
 	mov	%ax, %ds
 
 	/* Turn on the PAE and PSE bits for when paging is enabled */
 	mov	%cr4, %eax
 	orl	$(CR4_PAE | CR4_PSE), %eax
 	mov	%eax, %cr4
 
 	/*
 	 * Enable EFER.LME so that we get long mode when all the prereqs are
 	 * in place.  In this case, it turns on when CR0_PG is finally enabled.
 	 * Pick up a few other EFER bits that we'll use need we're here.
 	 */
 	movl	$MSR_EFER, %ecx
 	rdmsr
 	orl	$EFER_LME | EFER_SCE, %eax
 	wrmsr
 
 	/*
 	 * Point to the embedded page tables for startup.  Note that this
 	 * only gets accessed after we're actually in 64 bit mode, however
 	 * we can only set the bottom 32 bits of %cr3 in this state.  This
 	 * means we are required to use a temporary page table that is below
 	 * the 4GB limit.  %ebx is still our relocation base.  We could just
 	 * subtract 3 * PAGE_SIZE, but that would be too easy.
 	 */
 	leal	wakeup_pagetables - wakeup_start(%ebx), %eax
 	movl	(%eax), %eax
 	mov	%eax, %cr3
 
 	/*
 	 * Finally, switch to long bit mode by enabling paging.  We have
 	 * to be very careful here because all the segmentation disappears
 	 * out from underneath us.  The spec says we can depend on the
 	 * subsequent pipelined branch to execute, but *only if* everthing
 	 * is still identity mapped.  If any mappings change, the pipeline
 	 * will flush.
 	 */
 	mov	%cr0, %eax
 	orl	$CR0_PG, %eax
 	mov	%eax, %cr0
 
 	/*
 	 * At this point paging is enabled, and we are in "compatability" mode.
 	 * We do another far jump to reload %cs with the 64 bit selector.
 	 * %cr3 points to a 4-level page table page.
 	 * We cannot yet jump all the way to the kernel because we can only
 	 * specify a 32 bit linear address.  So, yet another trampoline.
 	 *
 	 * The following instruction is:
 	 * ljmp $bootcode64 - bootgdt, $wakeup_64 - wakeup_start
 	 * but gas cannot assemble that.  And besides, we patch the targets
 	 * in early startup and its a little clearer what we are patching.
 	 */
 wakeup_sw64:
 	.byte	0xea			/* opcode for far jump */
 	.long	wakeup_64 - wakeup_start /* offset in segment */
 	.word	bootcode64 - bootgdt	/* index in gdt for 64 bit code */
 
 	/*
 	 * Yeehar!  We're running in 64-bit mode!  We can mostly ignore our
 	 * segment registers, and get on with it.
 	 * Note that we are running at the correct virtual address, but with
 	 * a 1:1 1GB mirrored mapping over entire address space.  We had better
 	 * switch to a real %cr3 promptly so that we can get to the direct map
 	 * space. Remember that jmp is relative and that we've been relocated,
 	 * so use an indirect jump.
 	 */
 	ALIGN_TEXT
 	.code64
 wakeup_64:
 	mov	$bootdata64 - bootgdt, %eax
 	mov	%ax, %ds
 
 	/* Restore arguments and return. */
 	movq	wakeup_kpml4 - wakeup_start(%rbx), %rdi
 	movq	wakeup_ctx - wakeup_start(%rbx), %rsi
 	movq	wakeup_retaddr - wakeup_start(%rbx), %rax
 	jmp	*%rax
 
 	.data
 
 resume_beep:
 	.byte	0
 reset_video:
 	.byte	0
 
 	ALIGN_DATA
 bootgdt:
 	.long	0x00000000
 	.long	0x00000000
 	.long	0x00000000
 	.long	0x00000000
 	.long	0x00000000
 	.long	0x00000000
 	.long	0x00000000
 	.long	0x00000000
 
 bootcode64:
 	.long	0x0000ffff
 	.long	0x00af9b00
 
 bootdata64:
 	.long	0x0000ffff
 	.long	0x00af9300
 
 bootcode32:
 	.long	0x0000ffff
 	.long	0x00cf9b00
 
 bootdata32:
 	.long	0x0000ffff
 	.long	0x00cf9300
 bootgdtend:
 
 wakeup_pagetables:
 	.long	0
 
 bootgdtdesc:
 	.word	bootgdtend - bootgdt	/* Length */
 	.long	bootgdt - wakeup_start	/* Offset plus %ds << 4 */
 
 	ALIGN_DATA
 wakeup_retaddr:
 	.quad	0
 wakeup_kpml4:
 	.quad	0
 
 wakeup_ctx:
 	.quad	0
 wakeup_pcb:
 	.quad	0
 wakeup_gdt:
 	.word	0
 	.quad	0
 
 	ALIGN_DATA
 wakeup_efer:
 	.quad	0
-wakeup_pat:
-	.quad	0
 wakeup_star:
 	.quad	0
 wakeup_lstar:
 	.quad	0
 wakeup_cstar:
 	.quad	0
 wakeup_sfmask:
 	.quad	0
 wakeup_cpu:
 	.long	0
 dummy:
Index: projects/binutils-2.17/sys/amd64/acpica/acpi_wakeup.c
===================================================================
--- projects/binutils-2.17/sys/amd64/acpica/acpi_wakeup.c	(revision 215829)
+++ projects/binutils-2.17/sys/amd64/acpica/acpi_wakeup.c	(revision 215830)
@@ -1,410 +1,410 @@
 /*-
  * Copyright (c) 2001 Takanori Watanabe <takawata@jp.freebsd.org>
  * Copyright (c) 2001 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
  * Copyright (c) 2003 Peter Wemm
  * Copyright (c) 2008-2010 Jung-uk Kim <jkim@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/intr_machdep.h>
 #include <x86/mca.h>
 #include <machine/pcb.h>
 #include <machine/pmap.h>
 #include <machine/specialreg.h>
 
 #ifdef SMP
 #include <x86/apicreg.h>
 #include <machine/smp.h>
 #include <machine/vmparam.h>
 #endif
 
 #include <contrib/dev/acpica/include/acpi.h>
 
 #include <dev/acpica/acpivar.h>
 
 #include "acpi_wakecode.h"
 #include "acpi_wakedata.h"
 
 /* Make sure the code is less than a page and leave room for the stack. */
 CTASSERT(sizeof(wakecode) < PAGE_SIZE - 1024);
 
 extern int		acpi_resume_beep;
 extern int		acpi_reset_video;
 
 #ifdef SMP
 extern struct pcb	**susppcbs;
 #else
 static struct pcb	**susppcbs;
 #endif
 
 int			acpi_restorecpu(vm_offset_t, struct pcb *);
 
 static void		*acpi_alloc_wakeup_handler(void);
 static void		acpi_stop_beep(void *);
 
 #ifdef SMP
 static int		acpi_wakeup_ap(struct acpi_softc *, int);
 static void		acpi_wakeup_cpus(struct acpi_softc *, cpumask_t);
 #endif
 
 #define	WAKECODE_VADDR(sc)	((sc)->acpi_wakeaddr + (3 * PAGE_SIZE))
 #define	WAKECODE_PADDR(sc)	((sc)->acpi_wakephys + (3 * PAGE_SIZE))
 #define	WAKECODE_FIXUP(offset, type, val) do	{	\
 	type	*addr;					\
 	addr = (type *)(WAKECODE_VADDR(sc) + offset);	\
 	*addr = val;					\
 } while (0)
 
 /* Turn off bits 1&2 of the PIT, stopping the beep. */
 static void
 acpi_stop_beep(void *arg)
 {
 	outb(0x61, inb(0x61) & ~0x3);
 }
 
 #ifdef SMP
 static int
 acpi_wakeup_ap(struct acpi_softc *sc, int cpu)
 {
 	int		vector = (WAKECODE_PADDR(sc) >> 12) & 0xff;
 	int		apic_id = cpu_apic_ids[cpu];
 	int		ms;
 
 	WAKECODE_FIXUP(wakeup_pcb, struct pcb *, susppcbs[cpu]);
 	WAKECODE_FIXUP(wakeup_gdt, uint16_t, susppcbs[cpu]->pcb_gdt.rd_limit);
 	WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t,
 	    susppcbs[cpu]->pcb_gdt.rd_base);
 	WAKECODE_FIXUP(wakeup_cpu, int, cpu);
 
 	/* do an INIT IPI: assert RESET */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
 	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
 
 	/* wait for pending status end */
 	lapic_ipi_wait(-1);
 
 	/* do an INIT IPI: deassert RESET */
 	lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
 	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
 
 	/* wait for pending status end */
 	DELAY(10000);		/* wait ~10mS */
 	lapic_ipi_wait(-1);
 
 	/*
 	 * next we do a STARTUP IPI: the previous INIT IPI might still be
 	 * latched, (P5 bug) this 1st STARTUP would then terminate
 	 * immediately, and the previously started INIT IPI would continue. OR
 	 * the previous INIT IPI has already run. and this STARTUP IPI will
 	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
 	 * will run.
 	 */
 
 	/* do a STARTUP IPI */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
 	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
 	    vector, apic_id);
 	lapic_ipi_wait(-1);
 	DELAY(200);		/* wait ~200uS */
 
 	/*
 	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
 	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
 	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
 	 * recognized after hardware RESET or INIT IPI.
 	 */
 
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
 	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
 	    vector, apic_id);
 	lapic_ipi_wait(-1);
 	DELAY(200);		/* wait ~200uS */
 
 	/* Wait up to 5 seconds for it to start. */
 	for (ms = 0; ms < 5000; ms++) {
 		if (*(int *)(WAKECODE_VADDR(sc) + wakeup_cpu) == 0)
 			return (1);	/* return SUCCESS */
 		DELAY(1000);
 	}
 	return (0);		/* return FAILURE */
 }
 
 #define	WARMBOOT_TARGET		0
 #define	WARMBOOT_OFF		(KERNBASE + 0x0467)
 #define	WARMBOOT_SEG		(KERNBASE + 0x0469)
 
 #define	CMOS_REG		(0x70)
 #define	CMOS_DATA		(0x71)
 #define	BIOS_RESET		(0x0f)
 #define	BIOS_WARM		(0x0a)
 
 static void
 acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus)
 {
 	uint32_t	mpbioswarmvec;
 	int		cpu;
 	u_char		mpbiosreason;
 
 	/* save the current value of the warm-start vector */
 	mpbioswarmvec = *((uint32_t *)WARMBOOT_OFF);
 	outb(CMOS_REG, BIOS_RESET);
 	mpbiosreason = inb(CMOS_DATA);
 
 	/* setup a vector to our boot code */
 	*((volatile u_short *)WARMBOOT_OFF) = WARMBOOT_TARGET;
 	*((volatile u_short *)WARMBOOT_SEG) = WAKECODE_PADDR(sc) >> 4;
 	outb(CMOS_REG, BIOS_RESET);
 	outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
 
 	/* Wake up each AP. */
 	for (cpu = 1; cpu < mp_ncpus; cpu++) {
 		if ((wakeup_cpus & (1 << cpu)) == 0)
 			continue;
 		if (acpi_wakeup_ap(sc, cpu) == 0) {
 			/* restore the warmstart vector */
 			*(uint32_t *)WARMBOOT_OFF = mpbioswarmvec;
 			panic("acpi_wakeup: failed to resume AP #%d (PHY #%d)",
 			    cpu, cpu_apic_ids[cpu]);
 		}
 	}
 
 	/* restore the warmstart vector */
 	*(uint32_t *)WARMBOOT_OFF = mpbioswarmvec;
 
 	outb(CMOS_REG, BIOS_RESET);
 	outb(CMOS_DATA, mpbiosreason);
 }
 #endif
 
 int
 acpi_sleep_machdep(struct acpi_softc *sc, int state)
 {
 #ifdef SMP
 	cpumask_t	wakeup_cpus;
 #endif
 	register_t	cr3, rf;
 	ACPI_STATUS	status;
 	int		ret;
 
 	ret = -1;
 
 	if (sc->acpi_wakeaddr == 0ul)
 		return (ret);
 
 #ifdef SMP
 	wakeup_cpus = PCPU_GET(other_cpus);
 #endif
 
 	AcpiSetFirmwareWakingVector(WAKECODE_PADDR(sc));
 
 	rf = intr_disable();
 	intr_suspend();
 
 	/*
 	 * Temporarily switch to the kernel pmap because it provides
 	 * an identity mapping (setup at boot) for the low physical
 	 * memory region containing the wakeup code.
 	 */
 	cr3 = rcr3();
 	load_cr3(KPML4phys);
 
 	if (savectx(susppcbs[0])) {
 #ifdef SMP
 		if (wakeup_cpus != 0 && suspend_cpus(wakeup_cpus) == 0) {
 			device_printf(sc->acpi_dev,
 			    "Failed to suspend APs: CPU mask = 0x%jx\n",
 			    (uintmax_t)(wakeup_cpus & ~stopped_cpus));
 			goto out;
 		}
 #endif
 
 		WAKECODE_FIXUP(resume_beep, uint8_t, (acpi_resume_beep != 0));
 		WAKECODE_FIXUP(reset_video, uint8_t, (acpi_reset_video != 0));
 
 		WAKECODE_FIXUP(wakeup_pcb, struct pcb *, susppcbs[0]);
 		WAKECODE_FIXUP(wakeup_gdt, uint16_t,
 		    susppcbs[0]->pcb_gdt.rd_limit);
 		WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t,
 		    susppcbs[0]->pcb_gdt.rd_base);
 		WAKECODE_FIXUP(wakeup_cpu, int, 0);
 
 		/* Call ACPICA to enter the desired sleep state */
 		if (state == ACPI_STATE_S4 && sc->acpi_s4bios)
 			status = AcpiEnterSleepStateS4bios();
 		else
 			status = AcpiEnterSleepState(state);
 
 		if (status != AE_OK) {
 			device_printf(sc->acpi_dev,
 			    "AcpiEnterSleepState failed - %s\n",
 			    AcpiFormatException(status));
 			goto out;
 		}
 
 		for (;;)
 			ia32_pause();
 	} else {
+		pmap_init_pat();
 		PCPU_SET(switchtime, 0);
 		PCPU_SET(switchticks, ticks);
 #ifdef SMP
 		if (wakeup_cpus != 0)
 			acpi_wakeup_cpus(sc, wakeup_cpus);
 #endif
 		acpi_resync_clock(sc);
 		ret = 0;
 	}
 
 out:
 #ifdef SMP
 	if (wakeup_cpus != 0)
 		restart_cpus(wakeup_cpus);
 #endif
 
 	load_cr3(cr3);
 	mca_resume();
 	intr_resume();
 	intr_restore(rf);
 
 	AcpiSetFirmwareWakingVector(0);
 
 	if (ret == 0 && mem_range_softc.mr_op != NULL &&
 	    mem_range_softc.mr_op->reinit != NULL)
 		mem_range_softc.mr_op->reinit(&mem_range_softc);
 
 	/* If we beeped, turn it off after a delay. */
 	if (acpi_resume_beep)
 		timeout(acpi_stop_beep, NULL, 3 * hz);
 
 	return (ret);
 }
 
 static void *
 acpi_alloc_wakeup_handler(void)
 {
 	void		*wakeaddr;
 	int		i;
 
 	/*
 	 * Specify the region for our wakeup code.  We want it in the low 1 MB
 	 * region, excluding real mode IVT (0-0x3ff), BDA (0x400-0x4ff), EBDA
 	 * (less than 128KB, below 0xa0000, must be excluded by SMAP and DSDT),
 	 * and ROM area (0xa0000 and above).  The temporary page tables must be
 	 * page-aligned.
 	 */
 	wakeaddr = contigmalloc(4 * PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0x500,
 	    0xa0000, PAGE_SIZE, 0ul);
 	if (wakeaddr == NULL) {
 		printf("%s: can't alloc wake memory\n", __func__);
 		return (NULL);
 	}
 	susppcbs = malloc(mp_ncpus * sizeof(*susppcbs), M_DEVBUF, M_WAITOK);
 	for (i = 0; i < mp_ncpus; i++)
 		susppcbs[i] = malloc(sizeof(**susppcbs), M_DEVBUF, M_WAITOK);
 
 	return (wakeaddr);
 }
 
 void
 acpi_install_wakeup_handler(struct acpi_softc *sc)
 {
 	static void	*wakeaddr = NULL;
 	uint64_t	*pt4, *pt3, *pt2;
 	int		i;
 
 	if (wakeaddr != NULL)
 		return;
 
 	wakeaddr = acpi_alloc_wakeup_handler();
 	if (wakeaddr == NULL)
 		return;
 
 	sc->acpi_wakeaddr = (vm_offset_t)wakeaddr;
 	sc->acpi_wakephys = vtophys(wakeaddr);
 
 	bcopy(wakecode, (void *)WAKECODE_VADDR(sc), sizeof(wakecode));
 
 	/* Patch GDT base address, ljmp targets and page table base address. */
 	WAKECODE_FIXUP((bootgdtdesc + 2), uint32_t,
 	    WAKECODE_PADDR(sc) + bootgdt);
 	WAKECODE_FIXUP((wakeup_sw32 + 2), uint32_t,
 	    WAKECODE_PADDR(sc) + wakeup_32);
 	WAKECODE_FIXUP((wakeup_sw64 + 1), uint32_t,
 	    WAKECODE_PADDR(sc) + wakeup_64);
 	WAKECODE_FIXUP(wakeup_pagetables, uint32_t, sc->acpi_wakephys);
 
 	/* Save pointers to some global data. */
 	WAKECODE_FIXUP(wakeup_retaddr, void *, acpi_restorecpu);
 	WAKECODE_FIXUP(wakeup_kpml4, uint64_t, KPML4phys);
 	WAKECODE_FIXUP(wakeup_ctx, vm_offset_t,
 	    WAKECODE_VADDR(sc) + wakeup_ctx);
 	WAKECODE_FIXUP(wakeup_efer, uint64_t, rdmsr(MSR_EFER));
-	WAKECODE_FIXUP(wakeup_pat, uint64_t, rdmsr(MSR_PAT));
 	WAKECODE_FIXUP(wakeup_star, uint64_t, rdmsr(MSR_STAR));
 	WAKECODE_FIXUP(wakeup_lstar, uint64_t, rdmsr(MSR_LSTAR));
 	WAKECODE_FIXUP(wakeup_cstar, uint64_t, rdmsr(MSR_CSTAR));
 	WAKECODE_FIXUP(wakeup_sfmask, uint64_t, rdmsr(MSR_SF_MASK));
 
 	/* Build temporary page tables below realmode code. */
 	pt4 = wakeaddr;
 	pt3 = pt4 + (PAGE_SIZE) / sizeof(uint64_t);
 	pt2 = pt3 + (PAGE_SIZE) / sizeof(uint64_t);
 
 	/* Create the initial 1GB replicated page tables */
 	for (i = 0; i < 512; i++) {
 		/*
 		 * Each slot of the level 4 pages points
 		 * to the same level 3 page
 		 */
 		pt4[i] = (uint64_t)(sc->acpi_wakephys + PAGE_SIZE);
 		pt4[i] |= PG_V | PG_RW | PG_U;
 
 		/*
 		 * Each slot of the level 3 pages points
 		 * to the same level 2 page
 		 */
 		pt3[i] = (uint64_t)(sc->acpi_wakephys + (2 * PAGE_SIZE));
 		pt3[i] |= PG_V | PG_RW | PG_U;
 
 		/* The level 2 page slots are mapped with 2MB pages for 1GB. */
 		pt2[i] = i * (2 * 1024 * 1024);
 		pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
 	}
 
 	if (bootverbose)
 		device_printf(sc->acpi_dev, "wakeup code va %p pa %p\n",
 		    (void *)sc->acpi_wakeaddr, (void *)sc->acpi_wakephys);
 }
Index: projects/binutils-2.17/sys/amd64/amd64/cpu_switch.S
===================================================================
--- projects/binutils-2.17/sys/amd64/amd64/cpu_switch.S	(revision 215829)
+++ projects/binutils-2.17/sys/amd64/amd64/cpu_switch.S	(revision 215830)
@@ -1,363 +1,363 @@
 /*-
  * Copyright (c) 2003 Peter Wemm.
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <machine/asmacros.h>
 #include <machine/specialreg.h>
 
 #include "assym.s"
 #include "opt_sched.h"
 
 /*****************************************************************************/
 /* Scheduling                                                                */
 /*****************************************************************************/
 
 	.text
 
 #ifdef SMP
 #define LK	lock ;
 #else
 #define LK
 #endif
 
 #if defined(SCHED_ULE) && defined(SMP)
 #define	SETLK	xchgq
 #else
 #define	SETLK	movq
 #endif
 
 /*
  * cpu_throw()
  *
  * This is the second half of cpu_switch(). It is used when the current
  * thread is either a dummy or slated to die, and we no longer care
  * about its state.  This is only a slight optimization and is probably
  * not worth it anymore.  Note that we need to clear the pm_active bits so
  * we do need the old proc if it still exists.
  * %rdi = oldtd
  * %rsi = newtd
  */
 ENTRY(cpu_throw)
 	movl	PCPU(CPUID),%eax
 	testq	%rdi,%rdi
 	jz	1f
 	/* release bit from old pm_active */
 	movq	PCPU(CURPMAP),%rdx
 	LK btrl	%eax,PM_ACTIVE(%rdx)		/* clear old */
 1:
 	movq	TD_PCB(%rsi),%r8		/* newtd->td_proc */
 	movq	PCB_CR3(%r8),%rdx
 	movq	%rdx,%cr3			/* new address space */
 	jmp	swact
 END(cpu_throw)
 
 /*
  * cpu_switch(old, new, mtx)
  *
  * Save the current thread state, then select the next thread to run
  * and load its state.
  * %rdi = oldtd
  * %rsi = newtd
  * %rdx = mtx
  */
 ENTRY(cpu_switch)
 	/* Switch to new thread.  First, save context. */
 	movq	TD_PCB(%rdi),%r8
 	movb	$1,PCB_FULL_IRET(%r8)
 
 	movq	(%rsp),%rax			/* Hardware registers */
 	movq	%r15,PCB_R15(%r8)
 	movq	%r14,PCB_R14(%r8)
 	movq	%r13,PCB_R13(%r8)
 	movq	%r12,PCB_R12(%r8)
 	movq	%rbp,PCB_RBP(%r8)
 	movq	%rsp,PCB_RSP(%r8)
 	movq	%rbx,PCB_RBX(%r8)
 	movq	%rax,PCB_RIP(%r8)
 
 	testl	$PCB_DBREGS,PCB_FLAGS(%r8)
 	jnz	store_dr			/* static predict not taken */
 done_store_dr:
 
 	/* have we used fp, and need a save? */
 	cmpq	%rdi,PCPU(FPCURTHREAD)
 	jne	1f
 	movq	PCB_SAVEFPU(%r8),%r8
 	clts
 	fxsave	(%r8)
 	smsw	%ax
 	orb	$CR0_TS,%al
 	lmsw	%ax
 	xorl	%eax,%eax
 	movq	%rax,PCPU(FPCURTHREAD)
 1:
 
 	/* Save is done.  Now fire up new thread. Leave old vmspace. */
 	movq	TD_PCB(%rsi),%r8
 
 	/* switch address space */
 	movq	PCB_CR3(%r8),%rcx
 	movq	%cr3,%rax
 	cmpq	%rcx,%rax			/* Same address space? */
 	jne	swinact
 	SETLK	%rdx, TD_LOCK(%rdi)		/* Release the old thread */
 	jmp	sw1
 swinact:
 	movq	%rcx,%cr3			/* new address space */
 	movl	PCPU(CPUID), %eax
 	/* Release bit from old pmap->pm_active */
 	movq	PCPU(CURPMAP),%rcx
 	LK btrl	%eax,PM_ACTIVE(%rcx)		/* clear old */
 	SETLK	%rdx, TD_LOCK(%rdi)		/* Release the old thread */
 swact:
 	/* Set bit in new pmap->pm_active */
 	movq	TD_PROC(%rsi),%rdx		/* newproc */
 	movq	P_VMSPACE(%rdx), %rdx
 	addq	$VM_PMAP,%rdx
 	LK btsl	%eax,PM_ACTIVE(%rdx)		/* set new */
 	movq	%rdx,PCPU(CURPMAP)
 
 sw1:
 #if defined(SCHED_ULE) && defined(SMP)
 	/* Wait for the new thread to become unblocked */
 	movq	$blocked_lock, %rdx
 1:
 	movq	TD_LOCK(%rsi),%rcx
 	cmpq	%rcx, %rdx
 	pause
 	je	1b
 #endif
 	/*
 	 * At this point, we've switched address spaces and are ready
 	 * to load up the rest of the next context.
 	 */
 
 	/* Skip loading user fsbase/gsbase for kthreads */
 	testl	$TDP_KTHREAD,TD_PFLAGS(%rsi)
 	jnz	do_kthread
 
 	/*
 	 * Load ldt register
 	 */
 	movq	TD_PROC(%rsi),%rcx
 	cmpq	$0, P_MD+MD_LDT(%rcx)
 	jne	do_ldt
 	xorl	%eax,%eax
 ld_ldt:	lldt	%ax
 
 	/* Restore fs base in GDT */
 	movl	PCB_FSBASE(%r8),%eax
 	movq	PCPU(FS32P),%rdx
 	movw	%ax,2(%rdx)
 	shrl	$16,%eax
 	movb	%al,4(%rdx)
 	shrl	$8,%eax
 	movb	%al,7(%rdx)
 
 	/* Restore gs base in GDT */
 	movl	PCB_GSBASE(%r8),%eax
 	movq	PCPU(GS32P),%rdx
 	movw	%ax,2(%rdx)
 	shrl	$16,%eax
 	movb	%al,4(%rdx)
 	shrl	$8,%eax
 	movb	%al,7(%rdx)
 
 do_kthread:
 	/* Do we need to reload tss ? */
 	movq	PCPU(TSSP),%rax
 	movq	PCB_TSSP(%r8),%rdx
 	testq	%rdx,%rdx
 	cmovzq	PCPU(COMMONTSSP),%rdx
 	cmpq	%rax,%rdx
 	jne	do_tss
 done_tss:
 	movq	%r8,PCPU(RSP0)
 	movq	%r8,PCPU(CURPCB)
 	/* Update the TSS_RSP0 pointer for the next interrupt */
 	movq	%r8,COMMON_TSS_RSP0(%rdx)
 	movq	%rsi,PCPU(CURTHREAD)		/* into next thread */
 
 	/* Test if debug registers should be restored. */
 	testl	$PCB_DBREGS,PCB_FLAGS(%r8)
 	jnz	load_dr				/* static predict not taken */
 done_load_dr:
 
 	/* Restore context. */
 	movq	PCB_R15(%r8),%r15
 	movq	PCB_R14(%r8),%r14
 	movq	PCB_R13(%r8),%r13
 	movq	PCB_R12(%r8),%r12
 	movq	PCB_RBP(%r8),%rbp
 	movq	PCB_RSP(%r8),%rsp
 	movq	PCB_RBX(%r8),%rbx
 	movq	PCB_RIP(%r8),%rax
 	movq	%rax,(%rsp)
 	ret
 
 	/*
 	 * We order these strangely for several reasons.
 	 * 1: I wanted to use static branch prediction hints
 	 * 2: Most athlon64/opteron cpus don't have them.  They define
 	 *    a forward branch as 'predict not taken'.  Intel cores have
 	 *    the 'rep' prefix to invert this.
 	 * So, to make it work on both forms of cpu we do the detour.
 	 * We use jumps rather than call in order to avoid the stack.
 	 */
 
 store_dr:
 	movq	%dr7,%rax			/* yes, do the save */
 	movq	%dr0,%r15
 	movq	%dr1,%r14
 	movq	%dr2,%r13
 	movq	%dr3,%r12
 	movq	%dr6,%r11
 	movq	%r15,PCB_DR0(%r8)
 	movq	%r14,PCB_DR1(%r8)
 	movq	%r13,PCB_DR2(%r8)
 	movq	%r12,PCB_DR3(%r8)
 	movq	%r11,PCB_DR6(%r8)
 	movq	%rax,PCB_DR7(%r8)
 	andq	$0x0000fc00, %rax		/* disable all watchpoints */
 	movq	%rax,%dr7
 	jmp	done_store_dr
 
 load_dr:
 	movq	%dr7,%rax
 	movq	PCB_DR0(%r8),%r15
 	movq	PCB_DR1(%r8),%r14
 	movq	PCB_DR2(%r8),%r13
 	movq	PCB_DR3(%r8),%r12
 	movq	PCB_DR6(%r8),%r11
 	movq	PCB_DR7(%r8),%rcx
 	movq	%r15,%dr0
 	movq	%r14,%dr1
 	/* Preserve reserved bits in %dr7 */
 	andq	$0x0000fc00,%rax
 	andq	$~0x0000fc00,%rcx
 	movq	%r13,%dr2
 	movq	%r12,%dr3
 	orq	%rcx,%rax
 	movq	%r11,%dr6
 	movq	%rax,%dr7
 	jmp	done_load_dr
 
 do_tss:	movq	%rdx,PCPU(TSSP)
 	movq	%rdx,%rcx
 	movq	PCPU(TSS),%rax
-	movw	%rcx,2(%rax)
+	movw	%cx,2(%rax)
 	shrq	$16,%rcx
 	movb	%cl,4(%rax)
 	shrq	$8,%rcx
 	movb	%cl,7(%rax)
 	shrq	$8,%rcx
 	movl	%ecx,8(%rax)
 	movb	$0x89,5(%rax)	/* unset busy */
 	movl	$TSSSEL,%eax
 	ltr	%ax
 	jmp	done_tss
 
 do_ldt:	movq	PCPU(LDT),%rax
 	movq	P_MD+MD_LDT_SD(%rcx),%rdx
 	movq	%rdx,(%rax)
 	movq	P_MD+MD_LDT_SD+8(%rcx),%rdx
 	movq	%rdx,8(%rax)
 	movl	$LDTSEL,%eax
 	jmp	ld_ldt
 END(cpu_switch)
 
 /*
  * savectx(pcb)
  * Update pcb, saving current processor state.
  */
 ENTRY(savectx)
 	/* Save caller's return address. */
 	movq	(%rsp),%rax
 	movq	%rax,PCB_RIP(%rdi)
 
 	movq	%rbx,PCB_RBX(%rdi)
 	movq	%rsp,PCB_RSP(%rdi)
 	movq	%rbp,PCB_RBP(%rdi)
 	movq	%r12,PCB_R12(%rdi)
 	movq	%r13,PCB_R13(%rdi)
 	movq	%r14,PCB_R14(%rdi)
 	movq	%r15,PCB_R15(%rdi)
 
 	movq	%cr0,%rsi
 	movq	%rsi,PCB_CR0(%rdi)
 	movq	%cr2,%rax
 	movq	%rax,PCB_CR2(%rdi)
 	movq	%cr3,%rax
 	movq	%rax,PCB_CR3(%rdi)
 	movq	%cr4,%rax
 	movq	%rax,PCB_CR4(%rdi)
 
 	movq	%dr0,%rax
 	movq	%rax,PCB_DR0(%rdi)
 	movq	%dr1,%rax
 	movq	%rax,PCB_DR1(%rdi)
 	movq	%dr2,%rax
 	movq	%rax,PCB_DR2(%rdi)
 	movq	%dr3,%rax
 	movq	%rax,PCB_DR3(%rdi)
 	movq	%dr6,%rax
 	movq	%rax,PCB_DR6(%rdi)
 	movq	%dr7,%rax
 	movq	%rax,PCB_DR7(%rdi)
 
 	movl	$MSR_FSBASE,%ecx
 	rdmsr
 	movl	%eax,PCB_FSBASE(%rdi)
 	movl	%edx,PCB_FSBASE+4(%rdi)
 	movl	$MSR_GSBASE,%ecx
 	rdmsr
 	movl	%eax,PCB_GSBASE(%rdi)
 	movl	%edx,PCB_GSBASE+4(%rdi)
 	movl	$MSR_KGSBASE,%ecx
 	rdmsr
 	movl	%eax,PCB_KGSBASE(%rdi)
 	movl	%edx,PCB_KGSBASE+4(%rdi)
 
 	sgdt	PCB_GDT(%rdi)
 	sidt	PCB_IDT(%rdi)
 	sldt	PCB_LDT(%rdi)
 	str	PCB_TR(%rdi)
 
 	clts
 	fxsave	PCB_USERFPU(%rdi)
 	movq	%rsi,%cr0	/* The previous %cr0 is saved in %rsi. */
 
 	movl	$1,%eax
 	ret
 END(savectx)
Index: projects/binutils-2.17/sys/amd64/amd64/mp_machdep.c
===================================================================
--- projects/binutils-2.17/sys/amd64/amd64/mp_machdep.c	(revision 215829)
+++ projects/binutils-2.17/sys/amd64/amd64/mp_machdep.c	(revision 215830)
@@ -1,1630 +1,1631 @@
 /*-
  * Copyright (c) 1996, by Steve Passe
  * Copyright (c) 2003, by Peter Wemm
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 #include "opt_kstack_pages.h"
 #include "opt_mp_watchdog.h"
 #include "opt_sched.h"
 #include "opt_smp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 
 #include <x86/apicreg.h>
 #include <machine/clock.h>
 #include <machine/cputypes.h>
 #include <machine/cpufunc.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/mp_watchdog.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
 #include <machine/smp.h>
 #include <machine/specialreg.h>
 #include <machine/tss.h>
 
 #define WARMBOOT_TARGET		0
 #define WARMBOOT_OFF		(KERNBASE + 0x0467)
 #define WARMBOOT_SEG		(KERNBASE + 0x0469)
 
 #define CMOS_REG		(0x70)
 #define CMOS_DATA		(0x71)
 #define BIOS_RESET		(0x0f)
 #define BIOS_WARM		(0x0a)
 
 /* lock region used by kernel profiling */
 int	mcount_lock;
 
 int	mp_naps;		/* # of Applications processors */
 int	boot_cpu_id = -1;	/* designated BSP */
 
 extern  struct pcpu __pcpu[];
 
 /* AP uses this during bootstrap.  Do not staticize.  */
 char *bootSTK;
 static int bootAP;
 
 /* Free these after use */
 void *bootstacks[MAXCPU];
 
 /* Temporary variables for init_secondary()  */
 char *doublefault_stack;
 char *nmi_stack;
 void *dpcpu;
 
 struct pcb stoppcbs[MAXCPU];
 struct pcb **susppcbs = NULL;
 
 /* Variables needed for SMP tlb shootdown. */
 vm_offset_t smp_tlb_addr1;
 vm_offset_t smp_tlb_addr2;
 volatile int smp_tlb_wait;
 
 #ifdef COUNT_IPIS
 /* Interrupt counts. */
 static u_long *ipi_preempt_counts[MAXCPU];
 static u_long *ipi_ast_counts[MAXCPU];
 u_long *ipi_invltlb_counts[MAXCPU];
 u_long *ipi_invlrng_counts[MAXCPU];
 u_long *ipi_invlpg_counts[MAXCPU];
 u_long *ipi_invlcache_counts[MAXCPU];
 u_long *ipi_rendezvous_counts[MAXCPU];
 u_long *ipi_lazypmap_counts[MAXCPU];
 static u_long *ipi_hardclock_counts[MAXCPU];
 #endif
 
 extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
 
 /*
  * Local data and functions.
  */
 
 static volatile cpumask_t ipi_nmi_pending;
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 static volatile int aps_ready = 0;
 
 /*
  * Store data from cpu_add() until later in the boot when we actually setup
  * the APs.
  */
 struct cpu_info {
 	int	cpu_present:1;
 	int	cpu_bsp:1;
 	int	cpu_disabled:1;
 	int	cpu_hyperthread:1;
 } static cpu_info[MAX_APIC_ID + 1];
 int cpu_apic_ids[MAXCPU];
 int apic_cpuids[MAX_APIC_ID + 1];
 
 /* Holds pending bitmap based IPIs per CPU */
 static volatile u_int cpu_ipi_pending[MAXCPU];
 
 static u_int boot_address;
 static int cpu_logical;			/* logical cpus per core */
 static int cpu_cores;			/* cores per package */
 
 static void	assign_cpu_ids(void);
 static void	set_interrupt_apic_ids(void);
 static int	start_all_aps(void);
 static int	start_ap(int apic_id);
 static void	release_aps(void *dummy);
 
 static int	hlt_logical_cpus;
 static u_int	hyperthreading_cpus;	/* logical cpus sharing L1 cache */
 static cpumask_t	hyperthreading_cpus_mask;
 static int	hyperthreading_allowed = 1;
 static struct	sysctl_ctx_list logical_cpu_clist;
 static u_int	bootMP_size;
 
 static void
 mem_range_AP_init(void)
 {
 	if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
 		mem_range_softc.mr_op->initAP(&mem_range_softc);
 }
 
 static void
 topo_probe_amd(void)
 {
 
 	/* AMD processors do not support HTT. */
 	cpu_cores = (amd_feature2 & AMDID2_CMP) != 0 ?
 	    (cpu_procinfo2 & AMDID_CMP_CORES) + 1 : 1;
 	cpu_logical = 1;
 }
 
 /*
  * Round up to the next power of two, if necessary, and then
  * take log2.
  * Returns -1 if argument is zero.
  */
 static __inline int
 mask_width(u_int x)
 {
 
 	return (fls(x << (1 - powerof2(x))) - 1);
 }
 
 static void
 topo_probe_0x4(void)
 {
 	u_int p[4];
 	int pkg_id_bits;
 	int core_id_bits;
 	int max_cores;
 	int max_logical;
 	int id;
 
 	/* Both zero and one here mean one logical processor per package. */
 	max_logical = (cpu_feature & CPUID_HTT) != 0 ?
 	    (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1;
 	if (max_logical <= 1)
 		return;
 
 	/*
 	 * Because of uniformity assumption we examine only
 	 * those logical processors that belong to the same
 	 * package as BSP.  Further, we count number of
 	 * logical processors that belong to the same core
 	 * as BSP thus deducing number of threads per core.
 	 */
 	cpuid_count(0x04, 0, p);
 	max_cores = ((p[0] >> 26) & 0x3f) + 1;
 	core_id_bits = mask_width(max_logical/max_cores);
 	if (core_id_bits < 0)
 		return;
 	pkg_id_bits = core_id_bits + mask_width(max_cores);
 
 	for (id = 0; id <= MAX_APIC_ID; id++) {
 		/* Check logical CPU availability. */
 		if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
 			continue;
 		/* Check if logical CPU has the same package ID. */
 		if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits))
 			continue;
 		cpu_cores++;
 		/* Check if logical CPU has the same package and core IDs. */
 		if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits))
 			cpu_logical++;
 	}
 
 	KASSERT(cpu_cores >= 1 && cpu_logical >= 1,
 	    ("topo_probe_0x4 couldn't find BSP"));
 
 	cpu_cores /= cpu_logical;
 	hyperthreading_cpus = cpu_logical;
 }
 
 static void
 topo_probe_0xb(void)
 {
 	u_int p[4];
 	int bits;
 	int cnt;
 	int i;
 	int logical;
 	int type;
 	int x;
 
 	/* We only support three levels for now. */
 	for (i = 0; i < 3; i++) {
 		cpuid_count(0x0b, i, p);
 
 		/* Fall back if CPU leaf 11 doesn't really exist. */
 		if (i == 0 && p[1] == 0) {
 			topo_probe_0x4();
 			return;
 		}
 
 		bits = p[0] & 0x1f;
 		logical = p[1] &= 0xffff;
 		type = (p[2] >> 8) & 0xff;
 		if (type == 0 || logical == 0)
 			break;
 		/*
 		 * Because of uniformity assumption we examine only
 		 * those logical processors that belong to the same
 		 * package as BSP.
 		 */
 		for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) {
 			if (!cpu_info[x].cpu_present ||
 			    cpu_info[x].cpu_disabled)
 				continue;
 			if (x >> bits == boot_cpu_id >> bits)
 				cnt++;
 		}
 		if (type == CPUID_TYPE_SMT)
 			cpu_logical = cnt;
 		else if (type == CPUID_TYPE_CORE)
 			cpu_cores = cnt;
 	}
 	if (cpu_logical == 0)
 		cpu_logical = 1;
 	cpu_cores /= cpu_logical;
 }
 
 /*
  * Both topology discovery code and code that consumes topology
  * information assume top-down uniformity of the topology.
  * That is, all physical packages must be identical and each
  * core in a package must have the same number of threads.
  * Topology information is queried only on BSP, on which this
  * code runs and for which it can query CPUID information.
  * Then topology is extrapolated on all packages using the
  * uniformity assumption.
  */
 static void
 topo_probe(void)
 {
 	static int cpu_topo_probed = 0;
 
 	if (cpu_topo_probed)
 		return;
 
 	logical_cpus_mask = 0;
 	if (mp_ncpus <= 1)
 		cpu_cores = cpu_logical = 1;
 	else if (cpu_vendor_id == CPU_VENDOR_AMD)
 		topo_probe_amd();
 	else if (cpu_vendor_id == CPU_VENDOR_INTEL) {
 		/*
 		 * See Intel(R) 64 Architecture Processor
 		 * Topology Enumeration article for details.
 		 *
 		 * Note that 0x1 <= cpu_high < 4 case should be
 		 * compatible with topo_probe_0x4() logic when
 		 * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1)
 		 * or it should trigger the fallback otherwise.
 		 */
 		if (cpu_high >= 0xb)
 			topo_probe_0xb();
 		else if (cpu_high >= 0x1)
 			topo_probe_0x4();
 	}
 
 	/*
 	 * Fallback: assume each logical CPU is in separate
 	 * physical package.  That is, no multi-core, no SMT.
 	 */
 	if (cpu_cores == 0 || cpu_logical == 0)
 		cpu_cores = cpu_logical = 1;
 	cpu_topo_probed = 1;
 }
 
 struct cpu_group *
 cpu_topo(void)
 {
 	int cg_flags;
 
 	/*
 	 * Determine whether any threading flags are
 	 * necessry.
 	 */
 	topo_probe();
 	if (cpu_logical > 1 && hyperthreading_cpus)
 		cg_flags = CG_FLAG_HTT;
 	else if (cpu_logical > 1)
 		cg_flags = CG_FLAG_SMT;
 	else
 		cg_flags = 0;
 	if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
 		printf("WARNING: Non-uniform processors.\n");
 		printf("WARNING: Using suboptimal topology.\n");
 		return (smp_topo_none());
 	}
 	/*
 	 * No multi-core or hyper-threaded.
 	 */
 	if (cpu_logical * cpu_cores == 1)
 		return (smp_topo_none());
 	/*
 	 * Only HTT no multi-core.
 	 */
 	if (cpu_logical > 1 && cpu_cores == 1)
 		return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags));
 	/*
 	 * Only multi-core no HTT.
 	 */
 	if (cpu_cores > 1 && cpu_logical == 1)
 		return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags));
 	/*
 	 * Both HTT and multi-core.
 	 */
 	return (smp_topo_2level(CG_SHARE_L2, cpu_cores,
 	    CG_SHARE_L1, cpu_logical, cg_flags));
 }
 
 /*
  * Calculate usable address in base memory for AP trampoline code.
  */
 u_int
 mp_bootaddress(u_int basemem)
 {
 
 	bootMP_size = mptramp_end - mptramp_start;
 	boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */
 	if (((basemem * 1024) - boot_address) < bootMP_size)
 		boot_address -= PAGE_SIZE;	/* not enough, lower by 4k */
 	/* 3 levels of page table pages */
 	mptramp_pagetables = boot_address - (PAGE_SIZE * 3);
 
 	return mptramp_pagetables;
 }
 
 void
 cpu_add(u_int apic_id, char boot_cpu)
 {
 
 	if (apic_id > MAX_APIC_ID) {
 		panic("SMP: APIC ID %d too high", apic_id);
 		return;
 	}
 	KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
 	    apic_id));
 	cpu_info[apic_id].cpu_present = 1;
 	if (boot_cpu) {
 		KASSERT(boot_cpu_id == -1,
 		    ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
 		    boot_cpu_id));
 		boot_cpu_id = apic_id;
 		cpu_info[apic_id].cpu_bsp = 1;
 	}
 	if (mp_ncpus < MAXCPU) {
 		mp_ncpus++;
 		mp_maxid = mp_ncpus - 1;
 	}
 	if (bootverbose)
 		printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
 		    "AP");
 }
 
 void
 cpu_mp_setmaxid(void)
 {
 
 	/*
 	 * mp_maxid should be already set by calls to cpu_add().
 	 * Just sanity check its value here.
 	 */
 	if (mp_ncpus == 0)
 		KASSERT(mp_maxid == 0,
 		    ("%s: mp_ncpus is zero, but mp_maxid is not", __func__));
 	else if (mp_ncpus == 1)
 		mp_maxid = 0;
 	else
 		KASSERT(mp_maxid >= mp_ncpus - 1,
 		    ("%s: counters out of sync: max %d, count %d", __func__,
 			mp_maxid, mp_ncpus));
 }
 
 int
 cpu_mp_probe(void)
 {
 
 	/*
 	 * Always record BSP in CPU map so that the mbuf init code works
 	 * correctly.
 	 */
 	all_cpus = 1;
 	if (mp_ncpus == 0) {
 		/*
 		 * No CPUs were found, so this must be a UP system.  Setup
 		 * the variables to represent a system with a single CPU
 		 * with an id of 0.
 		 */
 		mp_ncpus = 1;
 		return (0);
 	}
 
 	/* At least one CPU was found. */
 	if (mp_ncpus == 1) {
 		/*
 		 * One CPU was found, so this must be a UP system with
 		 * an I/O APIC.
 		 */
 		mp_maxid = 0;
 		return (0);
 	}
 
 	/* At least two CPUs were found. */
 	return (1);
 }
 
 /*
  * Initialize the IPI handlers and start up the AP's.
  */
 void
 cpu_mp_start(void)
 {
 	int i;
 
 	/* Initialize the logical ID to APIC ID table. */
 	for (i = 0; i < MAXCPU; i++) {
 		cpu_apic_ids[i] = -1;
 		cpu_ipi_pending[i] = 0;
 	}
 
 	/* Install an inter-CPU IPI for TLB invalidation */
 	setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0);
 
 	/* Install an inter-CPU IPI for cache invalidation. */
 	setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYSIGT, SEL_KPL, 0);
 
 	/* Install an inter-CPU IPI for all-CPU rendezvous */
 	setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0);
 
 	/* Install generic inter-CPU IPI handler */
 	setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
 	       SDT_SYSIGT, SEL_KPL, 0);
 
 	/* Install an inter-CPU IPI for CPU stop/restart */
 	setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0);
 
 	/* Install an inter-CPU IPI for CPU suspend/resume */
 	setidt(IPI_SUSPEND, IDTVEC(cpususpend), SDT_SYSIGT, SEL_KPL, 0);
 
 	/* Set boot_cpu_id if needed. */
 	if (boot_cpu_id == -1) {
 		boot_cpu_id = PCPU_GET(apic_id);
 		cpu_info[boot_cpu_id].cpu_bsp = 1;
 	} else
 		KASSERT(boot_cpu_id == PCPU_GET(apic_id),
 		    ("BSP's APIC ID doesn't match boot_cpu_id"));
 
 	/* Probe logical/physical core configuration. */
 	topo_probe();
 
 	assign_cpu_ids();
 
 	/* Start each Application Processor */
 	start_all_aps();
 
 	set_interrupt_apic_ids();
 }
 
 
 /*
  * Print various information about the SMP system hardware and setup.
  */
 void
 cpu_mp_announce(void)
 {
 	const char *hyperthread;
 	int i;
 
 	printf("FreeBSD/SMP: %d package(s) x %d core(s)",
 	    mp_ncpus / (cpu_cores * cpu_logical), cpu_cores);
 	if (hyperthreading_cpus > 1)
 	    printf(" x %d HTT threads", cpu_logical);
 	else if (cpu_logical > 1)
 	    printf(" x %d SMT threads", cpu_logical);
 	printf("\n");
 
 	/* List active CPUs first. */
 	printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
 	for (i = 1; i < mp_ncpus; i++) {
 		if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread)
 			hyperthread = "/HT";
 		else
 			hyperthread = "";
 		printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread,
 		    cpu_apic_ids[i]);
 	}
 
 	/* List disabled CPUs last. */
 	for (i = 0; i <= MAX_APIC_ID; i++) {
 		if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled)
 			continue;
 		if (cpu_info[i].cpu_hyperthread)
 			hyperthread = "/HT";
 		else
 			hyperthread = "";
 		printf("  cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread,
 		    i);
 	}
 }
 
 /*
  * AP CPU's call this to initialize themselves.
  */
 void
 init_secondary(void)
 {
 	struct pcpu *pc;
 	struct nmi_pcpu *np;
 	u_int64_t msr, cr0;
 	int cpu, gsel_tss, x;
 	struct region_descriptor ap_gdt;
 
 	/* Set by the startup code for us to use */
 	cpu = bootAP;
 
 	/* Init tss */
 	common_tss[cpu] = common_tss[0];
 	common_tss[cpu].tss_rsp0 = 0;   /* not used until after switch */
 	common_tss[cpu].tss_iobase = sizeof(struct amd64tss) +
 	    IOPAGES * PAGE_SIZE;
 	common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE];
 
 	/* The NMI stack runs on IST2. */
 	np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1;
 	common_tss[cpu].tss_ist2 = (long) np;
 
 	/* Prepare private GDT */
 	gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
 	for (x = 0; x < NGDT; x++) {
 		if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
 		    x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1))
 			ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]);
 	}
 	ssdtosyssd(&gdt_segs[GPROC0_SEL],
 	    (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]);
 	ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	ap_gdt.rd_base =  (long) &gdt[NGDT * cpu];
 	lgdt(&ap_gdt);			/* does magic intra-segment return */
 
 	/* Get per-cpu data */
 	pc = &__pcpu[cpu];
 
 	/* prime data page for it to use */
 	pcpu_init(pc, cpu, sizeof(struct pcpu));
 	dpcpu_init(dpcpu, cpu);
 	pc->pc_apic_id = cpu_apic_ids[cpu];
 	pc->pc_prvspace = pc;
 	pc->pc_curthread = 0;
 	pc->pc_tssp = &common_tss[cpu];
 	pc->pc_commontssp = &common_tss[cpu];
 	pc->pc_rsp0 = 0;
 	pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
 	    GPROC0_SEL];
 	pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL];
 	pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL];
 	pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
 	    GUSERLDT_SEL];
 
 	/* Save the per-cpu pointer for use by the NMI handler. */
 	np->np_pcpu = (register_t) pc;
 
 	wrmsr(MSR_FSBASE, 0);		/* User value */
 	wrmsr(MSR_GSBASE, (u_int64_t)pc);
 	wrmsr(MSR_KGSBASE, (u_int64_t)pc);	/* XXX User value while we're in the kernel */
 
 	lidt(&r_idt);
 
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	ltr(gsel_tss);
 
 	/*
 	 * Set to a known state:
 	 * Set by mpboot.s: CR0_PG, CR0_PE
 	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
 	 */
 	cr0 = rcr0();
 	cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
 	load_cr0(cr0);
 
 	/* Set up the fast syscall stuff */
 	msr = rdmsr(MSR_EFER) | EFER_SCE;
 	wrmsr(MSR_EFER, msr);
 	wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
 	wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
 	msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
 	      ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
 	wrmsr(MSR_STAR, msr);
 	wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
 
 	/* Disable local APIC just to be sure. */
 	lapic_disable();
 
 	/* signal our startup to the BSP. */
 	mp_naps++;
 
 	/* Spin until the BSP releases the AP's. */
 	while (!aps_ready)
 		ia32_pause();
 
 	/* Initialize the PAT MSR. */
 	pmap_init_pat();
 
 	/* set up CPU registers and state */
 	cpu_setregs();
 
 	/* set up SSE/NX registers */
 	initializecpu();
 
 	/* set up FPU state on the AP */
 	fpuinit();
 
 	/* A quick check from sanity claus */
 	if (PCPU_GET(apic_id) != lapic_id()) {
 		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
 		printf("SMP: actual apic_id = %d\n", lapic_id());
 		printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
 		panic("cpuid mismatch! boom!!");
 	}
 
 	/* Initialize curthread. */
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	PCPU_SET(curthread, PCPU_GET(idlethread));
 
 	mca_init();
 
 	mtx_lock_spin(&ap_boot_mtx);
 
 	/* Init local apic for irq's */
 	lapic_setup(1);
 
 	/* Set memory range attributes for this CPU to match the BSP */
 	mem_range_AP_init();
 
 	smp_cpus++;
 
 	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
 	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
 
 	/* Determine if we are a logical CPU. */
 	/* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
 	if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
 		logical_cpus_mask |= PCPU_GET(cpumask);
 	
 	/* Determine if we are a hyperthread. */
 	if (hyperthreading_cpus > 1 &&
 	    PCPU_GET(apic_id) % hyperthreading_cpus != 0)
 		hyperthreading_cpus_mask |= PCPU_GET(cpumask);
 
 	/* Build our map of 'other' CPUs. */
 	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
 
 	if (bootverbose)
 		lapic_dump("AP");
 
 	if (smp_cpus == mp_ncpus) {
 		/* enable IPI's, tlb shootdown, freezes etc */
 		atomic_store_rel_int(&smp_started, 1);
 		smp_active = 1;	 /* historic */
 	}
 
 	/*
 	 * Enable global pages TLB extension
 	 * This also implicitly flushes the TLB 
 	 */
 
 	load_cr4(rcr4() | CR4_PGE);
 	load_ds(_udatasel);
 	load_es(_udatasel);
 	load_fs(_ufssel);
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* Wait until all the AP's are up. */
 	while (smp_started == 0)
 		ia32_pause();
 
 	/* Start per-CPU event timers. */
 	cpu_initclocks_ap();
 
 	sched_throw(NULL);
 
 	panic("scheduler returned us to %s", __func__);
 	/* NOTREACHED */
 }
 
 /*******************************************************************
  * local functions and data
  */
 
 /*
  * We tell the I/O APIC code about all the CPUs we want to receive
  * interrupts.  If we don't want certain CPUs to receive IRQs we
  * can simply not tell the I/O APIC code about them in this function.
  * We also do not tell it about the BSP since it tells itself about
  * the BSP internally to work with UP kernels and on UP machines.
  */
 static void
 set_interrupt_apic_ids(void)
 {
 	u_int i, apic_id;
 
 	for (i = 0; i < MAXCPU; i++) {
 		apic_id = cpu_apic_ids[i];
 		if (apic_id == -1)
 			continue;
 		if (cpu_info[apic_id].cpu_bsp)
 			continue;
 		if (cpu_info[apic_id].cpu_disabled)
 			continue;
 
 		/* Don't let hyperthreads service interrupts. */
 		if (hyperthreading_cpus > 1 &&
 		    apic_id % hyperthreading_cpus != 0)
 			continue;
 
 		intr_add_cpu(i);
 	}
 }
 
 /*
  * Assign logical CPU IDs to local APICs.
  */
 static void
 assign_cpu_ids(void)
 {
 	u_int i;
 
 	TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
 	    &hyperthreading_allowed);
 
 	/* Check for explicitly disabled CPUs. */
 	for (i = 0; i <= MAX_APIC_ID; i++) {
 		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
 			continue;
 
 		if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
 			cpu_info[i].cpu_hyperthread = 1;
 #if defined(SCHED_ULE)
 			/*
 			 * Don't use HT CPU if it has been disabled by a
 			 * tunable.
 			 */
 			if (hyperthreading_allowed == 0) {
 				cpu_info[i].cpu_disabled = 1;
 				continue;
 			}
 #endif
 		}
 
 		/* Don't use this CPU if it has been disabled by a tunable. */
 		if (resource_disabled("lapic", i)) {
 			cpu_info[i].cpu_disabled = 1;
 			continue;
 		}
 	}
 
 	/*
 	 * Assign CPU IDs to local APIC IDs and disable any CPUs
 	 * beyond MAXCPU.  CPU 0 is always assigned to the BSP.
 	 *
 	 * To minimize confusion for userland, we attempt to number
 	 * CPUs such that all threads and cores in a package are
 	 * grouped together.  For now we assume that the BSP is always
 	 * the first thread in a package and just start adding APs
 	 * starting with the BSP's APIC ID.
 	 */
 	mp_ncpus = 1;
 	cpu_apic_ids[0] = boot_cpu_id;
 	apic_cpuids[boot_cpu_id] = 0;
 	for (i = boot_cpu_id + 1; i != boot_cpu_id;
 	     i == MAX_APIC_ID ? i = 0 : i++) {
 		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
 		    cpu_info[i].cpu_disabled)
 			continue;
 
 		if (mp_ncpus < MAXCPU) {
 			cpu_apic_ids[mp_ncpus] = i;
 			apic_cpuids[i] = mp_ncpus;
 			mp_ncpus++;
 		} else
 			cpu_info[i].cpu_disabled = 1;
 	}
 	KASSERT(mp_maxid >= mp_ncpus - 1,
 	    ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
 	    mp_ncpus));		
 }
 
 /*
  * start each AP in our list
  */
 static int
 start_all_aps(void)
 {
 	vm_offset_t va = boot_address + KERNBASE;
 	u_int64_t *pt4, *pt3, *pt2;
 	u_int32_t mpbioswarmvec;
 	int apic_id, cpu, i;
 	u_char mpbiosreason;
 
 	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
 
 	/* install the AP 1st level boot code */
 	pmap_kenter(va, boot_address);
 	pmap_invalidate_page(kernel_pmap, va);
 	bcopy(mptramp_start, (void *)va, bootMP_size);
 
 	/* Locate the page tables, they'll be below the trampoline */
 	pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE);
 	pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
 	pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
 
 	/* Create the initial 1GB replicated page tables */
 	for (i = 0; i < 512; i++) {
 		/* Each slot of the level 4 pages points to the same level 3 page */
 		pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
 		pt4[i] |= PG_V | PG_RW | PG_U;
 
 		/* Each slot of the level 3 pages points to the same level 2 page */
 		pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
 		pt3[i] |= PG_V | PG_RW | PG_U;
 
 		/* The level 2 page slots are mapped with 2MB pages for 1GB. */
 		pt2[i] = i * (2 * 1024 * 1024);
 		pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
 	}
 
 	/* save the current value of the warm-start vector */
 	mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
 	outb(CMOS_REG, BIOS_RESET);
 	mpbiosreason = inb(CMOS_DATA);
 
 	/* setup a vector to our boot code */
 	*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
 	*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
 	outb(CMOS_REG, BIOS_RESET);
 	outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
 
 	/* start each AP */
 	for (cpu = 1; cpu < mp_ncpus; cpu++) {
 		apic_id = cpu_apic_ids[cpu];
 
 		/* allocate and set up an idle stack data page */
 		bootstacks[cpu] = (void *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
 		doublefault_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE);
 		nmi_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE);
 		dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE);
 
 		bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8;
 		bootAP = cpu;
 
 		/* attempt to start the Application Processor */
 		if (!start_ap(apic_id)) {
 			/* restore the warmstart vector */
 			*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
 			panic("AP #%d (PHY# %d) failed!", cpu, apic_id);
 		}
 
 		all_cpus |= (1 << cpu);		/* record AP in CPU map */
 	}
 
 	/* build our map of 'other' CPUs */
 	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
 
 	/* restore the warmstart vector */
 	*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
 
 	outb(CMOS_REG, BIOS_RESET);
 	outb(CMOS_DATA, mpbiosreason);
 
 	/* number of APs actually started */
 	return mp_naps;
 }
 
 
 /*
  * This function starts the AP (application processor) identified
  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  * to accomplish this.  This is necessary because of the nuances
  * of the different hardware we might encounter.  It isn't pretty,
  * but it seems to work.
  */
 static int
 start_ap(int apic_id)
 {
 	int vector, ms;
 	int cpus;
 
 	/* calculate the vector */
 	vector = (boot_address >> 12) & 0xff;
 
 	/* used as a watchpoint to signal AP startup */
 	cpus = mp_naps;
 
 	/*
 	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
 	 * and running the target CPU. OR this INIT IPI might be latched (P5
 	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
 	 * ignored.
 	 */
 
 	/* do an INIT IPI: assert RESET */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
 	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
 
 	/* wait for pending status end */
 	lapic_ipi_wait(-1);
 
 	/* do an INIT IPI: deassert RESET */
 	lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
 	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
 
 	/* wait for pending status end */
 	DELAY(10000);		/* wait ~10mS */
 	lapic_ipi_wait(-1);
 
 	/*
 	 * next we do a STARTUP IPI: the previous INIT IPI might still be
 	 * latched, (P5 bug) this 1st STARTUP would then terminate
 	 * immediately, and the previously started INIT IPI would continue. OR
 	 * the previous INIT IPI has already run. and this STARTUP IPI will
 	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
 	 * will run.
 	 */
 
 	/* do a STARTUP IPI */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
 	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
 	    vector, apic_id);
 	lapic_ipi_wait(-1);
 	DELAY(200);		/* wait ~200uS */
 
 	/*
 	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
 	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
 	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
 	 * recognized after hardware RESET or INIT IPI.
 	 */
 
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
 	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
 	    vector, apic_id);
 	lapic_ipi_wait(-1);
 	DELAY(200);		/* wait ~200uS */
 
 	/* Wait up to 5 seconds for it to start. */
 	for (ms = 0; ms < 5000; ms++) {
 		if (mp_naps > cpus)
 			return 1;	/* return SUCCESS */
 		DELAY(1000);
 	}
 	return 0;		/* return FAILURE */
 }
 
 #ifdef COUNT_XINVLTLB_HITS
 u_int xhits_gbl[MAXCPU];
 u_int xhits_pg[MAXCPU];
 u_int xhits_rng[MAXCPU];
 SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
     sizeof(xhits_gbl), "IU", "");
 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
     sizeof(xhits_pg), "IU", "");
 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
     sizeof(xhits_rng), "IU", "");
 
 u_int ipi_global;
 u_int ipi_page;
 u_int ipi_range;
 u_int ipi_range_size;
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
     0, "");
 
 u_int ipi_masked_global;
 u_int ipi_masked_page;
 u_int ipi_masked_range;
 u_int ipi_masked_range_size;
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
     &ipi_masked_global, 0, "");
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
     &ipi_masked_page, 0, "");
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
     &ipi_masked_range, 0, "");
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
     &ipi_masked_range_size, 0, "");
 #endif /* COUNT_XINVLTLB_HITS */
 
 /*
  * Flush the TLB on all other CPU's
  */
 static void
 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
 	u_int ncpu;
 
 	ncpu = mp_ncpus - 1;	/* does not shootdown self */
 	if (ncpu < 1)
 		return;		/* no other cpus */
 	if (!(read_rflags() & PSL_I))
 		panic("%s: interrupts disabled", __func__);
 	mtx_lock_spin(&smp_ipi_mtx);
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
 	ipi_all_but_self(vector);
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	mtx_unlock_spin(&smp_ipi_mtx);
 }
 
 static void
 smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
 	int ncpu, othercpus;
 
 	othercpus = mp_ncpus - 1;
 	if (mask == (cpumask_t)-1) {
 		ncpu = othercpus;
 		if (ncpu < 1)
 			return;
 	} else {
 		mask &= ~PCPU_GET(cpumask);
 		if (mask == 0)
 			return;
 		ncpu = bitcount32(mask);
 		if (ncpu > othercpus) {
 			/* XXX this should be a panic offence */
 			printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
 			    ncpu, othercpus);
 			ncpu = othercpus;
 		}
 		/* XXX should be a panic, implied by mask == 0 above */
 		if (ncpu < 1)
 			return;
 	}
 	if (!(read_rflags() & PSL_I))
 		panic("%s: interrupts disabled", __func__);
 	mtx_lock_spin(&smp_ipi_mtx);
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
 	if (mask == (cpumask_t)-1)
 		ipi_all_but_self(vector);
 	else
 		ipi_selected(mask, vector);
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	mtx_unlock_spin(&smp_ipi_mtx);
 }
 
 /*
  * Send an IPI to specified CPU handling the bitmap logic.
  */
 static void
 ipi_send_cpu(int cpu, u_int ipi)
 {
 	u_int bitmap, old_pending, new_pending;
 
 	KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
 
 	if (IPI_IS_BITMAPED(ipi)) {
 		bitmap = 1 << ipi;
 		ipi = IPI_BITMAP_VECTOR;
 		do {
 			old_pending = cpu_ipi_pending[cpu];
 			new_pending = old_pending | bitmap;
 		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
 		    old_pending, new_pending)); 
 		if (old_pending)
 			return;
 	}
 	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
 }
 
 void
 smp_cache_flush(void)
 {
 
 	if (smp_started)
 		smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
 }
 
 void
 smp_invltlb(void)
 {
 
 	if (smp_started) {
 		smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_global++;
 #endif
 	}
 }
 
 void
 smp_invlpg(vm_offset_t addr)
 {
 
 	if (smp_started) {
 		smp_tlb_shootdown(IPI_INVLPG, addr, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_page++;
 #endif
 	}
 }
 
 void
 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
 		smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_range++;
 		ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
 #endif
 	}
 }
 
 void
 smp_masked_invltlb(cpumask_t mask)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_masked_global++;
 #endif
 	}
 }
 
 void
 smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_masked_page++;
 #endif
 	}
 }
 
 void
 smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_masked_range++;
 		ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
 #endif
 	}
 }
 
 void
 ipi_bitmap_handler(struct trapframe frame)
 {
 	struct trapframe *oldframe;
 	struct thread *td;
 	int cpu = PCPU_GET(cpuid);
 	u_int ipi_bitmap;
 
 	critical_enter();
 	td = curthread;
 	td->td_intr_nesting_level++;
 	oldframe = td->td_intr_frame;
 	td->td_intr_frame = &frame;
 	ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
 	if (ipi_bitmap & (1 << IPI_PREEMPT)) {
 #ifdef COUNT_IPIS
 		(*ipi_preempt_counts[cpu])++;
 #endif
 		sched_preempt(td);
 	}
 	if (ipi_bitmap & (1 << IPI_AST)) {
 #ifdef COUNT_IPIS
 		(*ipi_ast_counts[cpu])++;
 #endif
 		/* Nothing to do for AST */
 	}
 	if (ipi_bitmap & (1 << IPI_HARDCLOCK)) {
 #ifdef COUNT_IPIS
 		(*ipi_hardclock_counts[cpu])++;
 #endif
 		hardclockintr();
 	}
 	td->td_intr_frame = oldframe;
 	td->td_intr_nesting_level--;
 	critical_exit();
 }
 
 /*
  * send an IPI to a set of cpus.
  */
 void
 ipi_selected(cpumask_t cpus, u_int ipi)
 {
 	int cpu;
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
 		atomic_set_int(&ipi_nmi_pending, cpus);
 
 	CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
 	while ((cpu = ffs(cpus)) != 0) {
 		cpu--;
 		cpus &= ~(1 << cpu);
 		ipi_send_cpu(cpu, ipi);
 	}
 }
 
 /*
  * send an IPI to a specific CPU.
  */
 void
 ipi_cpu(int cpu, u_int ipi)
 {
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
 		atomic_set_int(&ipi_nmi_pending, 1 << cpu);
 
 	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 	ipi_send_cpu(cpu, ipi);
 }
 
 /*
  * send an IPI to all CPUs EXCEPT myself
  */
 void
 ipi_all_but_self(u_int ipi)
 {
 
 	if (IPI_IS_BITMAPED(ipi)) {
 		ipi_selected(PCPU_GET(other_cpus), ipi);
 		return;
 	}
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
 		atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 }
 
 int
 ipi_nmi_handler()
 {
 	cpumask_t cpumask;
 
 	/*
 	 * As long as there is not a simple way to know about a NMI's
 	 * source, if the bitmask for the current CPU is present in
 	 * the global pending bitword an IPI_STOP_HARD has been issued
 	 * and should be handled.
 	 */
 	cpumask = PCPU_GET(cpumask);
 	if ((ipi_nmi_pending & cpumask) == 0)
 		return (1);
 
 	atomic_clear_int(&ipi_nmi_pending, cpumask);
 	cpustop_handler();
 	return (0);
 }
      
 /*
  * Handle an IPI_STOP by saving our current context and spinning until we
  * are resumed.
  */
 void
 cpustop_handler(void)
 {
 	cpumask_t cpumask;
 	u_int cpu;
 
 	cpu = PCPU_GET(cpuid);
 	cpumask = PCPU_GET(cpumask);
 
 	savectx(&stoppcbs[cpu]);
 
 	/* Indicate that we are stopped */
 	atomic_set_int(&stopped_cpus, cpumask);
 
 	/* Wait for restart */
 	while (!(started_cpus & cpumask))
 	    ia32_pause();
 
 	atomic_clear_int(&started_cpus, cpumask);
 	atomic_clear_int(&stopped_cpus, cpumask);
 
 	if (cpu == 0 && cpustop_restartfunc != NULL) {
 		cpustop_restartfunc();
 		cpustop_restartfunc = NULL;
 	}
 }
 
 /*
  * Handle an IPI_SUSPEND by saving our current context and spinning until we
  * are resumed.
  */
 void
 cpususpend_handler(void)
 {
 	cpumask_t cpumask;
 	register_t cr3, rf;
 	u_int cpu;
 
 	cpu = PCPU_GET(cpuid);
 	cpumask = PCPU_GET(cpumask);
 
 	rf = intr_disable();
 	cr3 = rcr3();
 
 	if (savectx(susppcbs[cpu])) {
 		wbinvd();
 		atomic_set_int(&stopped_cpus, cpumask);
 	} else {
+		pmap_init_pat();
 		PCPU_SET(switchtime, 0);
 		PCPU_SET(switchticks, ticks);
 	}
 
 	/* Wait for resume */
 	while (!(started_cpus & cpumask))
 		ia32_pause();
 
 	atomic_clear_int(&started_cpus, cpumask);
 	atomic_clear_int(&stopped_cpus, cpumask);
 
 	/* Restore CR3 and enable interrupts */
 	load_cr3(cr3);
 	mca_resume();
 	lapic_setup(0);
 	intr_restore(rf);
 }
 
 /*
  * This is called once the rest of the system is up and running and we're
  * ready to let the AP's out of the pen.
  */
 static void
 release_aps(void *dummy __unused)
 {
 
 	if (mp_ncpus == 1) 
 		return;
 	atomic_store_rel_int(&aps_ready, 1);
 	while (smp_started == 0)
 		ia32_pause();
 }
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 
 static int
 sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
 {
 	cpumask_t mask;
 	int error;
 
 	mask = hlt_cpus_mask;
 	error = sysctl_handle_int(oidp, &mask, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	if (logical_cpus_mask != 0 &&
 	    (mask & logical_cpus_mask) == logical_cpus_mask)
 		hlt_logical_cpus = 1;
 	else
 		hlt_logical_cpus = 0;
 
 	if (! hyperthreading_allowed)
 		mask |= hyperthreading_cpus_mask;
 
 	if ((mask & all_cpus) == all_cpus)
 		mask &= ~(1<<0);
 	hlt_cpus_mask = mask;
 	return (error);
 }
 SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
     0, 0, sysctl_hlt_cpus, "IU",
     "Bitmap of CPUs to halt.  101 (binary) will halt CPUs 0 and 2.");
 
 static int
 sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
 {
 	int disable, error;
 
 	disable = hlt_logical_cpus;
 	error = sysctl_handle_int(oidp, &disable, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	if (disable)
 		hlt_cpus_mask |= logical_cpus_mask;
 	else
 		hlt_cpus_mask &= ~logical_cpus_mask;
 
 	if (! hyperthreading_allowed)
 		hlt_cpus_mask |= hyperthreading_cpus_mask;
 
 	if ((hlt_cpus_mask & all_cpus) == all_cpus)
 		hlt_cpus_mask &= ~(1<<0);
 
 	hlt_logical_cpus = disable;
 	return (error);
 }
 
 static int
 sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
 {
 	int allowed, error;
 
 	allowed = hyperthreading_allowed;
 	error = sysctl_handle_int(oidp, &allowed, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 #ifdef SCHED_ULE
 	/*
 	 * SCHED_ULE doesn't allow enabling/disabling HT cores at
 	 * run-time.
 	 */
 	if (allowed != hyperthreading_allowed)
 		return (ENOTSUP);
 	return (error);
 #endif
 
 	if (allowed)
 		hlt_cpus_mask &= ~hyperthreading_cpus_mask;
 	else
 		hlt_cpus_mask |= hyperthreading_cpus_mask;
 
 	if (logical_cpus_mask != 0 &&
 	    (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
 		hlt_logical_cpus = 1;
 	else
 		hlt_logical_cpus = 0;
 
 	if ((hlt_cpus_mask & all_cpus) == all_cpus)
 		hlt_cpus_mask &= ~(1<<0);
 
 	hyperthreading_allowed = allowed;
 	return (error);
 }
 
 static void
 cpu_hlt_setup(void *dummy __unused)
 {
 
 	if (logical_cpus_mask != 0) {
 		TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
 		    &hlt_logical_cpus);
 		sysctl_ctx_init(&logical_cpu_clist);
 		SYSCTL_ADD_PROC(&logical_cpu_clist,
 		    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 		    "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
 		    sysctl_hlt_logical_cpus, "IU", "");
 		SYSCTL_ADD_UINT(&logical_cpu_clist,
 		    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 		    "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
 		    &logical_cpus_mask, 0, "");
 
 		if (hlt_logical_cpus)
 			hlt_cpus_mask |= logical_cpus_mask;
 
 		/*
 		 * If necessary for security purposes, force
 		 * hyperthreading off, regardless of the value
 		 * of hlt_logical_cpus.
 		 */
 		if (hyperthreading_cpus_mask) {
 			SYSCTL_ADD_PROC(&logical_cpu_clist,
 			    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 			    "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
 			    0, 0, sysctl_hyperthreading_allowed, "IU", "");
 			if (! hyperthreading_allowed)
 				hlt_cpus_mask |= hyperthreading_cpus_mask;
 		}
 	}
 }
 SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
 
 int
 mp_grab_cpu_hlt(void)
 {
 	cpumask_t mask;
 #ifdef MP_WATCHDOG
 	u_int cpuid;
 #endif
 	int retval;
 
 	mask = PCPU_GET(cpumask);
 #ifdef MP_WATCHDOG
 	cpuid = PCPU_GET(cpuid);
 	ap_watchdog(cpuid);
 #endif
 
 	retval = 0;
 	while (mask & hlt_cpus_mask) {
 		retval = 1;
 		__asm __volatile("sti; hlt" : : : "memory");
 	}
 	return (retval);
 }
 
 #ifdef COUNT_IPIS
 /*
  * Setup interrupt counters for IPI handlers.
  */
 static void
 mp_ipi_intrcnt(void *dummy)
 {
 	char buf[64];
 	int i;
 
 	CPU_FOREACH(i) {
 		snprintf(buf, sizeof(buf), "cpu%d:invltlb", i);
 		intrcnt_add(buf, &ipi_invltlb_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:invlrng", i);
 		intrcnt_add(buf, &ipi_invlrng_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:invlpg", i);
 		intrcnt_add(buf, &ipi_invlpg_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:preempt", i);
 		intrcnt_add(buf, &ipi_preempt_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:ast", i);
 		intrcnt_add(buf, &ipi_ast_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i);
 		intrcnt_add(buf, &ipi_rendezvous_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:lazypmap", i);
 		intrcnt_add(buf, &ipi_lazypmap_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
 		intrcnt_add(buf, &ipi_hardclock_counts[i]);
 	}
 }
 SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
 #endif
 
Index: projects/binutils-2.17/sys/amd64/amd64/pmap.c
===================================================================
--- projects/binutils-2.17/sys/amd64/amd64/pmap.c	(revision 215829)
+++ projects/binutils-2.17/sys/amd64/amd64/pmap.c	(revision 215830)
@@ -1,5092 +1,5091 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 2003 Peter Wemm
  * All rights reserved.
  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  */
 /*-
  * Copyright (c) 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jake Burkholder,
  * Safeport Network Services, and Network Associates Laboratories, the
  * Security Research Division of Network Associates, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
  * CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Manages physical address maps.
  *
  *	In addition to hardware address maps, this
  *	module is called upon to provide software-use-only
  *	maps which may or may not be stored in the same
  *	form as hardware maps.  These pseudo-maps are
  *	used to store intermediate results from copy
  *	operations to and from address spaces.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include "opt_pmap.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sx.h>
 #include <sys/vmmeter.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #ifdef SMP
 #include <sys/smp.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_reserv.h>
 #include <vm/uma.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/specialreg.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC 200
 #endif
 
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
 #define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
 #else
 #define PMAP_INLINE	extern inline
 #endif
 #else
 #define PMAP_INLINE
 #endif
 
 #define PV_STATS
 #ifdef PV_STATS
 #define PV_STAT(x)	do { x ; } while (0)
 #else
 #define PV_STAT(x)	do { } while (0)
 #endif
 
 #define	pa_index(pa)	((pa) >> PDRSHIFT)
 #define	pa_to_pvh(pa)	(&pv_table[pa_index(pa)])
 
 struct pmap kernel_pmap_store;
 
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 
 static int ndmpdp;
 static vm_paddr_t dmaplimit;
 vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
 pt_entry_t pg_nx;
 
 SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
 
 static int pat_works = 1;
-TUNABLE_INT("vm.pmap.pat_works", &pat_works);
-SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RDTUN, &pat_works, 1,
+SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1,
     "Is page attribute table fully functional?");
 
 static int pg_ps_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0,
     "Are large page mappings enabled?");
 
 #define	PAT_INDEX_SIZE	8
 static int pat_index[PAT_INDEX_SIZE];	/* cache mode to PAT index conversion */
 
 static u_int64_t	KPTphys;	/* phys addr of kernel level 1 */
 static u_int64_t	KPDphys;	/* phys addr of kernel level 2 */
 u_int64_t		KPDPphys;	/* phys addr of kernel level 3 */
 u_int64_t		KPML4phys;	/* phys addr of kernel level 4 */
 
 static u_int64_t	DMPDphys;	/* phys addr of direct mapped level 2 */
 static u_int64_t	DMPDPphys;	/* phys addr of direct mapped level 3 */
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
 static struct md_page *pv_table;
 static int shpgperproc = PMAP_SHPGPERPROC;
 
 /*
  * All those kernel PT submaps that BSD is so fond of
  */
 pt_entry_t *CMAP1 = 0;
 caddr_t CADDR1 = 0;
 
 /*
  * Crashdump maps.
  */
 static caddr_t crashdumpmap;
 
 static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
 static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try);
 static void	pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
 static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
 static void	pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
 static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
 		    vm_offset_t va);
 static int	pmap_pvh_wired_mappings(struct md_page *pvh, int count);
 
 static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode);
 static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
 static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
     vm_offset_t va);
 static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte);
 static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
 static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
 static void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
 static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
 static boolean_t pmap_is_referenced_pvh(struct md_page *pvh);
 static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
 static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
 static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
     vm_prot_t prot);
 static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
 static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
 		vm_page_t *free);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq,
 		vm_offset_t sva, pd_entry_t ptepde, vm_page_t *free);
 static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
 static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
     vm_page_t *free);
 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
 		vm_offset_t va);
 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
     vm_page_t m);
 static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
     pd_entry_t newpde);
 static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
 
 static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags);
 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
 
 static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags);
 static int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m,
                 vm_page_t* free);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, vm_page_t *);
 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
 
 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
 
 /*
  * Move the kernel virtual free pointer to the next
  * 2MB.  This is used to help improve performance
  * by using a large (2MB) page for much of the kernel
  * (.text, .data, .bss)
  */
 static vm_offset_t
 pmap_kmem_choose(vm_offset_t addr)
 {
 	vm_offset_t newaddr = addr;
 
 	newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 	return (newaddr);
 }
 
 /********************/
 /* Inline functions */
 /********************/
 
 /* Return a non-clipped PD index for a given VA */
 static __inline vm_pindex_t
 pmap_pde_pindex(vm_offset_t va)
 {
 	return (va >> PDRSHIFT);
 }
 
 
 /* Return various clipped indexes for a given VA */
 static __inline vm_pindex_t
 pmap_pte_index(vm_offset_t va)
 {
 
 	return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1));
 }
 
 static __inline vm_pindex_t
 pmap_pde_index(vm_offset_t va)
 {
 
 	return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
 }
 
 static __inline vm_pindex_t
 pmap_pdpe_index(vm_offset_t va)
 {
 
 	return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1));
 }
 
 static __inline vm_pindex_t
 pmap_pml4e_index(vm_offset_t va)
 {
 
 	return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1));
 }
 
 /* Return a pointer to the PML4 slot that corresponds to a VA */
 static __inline pml4_entry_t *
 pmap_pml4e(pmap_t pmap, vm_offset_t va)
 {
 
 	return (&pmap->pm_pml4[pmap_pml4e_index(va)]);
 }
 
 /* Return a pointer to the PDP slot that corresponds to a VA */
 static __inline pdp_entry_t *
 pmap_pml4e_to_pdpe(pml4_entry_t *pml4e, vm_offset_t va)
 {
 	pdp_entry_t *pdpe;
 
 	pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & PG_FRAME);
 	return (&pdpe[pmap_pdpe_index(va)]);
 }
 
 /* Return a pointer to the PDP slot that corresponds to a VA */
 static __inline pdp_entry_t *
 pmap_pdpe(pmap_t pmap, vm_offset_t va)
 {
 	pml4_entry_t *pml4e;
 
 	pml4e = pmap_pml4e(pmap, va);
 	if ((*pml4e & PG_V) == 0)
 		return (NULL);
 	return (pmap_pml4e_to_pdpe(pml4e, va));
 }
 
 /* Return a pointer to the PD slot that corresponds to a VA */
 static __inline pd_entry_t *
 pmap_pdpe_to_pde(pdp_entry_t *pdpe, vm_offset_t va)
 {
 	pd_entry_t *pde;
 
 	pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & PG_FRAME);
 	return (&pde[pmap_pde_index(va)]);
 }
 
 /* Return a pointer to the PD slot that corresponds to a VA */
 static __inline pd_entry_t *
 pmap_pde(pmap_t pmap, vm_offset_t va)
 {
 	pdp_entry_t *pdpe;
 
 	pdpe = pmap_pdpe(pmap, va);
 	if (pdpe == NULL || (*pdpe & PG_V) == 0)
 		return (NULL);
 	return (pmap_pdpe_to_pde(pdpe, va));
 }
 
 /* Return a pointer to the PT slot that corresponds to a VA */
 static __inline pt_entry_t *
 pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va)
 {
 	pt_entry_t *pte;
 
 	pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
 	return (&pte[pmap_pte_index(va)]);
 }
 
 /* Return a pointer to the PT slot that corresponds to a VA */
 static __inline pt_entry_t *
 pmap_pte(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t *pde;
 
 	pde = pmap_pde(pmap, va);
 	if (pde == NULL || (*pde & PG_V) == 0)
 		return (NULL);
 	if ((*pde & PG_PS) != 0)	/* compat with i386 pmap_pte() */
 		return ((pt_entry_t *)pde);
 	return (pmap_pde_to_pte(pde, va));
 }
 
 static __inline void
 pmap_resident_count_inc(pmap_t pmap, int count)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pmap->pm_stats.resident_count += count;
 }
 
 static __inline void
 pmap_resident_count_dec(pmap_t pmap, int count)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pmap->pm_stats.resident_count -= count;
 }
 
 PMAP_INLINE pt_entry_t *
 vtopte(vm_offset_t va)
 {
 	u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
 
 	return (PTmap + ((va >> PAGE_SHIFT) & mask));
 }
 
 static __inline pd_entry_t *
 vtopde(vm_offset_t va)
 {
 	u_int64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
 
 	return (PDmap + ((va >> PDRSHIFT) & mask));
 }
 
 static u_int64_t
 allocpages(vm_paddr_t *firstaddr, int n)
 {
 	u_int64_t ret;
 
 	ret = *firstaddr;
 	bzero((void *)ret, n * PAGE_SIZE);
 	*firstaddr += n * PAGE_SIZE;
 	return (ret);
 }
 
 static void
 create_pagetables(vm_paddr_t *firstaddr)
 {
 	int i;
 
 	/* Allocate pages */
 	KPTphys = allocpages(firstaddr, NKPT);
 	KPML4phys = allocpages(firstaddr, 1);
 	KPDPphys = allocpages(firstaddr, NKPML4E);
 	KPDphys = allocpages(firstaddr, NKPDPE);
 
 	ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT;
 	if (ndmpdp < 4)		/* Minimum 4GB of dirmap */
 		ndmpdp = 4;
 	DMPDPphys = allocpages(firstaddr, NDMPML4E);
 	if ((amd_feature & AMDID_PAGE1GB) == 0)
 		DMPDphys = allocpages(firstaddr, ndmpdp);
 	dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT;
 
 	/* Fill in the underlying page table pages */
 	/* Read-only from zero to physfree */
 	/* XXX not fully used, underneath 2M pages */
 	for (i = 0; (i << PAGE_SHIFT) < *firstaddr; i++) {
 		((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT;
 		((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V | PG_G;
 	}
 
 	/* Now map the page tables at their location within PTmap */
 	for (i = 0; i < NKPT; i++) {
 		((pd_entry_t *)KPDphys)[i] = KPTphys + (i << PAGE_SHIFT);
 		((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V;
 	}
 
 	/* Map from zero to end of allocations under 2M pages */
 	/* This replaces some of the KPTphys entries above */
 	for (i = 0; (i << PDRSHIFT) < *firstaddr; i++) {
 		((pd_entry_t *)KPDphys)[i] = i << PDRSHIFT;
 		((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G;
 	}
 
 	/* And connect up the PD to the PDP */
 	for (i = 0; i < NKPDPE; i++) {
 		((pdp_entry_t *)KPDPphys)[i + KPDPI] = KPDphys +
 		    (i << PAGE_SHIFT);
 		((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U;
 	}
 
 	/*
 	 * Now, set up the direct map region using either 2MB or 1GB pages.
 	 * Later, if pmap_mapdev{_attr}() uses the direct map for non-write-
 	 * back memory, pmap_change_attr() will demote any 2MB or 1GB page
 	 * mappings that are partially used.
 	 */
 	if ((amd_feature & AMDID_PAGE1GB) == 0) {
 		for (i = 0; i < NPDEPG * ndmpdp; i++) {
 			((pd_entry_t *)DMPDphys)[i] = (vm_paddr_t)i << PDRSHIFT;
 			/* Preset PG_M and PG_A because demotion expects it. */
 			((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS |
 			    PG_G | PG_M | PG_A;
 		}
 		/* And the direct map space's PDP */
 		for (i = 0; i < ndmpdp; i++) {
 			((pdp_entry_t *)DMPDPphys)[i] = DMPDphys +
 			    (i << PAGE_SHIFT);
 			((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U;
 		}
 	} else {
 		for (i = 0; i < ndmpdp; i++) {
 			((pdp_entry_t *)DMPDPphys)[i] =
 			    (vm_paddr_t)i << PDPSHIFT;
 			/* Preset PG_M and PG_A because demotion expects it. */
 			((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_PS |
 			    PG_G | PG_M | PG_A;
 		}
 	}
 
 	/* And recursively map PML4 to itself in order to get PTmap */
 	((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys;
 	((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U;
 
 	/* Connect the Direct Map slot up to the PML4 */
 	((pdp_entry_t *)KPML4phys)[DMPML4I] = DMPDPphys;
 	((pdp_entry_t *)KPML4phys)[DMPML4I] |= PG_RW | PG_V | PG_U;
 
 	/* Connect the KVA slot up to the PML4 */
 	((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys;
 	((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U;
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *
  *	On amd64 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
  *	from the linked base (virtual) address "KERNBASE" to the actual
  *	(physical) address starting relative to 0]
  */
 void
 pmap_bootstrap(vm_paddr_t *firstaddr)
 {
 	vm_offset_t va;
 	pt_entry_t *pte, *unused;
 
 	/*
 	 * Create an initial set of page tables to run the kernel in.
 	 */
 	create_pagetables(firstaddr);
 
 	virtual_avail = (vm_offset_t) KERNBASE + *firstaddr;
 	virtual_avail = pmap_kmem_choose(virtual_avail);
 
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
 
 	/* XXX do %cr0 as well */
 	load_cr4(rcr4() | CR4_PGE | CR4_PSE);
 	load_cr3(KPML4phys);
 
 	/*
 	 * Initialize the kernel pmap (which is statically allocated).
 	 */
 	PMAP_LOCK_INIT(kernel_pmap);
 	kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
 	kernel_pmap->pm_root = NULL;
 	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 
 	/*
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
 #define	SYSMAP(c, p, v, n)	\
 	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
 
 	va = virtual_avail;
 	pte = vtopte(va);
 
 	/*
 	 * CMAP1 is only used for the memory test.
 	 */
 	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
 
 	/*
 	 * Crashdump maps.
 	 */
 	SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
 
 	virtual_avail = va;
 
 	/* Initialize the PAT MSR. */
 	pmap_init_pat();
 }
 
 /*
  * Setup the PAT MSR.
  */
 void
 pmap_init_pat(void)
 {
 	int pat_table[PAT_INDEX_SIZE];
 	uint64_t pat_msr;
 	u_long cr0, cr4;
 	int i;
 
 	/* Bail if this CPU doesn't implement PAT. */
 	if ((cpu_feature & CPUID_PAT) == 0)
 		panic("no PAT??");
 
 	/* Set default PAT index table. */
 	for (i = 0; i < PAT_INDEX_SIZE; i++)
 		pat_table[i] = -1;
 	pat_table[PAT_WRITE_BACK] = 0;
 	pat_table[PAT_WRITE_THROUGH] = 1;
 	pat_table[PAT_UNCACHEABLE] = 3;
 	pat_table[PAT_WRITE_COMBINING] = 3;
 	pat_table[PAT_WRITE_PROTECTED] = 3;
 	pat_table[PAT_UNCACHED] = 3;
 
 	/* Initialize default PAT entries. */
 	pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) |
 	    PAT_VALUE(1, PAT_WRITE_THROUGH) |
 	    PAT_VALUE(2, PAT_UNCACHED) |
 	    PAT_VALUE(3, PAT_UNCACHEABLE) |
 	    PAT_VALUE(4, PAT_WRITE_BACK) |
 	    PAT_VALUE(5, PAT_WRITE_THROUGH) |
 	    PAT_VALUE(6, PAT_UNCACHED) |
 	    PAT_VALUE(7, PAT_UNCACHEABLE);
 
 	if (pat_works) {
 		/*
 		 * Leave the indices 0-3 at the default of WB, WT, UC-, and UC.
 		 * Program 5 and 6 as WP and WC.
 		 * Leave 4 and 7 as WB and UC.
 		 */
 		pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6));
 		pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) |
 		    PAT_VALUE(6, PAT_WRITE_COMBINING);
 		pat_table[PAT_UNCACHED] = 2;
 		pat_table[PAT_WRITE_PROTECTED] = 5;
 		pat_table[PAT_WRITE_COMBINING] = 6;
 	} else {
 		/*
 		 * Just replace PAT Index 2 with WC instead of UC-.
 		 */
 		pat_msr &= ~PAT_MASK(2);
 		pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
 		pat_table[PAT_WRITE_COMBINING] = 2;
 	}
 
 	/* Disable PGE. */
 	cr4 = rcr4();
 	load_cr4(cr4 & ~CR4_PGE);
 
 	/* Disable caches (CD = 1, NW = 0). */
 	cr0 = rcr0();
 	load_cr0((cr0 & ~CR0_NW) | CR0_CD);
 
 	/* Flushes caches and TLBs. */
 	wbinvd();
 	invltlb();
 
 	/* Update PAT and index table. */
 	wrmsr(MSR_PAT, pat_msr);
 	for (i = 0; i < PAT_INDEX_SIZE; i++)
 		pat_index[i] = pat_table[i];
 
 	/* Flush caches and TLBs again. */
 	wbinvd();
 	invltlb();
 
 	/* Restore caches and PGE. */
 	load_cr0(cr0);
 	load_cr4(cr4);
 }
 
 /*
  *	Initialize a vm_page's machine-dependent fields.
  */
 void
 pmap_page_init(vm_page_t m)
 {
 
 	TAILQ_INIT(&m->md.pv_list);
 	m->md.pat_mode = PAT_WRITE_BACK;
 }
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  */
 void
 pmap_init(void)
 {
 	vm_page_t mpte;
 	vm_size_t s;
 	int i, pv_npg;
 
 	/*
 	 * Initialize the vm page array entries for the kernel pmap's
 	 * page table pages.
 	 */ 
 	for (i = 0; i < NKPT; i++) {
 		mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT));
 		KASSERT(mpte >= vm_page_array &&
 		    mpte < &vm_page_array[vm_page_array_size],
 		    ("pmap_init: page table page is out of range"));
 		mpte->pindex = pmap_pde_pindex(KERNBASE) + i;
 		mpte->phys_addr = KPTphys + (i << PAGE_SHIFT);
 	}
 
 	/*
 	 * Initialize the address space (zone) for the pv entries.  Set a
 	 * high water mark so that the system can recover from excessive
 	 * numbers of pv entries.
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
 	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
 	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 
 	/*
 	 * If the kernel is running in a virtual machine on an AMD Family 10h
 	 * processor, then it must assume that MCA is enabled by the virtual
 	 * machine monitor.
 	 */
 	if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
 	    CPUID_TO_FAMILY(cpu_id) == 0x10)
 		workaround_erratum383 = 1;
 
 	/*
 	 * Are large page mappings enabled?
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
 	if (pg_ps_enabled) {
 		KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
 		    ("pmap_init: can't assign to pagesizes[1]"));
 		pagesizes[1] = NBPDR;
 	}
 
 	/*
 	 * Calculate the size of the pv head table for superpages.
 	 */
 	for (i = 0; phys_avail[i + 1]; i += 2);
 	pv_npg = round_2mpage(phys_avail[(i - 2) + 1]) / NBPDR;
 
 	/*
 	 * Allocate memory for the pv head table for superpages.
 	 */
 	s = (vm_size_t)(pv_npg * sizeof(struct md_page));
 	s = round_page(s);
 	pv_table = (struct md_page *)kmem_alloc(kernel_map, s);
 	for (i = 0; i < pv_npg; i++)
 		TAILQ_INIT(&pv_table[i].pv_list);
 }
 
 static int
 pmap_pventry_proc(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
 	if (error == 0 && req->newptr) {
 		shpgperproc = (pv_entry_max - cnt.v_page_count) / maxproc;
 		pv_entry_high_water = 9 * (pv_entry_max / 10);
 	}
 	return (error);
 }
 SYSCTL_PROC(_vm_pmap, OID_AUTO, pv_entry_max, CTLTYPE_INT|CTLFLAG_RW, 
     &pv_entry_max, 0, pmap_pventry_proc, "IU", "Max number of PV entries");
 
 static int
 pmap_shpgperproc_proc(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
 	if (error == 0 && req->newptr) {
 		pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
 		pv_entry_high_water = 9 * (pv_entry_max / 10);
 	}
 	return (error);
 }
 SYSCTL_PROC(_vm_pmap, OID_AUTO, shpgperproc, CTLTYPE_INT|CTLFLAG_RW, 
     &shpgperproc, 0, pmap_shpgperproc_proc, "IU", "Page share factor per proc");
 
 SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
     "2MB page mapping counters");
 
 static u_long pmap_pde_demotions;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD,
     &pmap_pde_demotions, 0, "2MB page demotions");
 
 static u_long pmap_pde_mappings;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
     &pmap_pde_mappings, 0, "2MB page mappings");
 
 static u_long pmap_pde_p_failures;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD,
     &pmap_pde_p_failures, 0, "2MB page promotion failures");
 
 static u_long pmap_pde_promotions;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD,
     &pmap_pde_promotions, 0, "2MB page promotions");
 
 SYSCTL_NODE(_vm_pmap, OID_AUTO, pdpe, CTLFLAG_RD, 0,
     "1GB page mapping counters");
 
 static u_long pmap_pdpe_demotions;
 SYSCTL_ULONG(_vm_pmap_pdpe, OID_AUTO, demotions, CTLFLAG_RD,
     &pmap_pdpe_demotions, 0, "1GB page demotions");
 
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
 
 /*
  * Determine the appropriate bits to set in a PTE or PDE for a specified
  * caching mode.
  */
 static int
 pmap_cache_bits(int mode, boolean_t is_pde)
 {
 	int cache_bits, pat_flag, pat_idx;
 
 	if (mode < 0 || mode >= PAT_INDEX_SIZE || pat_index[mode] < 0)
 		panic("Unknown caching mode %d\n", mode);
 
 	/* The PAT bit is different for PTE's and PDE's. */
 	pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
 
 	/* Map the caching mode to a PAT index. */
 	pat_idx = pat_index[mode];
 
 	/* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
 	cache_bits = 0;
 	if (pat_idx & 0x4)
 		cache_bits |= pat_flag;
 	if (pat_idx & 0x2)
 		cache_bits |= PG_NC_PCD;
 	if (pat_idx & 0x1)
 		cache_bits |= PG_NC_PWT;
 	return (cache_bits);
 }
 
 /*
  * After changing the page size for the specified virtual address in the page
  * table, flush the corresponding entries from the processor's TLB.  Only the
  * calling processor's TLB is affected.
  *
  * The calling thread must be pinned to a processor.
  */
 static void
 pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
 {
 	u_long cr4;
 
 	if ((newpde & PG_PS) == 0)
 		/* Demotion: flush a specific 2MB page mapping. */
 		invlpg(va);
 	else if ((newpde & PG_G) == 0)
 		/*
 		 * Promotion: flush every 4KB page mapping from the TLB
 		 * because there are too many to flush individually.
 		 */
 		invltlb();
 	else {
 		/*
 		 * Promotion: flush every 4KB page mapping from the TLB,
 		 * including any global (PG_G) mappings.
 		 */
 		cr4 = rcr4();
 		load_cr4(cr4 & ~CR4_PGE);
 		/*
 		 * Although preemption at this point could be detrimental to
 		 * performance, it would not lead to an error.  PG_G is simply
 		 * ignored if CR4.PGE is clear.  Moreover, in case this block
 		 * is re-entered, the load_cr4() either above or below will
 		 * modify CR4.PGE flushing the TLB.
 		 */
 		load_cr4(cr4 | CR4_PGE);
 	}
 }
 #ifdef SMP
 /*
  * For SMP, these functions have to use the IPI mechanism for coherence.
  *
  * N.B.: Before calling any of the following TLB invalidation functions,
  * the calling processor must ensure that all stores updating a non-
  * kernel page table are globally performed.  Otherwise, another
  * processor could cache an old, pre-update entry without being
  * invalidated.  This can happen one of two ways: (1) The pmap becomes
  * active on another processor after its pm_active field is checked by
  * one of the following functions but before a store updating the page
  * table is globally performed. (2) The pmap becomes active on another
  * processor before its pm_active field is checked but due to
  * speculative loads one of the following functions stills reads the
  * pmap as inactive on the other processor.
  * 
  * The kernel page table is exempt because its pm_active field is
  * immutable.  The kernel page table is always active on every
  * processor.
  */
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 	cpumask_t cpumask, other_cpus;
 
 	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
 		invlpg(va);
 		smp_invlpg(va);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
 		if (pmap->pm_active & cpumask)
 			invlpg(va);
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
 	}
 	sched_unpin();
 }
 
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	cpumask_t cpumask, other_cpus;
 	vm_offset_t addr;
 
 	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 		smp_invlpg_range(sva, eva);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
 		if (pmap->pm_active & cpumask)
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
 			    sva, eva);
 	}
 	sched_unpin();
 }
 
 void
 pmap_invalidate_all(pmap_t pmap)
 {
 	cpumask_t cpumask, other_cpus;
 
 	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
 		invltlb();
 		smp_invltlb();
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
 		if (pmap->pm_active & cpumask)
 			invltlb();
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invltlb(pmap->pm_active & other_cpus);
 	}
 	sched_unpin();
 }
 
 void
 pmap_invalidate_cache(void)
 {
 
 	sched_pin();
 	wbinvd();
 	smp_cache_flush();
 	sched_unpin();
 }
 
 struct pde_action {
 	cpumask_t store;	/* processor that updates the PDE */
 	cpumask_t invalidate;	/* processors that invalidate their TLB */
 	vm_offset_t va;
 	pd_entry_t *pde;
 	pd_entry_t newpde;
 };
 
 static void
 pmap_update_pde_action(void *arg)
 {
 	struct pde_action *act = arg;
 
 	if (act->store == PCPU_GET(cpumask))
 		pde_store(act->pde, act->newpde);
 }
 
 static void
 pmap_update_pde_teardown(void *arg)
 {
 	struct pde_action *act = arg;
 
 	if ((act->invalidate & PCPU_GET(cpumask)) != 0)
 		pmap_update_pde_invalidate(act->va, act->newpde);
 }
 
 /*
  * Change the page size for the specified virtual address in a way that
  * prevents any possibility of the TLB ever having two entries that map the
  * same virtual address using different page sizes.  This is the recommended
  * workaround for Erratum 383 on AMD Family 10h processors.  It prevents a
  * machine check exception for a TLB state that is improperly diagnosed as a
  * hardware error.
  */
 static void
 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 	struct pde_action act;
 	cpumask_t active, cpumask;
 
 	sched_pin();
 	cpumask = PCPU_GET(cpumask);
 	if (pmap == kernel_pmap)
 		active = all_cpus;
 	else
 		active = pmap->pm_active;
 	if ((active & PCPU_GET(other_cpus)) != 0) {
 		act.store = cpumask;
 		act.invalidate = active;
 		act.va = va;
 		act.pde = pde;
 		act.newpde = newpde;
 		smp_rendezvous_cpus(cpumask | active,
 		    smp_no_rendevous_barrier, pmap_update_pde_action,
 		    pmap_update_pde_teardown, &act);
 	} else {
 		pde_store(pde, newpde);
 		if ((active & cpumask) != 0)
 			pmap_update_pde_invalidate(va, newpde);
 	}
 	sched_unpin();
 }
 #else /* !SMP */
 /*
  * Normal, non-SMP, invalidation functions.
  * We inline these within pmap.c for speed.
  */
 PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invlpg(va);
 }
 
 PMAP_INLINE void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t addr;
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 }
 
 PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invltlb();
 }
 
 PMAP_INLINE void
 pmap_invalidate_cache(void)
 {
 
 	wbinvd();
 }
 
 static void
 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 
 	pde_store(pde, newpde);
 	if (pmap == kernel_pmap || pmap->pm_active)
 		pmap_update_pde_invalidate(va, newpde);
 }
 #endif /* !SMP */
 
 static void
 pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
 {
 
 	KASSERT((sva & PAGE_MASK) == 0,
 	    ("pmap_invalidate_cache_range: sva not page-aligned"));
 	KASSERT((eva & PAGE_MASK) == 0,
 	    ("pmap_invalidate_cache_range: eva not page-aligned"));
 
 	if (cpu_feature & CPUID_SS)
 		; /* If "Self Snoop" is supported, do nothing. */
 	else if ((cpu_feature & CPUID_CLFSH) != 0 &&
 		 eva - sva < 2 * 1024 * 1024) {
 
 		/*
 		 * Otherwise, do per-cache line flush.  Use the mfence
 		 * instruction to insure that previous stores are
 		 * included in the write-back.  The processor
 		 * propagates flush to other processors in the cache
 		 * coherence domain.
 		 */
 		mfence();
 		for (; sva < eva; sva += cpu_clflush_line_size)
 			clflush(sva);
 		mfence();
 	} else {
 
 		/*
 		 * No targeted cache flush methods are supported by CPU,
 		 * or the supplied range is bigger than 2MB.
 		 * Globally invalidate cache.
 		 */
 		pmap_invalidate_cache();
 	}
 }
 
 /*
  * Are we current address space or kernel?
  */
 static __inline int
 pmap_is_current(pmap_t pmap)
 {
 	return (pmap == kernel_pmap ||
 	    (pmap->pm_pml4[PML4PML4I] & PG_FRAME) == (PML4pml4e[0] & PG_FRAME));
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_paddr_t 
 pmap_extract(pmap_t pmap, vm_offset_t va)
 {
 	pdp_entry_t *pdpe;
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	vm_paddr_t pa;
 
 	pa = 0;
 	PMAP_LOCK(pmap);
 	pdpe = pmap_pdpe(pmap, va);
 	if (pdpe != NULL && (*pdpe & PG_V) != 0) {
 		if ((*pdpe & PG_PS) != 0)
 			pa = (*pdpe & PG_PS_FRAME) | (va & PDPMASK);
 		else {
 			pde = pmap_pdpe_to_pde(pdpe, va);
 			if ((*pde & PG_V) != 0) {
 				if ((*pde & PG_PS) != 0) {
 					pa = (*pde & PG_PS_FRAME) |
 					    (va & PDRMASK);
 				} else {
 					pte = pmap_pde_to_pte(pde, va);
 					pa = (*pte & PG_FRAME) |
 					    (va & PAGE_MASK);
 				}
 			}
 		}
 	}
 	PMAP_UNLOCK(pmap);
 	return (pa);
 }
 
 /*
  *	Routine:	pmap_extract_and_hold
  *	Function:
  *		Atomically extract and hold the physical page
  *		with the given pmap and virtual address pair
  *		if that mapping permits the given protection.
  */
 vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	pd_entry_t pde, *pdep;
 	pt_entry_t pte;
 	vm_paddr_t pa;
 	vm_page_t m;
 
 	pa = 0;
 	m = NULL;
 	PMAP_LOCK(pmap);
 retry:
 	pdep = pmap_pde(pmap, va);
 	if (pdep != NULL && (pde = *pdep)) {
 		if (pde & PG_PS) {
 			if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
 				if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) |
 				       (va & PDRMASK), &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
 				    (va & PDRMASK));
 				vm_page_hold(m);
 			}
 		} else {
 			pte = *pmap_pde_to_pte(pdep, va);
 			if ((pte & PG_V) &&
 			    ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
 				if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
 				vm_page_hold(m);
 			}
 		}
 	}
 	PA_UNLOCK_COND(pa);
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
 	pd_entry_t pde;
 	vm_paddr_t pa;
 
 	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
 		pa = DMAP_TO_PHYS(va);
 	} else {
 		pde = *vtopde(va);
 		if (pde & PG_PS) {
 			pa = (pde & PG_PS_FRAME) | (va & PDRMASK);
 		} else {
 			/*
 			 * Beware of a concurrent promotion that changes the
 			 * PDE at this point!  For example, vtopte() must not
 			 * be used to access the PTE because it would use the
 			 * new PDE.  It is, however, safe to use the old PDE
 			 * because the page table page is preserved by the
 			 * promotion.
 			 */
 			pa = *pmap_pde_to_pte(&pde, va);
 			pa = (pa & PG_FRAME) | (va & PAGE_MASK);
 		}
 	}
 	return (pa);
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 /*
  * Add a wired page to the kva.
  * Note: not SMP coherent.
  */
 PMAP_INLINE void 
 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	pte_store(pte, pa | PG_RW | PG_V | PG_G);
 }
 
 static __inline void
 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	pte_store(pte, pa | PG_RW | PG_V | PG_G | pmap_cache_bits(mode, 0));
 }
 
 /*
  * Remove a page from the kernel pagetables.
  * Note: not SMP coherent.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	pte_clear(pte);
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	The value passed in '*virt' is a suggested virtual address for
  *	the mapping. Architectures which can support a direct-mapped
  *	physical to virtual region can return the appropriate address
  *	within that region, leaving '*virt' unchanged. Other
  *	architectures should map the pages starting at '*virt' and
  *	update '*virt' with the first usable address after the mapped
  *	region.
  */
 vm_offset_t
 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 {
 	return PHYS_TO_DMAP(start);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
 	pt_entry_t *endpte, oldpte, pa, *pte;
 	vm_page_t m;
 
 	oldpte = 0;
 	pte = vtopte(sva);
 	endpte = pte + count;
 	while (pte < endpte) {
 		m = *ma++;
 		pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
 		if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) {
 			oldpte |= *pte;
 			pte_store(pte, pa | PG_G | PG_RW | PG_V);
 		}
 		pte++;
 	}
 	if (__predict_false((oldpte & PG_V) != 0))
 		pmap_invalidate_range(kernel_pmap, sva, sva + count *
 		    PAGE_SIZE);
 }
 
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
 	vm_offset_t va;
 
 	va = sva;
 	while (count-- > 0) {
 		pmap_kremove(va);
 		va += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 static __inline void
 pmap_free_zero_pages(vm_page_t free)
 {
 	vm_page_t m;
 
 	while (free != NULL) {
 		m = free;
 		free = m->right;
 		/* Preserve the page's PG_ZERO setting. */
 		vm_page_free_toq(m);
 	}
 }
 
 /*
  * Schedule the specified unused page table page to be freed.  Specifically,
  * add the page to the specified list of pages that will be released to the
  * physical memory manager after the TLB has been updated.
  */
 static __inline void
 pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO)
 {
 
 	if (set_PG_ZERO)
 		m->flags |= PG_ZERO;
 	else
 		m->flags &= ~PG_ZERO;
 	m->right = *free;
 	*free = m;
 }
 	
 /*
  * Inserts the specified page table page into the specified pmap's collection
  * of idle page table pages.  Each of a pmap's page table pages is responsible
  * for mapping a distinct range of virtual addresses.  The pmap's collection is
  * ordered by this virtual address range.
  */
 static void
 pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
 {
 	vm_page_t root;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	root = pmap->pm_root;
 	if (root == NULL) {
 		mpte->left = NULL;
 		mpte->right = NULL;
 	} else {
 		root = vm_page_splay(mpte->pindex, root);
 		if (mpte->pindex < root->pindex) {
 			mpte->left = root->left;
 			mpte->right = root;
 			root->left = NULL;
 		} else if (mpte->pindex == root->pindex)
 			panic("pmap_insert_pt_page: pindex already inserted");
 		else {
 			mpte->right = root->right;
 			mpte->left = root;
 			root->right = NULL;
 		}
 	}
 	pmap->pm_root = mpte;
 }
 
 /*
  * Looks for a page table page mapping the specified virtual address in the
  * specified pmap's collection of idle page table pages.  Returns NULL if there
  * is no page table page corresponding to the specified virtual address.
  */
 static vm_page_t
 pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
 {
 	vm_page_t mpte;
 	vm_pindex_t pindex = pmap_pde_pindex(va);
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) {
 		mpte = vm_page_splay(pindex, mpte);
 		if ((pmap->pm_root = mpte)->pindex != pindex)
 			mpte = NULL;
 	}
 	return (mpte);
 }
 
 /*
  * Removes the specified page table page from the specified pmap's collection
  * of idle page table pages.  The specified page table page must be a member of
  * the pmap's collection.
  */
 static void
 pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
 {
 	vm_page_t root;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if (mpte != pmap->pm_root) {
 		root = vm_page_splay(mpte->pindex, pmap->pm_root);
 		KASSERT(mpte == root,
 		    ("pmap_remove_pt_page: mpte %p is missing from pmap %p",
 		    mpte, pmap));
 	}
 	if (mpte->left == NULL)
 		root = mpte->right;
 	else {
 		root = vm_page_splay(mpte->pindex, mpte->left);
 		root->right = mpte->right;
 	}
 	pmap->pm_root = root;
 }
 
 /*
  * This routine unholds page table pages, and if the hold count
  * drops to zero, then it decrements the wire count.
  */
 static __inline int
 pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *free)
 {
 
 	--m->wire_count;
 	if (m->wire_count == 0)
 		return (_pmap_unwire_pte_hold(pmap, va, m, free));
 	else
 		return (0);
 }
 
 static int 
 _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m, 
     vm_page_t *free)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/*
 	 * unmap the page table page
 	 */
 	if (m->pindex >= (NUPDE + NUPDPE)) {
 		/* PDP page */
 		pml4_entry_t *pml4;
 		pml4 = pmap_pml4e(pmap, va);
 		*pml4 = 0;
 	} else if (m->pindex >= NUPDE) {
 		/* PD page */
 		pdp_entry_t *pdp;
 		pdp = pmap_pdpe(pmap, va);
 		*pdp = 0;
 	} else {
 		/* PTE page */
 		pd_entry_t *pd;
 		pd = pmap_pde(pmap, va);
 		*pd = 0;
 	}
 	pmap_resident_count_dec(pmap, 1);
 	if (m->pindex < NUPDE) {
 		/* We just released a PT, unhold the matching PD */
 		vm_page_t pdpg;
 
 		pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & PG_FRAME);
 		pmap_unwire_pte_hold(pmap, va, pdpg, free);
 	}
 	if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) {
 		/* We just released a PD, unhold the matching PDP */
 		vm_page_t pdppg;
 
 		pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME);
 		pmap_unwire_pte_hold(pmap, va, pdppg, free);
 	}
 
 	/*
 	 * This is a release store so that the ordinary store unmapping
 	 * the page table page is globally performed before TLB shoot-
 	 * down is begun.
 	 */
 	atomic_subtract_rel_int(&cnt.v_wire_count, 1);
 
 	/* 
 	 * Put page on a list so that it is released after
 	 * *ALL* TLB shootdown is done
 	 */
 	pmap_add_delayed_free_list(m, free, TRUE);
 	
 	return (1);
 }
 
 /*
  * After removing a page table entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, vm_page_t *free)
 {
 	vm_page_t mpte;
 
 	if (va >= VM_MAXUSER_ADDRESS)
 		return (0);
 	KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
 	mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
 	return (pmap_unwire_pte_hold(pmap, va, mpte, free));
 }
 
 void
 pmap_pinit0(pmap_t pmap)
 {
 
 	PMAP_LOCK_INIT(pmap);
 	pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys);
 	pmap->pm_root = NULL;
 	pmap->pm_active = 0;
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 int
 pmap_pinit(pmap_t pmap)
 {
 	vm_page_t pml4pg;
 	static vm_pindex_t color;
 
 	PMAP_LOCK_INIT(pmap);
 
 	/*
 	 * allocate the page directory page
 	 */
 	while ((pml4pg = vm_page_alloc(NULL, color++, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
 		VM_WAIT;
 
 	pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg));
 
 	if ((pml4pg->flags & PG_ZERO) == 0)
 		pagezero(pmap->pm_pml4);
 
 	/* Wire in kernel global address entries. */
 	pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U;
 	pmap->pm_pml4[DMPML4I] = DMPDPphys | PG_RW | PG_V | PG_U;
 
 	/* install self-referential address mapping entry(s) */
 	pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M;
 
 	pmap->pm_root = NULL;
 	pmap->pm_active = 0;
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 
 	return (1);
 }
 
 /*
  * this routine is called if the page table page is not
  * mapped correctly.
  *
  * Note: If a page allocation fails at page table level two or three,
  * one or two pages may be held during the wait, only to be released
  * afterwards.  This conservative approach is easily argued to avoid
  * race conditions.
  */
 static vm_page_t
 _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags)
 {
 	vm_page_t m, pdppg, pdpg;
 
 	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 	    ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/*
 	 * Allocate a page table page.
 	 */
 	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 		if (flags & M_WAITOK) {
 			PMAP_UNLOCK(pmap);
 			vm_page_unlock_queues();
 			VM_WAIT;
 			vm_page_lock_queues();
 			PMAP_LOCK(pmap);
 		}
 
 		/*
 		 * Indicate the need to retry.  While waiting, the page table
 		 * page may have been allocated.
 		 */
 		return (NULL);
 	}
 	if ((m->flags & PG_ZERO) == 0)
 		pmap_zero_page(m);
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	if (ptepindex >= (NUPDE + NUPDPE)) {
 		pml4_entry_t *pml4;
 		vm_pindex_t pml4index;
 
 		/* Wire up a new PDPE page */
 		pml4index = ptepindex - (NUPDE + NUPDPE);
 		pml4 = &pmap->pm_pml4[pml4index];
 		*pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
 
 	} else if (ptepindex >= NUPDE) {
 		vm_pindex_t pml4index;
 		vm_pindex_t pdpindex;
 		pml4_entry_t *pml4;
 		pdp_entry_t *pdp;
 
 		/* Wire up a new PDE page */
 		pdpindex = ptepindex - NUPDE;
 		pml4index = pdpindex >> NPML4EPGSHIFT;
 
 		pml4 = &pmap->pm_pml4[pml4index];
 		if ((*pml4 & PG_V) == 0) {
 			/* Have to allocate a new pdp, recurse */
 			if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index,
 			    flags) == NULL) {
 				--m->wire_count;
 				atomic_subtract_int(&cnt.v_wire_count, 1);
 				vm_page_free_zero(m);
 				return (NULL);
 			}
 		} else {
 			/* Add reference to pdp page */
 			pdppg = PHYS_TO_VM_PAGE(*pml4 & PG_FRAME);
 			pdppg->wire_count++;
 		}
 		pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
 
 		/* Now find the pdp page */
 		pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)];
 		*pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
 
 	} else {
 		vm_pindex_t pml4index;
 		vm_pindex_t pdpindex;
 		pml4_entry_t *pml4;
 		pdp_entry_t *pdp;
 		pd_entry_t *pd;
 
 		/* Wire up a new PTE page */
 		pdpindex = ptepindex >> NPDPEPGSHIFT;
 		pml4index = pdpindex >> NPML4EPGSHIFT;
 
 		/* First, find the pdp and check that its valid. */
 		pml4 = &pmap->pm_pml4[pml4index];
 		if ((*pml4 & PG_V) == 0) {
 			/* Have to allocate a new pd, recurse */
 			if (_pmap_allocpte(pmap, NUPDE + pdpindex,
 			    flags) == NULL) {
 				--m->wire_count;
 				atomic_subtract_int(&cnt.v_wire_count, 1);
 				vm_page_free_zero(m);
 				return (NULL);
 			}
 			pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
 			pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)];
 		} else {
 			pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
 			pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)];
 			if ((*pdp & PG_V) == 0) {
 				/* Have to allocate a new pd, recurse */
 				if (_pmap_allocpte(pmap, NUPDE + pdpindex,
 				    flags) == NULL) {
 					--m->wire_count;
 					atomic_subtract_int(&cnt.v_wire_count,
 					    1);
 					vm_page_free_zero(m);
 					return (NULL);
 				}
 			} else {
 				/* Add reference to the pd page */
 				pdpg = PHYS_TO_VM_PAGE(*pdp & PG_FRAME);
 				pdpg->wire_count++;
 			}
 		}
 		pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & PG_FRAME);
 
 		/* Now we know where the page directory page is */
 		pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)];
 		*pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
 	}
 
 	pmap_resident_count_inc(pmap, 1);
 
 	return (m);
 }
 
 static vm_page_t
 pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags)
 {
 	vm_pindex_t pdpindex, ptepindex;
 	pdp_entry_t *pdpe;
 	vm_page_t pdpg;
 
 	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 	    ("pmap_allocpde: flags is neither M_NOWAIT nor M_WAITOK"));
 retry:
 	pdpe = pmap_pdpe(pmap, va);
 	if (pdpe != NULL && (*pdpe & PG_V) != 0) {
 		/* Add a reference to the pd page. */
 		pdpg = PHYS_TO_VM_PAGE(*pdpe & PG_FRAME);
 		pdpg->wire_count++;
 	} else {
 		/* Allocate a pd page. */
 		ptepindex = pmap_pde_pindex(va);
 		pdpindex = ptepindex >> NPDPEPGSHIFT;
 		pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, flags);
 		if (pdpg == NULL && (flags & M_WAITOK))
 			goto retry;
 	}
 	return (pdpg);
 }
 
 static vm_page_t
 pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
 {
 	vm_pindex_t ptepindex;
 	pd_entry_t *pd;
 	vm_page_t m;
 
 	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 	    ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = pmap_pde_pindex(va);
 retry:
 	/*
 	 * Get the page directory entry
 	 */
 	pd = pmap_pde(pmap, va);
 
 	/*
 	 * This supports switching from a 2MB page to a
 	 * normal 4K page.
 	 */
 	if (pd != NULL && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
 		if (!pmap_demote_pde(pmap, pd, va)) {
 			/*
 			 * Invalidation of the 2MB page mapping may have caused
 			 * the deallocation of the underlying PD page.
 			 */
 			pd = NULL;
 		}
 	}
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (pd != NULL && (*pd & PG_V) != 0) {
 		m = PHYS_TO_VM_PAGE(*pd & PG_FRAME);
 		m->wire_count++;
 	} else {
 		/*
 		 * Here if the pte page isn't mapped, or if it has been
 		 * deallocated.
 		 */
 		m = _pmap_allocpte(pmap, ptepindex, flags);
 		if (m == NULL && (flags & M_WAITOK))
 			goto retry;
 	}
 	return (m);
 }
 
 
 /***************************************************
  * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 	vm_page_t m;
 
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
 	KASSERT(pmap->pm_root == NULL,
 	    ("pmap_release: pmap has reserved page table page(s)"));
 
 	m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME);
 
 	pmap->pm_pml4[KPML4I] = 0;	/* KVA */
 	pmap->pm_pml4[DMPML4I] = 0;	/* Direct Map */
 	pmap->pm_pml4[PML4PML4I] = 0;	/* Recursive Mapping */
 
 	m->wire_count--;
 	atomic_subtract_int(&cnt.v_wire_count, 1);
 	vm_page_free_zero(m);
 	PMAP_LOCK_DESTROY(pmap);
 }
 
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
 
 	return sysctl_handle_long(oidp, &ksize, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_size, "LU", "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
 	return sysctl_handle_long(oidp, &kfree, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "LU", "Amount of KVM free");
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	vm_paddr_t paddr;
 	vm_page_t nkpg;
 	pd_entry_t *pde, newpdir;
 	pdp_entry_t *pdpe;
 
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 
 	/*
 	 * Return if "addr" is within the range of kernel page table pages
 	 * that were preallocated during pmap bootstrap.  Moreover, leave
 	 * "kernel_vm_end" and the kernel page table as they were.
 	 *
 	 * The correctness of this action is based on the following
 	 * argument: vm_map_findspace() allocates contiguous ranges of the
 	 * kernel virtual address space.  It calls this function if a range
 	 * ends after "kernel_vm_end".  If the kernel is mapped between
 	 * "kernel_vm_end" and "addr", then the range cannot begin at
 	 * "kernel_vm_end".  In fact, its beginning address cannot be less
 	 * than the kernel.  Thus, there is no immediate need to allocate
 	 * any new kernel page table pages between "kernel_vm_end" and
 	 * "KERNBASE".
 	 */
 	if (KERNBASE < addr && addr <= KERNBASE + NKPT * NBPDR)
 		return;
 
 	addr = roundup2(addr, NBPDR);
 	if (addr - 1 >= kernel_map->max_offset)
 		addr = kernel_map->max_offset;
 	while (kernel_vm_end < addr) {
 		pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end);
 		if ((*pdpe & PG_V) == 0) {
 			/* We need a new PDP entry */
 			nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDPSHIFT,
 			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
 			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 			if (nkpg == NULL)
 				panic("pmap_growkernel: no memory to grow kernel");
 			if ((nkpg->flags & PG_ZERO) == 0)
 				pmap_zero_page(nkpg);
 			paddr = VM_PAGE_TO_PHYS(nkpg);
 			*pdpe = (pdp_entry_t)
 				(paddr | PG_V | PG_RW | PG_A | PG_M);
 			continue; /* try again */
 		}
 		pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end);
 		if ((*pde & PG_V) != 0) {
 			kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
 			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 				kernel_vm_end = kernel_map->max_offset;
 				break;                       
 			}
 			continue;
 		}
 
 		nkpg = vm_page_alloc(NULL, pmap_pde_pindex(kernel_vm_end),
 		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 		    VM_ALLOC_ZERO);
 		if (nkpg == NULL)
 			panic("pmap_growkernel: no memory to grow kernel");
 		if ((nkpg->flags & PG_ZERO) == 0)
 			pmap_zero_page(nkpg);
 		paddr = VM_PAGE_TO_PHYS(nkpg);
 		newpdir = (pd_entry_t) (paddr | PG_V | PG_RW | PG_A | PG_M);
 		pde_store(pde, newpdir);
 
 		kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
 		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 			kernel_vm_end = kernel_map->max_offset;
 			break;                       
 		}
 	}
 }
 
 
 /***************************************************
  * page management routines.
  ***************************************************/
 
 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 CTASSERT(_NPCM == 3);
 CTASSERT(_NPCPV == 168);
 
 static __inline struct pv_chunk *
 pv_to_chunk(pv_entry_t pv)
 {
 
 	return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK);
 }
 
 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 
 #define	PC_FREE0	0xfffffffffffffffful
 #define	PC_FREE1	0xfffffffffffffffful
 #define	PC_FREE2	0x000000fffffffffful
 
 static uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 	"Current number of pv entries");
 
 #ifdef PV_STATS
 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
 	"Current number of pv entry chunks");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
 	"Current number of pv entry chunks allocated");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
 	"Current number of pv entry chunks frees");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
 	"Number of times tried to get a chunk page but failed.");
 
 static long pv_entry_frees, pv_entry_allocs;
 static int pv_entry_spare;
 
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
 	"Current number of pv entry frees");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
 	"Current number of pv entry allocs");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
 	"Current number of spare pv entries");
 
 static int pmap_collect_inactive, pmap_collect_active;
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0,
 	"Current number times pmap_collect called on inactive queue");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0,
 	"Current number times pmap_collect called on active queue");
 #endif
 
 /*
  * We are in a serious low memory condition.  Resort to
  * drastic measures to free some pages so we can allocate
  * another pv entry chunk.  This is normally called to
  * unmap inactive pages, and if necessary, active pages.
  *
  * We do not, however, unmap 2mpages because subsequent accesses will
  * allocate per-page pv entries until repromotion occurs, thereby
  * exacerbating the shortage of free pv entries.
  */
 static void
 pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
 {
 	pd_entry_t *pde;
 	pmap_t pmap;
 	pt_entry_t *pte, tpte;
 	pv_entry_t next_pv, pv;
 	vm_offset_t va;
 	vm_page_t m, free;
 
 	TAILQ_FOREACH(m, &vpq->pl, pageq) {
 		if (m->hold_count || m->busy)
 			continue;
 		TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
 			va = pv->pv_va;
 			pmap = PV_PMAP(pv);
 			/* Avoid deadlock and lock recursion. */
 			if (pmap > locked_pmap)
 				PMAP_LOCK(pmap);
 			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
 				continue;
 			pmap_resident_count_dec(pmap, 1);
 			pde = pmap_pde(pmap, va);
 			KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found"
 			    " a 2mpage in page %p's pv list", m));
 			pte = pmap_pde_to_pte(pde, va);
 			tpte = pte_load_clear(pte);
 			KASSERT((tpte & PG_W) == 0,
 			    ("pmap_collect: wired pte %#lx", tpte));
 			if (tpte & PG_A)
 				vm_page_flag_set(m, PG_REFERENCED);
 			if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 				vm_page_dirty(m);
 			free = NULL;
 			pmap_unuse_pt(pmap, va, *pde, &free);
 			pmap_invalidate_page(pmap, va);
 			pmap_free_zero_pages(free);
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 			free_pv_entry(pmap, pv);
 			if (pmap != locked_pmap)
 				PMAP_UNLOCK(pmap);
 		}
 		if (TAILQ_EMPTY(&m->md.pv_list) &&
 		    TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list))
 			vm_page_flag_clear(m, PG_WRITEABLE);
 	}
 }
 
 
 /*
  * free the pv_entry back to the free list
  */
 static void
 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 {
 	vm_page_t m;
 	struct pv_chunk *pc;
 	int idx, field, bit;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(pv_entry_frees++);
 	PV_STAT(pv_entry_spare++);
 	pv_entry_count--;
 	pc = pv_to_chunk(pv);
 	idx = pv - &pc->pc_pventry[0];
 	field = idx / 64;
 	bit = idx % 64;
 	pc->pc_map[field] |= 1ul << bit;
 	/* move to head of list */
 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 	if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
 	    pc->pc_map[2] != PC_FREE2) {
 		TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 		return;
 	}
 	PV_STAT(pv_entry_spare -= _NPCPV);
 	PV_STAT(pc_chunk_count--);
 	PV_STAT(pc_chunk_frees++);
 	/* entire chunk is free, return it */
 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
 	dump_drop_page(m->phys_addr);
 	vm_page_unwire(m, 0);
 	vm_page_free(m);
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  */
 static pv_entry_t
 get_pv_entry(pmap_t pmap, int try)
 {
 	static const struct timeval printinterval = { 60, 0 };
 	static struct timeval lastprint;
 	static vm_pindex_t colour;
 	struct vpgqueues *pq;
 	int bit, field;
 	pv_entry_t pv;
 	struct pv_chunk *pc;
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	PV_STAT(pv_entry_allocs++);
 	pv_entry_count++;
 	if (pv_entry_count > pv_entry_high_water)
 		if (ratecheck(&lastprint, &printinterval))
 			printf("Approaching the limit on PV entries, consider "
 			    "increasing either the vm.pmap.shpgperproc or the "
 			    "vm.pmap.pv_entry_max sysctl.\n");
 	pq = NULL;
 retry:
 	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 	if (pc != NULL) {
 		for (field = 0; field < _NPCM; field++) {
 			if (pc->pc_map[field]) {
 				bit = bsfq(pc->pc_map[field]);
 				break;
 			}
 		}
 		if (field < _NPCM) {
 			pv = &pc->pc_pventry[field * 64 + bit];
 			pc->pc_map[field] &= ~(1ul << bit);
 			/* If this was the last item, move it to tail */
 			if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
 			    pc->pc_map[2] == 0) {
 				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 				TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 			}
 			PV_STAT(pv_entry_spare--);
 			return (pv);
 		}
 	}
 	/* No free items, allocate another chunk */
 	m = vm_page_alloc(NULL, colour, (pq == &vm_page_queues[PQ_ACTIVE] ?
 	    VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED);
 	if (m == NULL) {
 		if (try) {
 			pv_entry_count--;
 			PV_STAT(pc_chunk_tryfail++);
 			return (NULL);
 		}
 		/*
 		 * Reclaim pv entries: At first, destroy mappings to inactive
 		 * pages.  After that, if a pv chunk entry is still needed,
 		 * destroy mappings to active pages.
 		 */
 		if (pq == NULL) {
 			PV_STAT(pmap_collect_inactive++);
 			pq = &vm_page_queues[PQ_INACTIVE];
 		} else if (pq == &vm_page_queues[PQ_INACTIVE]) {
 			PV_STAT(pmap_collect_active++);
 			pq = &vm_page_queues[PQ_ACTIVE];
 		} else
 			panic("get_pv_entry: increase vm.pmap.shpgperproc");
 		pmap_collect(pmap, pq);
 		goto retry;
 	}
 	PV_STAT(pc_chunk_count++);
 	PV_STAT(pc_chunk_allocs++);
 	colour++;
 	dump_add_page(m->phys_addr);
 	pc = (void *)PHYS_TO_DMAP(m->phys_addr);
 	pc->pc_pmap = pmap;
 	pc->pc_map[0] = PC_FREE0 & ~1ul;	/* preallocated bit 0 */
 	pc->pc_map[1] = PC_FREE1;
 	pc->pc_map[2] = PC_FREE2;
 	pv = &pc->pc_pventry[0];
 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 	PV_STAT(pv_entry_spare += _NPCPV - 1);
 	return (pv);
 }
 
 /*
  * First find and then remove the pv entry for the specified pmap and virtual
  * address from the specified pv list.  Returns the pv entry if found and NULL
  * otherwise.  This operation can be performed on pv lists for either 4KB or
  * 2MB page mappings.
  */
 static __inline pv_entry_t
 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
 			break;
 		}
 	}
 	return (pv);
 }
 
 /*
  * After demotion from a 2MB page mapping to 512 4KB page mappings,
  * destroy the pv entry for the 2MB page mapping and reinstantiate the pv
  * entries for each of the 4KB page mappings.
  */
 static void
 pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_offset_t va_last;
 	vm_page_t m;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	KASSERT((pa & PDRMASK) == 0,
 	    ("pmap_pv_demote_pde: pa is not 2mpage aligned"));
 
 	/*
 	 * Transfer the 2mpage's pv entry for this mapping to the first
 	 * page's pv list.
 	 */
 	pvh = pa_to_pvh(pa);
 	va = trunc_2mpage(va);
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found"));
 	m = PHYS_TO_VM_PAGE(pa);
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 	/* Instantiate the remaining NPTEPG - 1 pv entries. */
 	va_last = va + NBPDR - PAGE_SIZE;
 	do {
 		m++;
 		KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 		    ("pmap_pv_demote_pde: page %p is not managed", m));
 		va += PAGE_SIZE;
 		pmap_insert_entry(pmap, va, m);
 	} while (va < va_last);
 }
 
 /*
  * After promotion from 512 4KB page mappings to a single 2MB page mapping,
  * replace the many pv entries for the 4KB page mappings by a single pv entry
  * for the 2MB page mapping.
  */
 static void
 pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_offset_t va_last;
 	vm_page_t m;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	KASSERT((pa & PDRMASK) == 0,
 	    ("pmap_pv_promote_pde: pa is not 2mpage aligned"));
 
 	/*
 	 * Transfer the first page's pv entry for this mapping to the
 	 * 2mpage's pv list.  Aside from avoiding the cost of a call
 	 * to get_pv_entry(), a transfer avoids the possibility that
 	 * get_pv_entry() calls pmap_collect() and that pmap_collect()
 	 * removes one of the mappings that is being promoted.
 	 */
 	m = PHYS_TO_VM_PAGE(pa);
 	va = trunc_2mpage(va);
 	pv = pmap_pvh_remove(&m->md, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found"));
 	pvh = pa_to_pvh(pa);
 	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
 	/* Free the remaining NPTEPG - 1 pv entries. */
 	va_last = va + NBPDR - PAGE_SIZE;
 	do {
 		m++;
 		va += PAGE_SIZE;
 		pmap_pvh_free(&m->md, pmap, va);
 	} while (va < va_last);
 }
 
 /*
  * First find and then destroy the pv entry for the specified pmap and virtual
  * address.  This operation can be performed on pv lists for either 4KB or 2MB
  * page mappings.
  */
 static void
 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
 	free_pv_entry(pmap, pv);
 }
 
 static void
 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 {
 	struct md_page *pvh;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	pmap_pvh_free(&m->md, pmap, va);
 	if (TAILQ_EMPTY(&m->md.pv_list)) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		if (TAILQ_EMPTY(&pvh->pv_list))
 			vm_page_flag_clear(m, PG_WRITEABLE);
 	}
 }
 
 /*
  * Create a pv entry for page at pa for
  * (pmap, va).
  */
 static void
 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pv_entry_t pv;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	pv = get_pv_entry(pmap, FALSE);
 	pv->pv_va = va;
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 }
 
 /*
  * Conditionally create a pv entry.
  */
 static boolean_t
 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pv_entry_t pv;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	if (pv_entry_count < pv_entry_high_water && 
 	    (pv = get_pv_entry(pmap, TRUE)) != NULL) {
 		pv->pv_va = va;
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  * Create the pv entry for a 2MB page mapping.
  */
 static boolean_t
 pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	if (pv_entry_count < pv_entry_high_water && 
 	    (pv = get_pv_entry(pmap, TRUE)) != NULL) {
 		pv->pv_va = va;
 		pvh = pa_to_pvh(pa);
 		TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  * Fills a page table page with mappings to consecutive physical pages.
  */
 static void
 pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte)
 {
 	pt_entry_t *pte;
 
 	for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
 		*pte = newpte;
 		newpte += PAGE_SIZE;
 	}
 }
 
 /*
  * Tries to demote a 2MB page mapping.  If demotion fails, the 2MB page
  * mapping is invalidated.
  */
 static boolean_t
 pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
 	pd_entry_t newpde, oldpde;
 	pt_entry_t *firstpte, newpte;
 	vm_paddr_t mptepa;
 	vm_page_t free, mpte;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	oldpde = *pde;
 	KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
 	    ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
 	mpte = pmap_lookup_pt_page(pmap, va);
 	if (mpte != NULL)
 		pmap_remove_pt_page(pmap, mpte);
 	else {
 		KASSERT((oldpde & PG_W) == 0,
 		    ("pmap_demote_pde: page table page for a wired mapping"
 		    " is missing"));
 
 		/*
 		 * Invalidate the 2MB page mapping and return "failure" if the
 		 * mapping was never accessed or the allocation of the new
 		 * page table page fails.  If the 2MB page mapping belongs to
 		 * the direct map region of the kernel's address space, then
 		 * the page allocation request specifies the highest possible
 		 * priority (VM_ALLOC_INTERRUPT).  Otherwise, the priority is
 		 * normal.  Page table pages are preallocated for every other
 		 * part of the kernel address space, so the direct map region
 		 * is the only part of the kernel address space that must be
 		 * handled here.
 		 */
 		if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL,
 		    pmap_pde_pindex(va), (va >= DMAP_MIN_ADDRESS && va <
 		    DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) |
 		    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
 			free = NULL;
 			pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free);
 			pmap_invalidate_page(pmap, trunc_2mpage(va));
 			pmap_free_zero_pages(free);
 			CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			return (FALSE);
 		}
 		if (va < VM_MAXUSER_ADDRESS)
 			pmap_resident_count_inc(pmap, 1);
 	}
 	mptepa = VM_PAGE_TO_PHYS(mpte);
 	firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa);
 	newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V;
 	KASSERT((oldpde & PG_A) != 0,
 	    ("pmap_demote_pde: oldpde is missing PG_A"));
 	KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW,
 	    ("pmap_demote_pde: oldpde is missing PG_M"));
 	newpte = oldpde & ~PG_PS;
 	if ((newpte & PG_PDE_PAT) != 0)
 		newpte ^= PG_PDE_PAT | PG_PTE_PAT;
 
 	/*
 	 * If the page table page is new, initialize it.
 	 */
 	if (mpte->wire_count == 1) {
 		mpte->wire_count = NPTEPG;
 		pmap_fill_ptp(firstpte, newpte);
 	}
 	KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
 	    ("pmap_demote_pde: firstpte and newpte map different physical"
 	    " addresses"));
 
 	/*
 	 * If the mapping has changed attributes, update the page table
 	 * entries.
 	 */
 	if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
 		pmap_fill_ptp(firstpte, newpte);
 
 	/*
 	 * Demote the mapping.  This pmap is locked.  The old PDE has
 	 * PG_A set.  If the old PDE has PG_RW set, it also has PG_M
 	 * set.  Thus, there is no danger of a race with another
 	 * processor changing the setting of PG_A and/or PG_M between
 	 * the read above and the store below. 
 	 */
 	if (workaround_erratum383)
 		pmap_update_pde(pmap, va, pde, newpde);
 	else
 		pde_store(pde, newpde);
 
 	/*
 	 * Invalidate a stale recursive mapping of the page table page.
 	 */
 	if (va >= VM_MAXUSER_ADDRESS)
 		pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
 
 	/*
 	 * Demote the pv entry.  This depends on the earlier demotion
 	 * of the mapping.  Specifically, the (re)creation of a per-
 	 * page pv entry might trigger the execution of pmap_collect(),
 	 * which might reclaim a newly (re)created per-page pv entry
 	 * and destroy the associated mapping.  In order to destroy
 	 * the mapping, the PDE must have already changed from mapping
 	 * the 2mpage to referencing the page table page.
 	 */
 	if ((oldpde & PG_MANAGED) != 0)
 		pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME);
 
 	pmap_pde_demotions++;
 	CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx"
 	    " in pmap %p", va, pmap);
 	return (TRUE);
 }
 
 /*
  * pmap_remove_pde: do the things to unmap a superpage in a process
  */
 static int
 pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
     vm_page_t *free)
 {
 	struct md_page *pvh;
 	pd_entry_t oldpde;
 	vm_offset_t eva, va;
 	vm_page_t m, mpte;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((sva & PDRMASK) == 0,
 	    ("pmap_remove_pde: sva is not 2mpage aligned"));
 	oldpde = pte_load_clear(pdq);
 	if (oldpde & PG_W)
 		pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE;
 
 	/*
 	 * Machines that don't support invlpg, also don't support
 	 * PG_G.
 	 */
 	if (oldpde & PG_G)
 		pmap_invalidate_page(kernel_pmap, sva);
 	pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE);
 	if (oldpde & PG_MANAGED) {
 		pvh = pa_to_pvh(oldpde & PG_PS_FRAME);
 		pmap_pvh_free(pvh, pmap, sva);
 		eva = sva + NBPDR;
 		for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
 		    va < eva; va += PAGE_SIZE, m++) {
 			if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
 				vm_page_dirty(m);
 			if (oldpde & PG_A)
 				vm_page_flag_set(m, PG_REFERENCED);
 			if (TAILQ_EMPTY(&m->md.pv_list) &&
 			    TAILQ_EMPTY(&pvh->pv_list))
 				vm_page_flag_clear(m, PG_WRITEABLE);
 		}
 	}
 	if (pmap == kernel_pmap) {
 		if (!pmap_demote_pde(pmap, pdq, sva))
 			panic("pmap_remove_pde: failed demotion");
 	} else {
 		mpte = pmap_lookup_pt_page(pmap, sva);
 		if (mpte != NULL) {
 			pmap_remove_pt_page(pmap, mpte);
 			pmap_resident_count_dec(pmap, 1);
 			KASSERT(mpte->wire_count == NPTEPG,
 			    ("pmap_remove_pde: pte page wire count error"));
 			mpte->wire_count = 0;
 			pmap_add_delayed_free_list(mpte, free, FALSE);
 			atomic_subtract_int(&cnt.v_wire_count, 1);
 		}
 	}
 	return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva), free));
 }
 
 /*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
 pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, 
     pd_entry_t ptepde, vm_page_t *free)
 {
 	pt_entry_t oldpte;
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	oldpte = pte_load_clear(ptq);
 	if (oldpte & PG_W)
 		pmap->pm_stats.wired_count -= 1;
 	pmap_resident_count_dec(pmap, 1);
 	if (oldpte & PG_MANAGED) {
 		m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME);
 		if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			vm_page_dirty(m);
 		if (oldpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 		pmap_remove_entry(pmap, m, va);
 	}
 	return (pmap_unuse_pt(pmap, va, ptepde, free));
 }
 
 /*
  * Remove a single page from a process address space
  */
 static void
 pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, vm_page_t *free)
 {
 	pt_entry_t *pte;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if ((*pde & PG_V) == 0)
 		return;
 	pte = pmap_pde_to_pte(pde, va);
 	if ((*pte & PG_V) == 0)
 		return;
 	pmap_remove_pte(pmap, pte, va, *pde, free);
 	pmap_invalidate_page(pmap, va);
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t va, va_next;
 	pml4_entry_t *pml4e;
 	pdp_entry_t *pdpe;
 	pd_entry_t ptpaddr, *pde;
 	pt_entry_t *pte;
 	vm_page_t free = NULL;
 	int anyvalid;
 
 	/*
 	 * Perform an unsynchronized read.  This is, however, safe.
 	 */
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	anyvalid = 0;
 
 	vm_page_lock_queues();
 	PMAP_LOCK(pmap);
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if (sva + PAGE_SIZE == eva) {
 		pde = pmap_pde(pmap, sva);
 		if (pde && (*pde & PG_PS) == 0) {
 			pmap_remove_page(pmap, sva, pde, &free);
 			goto out;
 		}
 	}
 
 	for (; sva < eva; sva = va_next) {
 
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		pml4e = pmap_pml4e(pmap, sva);
 		if ((*pml4e & PG_V) == 0) {
 			va_next = (sva + NBPML4) & ~PML4MASK;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		pdpe = pmap_pml4e_to_pdpe(pml4e, sva);
 		if ((*pdpe & PG_V) == 0) {
 			va_next = (sva + NBPDP) & ~PDPMASK;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		va_next = (sva + NBPDR) & ~PDRMASK;
 		if (va_next < sva)
 			va_next = eva;
 
 		pde = pmap_pdpe_to_pde(pdpe, sva);
 		ptpaddr = *pde;
 
 		/*
 		 * Weed out invalid mappings.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
 			/*
 			 * Are we removing the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + NBPDR == va_next && eva >= va_next) {
 				/*
 				 * The TLB entry for a PG_G mapping is
 				 * invalidated by pmap_remove_pde().
 				 */
 				if ((ptpaddr & PG_G) == 0)
 					anyvalid = 1;
 				pmap_remove_pde(pmap, pde, sva, &free);
 				continue;
 			} else if (!pmap_demote_pde(pmap, pde, sva)) {
 				/* The large page mapping was destroyed. */
 				continue;
 			} else
 				ptpaddr = *pde;
 		}
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (va_next > eva)
 			va_next = eva;
 
 		va = va_next;
 		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
 		    sva += PAGE_SIZE) {
 			if (*pte == 0) {
 				if (va != va_next) {
 					pmap_invalidate_range(pmap, va, sva);
 					va = va_next;
 				}
 				continue;
 			}
 			if ((*pte & PG_G) == 0)
 				anyvalid = 1;
 			else if (va == va_next)
 				va = sva;
 			if (pmap_remove_pte(pmap, pte, sva, ptpaddr, &free)) {
 				sva += PAGE_SIZE;
 				break;
 			}
 		}
 		if (va != va_next)
 			pmap_invalidate_range(pmap, va, sva);
 	}
 out:
 	if (anyvalid)
 		pmap_invalidate_all(pmap);
 	vm_page_unlock_queues();	
 	PMAP_UNLOCK(pmap);
 	pmap_free_zero_pages(free);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 void
 pmap_remove_all(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	pmap_t pmap;
 	pt_entry_t *pte, tpte;
 	pd_entry_t *pde;
 	vm_offset_t va;
 	vm_page_t free;
 
 	KASSERT((m->flags & PG_FICTITIOUS) == 0,
 	    ("pmap_remove_all: page %p is fictitious", m));
 	free = NULL;
 	vm_page_lock_queues();
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		va = pv->pv_va;
 		pde = pmap_pde(pmap, va);
 		(void)pmap_demote_pde(pmap, pde, va);
 		PMAP_UNLOCK(pmap);
 	}
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pmap_resident_count_dec(pmap, 1);
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found"
 		    " a 2mpage in page %p's pv list", m));
 		pte = pmap_pde_to_pte(pde, pv->pv_va);
 		tpte = pte_load_clear(pte);
 		if (tpte & PG_W)
 			pmap->pm_stats.wired_count--;
 		if (tpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			vm_page_dirty(m);
 		pmap_unuse_pt(pmap, pv->pv_va, *pde, &free);
 		pmap_invalidate_page(pmap, pv->pv_va);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		free_pv_entry(pmap, pv);
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_flag_clear(m, PG_WRITEABLE);
 	vm_page_unlock_queues();
 	pmap_free_zero_pages(free);
 }
 
 /*
  * pmap_protect_pde: do the things to protect a 2mpage in a process
  */
 static boolean_t
 pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot)
 {
 	pd_entry_t newpde, oldpde;
 	vm_offset_t eva, va;
 	vm_page_t m;
 	boolean_t anychanged;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((sva & PDRMASK) == 0,
 	    ("pmap_protect_pde: sva is not 2mpage aligned"));
 	anychanged = FALSE;
 retry:
 	oldpde = newpde = *pde;
 	if (oldpde & PG_MANAGED) {
 		eva = sva + NBPDR;
 		for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
 		    va < eva; va += PAGE_SIZE, m++)
 			if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
 				vm_page_dirty(m);
 	}
 	if ((prot & VM_PROT_WRITE) == 0)
 		newpde &= ~(PG_RW | PG_M);
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpde |= pg_nx;
 	if (newpde != oldpde) {
 		if (!atomic_cmpset_long(pde, oldpde, newpde))
 			goto retry;
 		if (oldpde & PG_G)
 			pmap_invalidate_page(pmap, sva);
 		else
 			anychanged = TRUE;
 	}
 	return (anychanged);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	vm_offset_t va_next;
 	pml4_entry_t *pml4e;
 	pdp_entry_t *pdpe;
 	pd_entry_t ptpaddr, *pde;
 	pt_entry_t *pte;
 	int anychanged;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
 	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
 		return;
 
 	anychanged = 0;
 
 	vm_page_lock_queues();
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = va_next) {
 
 		pml4e = pmap_pml4e(pmap, sva);
 		if ((*pml4e & PG_V) == 0) {
 			va_next = (sva + NBPML4) & ~PML4MASK;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		pdpe = pmap_pml4e_to_pdpe(pml4e, sva);
 		if ((*pdpe & PG_V) == 0) {
 			va_next = (sva + NBPDP) & ~PDPMASK;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		va_next = (sva + NBPDR) & ~PDRMASK;
 		if (va_next < sva)
 			va_next = eva;
 
 		pde = pmap_pdpe_to_pde(pdpe, sva);
 		ptpaddr = *pde;
 
 		/*
 		 * Weed out invalid mappings.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
 			/*
 			 * Are we protecting the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + NBPDR == va_next && eva >= va_next) {
 				/*
 				 * The TLB entry for a PG_G mapping is
 				 * invalidated by pmap_protect_pde().
 				 */
 				if (pmap_protect_pde(pmap, pde, sva, prot))
 					anychanged = 1;
 				continue;
 			} else if (!pmap_demote_pde(pmap, pde, sva)) {
 				/* The large page mapping was destroyed. */
 				continue;
 			}
 		}
 
 		if (va_next > eva)
 			va_next = eva;
 
 		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
 		    sva += PAGE_SIZE) {
 			pt_entry_t obits, pbits;
 			vm_page_t m;
 
 retry:
 			obits = pbits = *pte;
 			if ((pbits & PG_V) == 0)
 				continue;
 
 			if ((prot & VM_PROT_WRITE) == 0) {
 				if ((pbits & (PG_MANAGED | PG_M | PG_RW)) ==
 				    (PG_MANAGED | PG_M | PG_RW)) {
 					m = PHYS_TO_VM_PAGE(pbits & PG_FRAME);
 					vm_page_dirty(m);
 				}
 				pbits &= ~(PG_RW | PG_M);
 			}
 			if ((prot & VM_PROT_EXECUTE) == 0)
 				pbits |= pg_nx;
 
 			if (pbits != obits) {
 				if (!atomic_cmpset_long(pte, obits, pbits))
 					goto retry;
 				if (obits & PG_G)
 					pmap_invalidate_page(pmap, sva);
 				else
 					anychanged = 1;
 			}
 		}
 	}
 	if (anychanged)
 		pmap_invalidate_all(pmap);
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * Tries to promote the 512, contiguous 4KB page mappings that are within a
  * single page table page (PTP) to a single 2MB page mapping.  For promotion
  * to occur, two conditions must be met: (1) the 4KB page mappings must map
  * aligned, contiguous physical memory and (2) the 4KB page mappings must have
  * identical characteristics. 
  */
 static void
 pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
 	pd_entry_t newpde;
 	pt_entry_t *firstpte, oldpte, pa, *pte;
 	vm_offset_t oldpteva;
 	vm_page_t mpte;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * Examine the first PTE in the specified PTP.  Abort if this PTE is
 	 * either invalid, unused, or does not map the first 4KB physical page
 	 * within a 2MB page. 
 	 */
 	firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
 setpde:
 	newpde = *firstpte;
 	if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) {
 		pmap_pde_p_failures++;
 		CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
 		    " in pmap %p", va, pmap);
 		return;
 	}
 	if ((newpde & (PG_M | PG_RW)) == PG_RW) {
 		/*
 		 * When PG_M is already clear, PG_RW can be cleared without
 		 * a TLB invalidation.
 		 */
 		if (!atomic_cmpset_long(firstpte, newpde, newpde & ~PG_RW))
 			goto setpde;
 		newpde &= ~PG_RW;
 	}
 
 	/*
 	 * Examine each of the other PTEs in the specified PTP.  Abort if this
 	 * PTE maps an unexpected 4KB physical page or does not have identical
 	 * characteristics to the first PTE.
 	 */
 	pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
 	for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
 setpte:
 		oldpte = *pte;
 		if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
 			pmap_pde_p_failures++;
 			CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			return;
 		}
 		if ((oldpte & (PG_M | PG_RW)) == PG_RW) {
 			/*
 			 * When PG_M is already clear, PG_RW can be cleared
 			 * without a TLB invalidation.
 			 */
 			if (!atomic_cmpset_long(pte, oldpte, oldpte & ~PG_RW))
 				goto setpte;
 			oldpte &= ~PG_RW;
 			oldpteva = (oldpte & PG_FRAME & PDRMASK) |
 			    (va & ~PDRMASK);
 			CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx"
 			    " in pmap %p", oldpteva, pmap);
 		}
 		if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) {
 			pmap_pde_p_failures++;
 			CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			return;
 		}
 		pa -= PAGE_SIZE;
 	}
 
 	/*
 	 * Save the page table page in its current state until the PDE
 	 * mapping the superpage is demoted by pmap_demote_pde() or
 	 * destroyed by pmap_remove_pde(). 
 	 */
 	mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
 	KASSERT(mpte >= vm_page_array &&
 	    mpte < &vm_page_array[vm_page_array_size],
 	    ("pmap_promote_pde: page table page is out of range"));
 	KASSERT(mpte->pindex == pmap_pde_pindex(va),
 	    ("pmap_promote_pde: page table page's pindex is wrong"));
 	pmap_insert_pt_page(pmap, mpte);
 
 	/*
 	 * Promote the pv entries.
 	 */
 	if ((newpde & PG_MANAGED) != 0)
 		pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME);
 
 	/*
 	 * Propagate the PAT index to its proper position.
 	 */
 	if ((newpde & PG_PTE_PAT) != 0)
 		newpde ^= PG_PDE_PAT | PG_PTE_PAT;
 
 	/*
 	 * Map the superpage.
 	 */
 	if (workaround_erratum383)
 		pmap_update_pde(pmap, va, pde, PG_PS | newpde);
 	else
 		pde_store(pde, PG_PS | newpde);
 
 	pmap_pde_promotions++;
 	CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx"
 	    " in pmap %p", va, pmap);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 void
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
     vm_prot_t prot, boolean_t wired)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	pt_entry_t newpte, origpte;
 	pv_entry_t pv;
 	vm_paddr_t opa, pa;
 	vm_page_t mpte, om;
 	boolean_t invlva;
 
 	va = trunc_page(va);
 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
 	KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
 	    ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)",
 	    va));
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 ||
 	    (m->oflags & VPO_BUSY) != 0,
 	    ("pmap_enter: page %p is not busy", m));
 
 	mpte = NULL;
 
 	vm_page_lock_queues();
 	PMAP_LOCK(pmap);
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS)
 		mpte = pmap_allocpte(pmap, va, M_WAITOK);
 
 	pde = pmap_pde(pmap, va);
 	if (pde != NULL && (*pde & PG_V) != 0) {
 		if ((*pde & PG_PS) != 0)
 			panic("pmap_enter: attempted pmap_enter on 2MB page");
 		pte = pmap_pde_to_pte(pde, va);
 	} else
 		panic("pmap_enter: invalid page directory va=%#lx", va);
 
 	pa = VM_PAGE_TO_PHYS(m);
 	om = NULL;
 	origpte = *pte;
 	opa = origpte & PG_FRAME;
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (origpte && (opa == pa)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && ((origpte & PG_W) == 0))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && (origpte & PG_W))
 			pmap->pm_stats.wired_count--;
 
 		/*
 		 * Remove extra pte reference
 		 */
 		if (mpte)
 			mpte->wire_count--;
 
 		if (origpte & PG_MANAGED) {
 			om = m;
 			pa |= PG_MANAGED;
 		}
 		goto validate;
 	} 
 
 	pv = NULL;
 
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
 	if (opa) {
 		if (origpte & PG_W)
 			pmap->pm_stats.wired_count--;
 		if (origpte & PG_MANAGED) {
 			om = PHYS_TO_VM_PAGE(opa);
 			pv = pmap_pvh_remove(&om->md, pmap, va);
 		}
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			KASSERT(mpte->wire_count > 0,
 			    ("pmap_enter: missing reference to page table page,"
 			     " va: 0x%lx", va));
 		}
 	} else
 		pmap_resident_count_inc(pmap, 1);
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
 		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
 		    ("pmap_enter: managed mapping within the clean submap"));
 		if (pv == NULL)
 			pv = get_pv_entry(pmap, FALSE);
 		pv->pv_va = va;
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 		pa |= PG_MANAGED;
 	} else if (pv != NULL)
 		free_pv_entry(pmap, pv);
 
 	/*
 	 * Increment counters
 	 */
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V);
 	if ((prot & VM_PROT_WRITE) != 0) {
 		newpte |= PG_RW;
 		if ((newpte & PG_MANAGED) != 0)
 			vm_page_flag_set(m, PG_WRITEABLE);
 	}
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpte |= pg_nx;
 	if (wired)
 		newpte |= PG_W;
 	if (va < VM_MAXUSER_ADDRESS)
 		newpte |= PG_U;
 	if (pmap == kernel_pmap)
 		newpte |= PG_G;
 
 	/*
 	 * if the mapping or permission bits are different, we need
 	 * to update the pte.
 	 */
 	if ((origpte & ~(PG_M|PG_A)) != newpte) {
 		newpte |= PG_A;
 		if ((access & VM_PROT_WRITE) != 0)
 			newpte |= PG_M;
 		if (origpte & PG_V) {
 			invlva = FALSE;
 			origpte = pte_load_store(pte, newpte);
 			if (origpte & PG_A) {
 				if (origpte & PG_MANAGED)
 					vm_page_flag_set(om, PG_REFERENCED);
 				if (opa != VM_PAGE_TO_PHYS(m) || ((origpte &
 				    PG_NX) == 0 && (newpte & PG_NX)))
 					invlva = TRUE;
 			}
 			if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 				if ((origpte & PG_MANAGED) != 0)
 					vm_page_dirty(om);
 				if ((newpte & PG_RW) == 0)
 					invlva = TRUE;
 			}
 			if ((origpte & PG_MANAGED) != 0 &&
 			    TAILQ_EMPTY(&om->md.pv_list) &&
 			    TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))
 				vm_page_flag_clear(om, PG_WRITEABLE);
 			if (invlva)
 				pmap_invalidate_page(pmap, va);
 		} else
 			pte_store(pte, newpte);
 	}
 
 	/*
 	 * If both the page table page and the reservation are fully
 	 * populated, then attempt promotion.
 	 */
 	if ((mpte == NULL || mpte->wire_count == NPTEPG) &&
 	    pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0)
 		pmap_promote_pde(pmap, pde, va);
 
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * Tries to create a 2MB page mapping.  Returns TRUE if successful and FALSE
  * otherwise.  Fails if (1) a page table page cannot be allocated without
  * blocking, (2) a mapping already exists at the specified virtual address, or
  * (3) a pv entry cannot be allocated without reclaiming another pv entry. 
  */
 static boolean_t
 pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 	pd_entry_t *pde, newpde;
 	vm_page_t free, mpde;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if ((mpde = pmap_allocpde(pmap, va, M_NOWAIT)) == NULL) {
 		CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 		    " in pmap %p", va, pmap);
 		return (FALSE);
 	}
 	pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpde));
 	pde = &pde[pmap_pde_index(va)];
 	if ((*pde & PG_V) != 0) {
 		KASSERT(mpde->wire_count > 1,
 		    ("pmap_enter_pde: mpde's wire count is too low"));
 		mpde->wire_count--;
 		CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 		    " in pmap %p", va, pmap);
 		return (FALSE);
 	}
 	newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 1) |
 	    PG_PS | PG_V;
 	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
 		newpde |= PG_MANAGED;
 
 		/*
 		 * Abort this mapping if its PV entry could not be created.
 		 */
 		if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) {
 			free = NULL;
 			if (pmap_unwire_pte_hold(pmap, va, mpde, &free)) {
 				pmap_invalidate_page(pmap, va);
 				pmap_free_zero_pages(free);
 			}
 			CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			return (FALSE);
 		}
 	}
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpde |= pg_nx;
 	if (va < VM_MAXUSER_ADDRESS)
 		newpde |= PG_U;
 
 	/*
 	 * Increment counters.
 	 */
 	pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE);
 
 	/*
 	 * Map the superpage.
 	 */
 	pde_store(pde, newpde);
 
 	pmap_pde_mappings++;
 	CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx"
 	    " in pmap %p", va, pmap);
 	return (TRUE);
 }
 
 /*
  * Maps a sequence of resident pages belonging to the same object.
  * The sequence begins with the given page m_start.  This page is
  * mapped at the given virtual address start.  Each subsequent page is
  * mapped at a virtual address that is offset from start by the same
  * amount as the page is offset from m_start within the object.  The
  * last page in the sequence is the page with the largest offset from
  * m_start that can be mapped at a virtual address less than the given
  * virtual address end.  Not every virtual page between start and end
  * is mapped; only those for which a resident page exists with the
  * corresponding offset from m_start are mapped.
  */
 void
 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	vm_offset_t va;
 	vm_page_t m, mpte;
 	vm_pindex_t diff, psize;
 
 	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
 	psize = atop(end - start);
 	mpte = NULL;
 	m = m_start;
 	vm_page_lock_queues();
 	PMAP_LOCK(pmap);
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		va = start + ptoa(diff);
 		if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
 		    (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 &&
 		    pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 &&
 		    pmap_enter_pde(pmap, va, m, prot))
 			m = &m[NBPDR / PAGE_SIZE - 1];
 		else
 			mpte = pmap_enter_quick_locked(pmap, va, m, prot,
 			    mpte);
 		m = TAILQ_NEXT(m, listq);
 	}
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * but is *MUCH* faster than pmap_enter...
  */
 
 void
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 
 	vm_page_lock_queues();
 	PMAP_LOCK(pmap);
 	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 }
 
 static vm_page_t
 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, vm_page_t mpte)
 {
 	vm_page_t free;
 	pt_entry_t *pte;
 	vm_paddr_t pa;
 
 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 	    (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
 	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		vm_pindex_t ptepindex;
 		pd_entry_t *ptepa;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		ptepindex = pmap_pde_pindex(va);
 		if (mpte && (mpte->pindex == ptepindex)) {
 			mpte->wire_count++;
 		} else {
 			/*
 			 * Get the page directory entry
 			 */
 			ptepa = pmap_pde(pmap, va);
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.
 			 */
 			if (ptepa && (*ptepa & PG_V) != 0) {
 				if (*ptepa & PG_PS)
 					return (NULL);
 				mpte = PHYS_TO_VM_PAGE(*ptepa & PG_FRAME);
 				mpte->wire_count++;
 			} else {
 				mpte = _pmap_allocpte(pmap, ptepindex,
 				    M_NOWAIT);
 				if (mpte == NULL)
 					return (mpte);
 			}
 		}
 		pte = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
 		pte = &pte[pmap_pte_index(va)];
 	} else {
 		mpte = NULL;
 		pte = vtopte(va);
 	}
 	if (*pte) {
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 &&
 	    !pmap_try_insert_pv_entry(pmap, va, m)) {
 		if (mpte != NULL) {
 			free = NULL;
 			if (pmap_unwire_pte_hold(pmap, va, mpte, &free)) {
 				pmap_invalidate_page(pmap, va);
 				pmap_free_zero_pages(free);
 			}
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap_resident_count_inc(pmap, 1);
 
 	pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		pa |= pg_nx;
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 		pte_store(pte, pa | PG_V | PG_U);
 	else
 		pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
 	return (mpte);
 }
 
 /*
  * Make a temporary mapping for a physical address.  This is only intended
  * to be used for panic dumps.
  */
 void *
 pmap_kenter_temporary(vm_paddr_t pa, int i)
 {
 	vm_offset_t va;
 
 	va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 	pmap_kenter(va, pa);
 	invlpg(va);
 	return ((void *)crashdumpmap);
 }
 
 /*
  * This code maps large physical mmap regions into the
  * processor address space.  Note that some shortcuts
  * are taken, but the code works.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
 	pd_entry_t *pde;
 	vm_paddr_t pa, ptepa;
 	vm_page_t p, pdpg;
 	int pat_mode;
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 	    ("pmap_object_init_pt: non-device object"));
 	if ((addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
 		if (!vm_object_populate(object, pindex, pindex + atop(size)))
 			return;
 		p = vm_page_lookup(object, pindex);
 		KASSERT(p->valid == VM_PAGE_BITS_ALL,
 		    ("pmap_object_init_pt: invalid page %p", p));
 		pat_mode = p->md.pat_mode;
 
 		/*
 		 * Abort the mapping if the first page is not physically
 		 * aligned to a 2MB page boundary.
 		 */
 		ptepa = VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1))
 			return;
 
 		/*
 		 * Skip the first page.  Abort the mapping if the rest of
 		 * the pages are not physically contiguous or have differing
 		 * memory attributes.
 		 */
 		p = TAILQ_NEXT(p, listq);
 		for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
 		    pa += PAGE_SIZE) {
 			KASSERT(p->valid == VM_PAGE_BITS_ALL,
 			    ("pmap_object_init_pt: invalid page %p", p));
 			if (pa != VM_PAGE_TO_PHYS(p) ||
 			    pat_mode != p->md.pat_mode)
 				return;
 			p = TAILQ_NEXT(p, listq);
 		}
 
 		/*
 		 * Map using 2MB pages.  Since "ptepa" is 2M aligned and
 		 * "size" is a multiple of 2M, adding the PAT setting to "pa"
 		 * will not affect the termination of this loop.
 		 */ 
 		PMAP_LOCK(pmap);
 		for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
 		    size; pa += NBPDR) {
 			pdpg = pmap_allocpde(pmap, addr, M_NOWAIT);
 			if (pdpg == NULL) {
 				/*
 				 * The creation of mappings below is only an
 				 * optimization.  If a page directory page
 				 * cannot be allocated without blocking,
 				 * continue on to the next mapping rather than
 				 * blocking.
 				 */
 				addr += NBPDR;
 				continue;
 			}
 			pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg));
 			pde = &pde[pmap_pde_index(addr)];
 			if ((*pde & PG_V) == 0) {
 				pde_store(pde, pa | PG_PS | PG_M | PG_A |
 				    PG_U | PG_RW | PG_V);
 				pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE);
 				pmap_pde_mappings++;
 			} else {
 				/* Continue on if the PDE is already valid. */
 				pdpg->wire_count--;
 				KASSERT(pdpg->wire_count > 0,
 				    ("pmap_object_init_pt: missing reference "
 				    "to page directory page, va: 0x%lx", addr));
 			}
 			addr += NBPDR;
 		}
 		PMAP_UNLOCK(pmap);
 	}
 }
 
 /*
  *	Routine:	pmap_change_wiring
  *	Function:	Change the wiring attribute for a map/virtual-address
  *			pair.
  *	In/out conditions:
  *			The mapping must already exist in the pmap.
  */
 void
 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	boolean_t are_queues_locked;
 
 	are_queues_locked = FALSE;
 
 	/*
 	 * Wiring is not a hardware characteristic so there is no need to
 	 * invalidate TLB.
 	 */
 retry:
 	PMAP_LOCK(pmap);
 	pde = pmap_pde(pmap, va);
 	if ((*pde & PG_PS) != 0) {
 		if (!wired != ((*pde & PG_W) == 0)) {
 			if (!are_queues_locked) {
 				are_queues_locked = TRUE;
 				if (!mtx_trylock(&vm_page_queue_mtx)) {
 					PMAP_UNLOCK(pmap);
 					vm_page_lock_queues();
 					goto retry;
 				}
 			}
 			if (!pmap_demote_pde(pmap, pde, va))
 				panic("pmap_change_wiring: demotion failed");
 		} else
 			goto out;
 	}
 	pte = pmap_pde_to_pte(pde, va);
 	if (wired && (*pte & PG_W) == 0) {
 		pmap->pm_stats.wired_count++;
 		atomic_set_long(pte, PG_W);
 	} else if (!wired && (*pte & PG_W) != 0) {
 		pmap->pm_stats.wired_count--;
 		atomic_clear_long(pte, PG_W);
 	}
 out:
 	if (are_queues_locked)
 		vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
     vm_offset_t src_addr)
 {
 	vm_page_t   free;
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t va_next;
 
 	if (dst_addr != src_addr)
 		return;
 
 	vm_page_lock_queues();
 	if (dst_pmap < src_pmap) {
 		PMAP_LOCK(dst_pmap);
 		PMAP_LOCK(src_pmap);
 	} else {
 		PMAP_LOCK(src_pmap);
 		PMAP_LOCK(dst_pmap);
 	}
 	for (addr = src_addr; addr < end_addr; addr = va_next) {
 		pt_entry_t *src_pte, *dst_pte;
 		vm_page_t dstmpde, dstmpte, srcmpte;
 		pml4_entry_t *pml4e;
 		pdp_entry_t *pdpe;
 		pd_entry_t srcptepaddr, *pde;
 
 		KASSERT(addr < UPT_MIN_ADDRESS,
 		    ("pmap_copy: invalid to pmap_copy page tables"));
 
 		pml4e = pmap_pml4e(src_pmap, addr);
 		if ((*pml4e & PG_V) == 0) {
 			va_next = (addr + NBPML4) & ~PML4MASK;
 			if (va_next < addr)
 				va_next = end_addr;
 			continue;
 		}
 
 		pdpe = pmap_pml4e_to_pdpe(pml4e, addr);
 		if ((*pdpe & PG_V) == 0) {
 			va_next = (addr + NBPDP) & ~PDPMASK;
 			if (va_next < addr)
 				va_next = end_addr;
 			continue;
 		}
 
 		va_next = (addr + NBPDR) & ~PDRMASK;
 		if (va_next < addr)
 			va_next = end_addr;
 
 		pde = pmap_pdpe_to_pde(pdpe, addr);
 		srcptepaddr = *pde;
 		if (srcptepaddr == 0)
 			continue;
 			
 		if (srcptepaddr & PG_PS) {
 			dstmpde = pmap_allocpde(dst_pmap, addr, M_NOWAIT);
 			if (dstmpde == NULL)
 				break;
 			pde = (pd_entry_t *)
 			    PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpde));
 			pde = &pde[pmap_pde_index(addr)];
 			if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 ||
 			    pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr &
 			    PG_PS_FRAME))) {
 				*pde = srcptepaddr & ~PG_W;
 				pmap_resident_count_inc(dst_pmap, NBPDR / PAGE_SIZE);
 			} else
 				dstmpde->wire_count--;
 			continue;
 		}
 
 		srcptepaddr &= PG_FRAME;
 		srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
 		KASSERT(srcmpte->wire_count > 0,
 		    ("pmap_copy: source page table page is unused"));
 
 		if (va_next > end_addr)
 			va_next = end_addr;
 
 		src_pte = (pt_entry_t *)PHYS_TO_DMAP(srcptepaddr);
 		src_pte = &src_pte[pmap_pte_index(addr)];
 		dstmpte = NULL;
 		while (addr < va_next) {
 			pt_entry_t ptetemp;
 			ptetemp = *src_pte;
 			/*
 			 * we only virtual copy managed pages
 			 */
 			if ((ptetemp & PG_MANAGED) != 0) {
 				if (dstmpte != NULL &&
 				    dstmpte->pindex == pmap_pde_pindex(addr))
 					dstmpte->wire_count++;
 				else if ((dstmpte = pmap_allocpte(dst_pmap,
 				    addr, M_NOWAIT)) == NULL)
 					goto out;
 				dst_pte = (pt_entry_t *)
 				    PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte));
 				dst_pte = &dst_pte[pmap_pte_index(addr)];
 				if (*dst_pte == 0 &&
 				    pmap_try_insert_pv_entry(dst_pmap, addr,
 				    PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) {
 					/*
 					 * Clear the wired, modified, and
 					 * accessed (referenced) bits
 					 * during the copy.
 					 */
 					*dst_pte = ptetemp & ~(PG_W | PG_M |
 					    PG_A);
 					pmap_resident_count_inc(dst_pmap, 1);
 	 			} else {
 					free = NULL;
 					if (pmap_unwire_pte_hold(dst_pmap,
 					    addr, dstmpte, &free)) {
 					    	pmap_invalidate_page(dst_pmap,
 					 	    addr);
 				    	    	pmap_free_zero_pages(free);
 					}
 					goto out;
 				}
 				if (dstmpte->wire_count >= srcmpte->wire_count)
 					break;
 			}
 			addr += PAGE_SIZE;
 			src_pte++;
 		}
 	}
 out:
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(src_pmap);
 	PMAP_UNLOCK(dst_pmap);
 }	
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(vm_page_t m)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	pagezero((void *)va);
 }
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(vm_page_t m, int off, int size)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	if (off == 0 && size == PAGE_SIZE)
 		pagezero((void *)va);
 	else
 		bzero((char *)va + off, size);
 }
 
 /*
  *	pmap_zero_page_idle zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.  This
  *	is intended to be called from the vm_pagezero process only and
  *	outside of Giant.
  */
 void
 pmap_zero_page_idle(vm_page_t m)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	pagezero((void *)va);
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 {
 	vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
 	vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
 
 	pagecopy((void *)src, (void *)dst);
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	int loops = 0;
 	boolean_t rv;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_page_exists_quick: page %p is not managed", m));
 	rv = FALSE;
 	vm_page_lock_queues();
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		if (PV_PMAP(pv) == pmap) {
 			rv = TRUE;
 			break;
 		}
 		loops++;
 		if (loops >= 16)
 			break;
 	}
 	if (!rv && loops < 16) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 			if (PV_PMAP(pv) == pmap) {
 				rv = TRUE;
 				break;
 			}
 			loops++;
 			if (loops >= 16)
 				break;
 		}
 	}
 	vm_page_unlock_queues();
 	return (rv);
 }
 
 /*
  *	pmap_page_wired_mappings:
  *
  *	Return the number of managed mappings to the given physical page
  *	that are wired.
  */
 int
 pmap_page_wired_mappings(vm_page_t m)
 {
 	int count;
 
 	count = 0;
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		return (count);
 	vm_page_lock_queues();
 	count = pmap_pvh_wired_mappings(&m->md, count);
 	count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count);
 	vm_page_unlock_queues();
 	return (count);
 }
 
 /*
  *	pmap_pvh_wired_mappings:
  *
  *	Return the updated number "count" of managed mappings that are wired.
  */
 static int
 pmap_pvh_wired_mappings(struct md_page *pvh, int count)
 {
 	pmap_t pmap;
 	pt_entry_t *pte;
 	pv_entry_t pv;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte(pmap, pv->pv_va);
 		if ((*pte & PG_W) != 0)
 			count++;
 		PMAP_UNLOCK(pmap);
 	}
 	return (count);
 }
 
 /*
  * Returns TRUE if the given page is mapped individually or as part of
  * a 2mpage.  Otherwise, returns FALSE.
  */
 boolean_t
 pmap_page_is_mapped(vm_page_t m)
 {
 	boolean_t rv;
 
 	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
 		return (FALSE);
 	vm_page_lock_queues();
 	rv = !TAILQ_EMPTY(&m->md.pv_list) ||
 	    !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list);
 	vm_page_unlock_queues();
 	return (rv);
 }
 
 /*
  * Remove all pages from specified address space
  * this aids process exit speeds.  Also, this code
  * is special cased for current process only, but
  * can have the more generic (and slightly slower)
  * mode enabled.  This is much faster than pmap_remove
  * in the case of running down an entire address space.
  */
 void
 pmap_remove_pages(pmap_t pmap)
 {
 	pd_entry_t ptepde;
 	pt_entry_t *pte, tpte;
 	vm_page_t free = NULL;
 	vm_page_t m, mpte, mt;
 	pv_entry_t pv;
 	struct md_page *pvh;
 	struct pv_chunk *pc, *npc;
 	int field, idx;
 	int64_t bit;
 	uint64_t inuse, bitmask;
 	int allfree;
 
 	if (pmap != PCPU_GET(curpmap)) {
 		printf("warning: pmap_remove_pages called with non-current pmap\n");
 		return;
 	}
 	vm_page_lock_queues();
 	PMAP_LOCK(pmap);
 	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 		allfree = 1;
 		for (field = 0; field < _NPCM; field++) {
 			inuse = (~(pc->pc_map[field])) & pc_freemask[field];
 			while (inuse != 0) {
 				bit = bsfq(inuse);
 				bitmask = 1UL << bit;
 				idx = field * 64 + bit;
 				pv = &pc->pc_pventry[idx];
 				inuse &= ~bitmask;
 
 				pte = pmap_pdpe(pmap, pv->pv_va);
 				ptepde = *pte;
 				pte = pmap_pdpe_to_pde(pte, pv->pv_va);
 				tpte = *pte;
 				if ((tpte & (PG_PS | PG_V)) == PG_V) {
 					ptepde = tpte;
 					pte = (pt_entry_t *)PHYS_TO_DMAP(tpte &
 					    PG_FRAME);
 					pte = &pte[pmap_pte_index(pv->pv_va)];
 					tpte = *pte & ~PG_PTE_PAT;
 				}
 				if ((tpte & PG_V) == 0)
 					panic("bad pte");
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 				if (tpte & PG_W) {
 					allfree = 0;
 					continue;
 				}
 
 				m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
 				KASSERT(m->phys_addr == (tpte & PG_FRAME),
 				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 				    m, (uintmax_t)m->phys_addr,
 				    (uintmax_t)tpte));
 
 				KASSERT(m < &vm_page_array[vm_page_array_size],
 					("pmap_remove_pages: bad tpte %#jx",
 					(uintmax_t)tpte));
 
 				pte_clear(pte);
 
 				/*
 				 * Update the vm_page_t clean/reference bits.
 				 */
 				if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 					if ((tpte & PG_PS) != 0) {
 						for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
 							vm_page_dirty(mt);
 					} else
 						vm_page_dirty(m);
 				}
 
 				/* Mark free */
 				PV_STAT(pv_entry_frees++);
 				PV_STAT(pv_entry_spare++);
 				pv_entry_count--;
 				pc->pc_map[field] |= bitmask;
 				if ((tpte & PG_PS) != 0) {
 					pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE);
 					pvh = pa_to_pvh(tpte & PG_PS_FRAME);
 					TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
 					if (TAILQ_EMPTY(&pvh->pv_list)) {
 						for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
 							if (TAILQ_EMPTY(&mt->md.pv_list))
 								vm_page_flag_clear(mt, PG_WRITEABLE);
 					}
 					mpte = pmap_lookup_pt_page(pmap, pv->pv_va);
 					if (mpte != NULL) {
 						pmap_remove_pt_page(pmap, mpte);
 						pmap_resident_count_dec(pmap, 1);
 						KASSERT(mpte->wire_count == NPTEPG,
 						    ("pmap_remove_pages: pte page wire count error"));
 						mpte->wire_count = 0;
 						pmap_add_delayed_free_list(mpte, &free, FALSE);
 						atomic_subtract_int(&cnt.v_wire_count, 1);
 					}
 				} else {
 					pmap_resident_count_dec(pmap, 1);
 					TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 					if (TAILQ_EMPTY(&m->md.pv_list)) {
 						pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 						if (TAILQ_EMPTY(&pvh->pv_list))
 							vm_page_flag_clear(m, PG_WRITEABLE);
 					}
 				}
 				pmap_unuse_pt(pmap, pv->pv_va, ptepde, &free);
 			}
 		}
 		if (allfree) {
 			PV_STAT(pv_entry_spare -= _NPCPV);
 			PV_STAT(pc_chunk_count--);
 			PV_STAT(pc_chunk_frees++);
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
 			dump_drop_page(m->phys_addr);
 			vm_page_unwire(m, 0);
 			vm_page_free(m);
 		}
 	}
 	pmap_invalidate_all(pmap);
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 	pmap_free_zero_pages(free);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 	boolean_t rv;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_is_modified: page %p is not managed", m));
 
 	/*
 	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PG_WRITEABLE
 	 * is clear, no PTEs can have PG_M set.
 	 */
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	if ((m->oflags & VPO_BUSY) == 0 &&
 	    (m->flags & PG_WRITEABLE) == 0)
 		return (FALSE);
 	vm_page_lock_queues();
 	rv = pmap_is_modified_pvh(&m->md) ||
 	    pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)));
 	vm_page_unlock_queues();
 	return (rv);
 }
 
 /*
  * Returns TRUE if any of the given mappings were used to modify
  * physical memory.  Otherwise, returns FALSE.  Both page and 2mpage
  * mappings are supported.
  */
 static boolean_t
 pmap_is_modified_pvh(struct md_page *pvh)
 {
 	pv_entry_t pv;
 	pt_entry_t *pte;
 	pmap_t pmap;
 	boolean_t rv;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	rv = FALSE;
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte(pmap, pv->pv_va);
 		rv = (*pte & (PG_M | PG_RW)) == (PG_M | PG_RW);
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			break;
 	}
 	return (rv);
 }
 
 /*
  *	pmap_is_prefaultable:
  *
  *	Return whether or not the specified virtual address is elgible
  *	for prefault.
  */
 boolean_t
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	boolean_t rv;
 
 	rv = FALSE;
 	PMAP_LOCK(pmap);
 	pde = pmap_pde(pmap, addr);
 	if (pde != NULL && (*pde & (PG_PS | PG_V)) == PG_V) {
 		pte = pmap_pde_to_pte(pde, addr);
 		rv = (*pte & PG_V) == 0;
 	}
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  *	pmap_is_referenced:
  *
  *	Return whether or not the specified physical page was referenced
  *	in any physical maps.
  */
 boolean_t
 pmap_is_referenced(vm_page_t m)
 {
 	boolean_t rv;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_is_referenced: page %p is not managed", m));
 	vm_page_lock_queues();
 	rv = pmap_is_referenced_pvh(&m->md) ||
 	    pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)));
 	vm_page_unlock_queues();
 	return (rv);
 }
 
 /*
  * Returns TRUE if any of the given mappings were referenced and FALSE
  * otherwise.  Both page and 2mpage mappings are supported.
  */
 static boolean_t
 pmap_is_referenced_pvh(struct md_page *pvh)
 {
 	pv_entry_t pv;
 	pt_entry_t *pte;
 	pmap_t pmap;
 	boolean_t rv;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	rv = FALSE;
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte(pmap, pv->pv_va);
 		rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V);
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			break;
 	}
 	return (rv);
 }
 
 /*
  * Clear the write and modified bits in each of the given page's mappings.
  */
 void
 pmap_remove_write(vm_page_t m)
 {
 	struct md_page *pvh;
 	pmap_t pmap;
 	pv_entry_t next_pv, pv;
 	pd_entry_t *pde;
 	pt_entry_t oldpte, *pte;
 	vm_offset_t va;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_remove_write: page %p is not managed", m));
 
 	/*
 	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by
 	 * another thread while the object is locked.  Thus, if PG_WRITEABLE
 	 * is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	if ((m->oflags & VPO_BUSY) == 0 &&
 	    (m->flags & PG_WRITEABLE) == 0)
 		return;
 	vm_page_lock_queues();
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		va = pv->pv_va;
 		pde = pmap_pde(pmap, va);
 		if ((*pde & PG_RW) != 0)
 			(void)pmap_demote_pde(pmap, pde, va);
 		PMAP_UNLOCK(pmap);
 	}
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0, ("pmap_clear_write: found"
 		    " a 2mpage in page %p's pv list", m));
 		pte = pmap_pde_to_pte(pde, pv->pv_va);
 retry:
 		oldpte = *pte;
 		if (oldpte & PG_RW) {
 			if (!atomic_cmpset_long(pte, oldpte, oldpte &
 			    ~(PG_RW | PG_M)))
 				goto retry;
 			if ((oldpte & PG_M) != 0)
 				vm_page_dirty(m);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_flag_clear(m, PG_WRITEABLE);
 	vm_page_unlock_queues();
 }
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	XXX: The exact number of bits to check and clear is a matter that
  *	should be tested and standardized at some point in the future for
  *	optimal aging of shared pages.
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv, pvf, pvn;
 	pmap_t pmap;
 	pd_entry_t oldpde, *pde;
 	pt_entry_t *pte;
 	vm_offset_t va;
 	int rtval = 0;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_ts_referenced: page %p is not managed", m));
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	vm_page_lock_queues();
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		va = pv->pv_va;
 		pde = pmap_pde(pmap, va);
 		oldpde = *pde;
 		if ((oldpde & PG_A) != 0) {
 			if (pmap_demote_pde(pmap, pde, va)) {
 				if ((oldpde & PG_W) == 0) {
 					/*
 					 * Remove the mapping to a single page
 					 * so that a subsequent access may
 					 * repromote.  Since the underlying
 					 * page table page is fully populated,
 					 * this removal never frees a page
 					 * table page.
 					 */
 					va += VM_PAGE_TO_PHYS(m) - (oldpde &
 					    PG_PS_FRAME);
 					pmap_remove_page(pmap, va, pde, NULL);
 					rtval++;
 					if (rtval > 4) {
 						PMAP_UNLOCK(pmap);
 						goto out;
 					}
 				}
 			}
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pvf = pv;
 		do {
 			pvn = TAILQ_NEXT(pv, pv_list);
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 			pmap = PV_PMAP(pv);
 			PMAP_LOCK(pmap);
 			pde = pmap_pde(pmap, pv->pv_va);
 			KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced:"
 			    " found a 2mpage in page %p's pv list", m));
 			pte = pmap_pde_to_pte(pde, pv->pv_va);
 			if ((*pte & PG_A) != 0) {
 				atomic_clear_long(pte, PG_A);
 				pmap_invalidate_page(pmap, pv->pv_va);
 				rtval++;
 				if (rtval > 4)
 					pvn = NULL;
 			}
 			PMAP_UNLOCK(pmap);
 		} while ((pv = pvn) != NULL && pv != pvf);
 	}
 out:
 	vm_page_unlock_queues();
 	return (rtval);
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	struct md_page *pvh;
 	pmap_t pmap;
 	pv_entry_t next_pv, pv;
 	pd_entry_t oldpde, *pde;
 	pt_entry_t oldpte, *pte;
 	vm_offset_t va;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	KASSERT((m->oflags & VPO_BUSY) == 0,
 	    ("pmap_clear_modify: page %p is busy", m));
 
 	/*
 	 * If the page is not PG_WRITEABLE, then no PTEs can have PG_M set.
 	 * If the object containing the page is locked and the page is not
 	 * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->flags & PG_WRITEABLE) == 0)
 		return;
 	vm_page_lock_queues();
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		va = pv->pv_va;
 		pde = pmap_pde(pmap, va);
 		oldpde = *pde;
 		if ((oldpde & PG_RW) != 0) {
 			if (pmap_demote_pde(pmap, pde, va)) {
 				if ((oldpde & PG_W) == 0) {
 					/*
 					 * Write protect the mapping to a
 					 * single page so that a subsequent
 					 * write access may repromote.
 					 */
 					va += VM_PAGE_TO_PHYS(m) - (oldpde &
 					    PG_PS_FRAME);
 					pte = pmap_pde_to_pte(pde, va);
 					oldpte = *pte;
 					if ((oldpte & PG_V) != 0) {
 						while (!atomic_cmpset_long(pte,
 						    oldpte,
 						    oldpte & ~(PG_M | PG_RW)))
 							oldpte = *pte;
 						vm_page_dirty(m);
 						pmap_invalidate_page(pmap, va);
 					}
 				}
 			}
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found"
 		    " a 2mpage in page %p's pv list", m));
 		pte = pmap_pde_to_pte(pde, pv->pv_va);
 		if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 			atomic_clear_long(pte, PG_M);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_unlock_queues();
 }
 
 /*
  *	pmap_clear_reference:
  *
  *	Clear the reference bit on the specified physical page.
  */
 void
 pmap_clear_reference(vm_page_t m)
 {
 	struct md_page *pvh;
 	pmap_t pmap;
 	pv_entry_t next_pv, pv;
 	pd_entry_t oldpde, *pde;
 	pt_entry_t *pte;
 	vm_offset_t va;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_clear_reference: page %p is not managed", m));
 	vm_page_lock_queues();
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		va = pv->pv_va;
 		pde = pmap_pde(pmap, va);
 		oldpde = *pde;
 		if ((oldpde & PG_A) != 0) {
 			if (pmap_demote_pde(pmap, pde, va)) {
 				/*
 				 * Remove the mapping to a single page so
 				 * that a subsequent access may repromote.
 				 * Since the underlying page table page is
 				 * fully populated, this removal never frees
 				 * a page table page.
 				 */
 				va += VM_PAGE_TO_PHYS(m) - (oldpde &
 				    PG_PS_FRAME);
 				pmap_remove_page(pmap, va, pde, NULL);
 			}
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0, ("pmap_clear_reference: found"
 		    " a 2mpage in page %p's pv list", m));
 		pte = pmap_pde_to_pte(pde, pv->pv_va);
 		if (*pte & PG_A) {
 			atomic_clear_long(pte, PG_A);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_unlock_queues();
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 /* Adjust the cache mode for a 4KB page mapped via a PTE. */
 static __inline void
 pmap_pte_attr(pt_entry_t *pte, int cache_bits)
 {
 	u_int opte, npte;
 
 	/*
 	 * The cache mode bits are all in the low 32-bits of the
 	 * PTE, so we can just spin on updating the low 32-bits.
 	 */
 	do {
 		opte = *(u_int *)pte;
 		npte = opte & ~PG_PTE_CACHE;
 		npte |= cache_bits;
 	} while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte));
 }
 
 /* Adjust the cache mode for a 2MB page mapped via a PDE. */
 static __inline void
 pmap_pde_attr(pd_entry_t *pde, int cache_bits)
 {
 	u_int opde, npde;
 
 	/*
 	 * The cache mode bits are all in the low 32-bits of the
 	 * PDE, so we can just spin on updating the low 32-bits.
 	 */
 	do {
 		opde = *(u_int *)pde;
 		npde = opde & ~PG_PDE_CACHE;
 		npde |= cache_bits;
 	} while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde));
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
 {
 	vm_offset_t va, offset;
 	vm_size_t tmpsize;
 
 	/*
 	 * If the specified range of physical addresses fits within the direct
 	 * map window, use the direct map. 
 	 */
 	if (pa < dmaplimit && pa + size < dmaplimit) {
 		va = PHYS_TO_DMAP(pa);
 		if (!pmap_change_attr(va, size, mode))
 			return ((void *)va);
 	}
 	offset = pa & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 	va = kmem_alloc_nofault(kernel_map, size);
 	if (!va)
 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 	pa = trunc_page(pa);
 	for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
 		pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
 	pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
 	pmap_invalidate_cache_range(va, va + tmpsize);
 	return ((void *)(va + offset));
 }
 
 void *
 pmap_mapdev(vm_paddr_t pa, vm_size_t size)
 {
 
 	return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE));
 }
 
 void *
 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 {
 
 	return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK));
 }
 
 void
 pmap_unmapdev(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t base, offset, tmpva;
 
 	/* If we gave a direct map region in pmap_mapdev, do nothing */
 	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS)
 		return;
 	base = trunc_page(va);
 	offset = va & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 	for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
 		pmap_kremove(tmpva);
 	pmap_invalidate_range(kernel_pmap, va, tmpva);
 	kmem_free(kernel_map, base, size);
 }
 
 /*
  * Tries to demote a 1GB page mapping.
  */
 static boolean_t
 pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va)
 {
 	pdp_entry_t newpdpe, oldpdpe;
 	pd_entry_t *firstpde, newpde, *pde;
 	vm_paddr_t mpdepa;
 	vm_page_t mpde;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	oldpdpe = *pdpe;
 	KASSERT((oldpdpe & (PG_PS | PG_V)) == (PG_PS | PG_V),
 	    ("pmap_demote_pdpe: oldpdpe is missing PG_PS and/or PG_V"));
 	if ((mpde = vm_page_alloc(NULL, va >> PDPSHIFT, VM_ALLOC_INTERRUPT |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
 		CTR2(KTR_PMAP, "pmap_demote_pdpe: failure for va %#lx"
 		    " in pmap %p", va, pmap);
 		return (FALSE);
 	}
 	mpdepa = VM_PAGE_TO_PHYS(mpde);
 	firstpde = (pd_entry_t *)PHYS_TO_DMAP(mpdepa);
 	newpdpe = mpdepa | PG_M | PG_A | (oldpdpe & PG_U) | PG_RW | PG_V;
 	KASSERT((oldpdpe & PG_A) != 0,
 	    ("pmap_demote_pdpe: oldpdpe is missing PG_A"));
 	KASSERT((oldpdpe & (PG_M | PG_RW)) != PG_RW,
 	    ("pmap_demote_pdpe: oldpdpe is missing PG_M"));
 	newpde = oldpdpe;
 
 	/*
 	 * Initialize the page directory page.
 	 */
 	for (pde = firstpde; pde < firstpde + NPDEPG; pde++) {
 		*pde = newpde;
 		newpde += NBPDR;
 	}
 
 	/*
 	 * Demote the mapping.
 	 */
 	*pdpe = newpdpe;
 
 	/*
 	 * Invalidate a stale recursive mapping of the page directory page.
 	 */
 	pmap_invalidate_page(pmap, (vm_offset_t)vtopde(va));
 
 	pmap_pdpe_demotions++;
 	CTR2(KTR_PMAP, "pmap_demote_pdpe: success for va %#lx"
 	    " in pmap %p", va, pmap);
 	return (TRUE);
 }
 
 /*
  * Sets the memory attribute for the specified page.
  */
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
 
 	m->md.pat_mode = ma;
 
 	/*
 	 * If "m" is a normal page, update its direct mapping.  This update
 	 * can be relied upon to perform any cache operations that are
 	 * required for data coherence.
 	 */
 	if ((m->flags & PG_FICTITIOUS) == 0 &&
 	    pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE,
 	    m->md.pat_mode))
 		panic("memory attribute change on the direct map failed");
 }
 
 /*
  * Changes the specified virtual address range's memory type to that given by
  * the parameter "mode".  The specified virtual address range must be
  * completely contained within either the direct map or the kernel map.  If
  * the virtual address range is contained within the kernel map, then the
  * memory type for each of the corresponding ranges of the direct map is also
  * changed.  (The corresponding ranges of the direct map are those ranges that
  * map the same physical pages as the specified virtual address range.)  These
  * changes to the direct map are necessary because Intel describes the
  * behavior of their processors as "undefined" if two or more mappings to the
  * same physical page have different memory types.
  *
  * Returns zero if the change completed successfully, and either EINVAL or
  * ENOMEM if the change failed.  Specifically, EINVAL is returned if some part
  * of the virtual address range was not mapped, and ENOMEM is returned if
  * there was insufficient memory available to complete the change.  In the
  * latter case, the memory type may have been changed on some part of the
  * virtual address range or the direct map.
  */
 int
 pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
 {
 	int error;
 
 	PMAP_LOCK(kernel_pmap);
 	error = pmap_change_attr_locked(va, size, mode);
 	PMAP_UNLOCK(kernel_pmap);
 	return (error);
 }
 
 static int
 pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
 {
 	vm_offset_t base, offset, tmpva;
 	vm_paddr_t pa_start, pa_end;
 	pdp_entry_t *pdpe;
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	int cache_bits_pte, cache_bits_pde, error;
 	boolean_t changed;
 
 	PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED);
 	base = trunc_page(va);
 	offset = va & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 
 	/*
 	 * Only supported on kernel virtual addresses, including the direct
 	 * map but excluding the recursive map.
 	 */
 	if (base < DMAP_MIN_ADDRESS)
 		return (EINVAL);
 
 	cache_bits_pde = pmap_cache_bits(mode, 1);
 	cache_bits_pte = pmap_cache_bits(mode, 0);
 	changed = FALSE;
 
 	/*
 	 * Pages that aren't mapped aren't supported.  Also break down 2MB pages
 	 * into 4KB pages if required.
 	 */
 	for (tmpva = base; tmpva < base + size; ) {
 		pdpe = pmap_pdpe(kernel_pmap, tmpva);
 		if (*pdpe == 0)
 			return (EINVAL);
 		if (*pdpe & PG_PS) {
 			/*
 			 * If the current 1GB page already has the required
 			 * memory type, then we need not demote this page. Just
 			 * increment tmpva to the next 1GB page frame.
 			 */
 			if ((*pdpe & PG_PDE_CACHE) == cache_bits_pde) {
 				tmpva = trunc_1gpage(tmpva) + NBPDP;
 				continue;
 			}
 
 			/*
 			 * If the current offset aligns with a 1GB page frame
 			 * and there is at least 1GB left within the range, then
 			 * we need not break down this page into 2MB pages.
 			 */
 			if ((tmpva & PDPMASK) == 0 &&
 			    tmpva + PDPMASK < base + size) {
 				tmpva += NBPDP;
 				continue;
 			}
 			if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva))
 				return (ENOMEM);
 		}
 		pde = pmap_pdpe_to_pde(pdpe, tmpva);
 		if (*pde == 0)
 			return (EINVAL);
 		if (*pde & PG_PS) {
 			/*
 			 * If the current 2MB page already has the required
 			 * memory type, then we need not demote this page. Just
 			 * increment tmpva to the next 2MB page frame.
 			 */
 			if ((*pde & PG_PDE_CACHE) == cache_bits_pde) {
 				tmpva = trunc_2mpage(tmpva) + NBPDR;
 				continue;
 			}
 
 			/*
 			 * If the current offset aligns with a 2MB page frame
 			 * and there is at least 2MB left within the range, then
 			 * we need not break down this page into 4KB pages.
 			 */
 			if ((tmpva & PDRMASK) == 0 &&
 			    tmpva + PDRMASK < base + size) {
 				tmpva += NBPDR;
 				continue;
 			}
 			if (!pmap_demote_pde(kernel_pmap, pde, tmpva))
 				return (ENOMEM);
 		}
 		pte = pmap_pde_to_pte(pde, tmpva);
 		if (*pte == 0)
 			return (EINVAL);
 		tmpva += PAGE_SIZE;
 	}
 	error = 0;
 
 	/*
 	 * Ok, all the pages exist, so run through them updating their
 	 * cache mode if required.
 	 */
 	pa_start = pa_end = 0;
 	for (tmpva = base; tmpva < base + size; ) {
 		pdpe = pmap_pdpe(kernel_pmap, tmpva);
 		if (*pdpe & PG_PS) {
 			if ((*pdpe & PG_PDE_CACHE) != cache_bits_pde) {
 				pmap_pde_attr(pdpe, cache_bits_pde);
 				changed = TRUE;
 			}
 			if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
 				if (pa_start == pa_end) {
 					/* Start physical address run. */
 					pa_start = *pdpe & PG_PS_FRAME;
 					pa_end = pa_start + NBPDP;
 				} else if (pa_end == (*pdpe & PG_PS_FRAME))
 					pa_end += NBPDP;
 				else {
 					/* Run ended, update direct map. */
 					error = pmap_change_attr_locked(
 					    PHYS_TO_DMAP(pa_start),
 					    pa_end - pa_start, mode);
 					if (error != 0)
 						break;
 					/* Start physical address run. */
 					pa_start = *pdpe & PG_PS_FRAME;
 					pa_end = pa_start + NBPDP;
 				}
 			}
 			tmpva = trunc_1gpage(tmpva) + NBPDP;
 			continue;
 		}
 		pde = pmap_pdpe_to_pde(pdpe, tmpva);
 		if (*pde & PG_PS) {
 			if ((*pde & PG_PDE_CACHE) != cache_bits_pde) {
 				pmap_pde_attr(pde, cache_bits_pde);
 				changed = TRUE;
 			}
 			if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
 				if (pa_start == pa_end) {
 					/* Start physical address run. */
 					pa_start = *pde & PG_PS_FRAME;
 					pa_end = pa_start + NBPDR;
 				} else if (pa_end == (*pde & PG_PS_FRAME))
 					pa_end += NBPDR;
 				else {
 					/* Run ended, update direct map. */
 					error = pmap_change_attr_locked(
 					    PHYS_TO_DMAP(pa_start),
 					    pa_end - pa_start, mode);
 					if (error != 0)
 						break;
 					/* Start physical address run. */
 					pa_start = *pde & PG_PS_FRAME;
 					pa_end = pa_start + NBPDR;
 				}
 			}
 			tmpva = trunc_2mpage(tmpva) + NBPDR;
 		} else {
 			pte = pmap_pde_to_pte(pde, tmpva);
 			if ((*pte & PG_PTE_CACHE) != cache_bits_pte) {
 				pmap_pte_attr(pte, cache_bits_pte);
 				changed = TRUE;
 			}
 			if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
 				if (pa_start == pa_end) {
 					/* Start physical address run. */
 					pa_start = *pte & PG_FRAME;
 					pa_end = pa_start + PAGE_SIZE;
 				} else if (pa_end == (*pte & PG_FRAME))
 					pa_end += PAGE_SIZE;
 				else {
 					/* Run ended, update direct map. */
 					error = pmap_change_attr_locked(
 					    PHYS_TO_DMAP(pa_start),
 					    pa_end - pa_start, mode);
 					if (error != 0)
 						break;
 					/* Start physical address run. */
 					pa_start = *pte & PG_FRAME;
 					pa_end = pa_start + PAGE_SIZE;
 				}
 			}
 			tmpva += PAGE_SIZE;
 		}
 	}
 	if (error == 0 && pa_start != pa_end)
 		error = pmap_change_attr_locked(PHYS_TO_DMAP(pa_start),
 		    pa_end - pa_start, mode);
 
 	/*
 	 * Flush CPU caches if required to make sure any data isn't cached that
 	 * shouldn't be, etc.
 	 */
 	if (changed) {
 		pmap_invalidate_range(kernel_pmap, base, tmpva);
 		pmap_invalidate_cache_range(base, tmpva);
 	}
 	return (error);
 }
 
 /*
  * Demotes any mapping within the direct map region that covers more than the
  * specified range of physical addresses.  This range's size must be a power
  * of two and its starting address must be a multiple of its size.  Since the
  * demotion does not change any attributes of the mapping, a TLB invalidation
  * is not mandatory.  The caller may, however, request a TLB invalidation.
  */
 void
 pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate)
 {
 	pdp_entry_t *pdpe;
 	pd_entry_t *pde;
 	vm_offset_t va;
 	boolean_t changed;
 
 	if (len == 0)
 		return;
 	KASSERT(powerof2(len), ("pmap_demote_DMAP: len is not a power of 2"));
 	KASSERT((base & (len - 1)) == 0,
 	    ("pmap_demote_DMAP: base is not a multiple of len"));
 	if (len < NBPDP && base < dmaplimit) {
 		va = PHYS_TO_DMAP(base);
 		changed = FALSE;
 		PMAP_LOCK(kernel_pmap);
 		pdpe = pmap_pdpe(kernel_pmap, va);
 		if ((*pdpe & PG_V) == 0)
 			panic("pmap_demote_DMAP: invalid PDPE");
 		if ((*pdpe & PG_PS) != 0) {
 			if (!pmap_demote_pdpe(kernel_pmap, pdpe, va))
 				panic("pmap_demote_DMAP: PDPE failed");
 			changed = TRUE;
 		}
 		if (len < NBPDR) {
 			pde = pmap_pdpe_to_pde(pdpe, va);
 			if ((*pde & PG_V) == 0)
 				panic("pmap_demote_DMAP: invalid PDE");
 			if ((*pde & PG_PS) != 0) {
 				if (!pmap_demote_pde(kernel_pmap, pde, va))
 					panic("pmap_demote_DMAP: PDE failed");
 				changed = TRUE;
 			}
 		}
 		if (changed && invalidate)
 			pmap_invalidate_page(kernel_pmap, va);
 		PMAP_UNLOCK(kernel_pmap);
 	}
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
 {
 	pd_entry_t *pdep;
 	pt_entry_t pte;
 	vm_paddr_t pa;
 	int val;
 
 	PMAP_LOCK(pmap);
 retry:
 	pdep = pmap_pde(pmap, addr);
 	if (pdep != NULL && (*pdep & PG_V)) {
 		if (*pdep & PG_PS) {
 			pte = *pdep;
 			/* Compute the physical address of the 4KB page. */
 			pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) &
 			    PG_FRAME;
 			val = MINCORE_SUPER;
 		} else {
 			pte = *pmap_pde_to_pte(pdep, addr);
 			pa = pte & PG_FRAME;
 			val = 0;
 		}
 	} else {
 		pte = 0;
 		pa = 0;
 		val = 0;
 	}
 	if ((pte & PG_V) != 0) {
 		val |= MINCORE_INCORE;
 		if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if ((pte & PG_A) != 0)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 	}
 	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
 	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
 	    (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) {
 		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
 		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
 			goto retry;
 	} else
 		PA_UNLOCK_COND(*locked_pa);
 	PMAP_UNLOCK(pmap);
 	return (val);
 }
 
 void
 pmap_activate(struct thread *td)
 {
 	pmap_t	pmap, oldpmap;
 	u_int64_t  cr3;
 
 	critical_enter();
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	oldpmap = PCPU_GET(curpmap);
 #ifdef SMP
 	atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
 	atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
 #else
 	oldpmap->pm_active &= ~PCPU_GET(cpumask);
 	pmap->pm_active |= PCPU_GET(cpumask);
 #endif
 	cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4);
 	td->td_pcb->pcb_cr3 = cr3;
 	load_cr3(cr3);
 	PCPU_SET(curpmap, pmap);
 	critical_exit();
 }
 
 void
 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
 {
 }
 
 /*
  *	Increase the starting virtual address of the given mapping if a
  *	different alignment might result in more superpage mappings.
  */
 void
 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t size)
 {
 	vm_offset_t superpage_offset;
 
 	if (size < NBPDR)
 		return;
 	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
 		offset += ptoa(object->pg_color);
 	superpage_offset = offset & PDRMASK;
 	if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR ||
 	    (*addr & PDRMASK) == superpage_offset)
 		return;
 	if ((*addr & PDRMASK) < superpage_offset)
 		*addr = (*addr & ~PDRMASK) + superpage_offset;
 	else
 		*addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
 }
Index: projects/binutils-2.17/sys/amd64/include/specialreg.h
===================================================================
--- projects/binutils-2.17/sys/amd64/include/specialreg.h	(revision 215829)
+++ projects/binutils-2.17/sys/amd64/include/specialreg.h	(revision 215830)
@@ -1,555 +1,564 @@
 /*-
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)specialreg.h	7.1 (Berkeley) 5/9/91
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_SPECIALREG_H_
 #define	_MACHINE_SPECIALREG_H_
 
 /*
  * Bits in 386 special registers:
  */
 #define	CR0_PE	0x00000001	/* Protected mode Enable */
 #define	CR0_MP	0x00000002	/* "Math" (fpu) Present */
 #define	CR0_EM	0x00000004	/* EMulate FPU instructions. (trap ESC only) */
 #define	CR0_TS	0x00000008	/* Task Switched (if MP, trap ESC and WAIT) */
 #define	CR0_PG	0x80000000	/* PaGing enable */
 
 /*
  * Bits in 486 special registers:
  */
 #define	CR0_NE	0x00000020	/* Numeric Error enable (EX16 vs IRQ13) */
 #define	CR0_WP	0x00010000	/* Write Protect (honor page protect in
 							   all modes) */
 #define	CR0_AM	0x00040000	/* Alignment Mask (set to enable AC flag) */
 #define	CR0_NW  0x20000000	/* Not Write-through */
 #define	CR0_CD  0x40000000	/* Cache Disable */
 
 /*
  * Bits in PPro special registers
  */
 #define	CR4_VME	0x00000001	/* Virtual 8086 mode extensions */
 #define	CR4_PVI	0x00000002	/* Protected-mode virtual interrupts */
 #define	CR4_TSD	0x00000004	/* Time stamp disable */
 #define	CR4_DE	0x00000008	/* Debugging extensions */
 #define	CR4_PSE	0x00000010	/* Page size extensions */
 #define	CR4_PAE	0x00000020	/* Physical address extension */
 #define	CR4_MCE	0x00000040	/* Machine check enable */
 #define	CR4_PGE	0x00000080	/* Page global enable */
 #define	CR4_PCE	0x00000100	/* Performance monitoring counter enable */
 #define	CR4_FXSR 0x00000200	/* Fast FPU save/restore used by OS */
 #define	CR4_XMM	0x00000400	/* enable SIMD/MMX2 to use except 16 */
 
 /*
  * Bits in AMD64 special registers.  EFER is 64 bits wide.
  */
 #define	EFER_SCE 0x000000001	/* System Call Extensions (R/W) */
 #define	EFER_LME 0x000000100	/* Long mode enable (R/W) */
 #define	EFER_LMA 0x000000400	/* Long mode active (R) */
 #define	EFER_NXE 0x000000800	/* PTE No-Execute bit enable (R/W) */
 
 /*
  * CPUID instruction features register
  */
 #define	CPUID_FPU	0x00000001
 #define	CPUID_VME	0x00000002
 #define	CPUID_DE	0x00000004
 #define	CPUID_PSE	0x00000008
 #define	CPUID_TSC	0x00000010
 #define	CPUID_MSR	0x00000020
 #define	CPUID_PAE	0x00000040
 #define	CPUID_MCE	0x00000080
 #define	CPUID_CX8	0x00000100
 #define	CPUID_APIC	0x00000200
 #define	CPUID_B10	0x00000400
 #define	CPUID_SEP	0x00000800
 #define	CPUID_MTRR	0x00001000
 #define	CPUID_PGE	0x00002000
 #define	CPUID_MCA	0x00004000
 #define	CPUID_CMOV	0x00008000
 #define	CPUID_PAT	0x00010000
 #define	CPUID_PSE36	0x00020000
 #define	CPUID_PSN	0x00040000
 #define	CPUID_CLFSH	0x00080000
 #define	CPUID_B20	0x00100000
 #define	CPUID_DS	0x00200000
 #define	CPUID_ACPI	0x00400000
 #define	CPUID_MMX	0x00800000
 #define	CPUID_FXSR	0x01000000
 #define	CPUID_SSE	0x02000000
 #define	CPUID_XMM	0x02000000
 #define	CPUID_SSE2	0x04000000
 #define	CPUID_SS	0x08000000
 #define	CPUID_HTT	0x10000000
 #define	CPUID_TM	0x20000000
 #define	CPUID_IA64	0x40000000
 #define	CPUID_PBE	0x80000000
 
 #define	CPUID2_SSE3	0x00000001
 #define	CPUID2_PCLMULQDQ 0x00000002
 #define	CPUID2_DTES64	0x00000004
 #define	CPUID2_MON	0x00000008
 #define	CPUID2_DS_CPL	0x00000010
 #define	CPUID2_VMX	0x00000020
 #define	CPUID2_SMX	0x00000040
 #define	CPUID2_EST	0x00000080
 #define	CPUID2_TM2	0x00000100
 #define	CPUID2_SSSE3	0x00000200
 #define	CPUID2_CNXTID	0x00000400
 #define	CPUID2_CX16	0x00002000
 #define	CPUID2_XTPR	0x00004000
 #define	CPUID2_PDCM	0x00008000
 #define	CPUID2_PCID	0x00020000
 #define	CPUID2_DCA	0x00040000
 #define	CPUID2_SSE41	0x00080000
 #define	CPUID2_SSE42	0x00100000
 #define	CPUID2_X2APIC	0x00200000
 #define	CPUID2_MOVBE	0x00400000
 #define	CPUID2_POPCNT	0x00800000
 #define	CPUID2_AESNI	0x02000000
 
 /*
+ * Important bits in the Thermal and Power Management flags
+ * CPUID.6 EAX and ECX.
+ */
+#define	CPUTPM1_SENSOR	0x00000001
+#define	CPUTPM1_TURBO	0x00000002
+#define	CPUTPM1_ARAT	0x00000004
+#define	CPUTPM2_EFFREQ	0x00000001
+
+/*
  * Important bits in the AMD extended cpuid flags
  */
 #define	AMDID_SYSCALL	0x00000800
 #define	AMDID_MP	0x00080000
 #define	AMDID_NX	0x00100000
 #define	AMDID_EXT_MMX	0x00400000
 #define	AMDID_FFXSR	0x01000000
 #define	AMDID_PAGE1GB	0x04000000
 #define	AMDID_RDTSCP	0x08000000
 #define	AMDID_LM	0x20000000
 #define	AMDID_EXT_3DNOW	0x40000000
 #define	AMDID_3DNOW	0x80000000
 
 #define	AMDID2_LAHF	0x00000001
 #define	AMDID2_CMP	0x00000002
 #define	AMDID2_SVM	0x00000004
 #define	AMDID2_EXT_APIC	0x00000008
 #define	AMDID2_CR8	0x00000010
 #define	AMDID2_ABM	0x00000020
 #define	AMDID2_SSE4A	0x00000040
 #define	AMDID2_MAS	0x00000080
 #define	AMDID2_PREFETCH	0x00000100
 #define	AMDID2_OSVW	0x00000200
 #define	AMDID2_IBS	0x00000400
 #define	AMDID2_SSE5	0x00000800
 #define	AMDID2_SKINIT	0x00001000
 #define	AMDID2_WDT	0x00002000
 
 /*
  * CPUID instruction 1 eax info
  */
 #define	CPUID_STEPPING		0x0000000f
 #define	CPUID_MODEL		0x000000f0
 #define	CPUID_FAMILY		0x00000f00
 #define	CPUID_EXT_MODEL		0x000f0000
 #define	CPUID_EXT_FAMILY	0x0ff00000
 #define	CPUID_TO_MODEL(id) \
     ((((id) & CPUID_MODEL) >> 4) | \
     (((id) & CPUID_EXT_MODEL) >> 12))
 #define	CPUID_TO_FAMILY(id) \
     ((((id) & CPUID_FAMILY) >> 8) + \
     (((id) & CPUID_EXT_FAMILY) >> 20))
 
 /*
  * CPUID instruction 1 ebx info
  */
 #define	CPUID_BRAND_INDEX	0x000000ff
 #define	CPUID_CLFUSH_SIZE	0x0000ff00
 #define	CPUID_HTT_CORES		0x00ff0000
 #define	CPUID_LOCAL_APIC_ID	0xff000000
 
 /* 
  * CPUID instruction 0xb ebx info.
  */
 #define	CPUID_TYPE_INVAL	0
 #define	CPUID_TYPE_SMT		1
 #define	CPUID_TYPE_CORE		2
 
 /*
  * AMD extended function 8000_0007h edx info
  */
 #define	AMDPM_TS		0x00000001
 #define	AMDPM_FID		0x00000002
 #define	AMDPM_VID		0x00000004
 #define	AMDPM_TTP		0x00000008
 #define	AMDPM_TM		0x00000010
 #define	AMDPM_STC		0x00000020
 #define	AMDPM_100MHZ_STEPS	0x00000040
 #define	AMDPM_HW_PSTATE		0x00000080
 #define	AMDPM_TSC_INVARIANT	0x00000100
 #define	AMDPM_CPB		0x00000200
 
 /*
  * AMD extended function 8000_0008h ecx info
  */
 #define	AMDID_CMP_CORES		0x000000ff
 
 /*
  * CPUID manufacturers identifiers
  */
 #define	AMD_VENDOR_ID		"AuthenticAMD"
 #define	CENTAUR_VENDOR_ID	"CentaurHauls"
 #define	INTEL_VENDOR_ID		"GenuineIntel"
 
 /*
  * Model-specific registers for the i386 family
  */
 #define	MSR_P5_MC_ADDR		0x000
 #define	MSR_P5_MC_TYPE		0x001
 #define	MSR_TSC			0x010
 #define	MSR_P5_CESR		0x011
 #define	MSR_P5_CTR0		0x012
 #define	MSR_P5_CTR1		0x013
 #define	MSR_IA32_PLATFORM_ID	0x017
 #define	MSR_APICBASE		0x01b
 #define	MSR_EBL_CR_POWERON	0x02a
 #define	MSR_TEST_CTL		0x033
 #define	MSR_BIOS_UPDT_TRIG	0x079
 #define	MSR_BBL_CR_D0		0x088
 #define	MSR_BBL_CR_D1		0x089
 #define	MSR_BBL_CR_D2		0x08a
 #define	MSR_BIOS_SIGN		0x08b
 #define	MSR_PERFCTR0		0x0c1
 #define	MSR_PERFCTR1		0x0c2
 #define	MSR_MPERF		0x0e7
 #define	MSR_APERF		0x0e8
 #define	MSR_IA32_EXT_CONFIG	0x0ee	/* Undocumented. Core Solo/Duo only */
 #define	MSR_MTRRcap		0x0fe
 #define	MSR_BBL_CR_ADDR		0x116
 #define	MSR_BBL_CR_DECC		0x118
 #define	MSR_BBL_CR_CTL		0x119
 #define	MSR_BBL_CR_TRIG		0x11a
 #define	MSR_BBL_CR_BUSY		0x11b
 #define	MSR_BBL_CR_CTL3		0x11e
 #define	MSR_SYSENTER_CS_MSR	0x174
 #define	MSR_SYSENTER_ESP_MSR	0x175
 #define	MSR_SYSENTER_EIP_MSR	0x176
 #define	MSR_MCG_CAP		0x179
 #define	MSR_MCG_STATUS		0x17a
 #define	MSR_MCG_CTL		0x17b
 #define	MSR_EVNTSEL0		0x186
 #define	MSR_EVNTSEL1		0x187
 #define	MSR_THERM_CONTROL	0x19a
 #define	MSR_THERM_INTERRUPT	0x19b
 #define	MSR_THERM_STATUS	0x19c
 #define	MSR_IA32_MISC_ENABLE	0x1a0
 #define	MSR_IA32_TEMPERATURE_TARGET	0x1a2
 #define	MSR_DEBUGCTLMSR		0x1d9
 #define	MSR_LASTBRANCHFROMIP	0x1db
 #define	MSR_LASTBRANCHTOIP	0x1dc
 #define	MSR_LASTINTFROMIP	0x1dd
 #define	MSR_LASTINTTOIP		0x1de
 #define	MSR_ROB_CR_BKUPTMPDR6	0x1e0
 #define	MSR_MTRRVarBase		0x200
 #define	MSR_MTRR64kBase		0x250
 #define	MSR_MTRR16kBase		0x258
 #define	MSR_MTRR4kBase		0x268
 #define	MSR_PAT			0x277
 #define	MSR_MC0_CTL2		0x280
 #define	MSR_MTRRdefType		0x2ff
 #define	MSR_MC0_CTL		0x400
 #define	MSR_MC0_STATUS		0x401
 #define	MSR_MC0_ADDR		0x402
 #define	MSR_MC0_MISC		0x403
 #define	MSR_MC1_CTL		0x404
 #define	MSR_MC1_STATUS		0x405
 #define	MSR_MC1_ADDR		0x406
 #define	MSR_MC1_MISC		0x407
 #define	MSR_MC2_CTL		0x408
 #define	MSR_MC2_STATUS		0x409
 #define	MSR_MC2_ADDR		0x40a
 #define	MSR_MC2_MISC		0x40b
 #define	MSR_MC3_CTL		0x40c
 #define	MSR_MC3_STATUS		0x40d
 #define	MSR_MC3_ADDR		0x40e
 #define	MSR_MC3_MISC		0x40f
 #define	MSR_MC4_CTL		0x410
 #define	MSR_MC4_STATUS		0x411
 #define	MSR_MC4_ADDR		0x412
 #define	MSR_MC4_MISC		0x413
 
 /*
  * Constants related to MSR's.
  */
 #define	APICBASE_RESERVED	0x000006ff
 #define	APICBASE_BSP		0x00000100
 #define	APICBASE_ENABLED	0x00000800
 #define	APICBASE_ADDRESS	0xfffff000
 
 /*
  * PAT modes.
  */
 #define	PAT_UNCACHEABLE		0x00
 #define	PAT_WRITE_COMBINING	0x01
 #define	PAT_WRITE_THROUGH	0x04
 #define	PAT_WRITE_PROTECTED	0x05
 #define	PAT_WRITE_BACK		0x06
 #define	PAT_UNCACHED		0x07
 #define	PAT_VALUE(i, m)		((long)(m) << (8 * (i)))
 #define	PAT_MASK(i)		PAT_VALUE(i, 0xff)
 
 /*
  * Constants related to MTRRs
  */
 #define	MTRR_UNCACHEABLE	0x00
 #define	MTRR_WRITE_COMBINING	0x01
 #define	MTRR_WRITE_THROUGH	0x04
 #define	MTRR_WRITE_PROTECTED	0x05
 #define	MTRR_WRITE_BACK		0x06
 #define	MTRR_N64K		8	/* numbers of fixed-size entries */
 #define	MTRR_N16K		16
 #define	MTRR_N4K		64
 #define	MTRR_CAP_WC		0x0000000000000400
 #define	MTRR_CAP_FIXED		0x0000000000000100
 #define	MTRR_CAP_VCNT		0x00000000000000ff
 #define	MTRR_DEF_ENABLE		0x0000000000000800
 #define	MTRR_DEF_FIXED_ENABLE	0x0000000000000400
 #define	MTRR_DEF_TYPE		0x00000000000000ff
 #define	MTRR_PHYSBASE_PHYSBASE	0x000ffffffffff000
 #define	MTRR_PHYSBASE_TYPE	0x00000000000000ff
 #define	MTRR_PHYSMASK_PHYSMASK	0x000ffffffffff000
 #define	MTRR_PHYSMASK_VALID	0x0000000000000800
 
 /* Performance Control Register (5x86 only). */
 #define	PCR0			0x20
 #define	PCR0_RSTK		0x01	/* Enables return stack */
 #define	PCR0_BTB		0x02	/* Enables branch target buffer */
 #define	PCR0_LOOP		0x04	/* Enables loop */
 #define	PCR0_AIS		0x08	/* Enables all instrcutions stalled to
 								   serialize pipe. */
 #define	PCR0_MLR		0x10	/* Enables reordering of misaligned loads */
 #define	PCR0_BTBRT		0x40	/* Enables BTB test register. */
 #define	PCR0_LSSER		0x80	/* Disable reorder */
 
 /* Device Identification Registers */
 #define	DIR0			0xfe
 #define	DIR1			0xff
 
 /*
  * Machine Check register constants.
  */
 #define	MCG_CAP_COUNT		0x000000ff
 #define	MCG_CAP_CTL_P		0x00000100
 #define	MCG_CAP_EXT_P		0x00000200
 #define	MCG_CAP_CMCI_P		0x00000400
 #define	MCG_CAP_TES_P		0x00000800
 #define	MCG_CAP_EXT_CNT		0x00ff0000
 #define	MCG_CAP_SER_P		0x01000000
 #define	MCG_STATUS_RIPV		0x00000001
 #define	MCG_STATUS_EIPV		0x00000002
 #define	MCG_STATUS_MCIP		0x00000004
 #define	MCG_CTL_ENABLE		0xffffffffffffffff
 #define	MCG_CTL_DISABLE		0x0000000000000000
 #define	MSR_MC_CTL(x)		(MSR_MC0_CTL + (x) * 4)
 #define	MSR_MC_STATUS(x)	(MSR_MC0_STATUS + (x) * 4)
 #define	MSR_MC_ADDR(x)		(MSR_MC0_ADDR + (x) * 4)
 #define	MSR_MC_MISC(x)		(MSR_MC0_MISC + (x) * 4)
 #define	MSR_MC_CTL2(x)		(MSR_MC0_CTL2 + (x))	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_MCA_ERROR	0x000000000000ffff
 #define	MC_STATUS_MODEL_ERROR	0x00000000ffff0000
 #define	MC_STATUS_OTHER_INFO	0x01ffffff00000000
 #define	MC_STATUS_COR_COUNT	0x001fffc000000000	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_TES_STATUS	0x0060000000000000	/* If MCG_CAP_TES_P */
 #define	MC_STATUS_AR		0x0080000000000000	/* If MCG_CAP_TES_P */
 #define	MC_STATUS_S		0x0100000000000000	/* If MCG_CAP_TES_P */
 #define	MC_STATUS_PCC		0x0200000000000000
 #define	MC_STATUS_ADDRV		0x0400000000000000
 #define	MC_STATUS_MISCV		0x0800000000000000
 #define	MC_STATUS_EN		0x1000000000000000
 #define	MC_STATUS_UC		0x2000000000000000
 #define	MC_STATUS_OVER		0x4000000000000000
 #define	MC_STATUS_VAL		0x8000000000000000
 #define	MC_MISC_RA_LSB		0x000000000000003f	/* If MCG_CAP_SER_P */
 #define	MC_MISC_ADDRESS_MODE	0x00000000000001c0	/* If MCG_CAP_SER_P */
 #define	MC_CTL2_THRESHOLD	0x0000000000007fff
 #define	MC_CTL2_CMCI_EN		0x0000000040000000
 
 /*
  * The following four 3-byte registers control the non-cacheable regions.
  * These registers must be written as three separate bytes.
  *
  * NCRx+0: A31-A24 of starting address
  * NCRx+1: A23-A16 of starting address
  * NCRx+2: A15-A12 of starting address | NCR_SIZE_xx.
  *
  * The non-cacheable region's starting address must be aligned to the
  * size indicated by the NCR_SIZE_xx field.
  */
 #define	NCR1	0xc4
 #define	NCR2	0xc7
 #define	NCR3	0xca
 #define	NCR4	0xcd
 
 #define	NCR_SIZE_0K	0
 #define	NCR_SIZE_4K	1
 #define	NCR_SIZE_8K	2
 #define	NCR_SIZE_16K	3
 #define	NCR_SIZE_32K	4
 #define	NCR_SIZE_64K	5
 #define	NCR_SIZE_128K	6
 #define	NCR_SIZE_256K	7
 #define	NCR_SIZE_512K	8
 #define	NCR_SIZE_1M	9
 #define	NCR_SIZE_2M	10
 #define	NCR_SIZE_4M	11
 #define	NCR_SIZE_8M	12
 #define	NCR_SIZE_16M	13
 #define	NCR_SIZE_32M	14
 #define	NCR_SIZE_4G	15
 
 /*
  * The address region registers are used to specify the location and
  * size for the eight address regions.
  *
  * ARRx + 0: A31-A24 of start address
  * ARRx + 1: A23-A16 of start address
  * ARRx + 2: A15-A12 of start address | ARR_SIZE_xx
  */
 #define	ARR0	0xc4
 #define	ARR1	0xc7
 #define	ARR2	0xca
 #define	ARR3	0xcd
 #define	ARR4	0xd0
 #define	ARR5	0xd3
 #define	ARR6	0xd6
 #define	ARR7	0xd9
 
 #define	ARR_SIZE_0K		0
 #define	ARR_SIZE_4K		1
 #define	ARR_SIZE_8K		2
 #define	ARR_SIZE_16K	3
 #define	ARR_SIZE_32K	4
 #define	ARR_SIZE_64K	5
 #define	ARR_SIZE_128K	6
 #define	ARR_SIZE_256K	7
 #define	ARR_SIZE_512K	8
 #define	ARR_SIZE_1M		9
 #define	ARR_SIZE_2M		10
 #define	ARR_SIZE_4M		11
 #define	ARR_SIZE_8M		12
 #define	ARR_SIZE_16M	13
 #define	ARR_SIZE_32M	14
 #define	ARR_SIZE_4G		15
 
 /*
  * The region control registers specify the attributes associated with
  * the ARRx addres regions.
  */
 #define	RCR0	0xdc
 #define	RCR1	0xdd
 #define	RCR2	0xde
 #define	RCR3	0xdf
 #define	RCR4	0xe0
 #define	RCR5	0xe1
 #define	RCR6	0xe2
 #define	RCR7	0xe3
 
 #define	RCR_RCD	0x01	/* Disables caching for ARRx (x = 0-6). */
 #define	RCR_RCE	0x01	/* Enables caching for ARR7. */
 #define	RCR_WWO	0x02	/* Weak write ordering. */
 #define	RCR_WL	0x04	/* Weak locking. */
 #define	RCR_WG	0x08	/* Write gathering. */
 #define	RCR_WT	0x10	/* Write-through. */
 #define	RCR_NLB	0x20	/* LBA# pin is not asserted. */
 
 /* AMD Write Allocate Top-Of-Memory and Control Register */
 #define	AMD_WT_ALLOC_TME	0x40000	/* top-of-memory enable */
 #define	AMD_WT_ALLOC_PRE	0x20000	/* programmable range enable */
 #define	AMD_WT_ALLOC_FRE	0x10000	/* fixed (A0000-FFFFF) range enable */
 
 /* AMD64 MSR's */
 #define	MSR_EFER	0xc0000080	/* extended features */
 #define	MSR_STAR	0xc0000081	/* legacy mode SYSCALL target/cs/ss */
 #define	MSR_LSTAR	0xc0000082	/* long mode SYSCALL target rip */
 #define	MSR_CSTAR	0xc0000083	/* compat mode SYSCALL target rip */
 #define	MSR_SF_MASK	0xc0000084	/* syscall flags mask */
 #define	MSR_FSBASE	0xc0000100	/* base address of the %fs "segment" */
 #define	MSR_GSBASE	0xc0000101	/* base address of the %gs "segment" */
 #define	MSR_KGSBASE	0xc0000102	/* base address of the kernel %gs */
 #define	MSR_PERFEVSEL0	0xc0010000
 #define	MSR_PERFEVSEL1	0xc0010001
 #define	MSR_PERFEVSEL2	0xc0010002
 #define	MSR_PERFEVSEL3	0xc0010003
 #undef MSR_PERFCTR0
 #undef MSR_PERFCTR1
 #define	MSR_PERFCTR0	0xc0010004
 #define	MSR_PERFCTR1	0xc0010005
 #define	MSR_PERFCTR2	0xc0010006
 #define	MSR_PERFCTR3	0xc0010007
 #define	MSR_SYSCFG	0xc0010010
 #define	MSR_HWCR	0xc0010015
 #define	MSR_IORRBASE0	0xc0010016
 #define	MSR_IORRMASK0	0xc0010017
 #define	MSR_IORRBASE1	0xc0010018
 #define	MSR_IORRMASK1	0xc0010019
 #define	MSR_TOP_MEM	0xc001001a	/* boundary for ram below 4G */
 #define	MSR_TOP_MEM2	0xc001001d	/* boundary for ram above 4G */
 #define	MSR_K8_UCODE_UPDATE	0xc0010020	/* update microcode */
 #define	MSR_MC0_CTL_MASK	0xc0010044
 
 /* VIA ACE crypto featureset: for via_feature_rng */
 #define	VIA_HAS_RNG		1	/* cpu has RNG */
 
 /* VIA ACE crypto featureset: for via_feature_xcrypt */
 #define	VIA_HAS_AES		1	/* cpu has AES */
 #define	VIA_HAS_SHA		2	/* cpu has SHA1 & SHA256 */
 #define	VIA_HAS_MM		4	/* cpu has RSA instructions */
 #define	VIA_HAS_AESCTR		8	/* cpu has AES-CTR instructions */
 
 /* Centaur Extended Feature flags */
 #define	VIA_CPUID_HAS_RNG	0x000004
 #define	VIA_CPUID_DO_RNG	0x000008
 #define	VIA_CPUID_HAS_ACE	0x000040
 #define	VIA_CPUID_DO_ACE	0x000080
 #define	VIA_CPUID_HAS_ACE2	0x000100
 #define	VIA_CPUID_DO_ACE2	0x000200
 #define	VIA_CPUID_HAS_PHE	0x000400
 #define	VIA_CPUID_DO_PHE	0x000800
 #define	VIA_CPUID_HAS_PMM	0x001000
 #define	VIA_CPUID_DO_PMM	0x002000
 
 /* VIA ACE xcrypt-* instruction context control options */
 #define	VIA_CRYPT_CWLO_ROUND_M		0x0000000f
 #define	VIA_CRYPT_CWLO_ALG_M		0x00000070
 #define	VIA_CRYPT_CWLO_ALG_AES		0x00000000
 #define	VIA_CRYPT_CWLO_KEYGEN_M		0x00000080
 #define	VIA_CRYPT_CWLO_KEYGEN_HW	0x00000000
 #define	VIA_CRYPT_CWLO_KEYGEN_SW	0x00000080
 #define	VIA_CRYPT_CWLO_NORMAL		0x00000000
 #define	VIA_CRYPT_CWLO_INTERMEDIATE	0x00000100
 #define	VIA_CRYPT_CWLO_ENCRYPT		0x00000000
 #define	VIA_CRYPT_CWLO_DECRYPT		0x00000200
 #define	VIA_CRYPT_CWLO_KEY128		0x0000000a	/* 128bit, 10 rds */
 #define	VIA_CRYPT_CWLO_KEY192		0x0000040c	/* 192bit, 12 rds */
 #define	VIA_CRYPT_CWLO_KEY256		0x0000080e	/* 256bit, 15 rds */
 
 #endif /* !_MACHINE_SPECIALREG_H_ */
Index: projects/binutils-2.17/sys/amd64/include/xen
===================================================================
--- projects/binutils-2.17/sys/amd64/include/xen	(revision 215829)
+++ projects/binutils-2.17/sys/amd64/include/xen	(revision 215830)

Property changes on: projects/binutils-2.17/sys/amd64/include/xen
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/amd64/include/xen:r215709-215824
Index: projects/binutils-2.17/sys/boot/common/load_elf.c
===================================================================
--- projects/binutils-2.17/sys/boot/common/load_elf.c	(revision 215829)
+++ projects/binutils-2.17/sys/boot/common/load_elf.c	(revision 215830)
@@ -1,788 +1,789 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * Copyright (c) 1998 Peter Wemm <peter@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/exec.h>
 #include <sys/linker.h>
 #include <sys/module.h>
 #include <sys/stdint.h>
 #include <string.h>
 #include <machine/elf.h>
 #include <stand.h>
 #define FREEBSD_ELF
 #include <link.h>
 
 #include "bootstrap.h"
 
 #define COPYOUT(s,d,l)	archsw.arch_copyout((vm_offset_t)(s), d, l)
 
 #if defined(__i386__) && __ELF_WORD_SIZE == 64
 #undef ELF_TARG_CLASS
 #undef ELF_TARG_MACH
 #define ELF_TARG_CLASS  ELFCLASS64
 #define ELF_TARG_MACH   EM_X86_64
 #endif
 
 typedef struct elf_file {
     Elf_Phdr 	*ph;
     Elf_Ehdr	*ehdr;
     Elf_Sym	*symtab;
     Elf_Hashelt	*hashtab;
     Elf_Hashelt	nbuckets;
     Elf_Hashelt	nchains;
     Elf_Hashelt	*buckets;
     Elf_Hashelt	*chains;
     Elf_Rel	*rel;
     size_t	relsz;
     Elf_Rela	*rela;
     size_t	relasz;
     char	*strtab;
     size_t	strsz;
     int		fd;
     caddr_t	firstpage;
     size_t	firstlen;
     int		kernel;
     u_int64_t	off;
 } *elf_file_t;
 
 static int __elfN(loadimage)(struct preloaded_file *mp, elf_file_t ef, u_int64_t loadaddr);
 static int __elfN(lookup_symbol)(struct preloaded_file *mp, elf_file_t ef, const char* name, Elf_Sym* sym);
 static int __elfN(reloc_ptr)(struct preloaded_file *mp, elf_file_t ef,
     Elf_Addr p, void *val, size_t len);
 static int __elfN(parse_modmetadata)(struct preloaded_file *mp, elf_file_t ef);
 static symaddr_fn __elfN(symaddr);
 static char	*fake_modname(const char *name);
 
 const char	*__elfN(kerneltype) = "elf kernel";
 const char	*__elfN(moduletype) = "elf module";
 
 u_int64_t	__elfN(relocation_offset) = 0;
 
 /*
  * Attempt to load the file (file) as an ELF module.  It will be stored at
  * (dest), and a pointer to a module structure describing the loaded object
  * will be saved in (result).
  */
 int
 __elfN(loadfile)(char *filename, u_int64_t dest, struct preloaded_file **result)
 {
     struct preloaded_file	*fp, *kfp;
     struct elf_file		ef;
     Elf_Ehdr 			*ehdr;
     int				err;
     u_int			pad;
     ssize_t			bytes_read;
 
     fp = NULL;
     bzero(&ef, sizeof(struct elf_file));
 
     /*
      * Open the image, read and validate the ELF header 
      */
     if (filename == NULL)	/* can't handle nameless */
 	return(EFTYPE);
     if ((ef.fd = open(filename, O_RDONLY)) == -1)
 	return(errno);
     ef.firstpage = malloc(PAGE_SIZE);
     if (ef.firstpage == NULL) {
 	close(ef.fd);
 	return(ENOMEM);
     }
     bytes_read = read(ef.fd, ef.firstpage, PAGE_SIZE);
     ef.firstlen = (size_t)bytes_read;
     if (bytes_read < 0 || ef.firstlen <= sizeof(Elf_Ehdr)) {
 	err = EFTYPE;		/* could be EIO, but may be small file */
 	goto oerr;
     }
     ehdr = ef.ehdr = (Elf_Ehdr *)ef.firstpage;
 
     /* Is it ELF? */
     if (!IS_ELF(*ehdr)) {
 	err = EFTYPE;
 	goto oerr;
     }
     if (ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||	/* Layout ? */
 	ehdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
 	ehdr->e_ident[EI_VERSION] != EV_CURRENT ||	/* Version ? */
 	ehdr->e_version != EV_CURRENT ||
 	ehdr->e_machine != ELF_TARG_MACH) {		/* Machine ? */
 	err = EFTYPE;
 	goto oerr;
     }
 
 
     /*
      * Check to see what sort of module we are.
      */
     kfp = file_findfile(NULL, NULL);
     if (ehdr->e_type == ET_DYN) {
 	/* Looks like a kld module */
 	if (kfp == NULL) {
 	    printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadfile: can't load module before kernel\n");
 	    err = EPERM;
 	    goto oerr;
 	}
 	if (strcmp(__elfN(kerneltype), kfp->f_type)) {
 	    printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadfile: can't load module with kernel type '%s'\n", kfp->f_type);
 	    err = EPERM;
 	    goto oerr;
 	}
 	/* Looks OK, got ahead */
 	ef.kernel = 0;
 
 	/* Page-align the load address */
 	pad = (u_int)dest & PAGE_MASK;
 	if (pad != 0) {
 	    pad = PAGE_SIZE - pad;
 	    dest += pad;
 	}
     } else if (ehdr->e_type == ET_EXEC) {
 	/* Looks like a kernel */
 	if (kfp != NULL) {
 	    printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadfile: kernel already loaded\n");
 	    err = EPERM;
 	    goto oerr;
 	}
 	/* 
 	 * Calculate destination address based on kernel entrypoint 	
 	 */
 	dest = ehdr->e_entry;
 	if (dest == 0) {
 	    printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadfile: not a kernel (maybe static binary?)\n");
 	    err = EPERM;
 	    goto oerr;
 	}
 	ef.kernel = 1;
 
     } else {
 	err = EFTYPE;
 	goto oerr;
     }
 
     /* 
      * Ok, we think we should handle this.
      */
     fp = file_alloc();
     if (fp == NULL) {
 	    printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadfile: cannot allocate module info\n");
 	    err = EPERM;
 	    goto out;
     }
     if (ef.kernel)
 	setenv("kernelname", filename, 1);
     fp->f_name = strdup(filename);
     fp->f_type = strdup(ef.kernel ? __elfN(kerneltype) : __elfN(moduletype));
 
 #ifdef ELF_VERBOSE
     if (ef.kernel)
 	printf("%s entry at 0x%jx\n", filename, (uintmax_t)dest);
 #else
     printf("%s ", filename);
 #endif
 
     fp->f_size = __elfN(loadimage)(fp, &ef, dest);
     if (fp->f_size == 0 || fp->f_addr == 0)
 	goto ioerr;
 
     /* save exec header as metadata */
     file_addmetadata(fp, MODINFOMD_ELFHDR, sizeof(*ehdr), ehdr);
 
     /* Load OK, return module pointer */
     *result = (struct preloaded_file *)fp;
     err = 0;
     goto out;
     
  ioerr:
     err = EIO;
  oerr:
     file_discard(fp);
  out:
     if (ef.firstpage)
 	free(ef.firstpage);
     close(ef.fd);
     return(err);
 }
 
 /*
  * With the file (fd) open on the image, and (ehdr) containing
  * the Elf header, load the image at (off)
  */
 static int
 __elfN(loadimage)(struct preloaded_file *fp, elf_file_t ef, u_int64_t off)
 {
     int 	i;
     u_int	j;
     Elf_Ehdr	*ehdr;
     Elf_Phdr	*phdr, *php;
     Elf_Shdr	*shdr;
     int		ret;
     vm_offset_t firstaddr;
     vm_offset_t lastaddr;
     size_t	chunk;
     ssize_t	result;
     Elf_Addr	ssym, esym;
     Elf_Dyn	*dp;
     Elf_Addr	adp;
     int		ndp;
     int		symstrindex;
     int		symtabindex;
     Elf_Size	size;
     u_int	fpcopy;
 
     dp = NULL;
     shdr = NULL;
     ret = 0;
     firstaddr = lastaddr = 0;
     ehdr = ef->ehdr;
     if (ef->kernel) {
 #ifdef __i386__
 #if __ELF_WORD_SIZE == 64
 	off = - (off & 0xffffffffff000000ull);/* x86_64 relocates after locore */
 #else
 	off = - (off & 0xff000000u);	/* i386 relocates after locore */
 #endif
 #elif defined(__powerpc__)
 	/*
 	 * On the purely virtual memory machines like e500, the kernel is
 	 * linked against its final VA range, which is most often not
 	 * available at the loader stage, but only after kernel initializes
 	 * and completes its VM settings. In such cases we cannot use p_vaddr
 	 * field directly to load ELF segments, but put them at some
 	 * 'load-time' locations.
 	 */
 	if (off & 0xf0000000u) {
 	    off = -(off & 0xf0000000u);
 	    /*
 	     * XXX the physical load address should not be hardcoded. Note
 	     * that the Book-E kernel assumes that it's loaded at a 16MB
 	     * boundary for now...
 	     */
 	    off += 0x01000000;
 	    ehdr->e_entry += off;
 #ifdef ELF_VERBOSE
 	    printf("Converted entry 0x%08x\n", ehdr->e_entry);
 #endif
 	} else
 	    off = 0;
 #elif defined(__arm__)
 	if (off & 0xf0000000u) {
 	    off = -(off & 0xf0000000u);
 	    ehdr->e_entry += off;
 #ifdef ELF_VERBOSE
 	    printf("Converted entry 0x%08x\n", ehdr->e_entry);
 #endif
 	} else
 	    off = 0;
 #else
 	off = 0;		/* other archs use direct mapped kernels */
 #endif
 	__elfN(relocation_offset) = off;
     }
     ef->off = off;
 
     if ((ehdr->e_phoff + ehdr->e_phnum * sizeof(*phdr)) > ef->firstlen) {
 	printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadimage: program header not within first page\n");
 	goto out;
     }
     phdr = (Elf_Phdr *)(ef->firstpage + ehdr->e_phoff);
 
     for (i = 0; i < ehdr->e_phnum; i++) {
 	/* We want to load PT_LOAD segments only.. */
 	if (phdr[i].p_type != PT_LOAD)
 	    continue;
 
 #ifdef ELF_VERBOSE
 	printf("Segment: 0x%lx@0x%lx -> 0x%lx-0x%lx",
 	    (long)phdr[i].p_filesz, (long)phdr[i].p_offset,
 	    (long)(phdr[i].p_vaddr + off),
 	    (long)(phdr[i].p_vaddr + off + phdr[i].p_memsz - 1));
 #else
 	if ((phdr[i].p_flags & PF_W) == 0) {
 	    printf("text=0x%lx ", (long)phdr[i].p_filesz);
 	} else {
 	    printf("data=0x%lx", (long)phdr[i].p_filesz);
 	    if (phdr[i].p_filesz < phdr[i].p_memsz)
 		printf("+0x%lx", (long)(phdr[i].p_memsz -phdr[i].p_filesz));
 	    printf(" ");
 	}
 #endif
 	fpcopy = 0;
 	if (ef->firstlen > phdr[i].p_offset) {
 	    fpcopy = ef->firstlen - phdr[i].p_offset;
 	    archsw.arch_copyin(ef->firstpage + phdr[i].p_offset,
 			       phdr[i].p_vaddr + off, fpcopy);
 	}
 	if (phdr[i].p_filesz > fpcopy) {
 	    if (kern_pread(ef->fd, phdr[i].p_vaddr + off + fpcopy,
 		phdr[i].p_filesz - fpcopy, phdr[i].p_offset + fpcopy) != 0) {
 		printf("\nelf" __XSTRING(__ELF_WORD_SIZE)
 		    "_loadimage: read failed\n");
 		goto out;
 	    }
 	}
 	/* clear space from oversized segments; eg: bss */
 	if (phdr[i].p_filesz < phdr[i].p_memsz) {
 #ifdef ELF_VERBOSE
 	    printf(" (bss: 0x%lx-0x%lx)",
 		(long)(phdr[i].p_vaddr + off + phdr[i].p_filesz),
 		(long)(phdr[i].p_vaddr + off + phdr[i].p_memsz - 1));
 #endif
 
 	    kern_bzero(phdr[i].p_vaddr + off + phdr[i].p_filesz,
 		phdr[i].p_memsz - phdr[i].p_filesz);
 	}
 #ifdef ELF_VERBOSE
 	printf("\n");
 #endif
 
 	if (firstaddr == 0 || firstaddr > (phdr[i].p_vaddr + off))
 	    firstaddr = phdr[i].p_vaddr + off;
 	if (lastaddr == 0 || lastaddr < (phdr[i].p_vaddr + off + phdr[i].p_memsz))
 	    lastaddr = phdr[i].p_vaddr + off + phdr[i].p_memsz;
     }
     lastaddr = roundup(lastaddr, sizeof(long));
 
     /*
      * Now grab the symbol tables.  This isn't easy if we're reading a
      * .gz file.  I think the rule is going to have to be that you must
      * strip a file to remove symbols before gzipping it so that we do not
      * try to lseek() on it.
      */
     chunk = ehdr->e_shnum * ehdr->e_shentsize;
     if (chunk == 0 || ehdr->e_shoff == 0)
 	goto nosyms;
     shdr = alloc_pread(ef->fd, ehdr->e_shoff, chunk);
     if (shdr == NULL) {
 	printf("\nelf" __XSTRING(__ELF_WORD_SIZE)
 	    "_loadimage: failed to read section headers");
 	goto nosyms;
     }
     symtabindex = -1;
     symstrindex = -1;
     for (i = 0; i < ehdr->e_shnum; i++) {
 	if (shdr[i].sh_type != SHT_SYMTAB)
 	    continue;
 	for (j = 0; j < ehdr->e_phnum; j++) {
 	    if (phdr[j].p_type != PT_LOAD)
 		continue;
 	    if (shdr[i].sh_offset >= phdr[j].p_offset &&
 		(shdr[i].sh_offset + shdr[i].sh_size <=
 		 phdr[j].p_offset + phdr[j].p_filesz)) {
 		shdr[i].sh_offset = 0;
 		shdr[i].sh_size = 0;
 		break;
 	    }
 	}
 	if (shdr[i].sh_offset == 0 || shdr[i].sh_size == 0)
 	    continue;		/* alread loaded in a PT_LOAD above */
 	/* Save it for loading below */
 	symtabindex = i;
 	symstrindex = shdr[i].sh_link;
     }
     if (symtabindex < 0 || symstrindex < 0)
 	goto nosyms;
 
     /* Ok, committed to a load. */
 #ifndef ELF_VERBOSE
     printf("syms=[");
 #endif
     ssym = lastaddr;
     for (i = symtabindex; i >= 0; i = symstrindex) {
 #ifdef ELF_VERBOSE
 	char	*secname;
 
 	switch(shdr[i].sh_type) {
 	    case SHT_SYMTAB:		/* Symbol table */
 		secname = "symtab";
 		break;
 	    case SHT_STRTAB:		/* String table */
 		secname = "strtab";
 		break;
 	    default:
 		secname = "WHOA!!";
 		break;
 	}
 #endif
 
 	size = shdr[i].sh_size;
 	archsw.arch_copyin(&size, lastaddr, sizeof(size));
 	lastaddr += sizeof(size);
 
 #ifdef ELF_VERBOSE
 	printf("\n%s: 0x%jx@0x%jx -> 0x%jx-0x%jx", secname,
 	    (uintmax_t)shdr[i].sh_size, (uintmax_t)shdr[i].sh_offset,
 	    (uintmax_t)lastaddr, (uintmax_t)(lastaddr + shdr[i].sh_size));
 #else
 	if (i == symstrindex)
 	    printf("+");
 	printf("0x%lx+0x%lx", (long)sizeof(size), (long)size);
 #endif
 
 	if (lseek(ef->fd, (off_t)shdr[i].sh_offset, SEEK_SET) == -1) {
 	    printf("\nelf" __XSTRING(__ELF_WORD_SIZE) "_loadimage: could not seek for symbols - skipped!");
 	    lastaddr = ssym;
 	    ssym = 0;
 	    goto nosyms;
 	}
 	result = archsw.arch_readin(ef->fd, lastaddr, shdr[i].sh_size);
 	if (result < 0 || (size_t)result != shdr[i].sh_size) {
-	    printf("\nelf" __XSTRING(__ELF_WORD_SIZE) "_loadimage: could not read symbols - skipped!");
+	    printf("\nelf" __XSTRING(__ELF_WORD_SIZE) "_loadimage: could not read symbols - skipped! (%ju != %ju)", (uintmax_t)result,
+		(uintmax_t)shdr[i].sh_size);
 	    lastaddr = ssym;
 	    ssym = 0;
 	    goto nosyms;
 	}
 	/* Reset offsets relative to ssym */
 	lastaddr += shdr[i].sh_size;
 	lastaddr = roundup(lastaddr, sizeof(size));
 	if (i == symtabindex)
 	    symtabindex = -1;
 	else if (i == symstrindex)
 	    symstrindex = -1;
     }
     esym = lastaddr;
 #ifndef ELF_VERBOSE
     printf("]");
 #endif
 
     file_addmetadata(fp, MODINFOMD_SSYM, sizeof(ssym), &ssym);
     file_addmetadata(fp, MODINFOMD_ESYM, sizeof(esym), &esym);
 
 nosyms:
     printf("\n");
 
     ret = lastaddr - firstaddr;
     fp->f_addr = firstaddr;
 
     php = NULL;
     for (i = 0; i < ehdr->e_phnum; i++) {
 	if (phdr[i].p_type == PT_DYNAMIC) {
 	    php = phdr + i;
 	    adp = php->p_vaddr;
 	    file_addmetadata(fp, MODINFOMD_DYNAMIC, sizeof(adp), &adp);
 	    break;
 	}
     }
 
     if (php == NULL)	/* this is bad, we cannot get to symbols or _DYNAMIC */
 	goto out;
 
     ndp = php->p_filesz / sizeof(Elf_Dyn);
     if (ndp == 0)
 	goto out;
     dp = malloc(php->p_filesz);
     if (dp == NULL)
 	goto out;
     archsw.arch_copyout(php->p_vaddr + off, dp, php->p_filesz);
 
     ef->strsz = 0;
     for (i = 0; i < ndp; i++) {
 	if (dp[i].d_tag == 0)
 	    break;
 	switch (dp[i].d_tag) {
 	case DT_HASH:
 	    ef->hashtab = (Elf_Hashelt*)(uintptr_t)(dp[i].d_un.d_ptr + off);
 	    break;
 	case DT_STRTAB:
 	    ef->strtab = (char *)(uintptr_t)(dp[i].d_un.d_ptr + off);
 	    break;
 	case DT_STRSZ:
 	    ef->strsz = dp[i].d_un.d_val;
 	    break;
 	case DT_SYMTAB:
 	    ef->symtab = (Elf_Sym*)(uintptr_t)(dp[i].d_un.d_ptr + off);
 	    break;
 	case DT_REL:
 	    ef->rel = (Elf_Rel *)(uintptr_t)(dp[i].d_un.d_ptr + off);
 	    break;
 	case DT_RELSZ:
 	    ef->relsz = dp[i].d_un.d_val;
 	    break;
 	case DT_RELA:
 	    ef->rela = (Elf_Rela *)(uintptr_t)(dp[i].d_un.d_ptr + off);
 	    break;
 	case DT_RELASZ:
 	    ef->relasz = dp[i].d_un.d_val;
 	    break;
 	default:
 	    break;
 	}
     }
     if (ef->hashtab == NULL || ef->symtab == NULL ||
 	ef->strtab == NULL || ef->strsz == 0)
 	goto out;
     COPYOUT(ef->hashtab, &ef->nbuckets, sizeof(ef->nbuckets));
     COPYOUT(ef->hashtab + 1, &ef->nchains, sizeof(ef->nchains));
     ef->buckets = ef->hashtab + 2;
     ef->chains = ef->buckets + ef->nbuckets;
     if (__elfN(parse_modmetadata)(fp, ef) == 0)
 	goto out;
 
     if (ef->kernel)			/* kernel must not depend on anything */
 	goto out;
 
 out:
     if (dp)
 	free(dp);
     if (shdr)
 	free(shdr);
     return ret;
 }
 
 static char invalid_name[] = "bad";
 
 char *
 fake_modname(const char *name)
 {
     const char *sp, *ep;
     char *fp;
     size_t len;
 
     sp = strrchr(name, '/');
     if (sp)
 	sp++;
     else
 	sp = name;
     ep = strrchr(name, '.');
     if (ep) {
 	    if (ep == name) {
 		sp = invalid_name;
 		ep = invalid_name + sizeof(invalid_name) - 1;
 	    } 
     } else
 	ep = name + strlen(name);
     len = ep - sp;
     fp = malloc(len + 1);
     if (fp == NULL)
 	return NULL;
     memcpy(fp, sp, len);
     fp[len] = '\0';
     return fp;
 }
 
 #if defined(__i386__) && __ELF_WORD_SIZE == 64
 struct mod_metadata64 {
 	int		md_version;	/* structure version MDTV_* */  
 	int		md_type;	/* type of entry MDT_* */
 	u_int64_t	md_data;	/* specific data */
 	u_int64_t	md_cval;	/* common string label */
 };
 #endif
 
 int
 __elfN(parse_modmetadata)(struct preloaded_file *fp, elf_file_t ef)
 {
     struct mod_metadata md;
 #if defined(__i386__) && __ELF_WORD_SIZE == 64
     struct mod_metadata64 md64;
 #endif
     struct mod_depend *mdepend;
     struct mod_version mver;
     Elf_Sym sym;
     char *s;
     int error, modcnt, minfolen;
     Elf_Addr v, p, p_stop;
 
     if (__elfN(lookup_symbol)(fp, ef, "__start_set_modmetadata_set", &sym) != 0)
 	return ENOENT;
     p = sym.st_value + ef->off;
     if (__elfN(lookup_symbol)(fp, ef, "__stop_set_modmetadata_set", &sym) != 0)
 	return ENOENT;
     p_stop = sym.st_value + ef->off;
 
     modcnt = 0;
     while (p < p_stop) {
 	COPYOUT(p, &v, sizeof(v));
 	error = __elfN(reloc_ptr)(fp, ef, p, &v, sizeof(v));
 	if (error == EOPNOTSUPP)
 	    v += ef->off;
 	else if (error != 0)
 	    return (error);
 #if defined(__i386__) && __ELF_WORD_SIZE == 64
 	COPYOUT(v, &md64, sizeof(md64));
 	error = __elfN(reloc_ptr)(fp, ef, v, &md64, sizeof(md64));
 	if (error == EOPNOTSUPP) {
 	    md64.md_cval += ef->off;
 	    md64.md_data += ef->off;
 	} else if (error != 0)
 	    return (error);
 	md.md_version = md64.md_version;
 	md.md_type = md64.md_type;
 	md.md_cval = (const char *)(uintptr_t)md64.md_cval;
 	md.md_data = (void *)(uintptr_t)md64.md_data;
 #else
 	COPYOUT(v, &md, sizeof(md));
 	error = __elfN(reloc_ptr)(fp, ef, v, &md, sizeof(md));
 	if (error == EOPNOTSUPP) {
 	    md.md_cval += ef->off;
 	    md.md_data += ef->off;
 	} else if (error != 0)
 	    return (error);
 #endif
 	p += sizeof(Elf_Addr);
 	switch(md.md_type) {
 	  case MDT_DEPEND:
 	    if (ef->kernel)		/* kernel must not depend on anything */
 	      break;
 	    s = strdupout((vm_offset_t)md.md_cval);
 	    minfolen = sizeof(*mdepend) + strlen(s) + 1;
 	    mdepend = malloc(minfolen);
 	    if (mdepend == NULL)
 		return ENOMEM;
 	    COPYOUT((vm_offset_t)md.md_data, mdepend, sizeof(*mdepend));
 	    strcpy((char*)(mdepend + 1), s);
 	    free(s);
 	    file_addmetadata(fp, MODINFOMD_DEPLIST, minfolen, mdepend);
 	    free(mdepend);
 	    break;
 	  case MDT_VERSION:
 	    s = strdupout((vm_offset_t)md.md_cval);
 	    COPYOUT((vm_offset_t)md.md_data, &mver, sizeof(mver));
 	    file_addmodule(fp, s, mver.mv_version, NULL);
 	    free(s);
 	    modcnt++;
 	    break;
 	}
     }
     if (modcnt == 0) {
 	s = fake_modname(fp->f_name);
 	file_addmodule(fp, s, 1, NULL);
 	free(s);
     }
     return 0;
 }
 
 static unsigned long
 elf_hash(const char *name)
 {
     const unsigned char *p = (const unsigned char *) name;
     unsigned long h = 0;
     unsigned long g;
 
     while (*p != '\0') {
 	h = (h << 4) + *p++;
 	if ((g = h & 0xf0000000) != 0)
 	    h ^= g >> 24;
 	h &= ~g;
     }
     return h;
 }
 
 static const char __elfN(bad_symtable)[] = "elf" __XSTRING(__ELF_WORD_SIZE) "_lookup_symbol: corrupt symbol table\n";
 int
 __elfN(lookup_symbol)(struct preloaded_file *fp, elf_file_t ef, const char* name,
 		  Elf_Sym *symp)
 {
     Elf_Hashelt symnum;
     Elf_Sym sym;
     char *strp;
     unsigned long hash;
 
     hash = elf_hash(name);
     COPYOUT(&ef->buckets[hash % ef->nbuckets], &symnum, sizeof(symnum));
 
     while (symnum != STN_UNDEF) {
 	if (symnum >= ef->nchains) {
 	    printf(__elfN(bad_symtable));
 	    return ENOENT;
 	}
 
 	COPYOUT(ef->symtab + symnum, &sym, sizeof(sym));
 	if (sym.st_name == 0) {
 	    printf(__elfN(bad_symtable));
 	    return ENOENT;
 	}
 
 	strp = strdupout((vm_offset_t)(ef->strtab + sym.st_name));
 	if (strcmp(name, strp) == 0) {
 	    free(strp);
 	    if (sym.st_shndx != SHN_UNDEF ||
 		(sym.st_value != 0 &&
 		 ELF_ST_TYPE(sym.st_info) == STT_FUNC)) {
 		*symp = sym;
 		return 0;
 	    }
 	    return ENOENT;
 	}
 	free(strp);
 	COPYOUT(&ef->chains[symnum], &symnum, sizeof(symnum));
     }
     return ENOENT;
 }
 
 /*
  * Apply any intra-module relocations to the value. p is the load address
  * of the value and val/len is the value to be modified. This does NOT modify
  * the image in-place, because this is done by kern_linker later on.
  *
  * Returns EOPNOTSUPP if no relocation method is supplied.
  */
 static int
 __elfN(reloc_ptr)(struct preloaded_file *mp, elf_file_t ef,
     Elf_Addr p, void *val, size_t len)
 {
 	size_t n;
 	Elf_Rela a;
 	Elf_Rel r;
 	int error;
 
 	/*
 	 * The kernel is already relocated, but we still want to apply
 	 * offset adjustments.
 	 */
 	if (ef->kernel)
 		return (EOPNOTSUPP);
 
 	for (n = 0; n < ef->relsz / sizeof(r); n++) {
 		COPYOUT(ef->rel + n, &r, sizeof(r));
 
 		error = __elfN(reloc)(ef, __elfN(symaddr), &r, ELF_RELOC_REL,
 		    ef->off, p, val, len);
 		if (error != 0)
 			return (error);
 	}
 	for (n = 0; n < ef->relasz / sizeof(a); n++) {
 		COPYOUT(ef->rela + n, &a, sizeof(a));
 
 		error = __elfN(reloc)(ef, __elfN(symaddr), &a, ELF_RELOC_RELA,
 		    ef->off, p, val, len);
 		if (error != 0)
 			return (error);
 	}
 
 	return (0);
 }
 
 static Elf_Addr
 __elfN(symaddr)(struct elf_file *ef, Elf_Size symidx)
 {
 
 	/* Symbol lookup by index not required here. */
 	return (0);
 }
Index: projects/binutils-2.17/sys/cddl/contrib/opensolaris
===================================================================
--- projects/binutils-2.17/sys/cddl/contrib/opensolaris	(revision 215829)
+++ projects/binutils-2.17/sys/cddl/contrib/opensolaris	(revision 215830)

Property changes on: projects/binutils-2.17/sys/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/cddl/contrib/opensolaris:r215709-215824
Index: projects/binutils-2.17/sys/compat/freebsd32/freebsd32_misc.c
===================================================================
--- projects/binutils-2.17/sys/compat/freebsd32/freebsd32_misc.c	(revision 215829)
+++ projects/binutils-2.17/sys/compat/freebsd32/freebsd32_misc.c	(revision 215830)
@@ -1,2667 +1,2668 @@
 /*-
  * Copyright (c) 2002 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #define __ELF_WORD_SIZE 32
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/clock.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/imgact.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/file.h>		/* Must come after sys/malloc.h */
 #include <sys/imgact.h>
 #include <sys/mbuf.h>
 #include <sys/mman.h>
 #include <sys/module.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/selinfo.h>
 #include <sys/eventvar.h>	/* Must come after sys/selinfo.h */
 #include <sys/pipe.h>		/* Must come after sys/selinfo.h */
 #include <sys/signal.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/systm.h>
 #include <sys/thr.h>
 #include <sys/unistd.h>
 #include <sys/ucontext.h>
 #include <sys/vnode.h>
 #include <sys/wait.h>
 #include <sys/ipc.h>
 #include <sys/msg.h>
 #include <sys/sem.h>
 #include <sys/shm.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/elf.h>
 
 #include <security/audit/audit.h>
 
 #include <compat/freebsd32/freebsd32_util.h>
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_ipc.h>
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 
 CTASSERT(sizeof(struct timeval32) == 8);
 CTASSERT(sizeof(struct timespec32) == 8);
 CTASSERT(sizeof(struct itimerval32) == 16);
 CTASSERT(sizeof(struct statfs32) == 256);
 CTASSERT(sizeof(struct rusage32) == 72);
 CTASSERT(sizeof(struct sigaltstack32) == 12);
 CTASSERT(sizeof(struct kevent32) == 20);
 CTASSERT(sizeof(struct iovec32) == 8);
 CTASSERT(sizeof(struct msghdr32) == 28);
 CTASSERT(sizeof(struct stat32) == 96);
 CTASSERT(sizeof(struct sigaction32) == 24);
 
 static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count);
 static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count);
 
 #if BYTE_ORDER == BIG_ENDIAN
 #define PAIR32TO64(type, name) ((name ## 2) | ((type)(name ## 1) << 32))
 #define RETVAL_HI 0	
 #define RETVAL_LO 1	
 #else
 #define PAIR32TO64(type, name) ((name ## 1) | ((type)(name ## 2) << 32))
 #define RETVAL_HI 1	
 #define RETVAL_LO 0	
 #endif
 
 void
 freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32)
 {
 
 	TV_CP(*s, *s32, ru_utime);
 	TV_CP(*s, *s32, ru_stime);
 	CP(*s, *s32, ru_maxrss);
 	CP(*s, *s32, ru_ixrss);
 	CP(*s, *s32, ru_idrss);
 	CP(*s, *s32, ru_isrss);
 	CP(*s, *s32, ru_minflt);
 	CP(*s, *s32, ru_majflt);
 	CP(*s, *s32, ru_nswap);
 	CP(*s, *s32, ru_inblock);
 	CP(*s, *s32, ru_oublock);
 	CP(*s, *s32, ru_msgsnd);
 	CP(*s, *s32, ru_msgrcv);
 	CP(*s, *s32, ru_nsignals);
 	CP(*s, *s32, ru_nvcsw);
 	CP(*s, *s32, ru_nivcsw);
 }
 
 int
 freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap)
 {
 	int error, status;
 	struct rusage32 ru32;
 	struct rusage ru, *rup;
 
 	if (uap->rusage != NULL)
 		rup = &ru;
 	else
 		rup = NULL;
 	error = kern_wait(td, uap->pid, &status, uap->options, rup);
 	if (error)
 		return (error);
 	if (uap->status != NULL)
 		error = copyout(&status, uap->status, sizeof(status));
 	if (uap->rusage != NULL && error == 0) {
 		freebsd32_rusage_out(&ru, &ru32);
 		error = copyout(&ru32, uap->rusage, sizeof(ru32));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 static void
 copy_statfs(struct statfs *in, struct statfs32 *out)
 {
 
 	statfs_scale_blocks(in, INT32_MAX);
 	bzero(out, sizeof(*out));
 	CP(*in, *out, f_bsize);
 	out->f_iosize = MIN(in->f_iosize, INT32_MAX);
 	CP(*in, *out, f_blocks);
 	CP(*in, *out, f_bfree);
 	CP(*in, *out, f_bavail);
 	out->f_files = MIN(in->f_files, INT32_MAX);
 	out->f_ffree = MIN(in->f_ffree, INT32_MAX);
 	CP(*in, *out, f_fsid);
 	CP(*in, *out, f_owner);
 	CP(*in, *out, f_type);
 	CP(*in, *out, f_flags);
 	out->f_syncwrites = MIN(in->f_syncwrites, INT32_MAX);
 	out->f_asyncwrites = MIN(in->f_asyncwrites, INT32_MAX);
 	strlcpy(out->f_fstypename,
 	      in->f_fstypename, MFSNAMELEN);
 	strlcpy(out->f_mntonname,
 	      in->f_mntonname, min(MNAMELEN, FREEBSD4_MNAMELEN));
 	out->f_syncreads = MIN(in->f_syncreads, INT32_MAX);
 	out->f_asyncreads = MIN(in->f_asyncreads, INT32_MAX);
 	strlcpy(out->f_mntfromname,
 	      in->f_mntfromname, min(MNAMELEN, FREEBSD4_MNAMELEN));
 }
 #endif
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_getfsstat(struct thread *td, struct freebsd4_freebsd32_getfsstat_args *uap)
 {
 	struct statfs *buf, *sp;
 	struct statfs32 stat32;
 	size_t count, size;
 	int error;
 
 	count = uap->bufsize / sizeof(struct statfs32);
 	size = count * sizeof(struct statfs);
 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
 	if (size > 0) {
 		count = td->td_retval[0];
 		sp = buf;
 		while (count > 0 && error == 0) {
 			copy_statfs(sp, &stat32);
 			error = copyout(&stat32, uap->buf, sizeof(stat32));
 			sp++;
 			uap->buf++;
 			count--;
 		}
 		free(buf, M_TEMP);
 	}
 	return (error);
 }
 #endif
 
 int
 freebsd32_sigaltstack(struct thread *td,
 		      struct freebsd32_sigaltstack_args *uap)
 {
 	struct sigaltstack32 s32;
 	struct sigaltstack ss, oss, *ssp;
 	int error;
 
 	if (uap->ss != NULL) {
 		error = copyin(uap->ss, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		PTRIN_CP(s32, ss, ss_sp);
 		CP(s32, ss, ss_size);
 		CP(s32, ss, ss_flags);
 		ssp = &ss;
 	} else
 		ssp = NULL;
 	error = kern_sigaltstack(td, ssp, &oss);
 	if (error == 0 && uap->oss != NULL) {
 		PTROUT_CP(oss, s32, ss_sp);
 		CP(oss, s32, ss_size);
 		CP(oss, s32, ss_flags);
 		error = copyout(&s32, uap->oss, sizeof(s32));
 	}
 	return (error);
 }
 
 /*
  * Custom version of exec_copyin_args() so that we can translate
  * the pointers.
  */
 int
 freebsd32_exec_copyin_args(struct image_args *args, char *fname,
     enum uio_seg segflg, u_int32_t *argv, u_int32_t *envv)
 {
 	char *argp, *envp;
 	u_int32_t *p32, arg;
 	size_t length;
 	int error;
 
 	bzero(args, sizeof(*args));
 	if (argv == NULL)
 		return (EFAULT);
 
 	/*
 	 * Allocate demand-paged memory for the file name, argument, and
 	 * environment strings.
 	 */
 	error = exec_alloc_args(args);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Copy the file name.
 	 */
 	if (fname != NULL) {
 		args->fname = args->buf;
 		error = (segflg == UIO_SYSSPACE) ?
 		    copystr(fname, args->fname, PATH_MAX, &length) :
 		    copyinstr(fname, args->fname, PATH_MAX, &length);
 		if (error != 0)
 			goto err_exit;
 	} else
 		length = 0;
 
 	args->begin_argv = args->buf + length;
 	args->endp = args->begin_argv;
 	args->stringspace = ARG_MAX;
 
 	/*
 	 * extract arguments first
 	 */
 	p32 = argv;
 	for (;;) {
 		error = copyin(p32++, &arg, sizeof(arg));
 		if (error)
 			goto err_exit;
 		if (arg == 0)
 			break;
 		argp = PTRIN(arg);
 		error = copyinstr(argp, args->endp, args->stringspace, &length);
 		if (error) {
 			if (error == ENAMETOOLONG)
 				error = E2BIG;
 			goto err_exit;
 		}
 		args->stringspace -= length;
 		args->endp += length;
 		args->argc++;
 	}
 			
 	args->begin_envv = args->endp;
 
 	/*
 	 * extract environment strings
 	 */
 	if (envv) {
 		p32 = envv;
 		for (;;) {
 			error = copyin(p32++, &arg, sizeof(arg));
 			if (error)
 				goto err_exit;
 			if (arg == 0)
 				break;
 			envp = PTRIN(arg);
 			error = copyinstr(envp, args->endp, args->stringspace,
 			    &length);
 			if (error) {
 				if (error == ENAMETOOLONG)
 					error = E2BIG;
 				goto err_exit;
 			}
 			args->stringspace -= length;
 			args->endp += length;
 			args->envc++;
 		}
 	}
 
 	return (0);
 
 err_exit:
 	exec_free_args(args);
 	return (error);
 }
 
 int
 freebsd32_execve(struct thread *td, struct freebsd32_execve_args *uap)
 {
 	struct image_args eargs;
 	int error;
 
 	error = freebsd32_exec_copyin_args(&eargs, uap->fname, UIO_USERSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0)
 		error = kern_execve(td, &eargs, NULL);
 	return (error);
 }
 
 int
 freebsd32_fexecve(struct thread *td, struct freebsd32_fexecve_args *uap)
 {
 	struct image_args eargs;
 	int error;
 
 	error = freebsd32_exec_copyin_args(&eargs, NULL, UIO_SYSSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0) {
 		eargs.fd = uap->fd;
 		error = kern_execve(td, &eargs, NULL);
 	}
 	return (error);
 }
 
 #ifdef __ia64__
 static int
 freebsd32_mmap_partial(struct thread *td, vm_offset_t start, vm_offset_t end,
 		       int prot, int fd, off_t pos)
 {
 	vm_map_t map;
 	vm_map_entry_t entry;
 	int rv;
 
 	map = &td->td_proc->p_vmspace->vm_map;
 	if (fd != -1)
 		prot |= VM_PROT_WRITE;
 
 	if (vm_map_lookup_entry(map, start, &entry)) {
 		if ((entry->protection & prot) != prot) {
 			rv = vm_map_protect(map,
 					    trunc_page(start),
 					    round_page(end),
 					    entry->protection | prot,
 					    FALSE);
 			if (rv != KERN_SUCCESS)
 				return (EINVAL);
 		}
 	} else {
 		vm_offset_t addr = trunc_page(start);
 		rv = vm_map_find(map, 0, 0,
 				 &addr, PAGE_SIZE, FALSE, prot,
 				 VM_PROT_ALL, 0);
 		if (rv != KERN_SUCCESS)
 			return (EINVAL);
 	}
 
 	if (fd != -1) {
 		struct pread_args r;
 		r.fd = fd;
 		r.buf = (void *) start;
 		r.nbyte = end - start;
 		r.offset = pos;
 		return (pread(td, &r));
 	} else {
 		while (start < end) {
 			subyte((void *) start, 0);
 			start++;
 		}
 		return (0);
 	}
 }
 #endif
 
 int
 freebsd32_mmap(struct thread *td, struct freebsd32_mmap_args *uap)
 {
 	struct mmap_args ap;
 	vm_offset_t addr = (vm_offset_t) uap->addr;
 	vm_size_t len	 = uap->len;
 	int prot	 = uap->prot;
 	int flags	 = uap->flags;
 	int fd		 = uap->fd;
 	off_t pos	 = PAIR32TO64(off_t,uap->pos);
 #ifdef __ia64__
 	vm_size_t pageoff;
 	int error;
 
 	/*
 	 * Attempt to handle page size hassles.
 	 */
 	pageoff = (pos & PAGE_MASK);
 	if (flags & MAP_FIXED) {
 		vm_offset_t start, end;
 		start = addr;
 		end = addr + len;
 
 		if (start != trunc_page(start)) {
 			error = freebsd32_mmap_partial(td, start,
 						       round_page(start), prot,
 						       fd, pos);
 			if (fd != -1)
 				pos += round_page(start) - start;
 			start = round_page(start);
 		}
 		if (end != round_page(end)) {
 			vm_offset_t t = trunc_page(end);
 			error = freebsd32_mmap_partial(td, t, end,
 						  prot, fd,
 						  pos + t - start);
 			end = trunc_page(end);
 		}
 		if (end > start && fd != -1 && (pos & PAGE_MASK)) {
 			/*
 			 * We can't map this region at all. The specified
 			 * address doesn't have the same alignment as the file
 			 * position. Fake the mapping by simply reading the
 			 * entire region into memory. First we need to make
 			 * sure the region exists.
 			 */
 			vm_map_t map;
 			struct pread_args r;
 			int rv;
 
 			prot |= VM_PROT_WRITE;
 			map = &td->td_proc->p_vmspace->vm_map;
 			rv = vm_map_remove(map, start, end);
 			if (rv != KERN_SUCCESS)
 				return (EINVAL);
 			rv = vm_map_find(map, 0, 0,
 					 &start, end - start, FALSE,
 					 prot, VM_PROT_ALL, 0);
 			if (rv != KERN_SUCCESS)
 				return (EINVAL);
 			r.fd = fd;
 			r.buf = (void *) start;
 			r.nbyte = end - start;
 			r.offset = pos;
 			error = pread(td, &r);
 			if (error)
 				return (error);
 
 			td->td_retval[0] = addr;
 			return (0);
 		}
 		if (end == start) {
 			/*
 			 * After dealing with the ragged ends, there
 			 * might be none left.
 			 */
 			td->td_retval[0] = addr;
 			return (0);
 		}
 		addr = start;
 		len = end - start;
 	}
 #endif
 
 	ap.addr = (void *) addr;
 	ap.len = len;
 	ap.prot = prot;
 	ap.flags = flags;
 	ap.fd = fd;
 	ap.pos = pos;
 
 	return (mmap(td, &ap));
 }
 
 #ifdef COMPAT_FREEBSD6
 int
 freebsd6_freebsd32_mmap(struct thread *td, struct freebsd6_freebsd32_mmap_args *uap)
 {
 	struct freebsd32_mmap_args ap;
 
 	ap.addr = uap->addr;
 	ap.len = uap->len;
 	ap.prot = uap->prot;
 	ap.flags = uap->flags;
 	ap.fd = uap->fd;
 	ap.pos1 = uap->pos1;
 	ap.pos2 = uap->pos2;
 
 	return (freebsd32_mmap(td, &ap));
 }
 #endif
 
 int
 freebsd32_setitimer(struct thread *td, struct freebsd32_setitimer_args *uap)
 {
 	struct itimerval itv, oitv, *itvp;	
 	struct itimerval32 i32;
 	int error;
 
 	if (uap->itv != NULL) {
 		error = copyin(uap->itv, &i32, sizeof(i32));
 		if (error)
 			return (error);
 		TV_CP(i32, itv, it_interval);
 		TV_CP(i32, itv, it_value);
 		itvp = &itv;
 	} else
 		itvp = NULL;
 	error = kern_setitimer(td, uap->which, itvp, &oitv);
 	if (error || uap->oitv == NULL)
 		return (error);
 	TV_CP(oitv, i32, it_interval);
 	TV_CP(oitv, i32, it_value);
 	return (copyout(&i32, uap->oitv, sizeof(i32)));
 }
 
 int
 freebsd32_getitimer(struct thread *td, struct freebsd32_getitimer_args *uap)
 {
 	struct itimerval itv;
 	struct itimerval32 i32;
 	int error;
 
 	error = kern_getitimer(td, uap->which, &itv);
 	if (error || uap->itv == NULL)
 		return (error);
 	TV_CP(itv, i32, it_interval);
 	TV_CP(itv, i32, it_value);
 	return (copyout(&i32, uap->itv, sizeof(i32)));
 }
 
 int
 freebsd32_select(struct thread *td, struct freebsd32_select_args *uap)
 {
 	struct timeval32 tv32;
 	struct timeval tv, *tvp;
 	int error;
 
 	if (uap->tv != NULL) {
 		error = copyin(uap->tv, &tv32, sizeof(tv32));
 		if (error)
 			return (error);
 		CP(tv32, tv, tv_sec);
 		CP(tv32, tv, tv_usec);
 		tvp = &tv;
 	} else
 		tvp = NULL;
 	/*
 	 * XXX Do pointers need PTRIN()?
 	 */
 	return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp,
 	    sizeof(int32_t) * 8));
 }
 
 int
 freebsd32_pselect(struct thread *td, struct freebsd32_pselect_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts;
 	struct timeval tv, *tvp;
 	sigset_t set, *uset;
 	int error;
 
 	if (uap->ts != NULL) {
 		error = copyin(uap->ts, &ts32, sizeof(ts32));
 		if (error != 0)
 			return (error);
 		CP(ts32, ts, tv_sec);
 		CP(ts32, ts, tv_nsec);
 		TIMESPEC_TO_TIMEVAL(&tv, &ts);
 		tvp = &tv;
 	} else
 		tvp = NULL;
 	if (uap->sm != NULL) {
 		error = copyin(uap->sm, &set, sizeof(set));
 		if (error != 0)
 			return (error);
 		uset = &set;
 	} else
 		uset = NULL;
 	/*
 	 * XXX Do pointers need PTRIN()?
 	 */
 	error = kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp,
 	    uset, sizeof(int32_t) * 8);
 	return (error);
 }
 
 /*
  * Copy 'count' items into the destination list pointed to by uap->eventlist.
  */
 static int
 freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count)
 {
 	struct freebsd32_kevent_args *uap;
 	struct kevent32	ks32[KQ_NEVENTS];
 	int i, error = 0;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct freebsd32_kevent_args *)arg;
 
 	for (i = 0; i < count; i++) {
 		CP(kevp[i], ks32[i], ident);
 		CP(kevp[i], ks32[i], filter);
 		CP(kevp[i], ks32[i], flags);
 		CP(kevp[i], ks32[i], fflags);
 		CP(kevp[i], ks32[i], data);
 		PTROUT_CP(kevp[i], ks32[i], udata);
 	}
 	error = copyout(ks32, uap->eventlist, count * sizeof *ks32);
 	if (error == 0)
 		uap->eventlist += count;
 	return (error);
 }
 
 /*
  * Copy 'count' items from the list pointed to by uap->changelist.
  */
 static int
 freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count)
 {
 	struct freebsd32_kevent_args *uap;
 	struct kevent32	ks32[KQ_NEVENTS];
 	int i, error = 0;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct freebsd32_kevent_args *)arg;
 
 	error = copyin(uap->changelist, ks32, count * sizeof *ks32);
 	if (error)
 		goto done;
 	uap->changelist += count;
 
 	for (i = 0; i < count; i++) {
 		CP(ks32[i], kevp[i], ident);
 		CP(ks32[i], kevp[i], filter);
 		CP(ks32[i], kevp[i], flags);
 		CP(ks32[i], kevp[i], fflags);
 		CP(ks32[i], kevp[i], data);
 		PTRIN_CP(ks32[i], kevp[i], udata);
 	}
 done:
 	return (error);
 }
 
 int
 freebsd32_kevent(struct thread *td, struct freebsd32_kevent_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts, *tsp;
 	struct kevent_copyops k_ops = { uap,
 					freebsd32_kevent_copyout,
 					freebsd32_kevent_copyin};
 	int error;
 
 
 	if (uap->timeout) {
 		error = copyin(uap->timeout, &ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		CP(ts32, ts, tv_sec);
 		CP(ts32, ts, tv_nsec);
 		tsp = &ts;
 	} else
 		tsp = NULL;
 	error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents,
 	    &k_ops, tsp);
 	return (error);
 }
 
 int
 freebsd32_gettimeofday(struct thread *td,
 		       struct freebsd32_gettimeofday_args *uap)
 {
 	struct timeval atv;
 	struct timeval32 atv32;
 	struct timezone rtz;
 	int error = 0;
 
 	if (uap->tp) {
 		microtime(&atv);
 		CP(atv, atv32, tv_sec);
 		CP(atv, atv32, tv_usec);
 		error = copyout(&atv32, uap->tp, sizeof (atv32));
 	}
 	if (error == 0 && uap->tzp != NULL) {
 		rtz.tz_minuteswest = tz_minuteswest;
 		rtz.tz_dsttime = tz_dsttime;
 		error = copyout(&rtz, uap->tzp, sizeof (rtz));
 	}
 	return (error);
 }
 
 int
 freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap)
 {
 	struct rusage32 s32;
 	struct rusage s;
 	int error;
 
 	error = kern_getrusage(td, uap->who, &s);
 	if (error)
 		return (error);
 	if (uap->rusage != NULL) {
 		freebsd32_rusage_out(&s, &s32);
 		error = copyout(&s32, uap->rusage, sizeof(s32));
 	}
 	return (error);
 }
 
 static int
 freebsd32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop)
 {
 	struct iovec32 iov32;
 	struct iovec *iov;
 	struct uio *uio;
 	u_int iovlen;
 	int error, i;
 
 	*uiop = NULL;
 	if (iovcnt > UIO_MAXIOV)
 		return (EINVAL);
 	iovlen = iovcnt * sizeof(struct iovec);
 	uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK);
 	iov = (struct iovec *)(uio + 1);
 	for (i = 0; i < iovcnt; i++) {
 		error = copyin(&iovp[i], &iov32, sizeof(struct iovec32));
 		if (error) {
 			free(uio, M_IOV);
 			return (error);
 		}
 		iov[i].iov_base = PTRIN(iov32.iov_base);
 		iov[i].iov_len = iov32.iov_len;
 	}
 	uio->uio_iov = iov;
 	uio->uio_iovcnt = iovcnt;
 	uio->uio_segflg = UIO_USERSPACE;
 	uio->uio_offset = -1;
 	uio->uio_resid = 0;
 	for (i = 0; i < iovcnt; i++) {
 		if (iov->iov_len > INT_MAX - uio->uio_resid) {
 			free(uio, M_IOV);
 			return (EINVAL);
 		}
 		uio->uio_resid += iov->iov_len;
 		iov++;
 	}
 	*uiop = uio;
 	return (0);
 }
 
 int
 freebsd32_readv(struct thread *td, struct freebsd32_readv_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_readv(td, uap->fd, auio);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_writev(struct thread *td, struct freebsd32_writev_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_writev(td, uap->fd, auio);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_preadv(struct thread *td, struct freebsd32_preadv_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_preadv(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset));
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_pwritev(struct thread *td, struct freebsd32_pwritev_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_pwritev(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset));
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_copyiniov(struct iovec32 *iovp32, u_int iovcnt, struct iovec **iovp,
     int error)
 {
 	struct iovec32 iov32;
 	struct iovec *iov;
 	u_int iovlen;
 	int i;
 
 	*iovp = NULL;
 	if (iovcnt > UIO_MAXIOV)
 		return (error);
 	iovlen = iovcnt * sizeof(struct iovec);
 	iov = malloc(iovlen, M_IOV, M_WAITOK);
 	for (i = 0; i < iovcnt; i++) {
 		error = copyin(&iovp32[i], &iov32, sizeof(struct iovec32));
 		if (error) {
 			free(iov, M_IOV);
 			return (error);
 		}
 		iov[i].iov_base = PTRIN(iov32.iov_base);
 		iov[i].iov_len = iov32.iov_len;
 	}
 	*iovp = iov;
 	return (0);
 }
 
 static int
 freebsd32_copyinmsghdr(struct msghdr32 *msg32, struct msghdr *msg)
 {
 	struct msghdr32 m32;
 	int error;
 
 	error = copyin(msg32, &m32, sizeof(m32));
 	if (error)
 		return (error);
 	msg->msg_name = PTRIN(m32.msg_name);
 	msg->msg_namelen = m32.msg_namelen;
 	msg->msg_iov = PTRIN(m32.msg_iov);
 	msg->msg_iovlen = m32.msg_iovlen;
 	msg->msg_control = PTRIN(m32.msg_control);
 	msg->msg_controllen = m32.msg_controllen;
 	msg->msg_flags = m32.msg_flags;
 	return (0);
 }
 
 static int
 freebsd32_copyoutmsghdr(struct msghdr *msg, struct msghdr32 *msg32)
 {
 	struct msghdr32 m32;
 	int error;
 
 	m32.msg_name = PTROUT(msg->msg_name);
 	m32.msg_namelen = msg->msg_namelen;
 	m32.msg_iov = PTROUT(msg->msg_iov);
 	m32.msg_iovlen = msg->msg_iovlen;
 	m32.msg_control = PTROUT(msg->msg_control);
 	m32.msg_controllen = msg->msg_controllen;
 	m32.msg_flags = msg->msg_flags;
 	error = copyout(&m32, msg32, sizeof(m32));
 	return (error);
 }
 
 #define FREEBSD32_ALIGNBYTES	(sizeof(int) - 1)
 #define FREEBSD32_ALIGN(p)	\
 	(((u_long)(p) + FREEBSD32_ALIGNBYTES) & ~FREEBSD32_ALIGNBYTES)
 #define	FREEBSD32_CMSG_SPACE(l)	\
 	(FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + FREEBSD32_ALIGN(l))
 
 #define	FREEBSD32_CMSG_DATA(cmsg)	((unsigned char *)(cmsg) + \
 				 FREEBSD32_ALIGN(sizeof(struct cmsghdr)))
 static int
 freebsd32_copy_msg_out(struct msghdr *msg, struct mbuf *control)
 {
 	struct cmsghdr *cm;
 	void *data;
 	socklen_t clen, datalen;
 	int error;
 	caddr_t ctlbuf;
 	int len, maxlen, copylen;
 	struct mbuf *m;
 	error = 0;
 
 	len    = msg->msg_controllen;
 	maxlen = msg->msg_controllen;
 	msg->msg_controllen = 0;
 
 	m = control;
 	ctlbuf = msg->msg_control;
       
 	while (m && len > 0) {
 		cm = mtod(m, struct cmsghdr *);
 		clen = m->m_len;
 
 		while (cm != NULL) {
 
 			if (sizeof(struct cmsghdr) > clen ||
 			    cm->cmsg_len > clen) {
 				error = EINVAL;
 				break;
 			}	
 
 			data   = CMSG_DATA(cm);
 			datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 
 			/* Adjust message length */
 			cm->cmsg_len = FREEBSD32_ALIGN(sizeof(struct cmsghdr)) +
 			    datalen;
 
 
 			/* Copy cmsghdr */
 			copylen = sizeof(struct cmsghdr);
 			if (len < copylen) {
 				msg->msg_flags |= MSG_CTRUNC;
 				copylen = len;
 			}
 
 			error = copyout(cm,ctlbuf,copylen);
 			if (error)
 				goto exit;
 
 			ctlbuf += FREEBSD32_ALIGN(copylen);
 			len    -= FREEBSD32_ALIGN(copylen);
 
 			if (len <= 0)
 				break;
 
 			/* Copy data */
 			copylen = datalen;
 			if (len < copylen) {
 				msg->msg_flags |= MSG_CTRUNC;
 				copylen = len;
 			}
 
 			error = copyout(data,ctlbuf,copylen);
 			if (error)
 				goto exit;
 
 			ctlbuf += FREEBSD32_ALIGN(copylen);
 			len    -= FREEBSD32_ALIGN(copylen);
 
 			if (CMSG_SPACE(datalen) < clen) {
 				clen -= CMSG_SPACE(datalen);
 				cm = (struct cmsghdr *)
 					((caddr_t)cm + CMSG_SPACE(datalen));
 			} else {
 				clen = 0;
 				cm = NULL;
 			}
 		}	
 		m = m->m_next;
 	}
 
 	msg->msg_controllen = (len <= 0) ? maxlen :  ctlbuf - (caddr_t)msg->msg_control;
 	
 exit:
 	return (error);
 
 }
 
 int
 freebsd32_recvmsg(td, uap)
 	struct thread *td;
 	struct freebsd32_recvmsg_args /* {
 		int	s;
 		struct	msghdr32 *msg;
 		int	flags;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct msghdr32 m32;
 	struct iovec *uiov, *iov;
 	struct mbuf *control = NULL;
 	struct mbuf **controlp;
 
 	int error;
 	error = copyin(uap->msg, &m32, sizeof(m32));
 	if (error)
 		return (error);
 	error = freebsd32_copyinmsghdr(uap->msg, &msg);
 	if (error)
 		return (error);
 	error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov,
 	    EMSGSIZE);
 	if (error)
 		return (error);
 	msg.msg_flags = uap->flags;
 	uiov = msg.msg_iov;
 	msg.msg_iov = iov;
 
 	controlp = (msg.msg_control != NULL) ?  &control : NULL;
 	error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, controlp);
 	if (error == 0) {
 		msg.msg_iov = uiov;
 		
 		if (control != NULL)
 			error = freebsd32_copy_msg_out(&msg, control);
 		else
 			msg.msg_controllen = 0;
 		
 		if (error == 0)
 			error = freebsd32_copyoutmsghdr(&msg, uap->msg);
 	}
 	free(iov, M_IOV);
 
 	if (control != NULL)
 		m_freem(control);
 
 	return (error);
 }
 
 
 static int
 freebsd32_convert_msg_in(struct mbuf **controlp)
 {
 	struct mbuf *control = *controlp;
 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 	void *data;
 	socklen_t clen = control->m_len, datalen;
 	int error;
 
 	error = 0;
 	*controlp = NULL;
 
 	while (cm != NULL) {
 		if (sizeof(struct cmsghdr) > clen || cm->cmsg_len > clen) {
 			error = EINVAL;
 			break;
 		}
 
 		data = FREEBSD32_CMSG_DATA(cm);
 		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 
 		*controlp = sbcreatecontrol(data, datalen, cm->cmsg_type,
 		    cm->cmsg_level);
 		controlp = &(*controlp)->m_next;
 
 		if (FREEBSD32_CMSG_SPACE(datalen) < clen) {
 			clen -= FREEBSD32_CMSG_SPACE(datalen);
 			cm = (struct cmsghdr *)
 				((caddr_t)cm + FREEBSD32_CMSG_SPACE(datalen));
 		} else {
 			clen = 0;
 			cm = NULL;
 		}
 	}
 
 	m_freem(control);
 	return (error);
 }
 
 
 int
 freebsd32_sendmsg(struct thread *td,
 		  struct freebsd32_sendmsg_args *uap)
 {
 	struct msghdr msg;
 	struct msghdr32 m32;
 	struct iovec *iov;
 	struct mbuf *control = NULL;
 	struct sockaddr *to = NULL;
 	int error;
 
 	error = copyin(uap->msg, &m32, sizeof(m32));
 	if (error)
 		return (error);
 	error = freebsd32_copyinmsghdr(uap->msg, &msg);
 	if (error)
 		return (error);
 	error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov,
 	    EMSGSIZE);
 	if (error)
 		return (error);
 	msg.msg_iov = iov;
 	if (msg.msg_name != NULL) {
 		error = getsockaddr(&to, msg.msg_name, msg.msg_namelen);
 		if (error) {
 			to = NULL;
 			goto out;
 		}
 		msg.msg_name = to;
 	}
 
 	if (msg.msg_control) {
 		if (msg.msg_controllen < sizeof(struct cmsghdr)) {
 			error = EINVAL;
 			goto out;
 		}
 
 		error = sockargs(&control, msg.msg_control,
 		    msg.msg_controllen, MT_CONTROL);
 		if (error)
 			goto out;
 		
 		error = freebsd32_convert_msg_in(&control);
 		if (error)
 			goto out;
 	}
 
 	error = kern_sendit(td, uap->s, &msg, uap->flags, control,
 	    UIO_USERSPACE);
 
 out:
 	free(iov, M_IOV);
 	if (to)
 		free(to, M_SONAME);
 	return (error);
 }
 
 int
 freebsd32_recvfrom(struct thread *td,
 		   struct freebsd32_recvfrom_args *uap)
 {
 	struct msghdr msg;
 	struct iovec aiov;
 	int error;
 
 	if (uap->fromlenaddr) {
 		error = copyin(PTRIN(uap->fromlenaddr), &msg.msg_namelen,
 		    sizeof(msg.msg_namelen));
 		if (error)
 			return (error);
 	} else {
 		msg.msg_namelen = 0;
 	}
 
 	msg.msg_name = PTRIN(uap->from);
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = PTRIN(uap->buf);
 	aiov.iov_len = uap->len;
 	msg.msg_control = NULL;
 	msg.msg_flags = uap->flags;
 	error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, NULL);
 	if (error == 0 && uap->fromlenaddr)
 		error = copyout(&msg.msg_namelen, PTRIN(uap->fromlenaddr),
 		    sizeof (msg.msg_namelen));
 	return (error);
 }
 
 int
 freebsd32_settimeofday(struct thread *td,
 		       struct freebsd32_settimeofday_args *uap)
 {
 	struct timeval32 tv32;
 	struct timeval tv, *tvp;
 	struct timezone tz, *tzp;
 	int error;
 
 	if (uap->tv) {
 		error = copyin(uap->tv, &tv32, sizeof(tv32));
 		if (error)
 			return (error);
 		CP(tv32, tv, tv_sec);
 		CP(tv32, tv, tv_usec);
 		tvp = &tv;
 	} else
 		tvp = NULL;
 	if (uap->tzp) {
 		error = copyin(uap->tzp, &tz, sizeof(tz));
 		if (error)
 			return (error);
 		tzp = &tz;
 	} else
 		tzp = NULL;
 	return (kern_settimeofday(td, tvp, tzp));
 }
 
 int
 freebsd32_utimes(struct thread *td, struct freebsd32_utimes_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->tptr != NULL) {
 		error = copyin(uap->tptr, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_utimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_lutimes(struct thread *td, struct freebsd32_lutimes_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->tptr != NULL) {
 		error = copyin(uap->tptr, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_futimes(struct thread *td, struct freebsd32_futimes_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->tptr != NULL) {
 		error = copyin(uap->tptr, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_futimes(td, uap->fd, sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_futimesat(struct thread *td, struct freebsd32_futimesat_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->times != NULL) {
 		error = copyin(uap->times, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 		sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_adjtime(struct thread *td, struct freebsd32_adjtime_args *uap)
 {
 	struct timeval32 tv32;
 	struct timeval delta, olddelta, *deltap;
 	int error;
 
 	if (uap->delta) {
 		error = copyin(uap->delta, &tv32, sizeof(tv32));
 		if (error)
 			return (error);
 		CP(tv32, delta, tv_sec);
 		CP(tv32, delta, tv_usec);
 		deltap = &delta;
 	} else
 		deltap = NULL;
 	error = kern_adjtime(td, deltap, &olddelta);
 	if (uap->olddelta && error == 0) {
 		CP(olddelta, tv32, tv_sec);
 		CP(olddelta, tv32, tv_usec);
 		error = copyout(&tv32, uap->olddelta, sizeof(tv32));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_statfs(struct thread *td, struct freebsd4_freebsd32_statfs_args *uap)
 {
 	struct statfs32 s32;
 	struct statfs s;
 	int error;
 
 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &s);
 	if (error)
 		return (error);
 	copy_statfs(&s, &s32);
 	return (copyout(&s32, uap->buf, sizeof(s32)));
 }
 #endif
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_fstatfs(struct thread *td, struct freebsd4_freebsd32_fstatfs_args *uap)
 {
 	struct statfs32 s32;
 	struct statfs s;
 	int error;
 
 	error = kern_fstatfs(td, uap->fd, &s);
 	if (error)
 		return (error);
 	copy_statfs(&s, &s32);
 	return (copyout(&s32, uap->buf, sizeof(s32)));
 }
 #endif
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_fhstatfs(struct thread *td, struct freebsd4_freebsd32_fhstatfs_args *uap)
 {
 	struct statfs32 s32;
 	struct statfs s;
 	fhandle_t fh;
 	int error;
 
 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
 		return (error);
 	error = kern_fhstatfs(td, fh, &s);
 	if (error)
 		return (error);
 	copy_statfs(&s, &s32);
 	return (copyout(&s32, uap->buf, sizeof(s32)));
 }
 #endif
 
 int
 freebsd32_pread(struct thread *td, struct freebsd32_pread_args *uap)
 {
 	struct pread_args ap;
 
 	ap.fd = uap->fd;
 	ap.buf = uap->buf;
 	ap.nbyte = uap->nbyte;
 	ap.offset = PAIR32TO64(off_t,uap->offset);
 	return (pread(td, &ap));
 }
 
 int
 freebsd32_pwrite(struct thread *td, struct freebsd32_pwrite_args *uap)
 {
 	struct pwrite_args ap;
 
 	ap.fd = uap->fd;
 	ap.buf = uap->buf;
 	ap.nbyte = uap->nbyte;
 	ap.offset = PAIR32TO64(off_t,uap->offset);
 	return (pwrite(td, &ap));
 }
 
 int
 freebsd32_lseek(struct thread *td, struct freebsd32_lseek_args *uap)
 {
 	int error;
 	struct lseek_args ap;
 	off_t pos;
 
 	ap.fd = uap->fd;
 	ap.offset = PAIR32TO64(off_t,uap->offset);
 	ap.whence = uap->whence;
 	error = lseek(td, &ap);
 	/* Expand the quad return into two parts for eax and edx */
 	pos = *(off_t *)(td->td_retval);
 	td->td_retval[RETVAL_LO] = pos & 0xffffffff;	/* %eax */
 	td->td_retval[RETVAL_HI] = pos >> 32;		/* %edx */
 	return error;
 }
 
 int
 freebsd32_truncate(struct thread *td, struct freebsd32_truncate_args *uap)
 {
 	struct truncate_args ap;
 
 	ap.path = uap->path;
 	ap.length = PAIR32TO64(off_t,uap->length);
 	return (truncate(td, &ap));
 }
 
 int
 freebsd32_ftruncate(struct thread *td, struct freebsd32_ftruncate_args *uap)
 {
 	struct ftruncate_args ap;
 
 	ap.fd = uap->fd;
 	ap.length = PAIR32TO64(off_t,uap->length);
 	return (ftruncate(td, &ap));
 }
 
 int
 freebsd32_getdirentries(struct thread *td,
     struct freebsd32_getdirentries_args *uap)
 {
 	long base;
 	int32_t base32;
 	int error;
 
 	error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base);
 	if (error)
 		return (error);
 	if (uap->basep != NULL) {
 		base32 = base;
 		error = copyout(&base32, uap->basep, sizeof(int32_t));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD6
 /* versions with the 'int pad' argument */
 int
 freebsd6_freebsd32_pread(struct thread *td, struct freebsd6_freebsd32_pread_args *uap)
 {
 	struct pread_args ap;
 
 	ap.fd = uap->fd;
 	ap.buf = uap->buf;
 	ap.nbyte = uap->nbyte;
 	ap.offset = PAIR32TO64(off_t,uap->offset);
 	return (pread(td, &ap));
 }
 
 int
 freebsd6_freebsd32_pwrite(struct thread *td, struct freebsd6_freebsd32_pwrite_args *uap)
 {
 	struct pwrite_args ap;
 
 	ap.fd = uap->fd;
 	ap.buf = uap->buf;
 	ap.nbyte = uap->nbyte;
 	ap.offset = PAIR32TO64(off_t,uap->offset);
 	return (pwrite(td, &ap));
 }
 
 int
 freebsd6_freebsd32_lseek(struct thread *td, struct freebsd6_freebsd32_lseek_args *uap)
 {
 	int error;
 	struct lseek_args ap;
 	off_t pos;
 
 	ap.fd = uap->fd;
 	ap.offset = PAIR32TO64(off_t,uap->offset);
 	ap.whence = uap->whence;
 	error = lseek(td, &ap);
 	/* Expand the quad return into two parts for eax and edx */
 	pos = *(off_t *)(td->td_retval);
 	td->td_retval[RETVAL_LO] = pos & 0xffffffff;	/* %eax */
 	td->td_retval[RETVAL_HI] = pos >> 32;		/* %edx */
 	return error;
 }
 
 int
 freebsd6_freebsd32_truncate(struct thread *td, struct freebsd6_freebsd32_truncate_args *uap)
 {
 	struct truncate_args ap;
 
 	ap.path = uap->path;
 	ap.length = PAIR32TO64(off_t,uap->length);
 	return (truncate(td, &ap));
 }
 
 int
 freebsd6_freebsd32_ftruncate(struct thread *td, struct freebsd6_freebsd32_ftruncate_args *uap)
 {
 	struct ftruncate_args ap;
 
 	ap.fd = uap->fd;
 	ap.length = PAIR32TO64(off_t,uap->length);
 	return (ftruncate(td, &ap));
 }
 #endif /* COMPAT_FREEBSD6 */
 
 struct sf_hdtr32 {
 	uint32_t headers;
 	int hdr_cnt;
 	uint32_t trailers;
 	int trl_cnt;
 };
 
 static int
 freebsd32_do_sendfile(struct thread *td,
     struct freebsd32_sendfile_args *uap, int compat)
 {
 	struct sendfile_args ap;
 	struct sf_hdtr32 hdtr32;
 	struct sf_hdtr hdtr;
 	struct uio *hdr_uio, *trl_uio;
 	struct iovec32 *iov32;
 	int error;
 
 	hdr_uio = trl_uio = NULL;
 
 	ap.fd = uap->fd;
 	ap.s = uap->s;
 	ap.offset = PAIR32TO64(off_t,uap->offset);
 	ap.nbytes = uap->nbytes;
 	ap.hdtr = (struct sf_hdtr *)uap->hdtr;		/* XXX not used */
 	ap.sbytes = uap->sbytes;
 	ap.flags = uap->flags;
 
 	if (uap->hdtr != NULL) {
 		error = copyin(uap->hdtr, &hdtr32, sizeof(hdtr32));
 		if (error)
 			goto out;
 		PTRIN_CP(hdtr32, hdtr, headers);
 		CP(hdtr32, hdtr, hdr_cnt);
 		PTRIN_CP(hdtr32, hdtr, trailers);
 		CP(hdtr32, hdtr, trl_cnt);
 
 		if (hdtr.headers != NULL) {
 			iov32 = PTRIN(hdtr32.headers);
 			error = freebsd32_copyinuio(iov32,
 			    hdtr32.hdr_cnt, &hdr_uio);
 			if (error)
 				goto out;
 		}
 		if (hdtr.trailers != NULL) {
 			iov32 = PTRIN(hdtr32.trailers);
 			error = freebsd32_copyinuio(iov32,
 			    hdtr32.trl_cnt, &trl_uio);
 			if (error)
 				goto out;
 		}
 	}
 
 	error = kern_sendfile(td, &ap, hdr_uio, trl_uio, compat);
 out:
 	if (hdr_uio)
 		free(hdr_uio, M_IOV);
 	if (trl_uio)
 		free(trl_uio, M_IOV);
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_sendfile(struct thread *td,
     struct freebsd4_freebsd32_sendfile_args *uap)
 {
 	return (freebsd32_do_sendfile(td,
 	    (struct freebsd32_sendfile_args *)uap, 1));
 }
 #endif
 
 int
 freebsd32_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap)
 {
 
 	return (freebsd32_do_sendfile(td, uap, 0));
 }
 
 static void
 copy_stat(struct stat *in, struct stat32 *out)
 {
 
 	CP(*in, *out, st_dev);
 	CP(*in, *out, st_ino);
 	CP(*in, *out, st_mode);
 	CP(*in, *out, st_nlink);
 	CP(*in, *out, st_uid);
 	CP(*in, *out, st_gid);
 	CP(*in, *out, st_rdev);
 	TS_CP(*in, *out, st_atim);
 	TS_CP(*in, *out, st_mtim);
 	TS_CP(*in, *out, st_ctim);
 	CP(*in, *out, st_size);
 	CP(*in, *out, st_blocks);
 	CP(*in, *out, st_blksize);
 	CP(*in, *out, st_flags);
 	CP(*in, *out, st_gen);
 	TS_CP(*in, *out, st_birthtim);
 }
 
 int
 freebsd32_stat(struct thread *td, struct freebsd32_stat_args *uap)
 {
 	struct stat sb;
 	struct stat32 sb32;
 	int error;
 
 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
 	if (error)
 		return (error);
 	copy_stat(&sb, &sb32);
 	error = copyout(&sb32, uap->ub, sizeof (sb32));
 	return (error);
 }
 
 int
 freebsd32_fstat(struct thread *td, struct freebsd32_fstat_args *uap)
 {
 	struct stat ub;
 	struct stat32 ub32;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &ub);
 	if (error)
 		return (error);
 	copy_stat(&ub, &ub32);
 	error = copyout(&ub32, uap->ub, sizeof(ub32));
 	return (error);
 }
 
 int
 freebsd32_fstatat(struct thread *td, struct freebsd32_fstatat_args *uap)
 {
 	struct stat ub;
 	struct stat32 ub32;
 	int error;
 
 	error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &ub);
 	if (error)
 		return (error);
 	copy_stat(&ub, &ub32);
 	error = copyout(&ub32, uap->buf, sizeof(ub32));
 	return (error);
 }
 
 int
 freebsd32_lstat(struct thread *td, struct freebsd32_lstat_args *uap)
 {
 	struct stat sb;
 	struct stat32 sb32;
 	int error;
 
 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
 	if (error)
 		return (error);
 	copy_stat(&sb, &sb32);
 	error = copyout(&sb32, uap->ub, sizeof (sb32));
 	return (error);
 }
 
 /*
  * MPSAFE
  */
 int
 freebsd32_sysctl(struct thread *td, struct freebsd32_sysctl_args *uap)
 {
 	int error, name[CTL_MAXNAME];
 	size_t j, oldlen;
 
 	if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
 		return (EINVAL);
  	error = copyin(uap->name, name, uap->namelen * sizeof(int));
  	if (error)
 		return (error);
 	if (uap->oldlenp)
 		oldlen = fuword32(uap->oldlenp);
 	else
 		oldlen = 0;
 	error = userland_sysctl(td, name, uap->namelen,
 		uap->old, &oldlen, 1,
 		uap->new, uap->newlen, &j, SCTL_MASK32);
 	if (error && error != ENOMEM)
 		return (error);
 	if (uap->oldlenp)
 		suword32(uap->oldlenp, j);
 	return (0);
 }
 
 int
 freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap)
 {
 	uint32_t version;
 	int error;
 	struct jail j;
 
 	error = copyin(uap->jail, &version, sizeof(uint32_t));
 	if (error)
 		return (error);
 
 	switch (version) {
 	case 0:
 	{
 		/* FreeBSD single IPv4 jails. */
 		struct jail32_v0 j32_v0;
 
 		bzero(&j, sizeof(struct jail));
 		error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0));
 		if (error)
 			return (error);
 		CP(j32_v0, j, version);
 		PTRIN_CP(j32_v0, j, path);
 		PTRIN_CP(j32_v0, j, hostname);
 		j.ip4s = j32_v0.ip_number;
 		break;
 	}
 
 	case 1:
 		/*
 		 * Version 1 was used by multi-IPv4 jail implementations
 		 * that never made it into the official kernel.
 		 */
 		return (EINVAL);
 
 	case 2:	/* JAIL_API_VERSION */
 	{
 		/* FreeBSD multi-IPv4/IPv6,noIP jails. */
 		struct jail32 j32;
 
 		error = copyin(uap->jail, &j32, sizeof(struct jail32));
 		if (error)
 			return (error);
 		CP(j32, j, version);
 		PTRIN_CP(j32, j, path);
 		PTRIN_CP(j32, j, hostname);
 		PTRIN_CP(j32, j, jailname);
 		CP(j32, j, ip4s);
 		CP(j32, j, ip6s);
 		PTRIN_CP(j32, j, ip4);
 		PTRIN_CP(j32, j, ip6);
 		break;
 	}
 
 	default:
 		/* Sci-Fi jails are not supported, sorry. */
 		return (EINVAL);
 	}
 	return (kern_jail(td, &j));
 }
 
 int
 freebsd32_jail_set(struct thread *td, struct freebsd32_jail_set_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	/* Check that we have an even number of iovecs. */
 	if (uap->iovcnt & 1)
 		return (EINVAL);
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_jail_set(td, auio, uap->flags);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_jail_get(struct thread *td, struct freebsd32_jail_get_args *uap)
 {
 	struct iovec32 iov32;
 	struct uio *auio;
 	int error, i;
 
 	/* Check that we have an even number of iovecs. */
 	if (uap->iovcnt & 1)
 		return (EINVAL);
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_jail_get(td, auio, uap->flags);
 	if (error == 0)
 		for (i = 0; i < uap->iovcnt; i++) {
 			PTROUT_CP(auio->uio_iov[i], iov32, iov_base);
 			CP(auio->uio_iov[i], iov32, iov_len);
 			error = copyout(&iov32, uap->iovp + i, sizeof(iov32));
 			if (error != 0)
 				break;
 		}
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_sigaction(struct thread *td, struct freebsd32_sigaction_args *uap)
 {
 	struct sigaction32 s32;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->act) {
 		error = copyin(uap->act, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(s32.sa_u);
 		CP(s32, sa, sa_flags);
 		CP(s32, sa, sa_mask);
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->sig, sap, &osa, 0);
 	if (error == 0 && uap->oact != NULL) {
 		s32.sa_u = PTROUT(osa.sa_handler);
 		CP(osa, s32, sa_flags);
 		CP(osa, s32, sa_mask);
 		error = copyout(&s32, uap->oact, sizeof(s32));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_sigaction(struct thread *td,
 			     struct freebsd4_freebsd32_sigaction_args *uap)
 {
 	struct sigaction32 s32;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->act) {
 		error = copyin(uap->act, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(s32.sa_u);
 		CP(s32, sa, sa_flags);
 		CP(s32, sa, sa_mask);
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->sig, sap, &osa, KSA_FREEBSD4);
 	if (error == 0 && uap->oact != NULL) {
 		s32.sa_u = PTROUT(osa.sa_handler);
 		CP(osa, s32, sa_flags);
 		CP(osa, s32, sa_mask);
 		error = copyout(&s32, uap->oact, sizeof(s32));
 	}
 	return (error);
 }
 #endif
 
 #ifdef COMPAT_43
 struct osigaction32 {
 	u_int32_t	sa_u;
 	osigset_t	sa_mask;
 	int		sa_flags;
 };
 
 #define	ONSIG	32
 
 int
 ofreebsd32_sigaction(struct thread *td,
 			     struct ofreebsd32_sigaction_args *uap)
 {
 	struct osigaction32 s32;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->signum <= 0 || uap->signum >= ONSIG)
 		return (EINVAL);
 
 	if (uap->nsa) {
 		error = copyin(uap->nsa, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(s32.sa_u);
 		CP(s32, sa, sa_flags);
 		OSIG2SIG(s32.sa_mask, sa.sa_mask);
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET);
 	if (error == 0 && uap->osa != NULL) {
 		s32.sa_u = PTROUT(osa.sa_handler);
 		CP(osa, s32, sa_flags);
 		SIG2OSIG(osa.sa_mask, s32.sa_mask);
 		error = copyout(&s32, uap->osa, sizeof(s32));
 	}
 	return (error);
 }
 
 int
 ofreebsd32_sigprocmask(struct thread *td,
 			       struct ofreebsd32_sigprocmask_args *uap)
 {
 	sigset_t set, oset;
 	int error;
 
 	OSIG2SIG(uap->mask, set);
 	error = kern_sigprocmask(td, uap->how, &set, &oset, SIGPROCMASK_OLD);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (error);
 }
 
 int
 ofreebsd32_sigpending(struct thread *td,
 			      struct ofreebsd32_sigpending_args *uap)
 {
 	struct proc *p = td->td_proc;
 	sigset_t siglist;
 
 	PROC_LOCK(p);
 	siglist = p->p_siglist;
 	SIGSETOR(siglist, td->td_siglist);
 	PROC_UNLOCK(p);
 	SIG2OSIG(siglist, td->td_retval[0]);
 	return (0);
 }
 
 struct sigvec32 {
 	u_int32_t	sv_handler;
 	int		sv_mask;
 	int		sv_flags;
 };
 
 int
 ofreebsd32_sigvec(struct thread *td,
 			  struct ofreebsd32_sigvec_args *uap)
 {
 	struct sigvec32 vec;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->signum <= 0 || uap->signum >= ONSIG)
 		return (EINVAL);
 
 	if (uap->nsv) {
 		error = copyin(uap->nsv, &vec, sizeof(vec));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(vec.sv_handler);
 		OSIG2SIG(vec.sv_mask, sa.sa_mask);
 		sa.sa_flags = vec.sv_flags;
 		sa.sa_flags ^= SA_RESTART;
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET);
 	if (error == 0 && uap->osv != NULL) {
 		vec.sv_handler = PTROUT(osa.sa_handler);
 		SIG2OSIG(osa.sa_mask, vec.sv_mask);
 		vec.sv_flags = osa.sa_flags;
 		vec.sv_flags &= ~SA_NOCLDWAIT;
 		vec.sv_flags ^= SA_RESTART;
 		error = copyout(&vec, uap->osv, sizeof(vec));
 	}
 	return (error);
 }
 
 int
 ofreebsd32_sigblock(struct thread *td,
 			    struct ofreebsd32_sigblock_args *uap)
 {
 	sigset_t set, oset;
 
 	OSIG2SIG(uap->mask, set);
 	kern_sigprocmask(td, SIG_BLOCK, &set, &oset, 0);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (0);
 }
 
 int
 ofreebsd32_sigsetmask(struct thread *td,
 			      struct ofreebsd32_sigsetmask_args *uap)
 {
 	sigset_t set, oset;
 
 	OSIG2SIG(uap->mask, set);
 	kern_sigprocmask(td, SIG_SETMASK, &set, &oset, 0);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (0);
 }
 
 int
 ofreebsd32_sigsuspend(struct thread *td,
 			      struct ofreebsd32_sigsuspend_args *uap)
 {
 	sigset_t mask;
 
 	OSIG2SIG(uap->mask, mask);
 	return (kern_sigsuspend(td, mask));
 }
 
 struct sigstack32 {
 	u_int32_t	ss_sp;
 	int		ss_onstack;
 };
 
 int
 ofreebsd32_sigstack(struct thread *td,
 			    struct ofreebsd32_sigstack_args *uap)
 {
 	struct sigstack32 s32;
 	struct sigstack nss, oss;
 	int error = 0, unss;
 
 	if (uap->nss != NULL) {
 		error = copyin(uap->nss, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		nss.ss_sp = PTRIN(s32.ss_sp);
 		CP(s32, nss, ss_onstack);
 		unss = 1;
 	} else {
 		unss = 0;
 	}
 	oss.ss_sp = td->td_sigstk.ss_sp;
 	oss.ss_onstack = sigonstack(cpu_getstack(td));
 	if (unss) {
 		td->td_sigstk.ss_sp = nss.ss_sp;
 		td->td_sigstk.ss_size = 0;
 		td->td_sigstk.ss_flags |= (nss.ss_onstack & SS_ONSTACK);
 		td->td_pflags |= TDP_ALTSTACK;
 	}
 	if (uap->oss != NULL) {
 		s32.ss_sp = PTROUT(oss.ss_sp);
 		CP(oss, s32, ss_onstack);
 		error = copyout(&s32, uap->oss, sizeof(s32));
 	}
 	return (error);
 }
 #endif
 
 int
 freebsd32_nanosleep(struct thread *td, struct freebsd32_nanosleep_args *uap)
 {
 	struct timespec32 rmt32, rqt32;
 	struct timespec rmt, rqt;
 	int error;
 
 	error = copyin(uap->rqtp, &rqt32, sizeof(rqt32));
 	if (error)
 		return (error);
 
 	CP(rqt32, rqt, tv_sec);
 	CP(rqt32, rqt, tv_nsec);
 
 	if (uap->rmtp &&
 	    !useracc((caddr_t)uap->rmtp, sizeof(rmt), VM_PROT_WRITE))
 		return (EFAULT);
 	error = kern_nanosleep(td, &rqt, &rmt);
 	if (error && uap->rmtp) {
 		int error2;
 
 		CP(rmt, rmt32, tv_sec);
 		CP(rmt, rmt32, tv_nsec);
 
 		error2 = copyout(&rmt32, uap->rmtp, sizeof(rmt32));
 		if (error2)
 			error = error2;
 	}
 	return (error);
 }
 
 int
 freebsd32_clock_gettime(struct thread *td,
 			struct freebsd32_clock_gettime_args *uap)
 {
 	struct timespec	ats;
 	struct timespec32 ats32;
 	int error;
 
 	error = kern_clock_gettime(td, uap->clock_id, &ats);
 	if (error == 0) {
 		CP(ats, ats32, tv_sec);
 		CP(ats, ats32, tv_nsec);
 		error = copyout(&ats32, uap->tp, sizeof(ats32));
 	}
 	return (error);
 }
 
 int
 freebsd32_clock_settime(struct thread *td,
 			struct freebsd32_clock_settime_args *uap)
 {
 	struct timespec	ats;
 	struct timespec32 ats32;
 	int error;
 
 	error = copyin(uap->tp, &ats32, sizeof(ats32));
 	if (error)
 		return (error);
 	CP(ats32, ats, tv_sec);
 	CP(ats32, ats, tv_nsec);
 
 	return (kern_clock_settime(td, uap->clock_id, &ats));
 }
 
 int
 freebsd32_clock_getres(struct thread *td,
 		       struct freebsd32_clock_getres_args *uap)
 {
 	struct timespec	ts;
 	struct timespec32 ts32;
 	int error;
 
 	if (uap->tp == NULL)
 		return (0);
 	error = kern_clock_getres(td, uap->clock_id, &ts);
 	if (error == 0) {
 		CP(ts, ts32, tv_sec);
 		CP(ts, ts32, tv_nsec);
 		error = copyout(&ts32, uap->tp, sizeof(ts32));
 	}
 	return (error);
 }
 
 int
 freebsd32_thr_new(struct thread *td,
 		  struct freebsd32_thr_new_args *uap)
 {
 	struct thr_param32 param32;
 	struct thr_param param;
 	int error;
 
 	if (uap->param_size < 0 ||
 	    uap->param_size > sizeof(struct thr_param32))
 		return (EINVAL);
 	bzero(&param, sizeof(struct thr_param));
 	bzero(&param32, sizeof(struct thr_param32));
 	error = copyin(uap->param, &param32, uap->param_size);
 	if (error != 0)
 		return (error);
 	param.start_func = PTRIN(param32.start_func);
 	param.arg = PTRIN(param32.arg);
 	param.stack_base = PTRIN(param32.stack_base);
 	param.stack_size = param32.stack_size;
 	param.tls_base = PTRIN(param32.tls_base);
 	param.tls_size = param32.tls_size;
 	param.child_tid = PTRIN(param32.child_tid);
 	param.parent_tid = PTRIN(param32.parent_tid);
 	param.flags = param32.flags;
 	param.rtp = PTRIN(param32.rtp);
 	param.spare[0] = PTRIN(param32.spare[0]);
 	param.spare[1] = PTRIN(param32.spare[1]);
 	param.spare[2] = PTRIN(param32.spare[2]);
 
 	return (kern_thr_new(td, &param));
 }
 
 int
 freebsd32_thr_suspend(struct thread *td, struct freebsd32_thr_suspend_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts, *tsp;
 	int error;
 
 	error = 0;
 	tsp = NULL;
 	if (uap->timeout != NULL) {
 		error = copyin((const void *)uap->timeout, (void *)&ts32,
 		    sizeof(struct timespec32));
 		if (error != 0)
 			return (error);
 		ts.tv_sec = ts32.tv_sec;
 		ts.tv_nsec = ts32.tv_nsec;
 		tsp = &ts;
 	}
 	return (kern_thr_suspend(td, tsp));
 }
 
 void
 siginfo_to_siginfo32(const siginfo_t *src, struct siginfo32 *dst)
 {
 	bzero(dst, sizeof(*dst));
 	dst->si_signo = src->si_signo;
 	dst->si_errno = src->si_errno;
 	dst->si_code = src->si_code;
 	dst->si_pid = src->si_pid;
 	dst->si_uid = src->si_uid;
 	dst->si_status = src->si_status;
 	dst->si_addr = (uintptr_t)src->si_addr;
 	dst->si_value.sigval_int = src->si_value.sival_int;
 	dst->si_timerid = src->si_timerid;
 	dst->si_overrun = src->si_overrun;
 }
 
 int
 freebsd32_sigtimedwait(struct thread *td, struct freebsd32_sigtimedwait_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts;
 	struct timespec *timeout;
 	sigset_t set;
 	ksiginfo_t ksi;
 	struct siginfo32 si32;
 	int error;
 
 	if (uap->timeout) {
 		error = copyin(uap->timeout, &ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		ts.tv_sec = ts32.tv_sec;
 		ts.tv_nsec = ts32.tv_nsec;
 		timeout = &ts;
 	} else
 		timeout = NULL;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error)
 		return (error);
 
 	error = kern_sigtimedwait(td, set, &ksi, timeout);
 	if (error)
 		return (error);
 
 	if (uap->info) {
 		siginfo_to_siginfo32(&ksi.ksi_info, &si32);
 		error = copyout(&si32, uap->info, sizeof(struct siginfo32));
 	}
 
 	if (error == 0)
 		td->td_retval[0] = ksi.ksi_signo;
 	return (error);
 }
 
 /*
  * MPSAFE
  */
 int
 freebsd32_sigwaitinfo(struct thread *td, struct freebsd32_sigwaitinfo_args *uap)
 {
 	ksiginfo_t ksi;
 	struct siginfo32 si32;
 	sigset_t set;
 	int error;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error)
 		return (error);
 
 	error = kern_sigtimedwait(td, set, &ksi, NULL);
 	if (error)
 		return (error);
 
 	if (uap->info) {
 		siginfo_to_siginfo32(&ksi.ksi_info, &si32);
 		error = copyout(&si32, uap->info, sizeof(struct siginfo32));
 	}	
 	if (error == 0)
 		td->td_retval[0] = ksi.ksi_signo;
 	return (error);
 }
 
 int
 freebsd32_cpuset_setid(struct thread *td,
     struct freebsd32_cpuset_setid_args *uap)
 {
 	struct cpuset_setid_args ap;
 
 	ap.which = uap->which;
 	ap.id = PAIR32TO64(id_t,uap->id);
 	ap.setid = uap->setid;
 
 	return (cpuset_setid(td, &ap));
 }
 
 int
 freebsd32_cpuset_getid(struct thread *td,
     struct freebsd32_cpuset_getid_args *uap)
 {
 	struct cpuset_getid_args ap;
 
 	ap.level = uap->level;
 	ap.which = uap->which;
 	ap.id = PAIR32TO64(id_t,uap->id);
 	ap.setid = uap->setid;
 
 	return (cpuset_getid(td, &ap));
 }
 
 int
 freebsd32_cpuset_getaffinity(struct thread *td,
     struct freebsd32_cpuset_getaffinity_args *uap)
 {
 	struct cpuset_getaffinity_args ap;
 
 	ap.level = uap->level;
 	ap.which = uap->which;
 	ap.id = PAIR32TO64(id_t,uap->id);
 	ap.cpusetsize = uap->cpusetsize;
 	ap.mask = uap->mask;
 
 	return (cpuset_getaffinity(td, &ap));
 }
 
 int
 freebsd32_cpuset_setaffinity(struct thread *td,
     struct freebsd32_cpuset_setaffinity_args *uap)
 {
 	struct cpuset_setaffinity_args ap;
 
 	ap.level = uap->level;
 	ap.which = uap->which;
 	ap.id = PAIR32TO64(id_t,uap->id);
 	ap.cpusetsize = uap->cpusetsize;
 	ap.mask = uap->mask;
 
 	return (cpuset_setaffinity(td, &ap));
 }
 
 int
 freebsd32_nmount(struct thread *td,
     struct freebsd32_nmount_args /* {
     	struct iovec *iovp;
     	unsigned int iovcnt;
     	int flags;
     } */ *uap)
 {
 	struct uio *auio;
 	int error;
 
 	AUDIT_ARG_FFLAGS(uap->flags);
 
 	/*
 	 * Filter out MNT_ROOTFS.  We do not want clients of nmount() in
 	 * userspace to set this flag, but we must filter it out if we want
 	 * MNT_UPDATE on the root file system to work.
-	 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
+	 * MNT_ROOTFS should only be set by the kernel when mounting its
+	 * root file system.
 	 */
 	uap->flags &= ~MNT_ROOTFS;
 
 	/*
 	 * check that we have an even number of iovec's
 	 * and that we have at least two options.
 	 */
 	if ((uap->iovcnt & 1) || (uap->iovcnt < 4))
 		return (EINVAL);
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = vfs_donmount(td, uap->flags, auio);
 
 	free(auio, M_IOV);
 	return error;
 }
 
 #if 0
 int
 freebsd32_xxx(struct thread *td, struct freebsd32_xxx_args *uap)
 {
 	struct yyy32 *p32, s32;
 	struct yyy *p = NULL, s;
 	struct xxx_arg ap;
 	int error;
 
 	if (uap->zzz) {
 		error = copyin(uap->zzz, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		/* translate in */
 		p = &s;
 	}
 	error = kern_xxx(td, p);
 	if (error)
 		return (error);
 	if (uap->zzz) {
 		/* translate out */
 		error = copyout(&s32, p32, sizeof(s32));
 	}
 	return (error);
 }
 #endif
 
 int
 syscall32_register(int *offset, struct sysent *new_sysent,
     struct sysent *old_sysent)
 {
 	if (*offset == NO_SYSCALL) {
 		int i;
 
 		for (i = 1; i < SYS_MAXSYSCALL; ++i)
 			if (freebsd32_sysent[i].sy_call ==
 			    (sy_call_t *)lkmnosys)
 				break;
 		if (i == SYS_MAXSYSCALL)
 			return (ENFILE);
 		*offset = i;
 	} else if (*offset < 0 || *offset >= SYS_MAXSYSCALL)
 		return (EINVAL);
 	else if (freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmnosys &&
 	    freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmressys)
 		return (EEXIST);
 
 	*old_sysent = freebsd32_sysent[*offset];
 	freebsd32_sysent[*offset] = *new_sysent;
 	return 0;
 }
 
 int
 syscall32_deregister(int *offset, struct sysent *old_sysent)
 {
 
 	if (*offset)
 		freebsd32_sysent[*offset] = *old_sysent;
 	return 0;
 }
 
 int
 syscall32_module_handler(struct module *mod, int what, void *arg)
 {
 	struct syscall_module_data *data = (struct syscall_module_data*)arg;
 	modspecific_t ms;
 	int error;
 
 	switch (what) {
 	case MOD_LOAD:
 		error = syscall32_register(data->offset, data->new_sysent,
 		    &data->old_sysent);
 		if (error) {
 			/* Leave a mark so we know to safely unload below. */
 			data->offset = NULL;
 			return error;
 		}
 		ms.intval = *data->offset;
 		MOD_XLOCK;
 		module_setspecific(mod, &ms);
 		MOD_XUNLOCK;
 		if (data->chainevh)
 			error = data->chainevh(mod, what, data->chainarg);
 		return (error);
 	case MOD_UNLOAD:
 		/*
 		 * MOD_LOAD failed, so just return without calling the
 		 * chained handler since we didn't pass along the MOD_LOAD
 		 * event.
 		 */
 		if (data->offset == NULL)
 			return (0);
 		if (data->chainevh) {
 			error = data->chainevh(mod, what, data->chainarg);
 			if (error)
 				return (error);
 		}
 		error = syscall32_deregister(data->offset, &data->old_sysent);
 		return (error);
 	default:
 		error = EOPNOTSUPP;
 		if (data->chainevh)
 			error = data->chainevh(mod, what, data->chainarg);
 		return (error);
 	}
 }
 
 int
 syscall32_helper_register(struct syscall_helper_data *sd)
 {
 	struct syscall_helper_data *sd1;
 	int error;
 
 	for (sd1 = sd; sd1->syscall_no != NO_SYSCALL; sd1++) {
 		error = syscall32_register(&sd1->syscall_no, &sd1->new_sysent,
 		    &sd1->old_sysent);
 		if (error != 0) {
 			syscall32_helper_unregister(sd);
 			return (error);
 		}
 		sd1->registered = 1;
 	}
 	return (0);
 }
 
 int
 syscall32_helper_unregister(struct syscall_helper_data *sd)
 {
 	struct syscall_helper_data *sd1;
 
 	for (sd1 = sd; sd1->registered != 0; sd1++) {
 		syscall32_deregister(&sd1->syscall_no, &sd1->old_sysent);
 		sd1->registered = 0;
 	}
 	return (0);
 }
 
 register_t *
 freebsd32_copyout_strings(struct image_params *imgp)
 {
 	int argc, envc, i;
 	u_int32_t *vectp;
 	char *stringp, *destp;
 	u_int32_t *stack_base;
 	struct freebsd32_ps_strings *arginfo;
 	char canary[sizeof(long) * 8];
 	int32_t pagesizes32[MAXPAGESIZES];
 	size_t execpath_len;
 	int szsigcode;
 
 	/*
 	 * Calculate string base and vector table pointers.
 	 * Also deal with signal trampoline code for this exec type.
 	 */
 	if (imgp->execpath != NULL && imgp->auxargs != NULL)
 		execpath_len = strlen(imgp->execpath) + 1;
 	else
 		execpath_len = 0;
 	arginfo = (struct freebsd32_ps_strings *)curproc->p_sysent->
 	    sv_psstrings;
 	szsigcode = *(imgp->proc->p_sysent->sv_szsigcode);
 	destp =	(caddr_t)arginfo - szsigcode - SPARE_USRSPACE -
 	    roundup(execpath_len, sizeof(char *)) -
 	    roundup(sizeof(canary), sizeof(char *)) -
 	    roundup(sizeof(pagesizes32), sizeof(char *)) -
 	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
 
 	/*
 	 * install sigcode
 	 */
 	if (szsigcode)
 		copyout(imgp->proc->p_sysent->sv_sigcode,
 			((caddr_t)arginfo - szsigcode), szsigcode);
 
 	/*
 	 * Copy the image path for the rtld.
 	 */
 	if (execpath_len != 0) {
 		imgp->execpathp = (uintptr_t)arginfo - szsigcode - execpath_len;
 		copyout(imgp->execpath, (void *)imgp->execpathp,
 		    execpath_len);
 	}
 
 	/*
 	 * Prepare the canary for SSP.
 	 */
 	arc4rand(canary, sizeof(canary), 0);
 	imgp->canary = (uintptr_t)arginfo - szsigcode - execpath_len -
 	    sizeof(canary);
 	copyout(canary, (void *)imgp->canary, sizeof(canary));
 	imgp->canarylen = sizeof(canary);
 
 	/*
 	 * Prepare the pagesizes array.
 	 */
 	for (i = 0; i < MAXPAGESIZES; i++)
 		pagesizes32[i] = (uint32_t)pagesizes[i];
 	imgp->pagesizes = (uintptr_t)arginfo - szsigcode - execpath_len -
 	    roundup(sizeof(canary), sizeof(char *)) - sizeof(pagesizes32);
 	copyout(pagesizes32, (void *)imgp->pagesizes, sizeof(pagesizes32));
 	imgp->pagesizeslen = sizeof(pagesizes32);
 
 	/*
 	 * If we have a valid auxargs ptr, prepare some room
 	 * on the stack.
 	 */
 	if (imgp->auxargs) {
 		/*
 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
 		 * lower compatibility.
 		 */
 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size
 			: (AT_COUNT * 2);
 		/*
 		 * The '+ 2' is for the null pointers at the end of each of
 		 * the arg and env vector sets,and imgp->auxarg_size is room
 		 * for argument of Runtime loader.
 		 */
 		vectp = (u_int32_t *) (destp - (imgp->args->argc +
 		    imgp->args->envc + 2 + imgp->auxarg_size + execpath_len) *
 		    sizeof(u_int32_t));
 	} else
 		/*
 		 * The '+ 2' is for the null pointers at the end of each of
 		 * the arg and env vector sets
 		 */
 		vectp = (u_int32_t *)
 			(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t));
 
 	/*
 	 * vectp also becomes our initial stack base
 	 */
 	stack_base = vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 	/*
 	 * Copy out strings - arguments and environment.
 	 */
 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
 
 	/*
 	 * Fill in "ps_strings" struct for ps, w, etc.
 	 */
 	suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp);
 	suword32(&arginfo->ps_nargvstr, argc);
 
 	/*
 	 * Fill in argument portion of vector table.
 	 */
 	for (; argc > 0; --argc) {
 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* a null vector table pointer separates the argp's from the envp's */
 	suword32(vectp++, 0);
 
 	suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp);
 	suword32(&arginfo->ps_nenvstr, envc);
 
 	/*
 	 * Fill in environment portion of vector table.
 	 */
 	for (; envc > 0; --envc) {
 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* end of vector table is a null pointer */
 	suword32(vectp, 0);
 
 	return ((register_t *)stack_base);
 }
 
Index: projects/binutils-2.17/sys/compat/ia32/ia32_signal.h
===================================================================
--- projects/binutils-2.17/sys/compat/ia32/ia32_signal.h	(revision 215829)
+++ projects/binutils-2.17/sys/compat/ia32/ia32_signal.h	(revision 215830)
@@ -1,190 +1,195 @@
 /*-
  * Copyright (c) 1999 Marcel Moolenaar
  * Copyright (c) 2003 Peter Wemm
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer 
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
+#ifndef	_COMPAT_IA32_IA32_SIGNAL_H
+#define	_COMPAT_IA32_IA32_SIGNAL_H
+
 struct ia32_mcontext {
 	u_int32_t	mc_onstack;		/* XXX - sigcontext compat. */
 	u_int32_t	mc_gs;			/* machine state (struct trapframe) */
 	u_int32_t	mc_fs;
 	u_int32_t	mc_es;
 	u_int32_t	mc_ds;
 	u_int32_t	mc_edi;
 	u_int32_t	mc_esi;
 	u_int32_t	mc_ebp;
 	u_int32_t	mc_isp;
 	u_int32_t	mc_ebx;
 	u_int32_t	mc_edx;
 	u_int32_t	mc_ecx;
 	u_int32_t	mc_eax;
 	u_int32_t	mc_trapno;
 	u_int32_t	mc_err;
 	u_int32_t	mc_eip;
 	u_int32_t	mc_cs;
 	u_int32_t	mc_eflags;
 	u_int32_t	mc_esp;
 	u_int32_t	mc_ss;
 	u_int32_t	mc_len;			/* sizeof(struct ia32_mcontext) */
 	/* We use the same values for fpformat and ownedfp */
 	u_int32_t	mc_fpformat;
 	u_int32_t	mc_ownedfp;
 	u_int32_t	mc_spare1[1];		/* align next field to 16 bytes */
 	/*
 	 * See <i386/include/npx.h> for the internals of mc_fpstate[].
 	 */
 	u_int32_t	mc_fpstate[128] __aligned(16);
 	u_int32_t	mc_fsbase;
 	u_int32_t	mc_gsbase;
 	u_int32_t	mc_spare2[6];
 };
 
 struct ia32_ucontext {
 	sigset_t		uc_sigmask;
 	struct ia32_mcontext	uc_mcontext;
 	u_int32_t		uc_link;
 	struct sigaltstack32	uc_stack;
 	u_int32_t		uc_flags;
 	u_int32_t		__spare__[4];
 };
 
 
 #if defined(COMPAT_FREEBSD4)
 struct ia32_mcontext4 {
 	u_int32_t	mc_onstack;		/* XXX - sigcontext compat. */
 	u_int32_t	mc_gs;			/* machine state (struct trapframe) */
 	u_int32_t	mc_fs;
 	u_int32_t	mc_es;
 	u_int32_t	mc_ds;
 	u_int32_t	mc_edi;
 	u_int32_t	mc_esi;
 	u_int32_t	mc_ebp;
 	u_int32_t	mc_isp;
 	u_int32_t	mc_ebx;
 	u_int32_t	mc_edx;
 	u_int32_t	mc_ecx;
 	u_int32_t	mc_eax;
 	u_int32_t	mc_trapno;
 	u_int32_t	mc_err;
 	u_int32_t	mc_eip;
 	u_int32_t	mc_cs;
 	u_int32_t	mc_eflags;
 	u_int32_t	mc_esp;	
 	u_int32_t	mc_ss;
 	u_int32_t	mc_fpregs[28];
 	u_int32_t	__spare__[17];
 };
 
 struct ia32_ucontext4 {
 	sigset_t		uc_sigmask;
 	struct ia32_mcontext4	uc_mcontext;
 	u_int32_t		uc_link;
 	struct sigaltstack32	uc_stack;
 	u_int32_t		__spare__[8];
 };
 #endif
 
 #ifdef COMPAT_FREEBSD3
 struct ia32_sigcontext3 {
 	u_int32_t	sc_onstack;
 	u_int32_t	sc_mask;
 	u_int32_t	sc_esp;	
 	u_int32_t	sc_ebp;
 	u_int32_t	sc_isp;
 	u_int32_t	sc_eip;
 	u_int32_t	sc_eflags;
 	u_int32_t	sc_es;
 	u_int32_t	sc_ds;
 	u_int32_t	sc_cs;
 	u_int32_t	sc_ss;
 	u_int32_t	sc_edi;
 	u_int32_t	sc_esi;
 	u_int32_t	sc_ebx;
 	u_int32_t	sc_edx;
 	u_int32_t	sc_ecx;
 	u_int32_t	sc_eax;
 	u_int32_t	sc_gs;
 	u_int32_t	sc_fs;
 	u_int32_t	sc_trapno;
 	u_int32_t	sc_err;
 };
 #endif
 
 /*
  * Signal frames, arguments passed to application signal handlers.
  */
 
 #ifdef COMPAT_FREEBSD4
 struct ia32_sigframe4 {
 	u_int32_t		sf_signum;
 	u_int32_t		sf_siginfo;	/* code or pointer to sf_si */
 	u_int32_t		sf_ucontext;	/* points to sf_uc */
 	u_int32_t		sf_addr;	/* undocumented 4th arg */
 	u_int32_t		sf_ah;		/* action/handler pointer */
 	struct ia32_ucontext4	sf_uc;		/* = *sf_ucontext */
 	struct siginfo32	sf_si;		/* = *sf_siginfo (SA_SIGINFO case) */
 };
 #endif
 
 struct ia32_sigframe {
 	u_int32_t		sf_signum;
 	u_int32_t		sf_siginfo;	/* code or pointer to sf_si */
 	u_int32_t		sf_ucontext;	/* points to sf_uc */
 	u_int32_t		sf_addr;	/* undocumented 4th arg */
 	u_int32_t		sf_ah;		/* action/handler pointer */
 	/* Beware, hole due to ucontext being 16 byte aligned! */
 	struct ia32_ucontext	sf_uc;		/* = *sf_ucontext */
 	struct siginfo32	sf_si;		/* = *sf_siginfo (SA_SIGINFO case) */
 };
 
 #ifdef COMPAT_FREEBSD3
 struct ia32_siginfo3 {
 	struct ia32_sigcontext3 si_sc;
 	int			si_signo;
 	int			si_code;
 	union sigval32		si_value;
 };
 struct ia32_sigframe3 {
 	int			sf_signum;
 	u_int32_t		sf_arg2;	/* int or siginfo_t */
 	u_int32_t		sf_scp;
 	u_int32_t		sf_addr;
 	u_int32_t		sf_ah;		/* action/handler pointer */
 	struct ia32_siginfo3	sf_siginfo;
 };
 #endif
 
 struct ksiginfo;
 struct image_params;
 extern char ia32_sigcode[];
 extern char freebsd4_ia32_sigcode[];
 extern int sz_ia32_sigcode;
 extern int sz_freebsd4_ia32_sigcode;
 extern void ia32_sendsig(sig_t, struct ksiginfo *, sigset_t *);
 extern void ia32_setregs(struct thread *td, struct image_params *imgp,
     u_long stack);
+
+#endif
Index: projects/binutils-2.17/sys/compat/ia32/ia32_util.h
===================================================================
--- projects/binutils-2.17/sys/compat/ia32/ia32_util.h	(revision 215829)
+++ projects/binutils-2.17/sys/compat/ia32/ia32_util.h	(revision 215830)
@@ -1,53 +1,58 @@
 /*-
  * Copyright (c) 1998-1999 Andrew Gallatin
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software withough specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
+#ifndef	_COMPAT_IA32_IA32_UTIL_H
+#define	_COMPAT_IA32_IA32_UTIL_H
+
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 
 #include <sys/exec.h>
 #include <sys/sysent.h>
 #include <sys/cdefs.h>
 
 #ifdef __ia64__
 #define FREEBSD32_USRSTACK	((1ul << 32) - IA32_PAGE_SIZE * 2)
 #else
 #define FREEBSD32_USRSTACK	((1ul << 32) - IA32_PAGE_SIZE)
 #endif
 
 #define	IA32_PAGE_SIZE	4096
 #define	IA32_MAXDSIZ	(512*1024*1024)		/* 512MB */
 #define	IA32_MAXSSIZ	(64*1024*1024)		/* 64MB */
 #define IA32_MAXVMEM	0			/* Unlimited */
 
 struct syscall_args;
 int ia32_fetch_syscall_args(struct thread *td, struct syscall_args *sa);
 void ia32_set_syscall_retval(struct thread *, int);
+
+#endif
Index: projects/binutils-2.17/sys/compat/ndis/ntoskrnl_var.h
===================================================================
--- projects/binutils-2.17/sys/compat/ndis/ntoskrnl_var.h	(revision 215829)
+++ projects/binutils-2.17/sys/compat/ndis/ntoskrnl_var.h	(revision 215830)
@@ -1,1527 +1,1528 @@
 /*-
  * Copyright (c) 2003
  *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Bill Paul.
  * 4. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _NTOSKRNL_VAR_H_
 #define	_NTOSKRNL_VAR_H_
 
 #define	MTX_NTOSKRNL_SPIN_LOCK "NDIS thread lock"
 
 /*
  * us_buf is really a wchar_t *, but it's inconvenient to include
  * all the necessary header goop needed to define it, and it's a
  * pointer anyway, so for now, just make it a uint16_t *.
  */
 struct unicode_string {
 	uint16_t		us_len;
 	uint16_t		us_maxlen;
 	uint16_t		*us_buf;
 };
 
 typedef struct unicode_string unicode_string;
 
 struct ansi_string {
 	uint16_t		as_len;
 	uint16_t		as_maxlen;
 	char			*as_buf;
 };
 
 typedef struct ansi_string ansi_string;
 
 /*
  * Windows memory descriptor list. In Windows, it's possible for
  * buffers to be passed between user and kernel contexts without
  * copying. Buffers may also be allocated in either paged or
  * non-paged memory regions. An MDL describes the pages of memory
  * used to contain a particular buffer. Note that a single MDL
  * may describe a buffer that spans multiple pages. An array of
  * page addresses appears immediately after the MDL structure itself.
  * MDLs are therefore implicitly variably sized, even though they
  * don't look it.
  *
  * Note that in FreeBSD, we can take many shortcuts in the way
  * we handle MDLs because:
  *
  * - We are only concerned with pages in kernel context. This means
  *   we will only ever use the kernel's memory map, and remapping
  *   of buffers is never needed.
  *
  * - Kernel pages can never be paged out, so we don't have to worry
  *   about whether or not a page is actually mapped before going to
  *   touch it.
  */
 
 struct mdl {
 	struct mdl		*mdl_next;
 	uint16_t		mdl_size;
 	uint16_t		mdl_flags;
 	void			*mdl_process;
 	void			*mdl_mappedsystemva;
 	void			*mdl_startva;
 	uint32_t		mdl_bytecount;
 	uint32_t		mdl_byteoffset;
 };
 
 typedef struct mdl mdl, ndis_buffer;
 
 /* MDL flags */
 
 #define	MDL_MAPPED_TO_SYSTEM_VA		0x0001
 #define	MDL_PAGES_LOCKED		0x0002
 #define	MDL_SOURCE_IS_NONPAGED_POOL	0x0004
 #define	MDL_ALLOCATED_FIXED_SIZE	0x0008
 #define	MDL_PARTIAL			0x0010
 #define	MDL_PARTIAL_HAS_BEEN_MAPPED	0x0020
 #define	MDL_IO_PAGE_READ		0x0040
 #define	MDL_WRITE_OPERATION		0x0080
 #define	MDL_PARENT_MAPPED_SYSTEM_VA	0x0100
 #define	MDL_FREE_EXTRA_PTES		0x0200
 #define	MDL_IO_SPACE			0x0800
 #define	MDL_NETWORK_HEADER		0x1000
 #define	MDL_MAPPING_CAN_FAIL		0x2000
 #define	MDL_ALLOCATED_MUST_SUCCEED	0x4000
 #define	MDL_ZONE_ALLOCED		0x8000	/* BSD private */
 
 #define	MDL_ZONE_PAGES 16
 #define	MDL_ZONE_SIZE (sizeof(mdl) + (sizeof(vm_offset_t) * MDL_ZONE_PAGES))
 
 /* Note: assumes x86 page size of 4K. */
 
 #ifndef PAGE_SHIFT
 #if PAGE_SIZE == 4096
 #define	PAGE_SHIFT	12
 #elif PAGE_SIZE == 8192
 #define	PAGE_SHIFT	13
 #else
 #error PAGE_SHIFT undefined!
 #endif
 #endif
 
 #define	SPAN_PAGES(ptr, len)					\
 	((uint32_t)((((uintptr_t)(ptr) & (PAGE_SIZE - 1)) +	\
 	(len) + (PAGE_SIZE - 1)) >> PAGE_SHIFT))
 
 #define	PAGE_ALIGN(ptr)						\
 	((void *)((uintptr_t)(ptr) & ~(PAGE_SIZE - 1)))
 
 #define	BYTE_OFFSET(ptr)					\
 	((uint32_t)((uintptr_t)(ptr) & (PAGE_SIZE - 1)))
 
 #define	MDL_PAGES(m)	(vm_offset_t *)(m + 1)
 
 #define	MmInitializeMdl(b, baseva, len)					\
 	(b)->mdl_next = NULL;						\
 	(b)->mdl_size = (uint16_t)(sizeof(mdl) +			\
 		(sizeof(vm_offset_t) * SPAN_PAGES((baseva), (len))));	\
 	(b)->mdl_flags = 0;						\
 	(b)->mdl_startva = (void *)PAGE_ALIGN((baseva));		\
 	(b)->mdl_byteoffset = BYTE_OFFSET((baseva));			\
 	(b)->mdl_bytecount = (uint32_t)(len);
 
 #define	MmGetMdlByteOffset(mdl)		((mdl)->mdl_byteoffset)
 #define	MmGetMdlByteCount(mdl)		((mdl)->mdl_bytecount)
 #define	MmGetMdlVirtualAddress(mdl)					\
 	((void *)((char *)((mdl)->mdl_startva) + (mdl)->mdl_byteoffset))
 #define	MmGetMdlStartVa(mdl)		((mdl)->mdl_startva)
 #define	MmGetMdlPfnArray(mdl)		MDL_PAGES(mdl)
 
 #define	WDM_MAJOR		1
 #define	WDM_MINOR_WIN98		0x00
 #define	WDM_MINOR_WINME		0x05
 #define	WDM_MINOR_WIN2000	0x10
 #define	WDM_MINOR_WINXP		0x20
 #define	WDM_MINOR_WIN2003	0x30
 
 enum nt_caching_type {
 	MmNonCached			= 0,
 	MmCached			= 1,
 	MmWriteCombined			= 2,
 	MmHardwareCoherentCached	= 3,
 	MmNonCachedUnordered		= 4,
 	MmUSWCCached			= 5,
 	MmMaximumCacheType		= 6
 };
 
 /*-
  * The ndis_kspin_lock type is called KSPIN_LOCK in MS-Windows.
  * According to the Windows DDK header files, KSPIN_LOCK is defined like this:
  *	typedef ULONG_PTR KSPIN_LOCK;
  *
  * From basetsd.h (SDK, Feb. 2003):
  *	typedef [public] unsigned __int3264 ULONG_PTR, *PULONG_PTR;
  *	typedef unsigned __int64 ULONG_PTR, *PULONG_PTR;
  *	typedef _W64 unsigned long ULONG_PTR, *PULONG_PTR;
  *
  * The keyword __int3264 specifies an integral type that has the following
  * properties:
  *	+ It is 32-bit on 32-bit platforms
  *	+ It is 64-bit on 64-bit platforms
  *	+ It is 32-bit on the wire for backward compatibility.
  *	  It gets truncated on the sending side and extended appropriately
  *	  (signed or unsigned) on the receiving side.
  *
  * Thus register_t seems the proper mapping onto FreeBSD for spin locks.
  */
 
 typedef register_t kspin_lock;
 
 struct slist_entry {
 	struct slist_entry	*sl_next;
 };
 
 typedef struct slist_entry slist_entry;
 
 union slist_header {
 	uint64_t		slh_align;
 	struct {
 		struct slist_entry	*slh_next;
 		uint16_t		slh_depth;
 		uint16_t		slh_seq;
 	} slh_list;
 };
 
 typedef union slist_header slist_header;
 
 struct list_entry {
 	struct list_entry *nle_flink;
 	struct list_entry *nle_blink;
 };
 
 typedef struct list_entry list_entry;
 
 #define	InitializeListHead(l)			\
 	(l)->nle_flink = (l)->nle_blink = (l)
 
 #define	IsListEmpty(h)				\
 	((h)->nle_flink == (h))
 
 #define	RemoveEntryList(e)			\
 	do {					\
 		list_entry		*b;	\
 		list_entry		*f;	\
 						\
 		f = (e)->nle_flink;		\
 		b = (e)->nle_blink;		\
 		b->nle_flink = f;		\
 		f->nle_blink = b;		\
 	} while (0)
 
 /* These two have to be inlined since they return things. */
 
 static __inline__ list_entry *
 RemoveHeadList(list_entry *l)
 {
 	list_entry		*f;
 	list_entry		*e;
 
 	e = l->nle_flink;
 	f = e->nle_flink;
 	l->nle_flink = f;
 	f->nle_blink = l;
 
 	return (e);
 }
 
 static __inline__ list_entry *
 RemoveTailList(list_entry *l)
 {
 	list_entry		*b;
 	list_entry		*e;
 
 	e = l->nle_blink;
 	b = e->nle_blink;
 	l->nle_blink = b;
 	b->nle_flink = l;
 
 	return (e);
 }
 
 #define	InsertTailList(l, e)			\
 	do {					\
 		list_entry		*b;	\
 						\
 		b = l->nle_blink;		\
 		e->nle_flink = l;		\
 		e->nle_blink = b;		\
 		b->nle_flink = (e);		\
 		l->nle_blink = (e);		\
 	} while (0)
 
 #define	InsertHeadList(l, e)			\
 	do {					\
 		list_entry		*f;	\
 						\
 		f = l->nle_flink;		\
 		e->nle_flink = f;		\
 		e->nle_blink = l;		\
 		f->nle_blink = e;		\
 		l->nle_flink = e;		\
 	} while (0)
 
 #define	CONTAINING_RECORD(addr, type, field)	\
 	((type *)((vm_offset_t)(addr) - (vm_offset_t)(&((type *)0)->field)))
 
 struct nt_dispatch_header {
 	uint8_t			dh_type;
 	uint8_t			dh_abs;
 	uint8_t			dh_size;
 	uint8_t			dh_inserted;
 	int32_t			dh_sigstate;
 	list_entry		dh_waitlisthead;
 };
 
 typedef struct nt_dispatch_header nt_dispatch_header;
 
 /* Dispatcher object types */
 
 #define	DISP_TYPE_NOTIFICATION_EVENT	0	/* KEVENT */
 #define	DISP_TYPE_SYNCHRONIZATION_EVENT	1	/* KEVENT */
 #define	DISP_TYPE_MUTANT		2	/* KMUTANT/KMUTEX */
 #define	DISP_TYPE_PROCESS		3	/* KPROCESS */
 #define	DISP_TYPE_QUEUE			4	/* KQUEUE */
 #define	DISP_TYPE_SEMAPHORE		5	/* KSEMAPHORE */
 #define	DISP_TYPE_THREAD		6	/* KTHREAD */
 #define	DISP_TYPE_NOTIFICATION_TIMER	8	/* KTIMER */
 #define	DISP_TYPE_SYNCHRONIZATION_TIMER	9	/* KTIMER */
 
 #define	OTYPE_EVENT		0
 #define	OTYPE_MUTEX		1
 #define	OTYPE_THREAD		2
 #define	OTYPE_TIMER		3
 
 /* Windows dispatcher levels. */
 
 #define	PASSIVE_LEVEL		0
 #define	LOW_LEVEL		0
 #define	APC_LEVEL		1
 #define	DISPATCH_LEVEL		2
 #define	DEVICE_LEVEL		(DISPATCH_LEVEL + 1)
 #define	PROFILE_LEVEL		27
 #define	CLOCK1_LEVEL		28
 #define	CLOCK2_LEVEL		28
 #define	IPI_LEVEL		29
 #define	POWER_LEVEL		30
 #define	HIGH_LEVEL		31
 
 #define	SYNC_LEVEL_UP		DISPATCH_LEVEL
 #define	SYNC_LEVEL_MP		(IPI_LEVEL - 1)
 
 #define	AT_PASSIVE_LEVEL(td)		\
 	((td)->td_proc->p_flag & P_KTHREAD == FALSE)
 
 #define	AT_DISPATCH_LEVEL(td)		\
 	((td)->td_base_pri == PI_REALTIME)
 
 #define	AT_DIRQL_LEVEL(td)		\
 	((td)->td_priority <= PI_NET)
 
 #define	AT_HIGH_LEVEL(td)		\
 	((td)->td_critnest != 0)
 
 struct nt_objref {
 	nt_dispatch_header	no_dh;
 	void			*no_obj;
 	TAILQ_ENTRY(nt_objref)	link;
 };
 
 TAILQ_HEAD(nt_objref_head, nt_objref);
 
 typedef struct nt_objref nt_objref;
 
 #define	EVENT_TYPE_NOTIFY	0
 #define	EVENT_TYPE_SYNC		1
 
 /*
  * We need to use the timeout()/untimeout() API for ktimers
  * since timers can be initialized, but not destroyed (so
  * malloc()ing our own callout structures would mean a leak,
  * since there'd be no way to free() them). This means we
  * need to use struct callout_handle, which is really just a
  * pointer. To make it easier to deal with, we use a union
  * to overlay the callout_handle over the k_timerlistentry.
  * The latter is a list_entry, which is two pointers, so
  * there's enough space available to hide a callout_handle
  * there.
  */
 
 struct ktimer {
 	nt_dispatch_header	k_header;
 	uint64_t		k_duetime;
 	union {
 		list_entry		k_timerlistentry;
 		struct callout		*k_callout;
 	} u;
 	void			*k_dpc;
 	uint32_t		k_period;
 };
 
 #define	k_timerlistentry	u.k_timerlistentry
 #define	k_callout		u.k_callout
 
 typedef struct ktimer ktimer;
 
 struct nt_kevent {
 	nt_dispatch_header	k_header;
 };
 
 typedef struct nt_kevent nt_kevent;
 
 /* Kernel defered procedure call (i.e. timer callback) */
 
 struct kdpc;
 typedef void (*kdpc_func)(struct kdpc *, void *, void *, void *);
 
 struct kdpc {
 	uint16_t		k_type;
 	uint8_t			k_num;		/* CPU number */
 	uint8_t			k_importance;	/* priority */
 	list_entry		k_dpclistentry;
 	void			*k_deferedfunc;
 	void			*k_deferredctx;
 	void			*k_sysarg1;
 	void			*k_sysarg2;
 	void			*k_lock;
 };
 
 #define	KDPC_IMPORTANCE_LOW	0
 #define	KDPC_IMPORTANCE_MEDIUM	1
 #define	KDPC_IMPORTANCE_HIGH	2
 
 #define	KDPC_CPU_DEFAULT	255
 
 typedef struct kdpc kdpc;
 
 /*
  * Note: the acquisition count is BSD-specific. The Microsoft
  * documentation says that mutexes can be acquired recursively
  * by a given thread, but that you must release the mutex as
  * many times as you acquired it before it will be set to the
  * signalled state (i.e. before any other threads waiting on
  * the object will be woken up). However the Windows KMUTANT
  * structure has no field for keeping track of the number of
  * acquisitions, so we need to add one ourselves. As long as
  * driver code treats the mutex as opaque, we should be ok.
  */
 struct kmutant {
 	nt_dispatch_header	km_header;
 	list_entry		km_listentry;
 	void			*km_ownerthread;
 	uint8_t			km_abandoned;
 	uint8_t			km_apcdisable;
 };
 
 typedef struct kmutant kmutant;
 
 #define	LOOKASIDE_DEPTH 256
 
 struct general_lookaside {
 	slist_header		gl_listhead;
 	uint16_t		gl_depth;
 	uint16_t		gl_maxdepth;
 	uint32_t		gl_totallocs;
 	union {
 		uint32_t		gl_allocmisses;
 		uint32_t		gl_allochits;
 	} u_a;
 	uint32_t		gl_totalfrees;
 	union {
 		uint32_t		gl_freemisses;
 		uint32_t		gl_freehits;
 	} u_m;
 	uint32_t		gl_type;
 	uint32_t		gl_tag;
 	uint32_t		gl_size;
 	void			*gl_allocfunc;
 	void			*gl_freefunc;
 	list_entry		gl_listent;
 	uint32_t		gl_lasttotallocs;
 	union {
 		uint32_t		gl_lastallocmisses;
 		uint32_t		gl_lastallochits;
 	} u_l;
 	uint32_t		gl_rsvd[2];
 };
 
 typedef struct general_lookaside general_lookaside;
 
 struct npaged_lookaside_list {
 	general_lookaside	nll_l;
 #ifdef __i386__
 	kspin_lock		nll_obsoletelock;
 #endif
 };
 
 typedef struct npaged_lookaside_list npaged_lookaside_list;
 typedef struct npaged_lookaside_list paged_lookaside_list;
 
 typedef void * (*lookaside_alloc_func)(uint32_t, size_t, uint32_t);
 typedef void (*lookaside_free_func)(void *);
 
 struct irp;
 
 struct kdevice_qentry {
 	list_entry		kqe_devlistent;
 	uint32_t		kqe_sortkey;
 	uint8_t			kqe_inserted;
 };
 
 typedef struct kdevice_qentry kdevice_qentry;
 
 struct kdevice_queue {
 	uint16_t		kq_type;
 	uint16_t		kq_size;
 	list_entry		kq_devlisthead;
 	kspin_lock		kq_lock;
 	uint8_t			kq_busy;
 };
 
 typedef struct kdevice_queue kdevice_queue;
 
 struct wait_ctx_block {
 	kdevice_qentry		wcb_waitqueue;
 	void			*wcb_devfunc;
 	void			*wcb_devctx;
 	uint32_t		wcb_mapregcnt;
 	void			*wcb_devobj;
 	void			*wcb_curirp;
 	void			*wcb_bufchaindpc;
 };
 
 typedef struct wait_ctx_block wait_ctx_block;
 
 struct wait_block {
 	list_entry		wb_waitlist;
 	void			*wb_kthread;
 	nt_dispatch_header	*wb_object;
 	struct wait_block	*wb_next;
 #ifdef notdef
 	uint16_t		wb_waitkey;
 	uint16_t		wb_waittype;
 #endif
 	uint8_t			wb_waitkey;
 	uint8_t			wb_waittype;
 	uint8_t			wb_awakened;
 	uint8_t			wb_oldpri;
 };
 
 typedef struct wait_block wait_block;
 
 #define	wb_ext wb_kthread
 
 #define	THREAD_WAIT_OBJECTS	3
 #define	MAX_WAIT_OBJECTS	64
 
 #define	WAITTYPE_ALL		0
 #define	WAITTYPE_ANY		1
 
 #define	WAITKEY_VALID		0x8000
 
 /* kthread priority  */
 #define	LOW_PRIORITY		0
 #define	LOW_REALTIME_PRIORITY	16
 #define	HIGH_PRIORITY		31
 
 struct thread_context {
 	void			*tc_thrctx;
 	void			*tc_thrfunc;
 };
 
 typedef struct thread_context thread_context;
 
 /* Forward declaration */
 struct driver_object;
 struct devobj_extension;
 
 struct driver_extension {
 	struct driver_object	*dre_driverobj;
 	void			*dre_adddevicefunc;
 	uint32_t		dre_reinitcnt;
 	unicode_string		dre_srvname;
 
 	/*
 	 * Drivers are allowed to add one or more custom extensions
 	 * to the driver object, but there's no special pointer
 	 * for them. Hang them off here for now.
 	 */
 
 	list_entry		dre_usrext;
 };
 
 typedef struct driver_extension driver_extension;
 
 struct custom_extension {
 	list_entry		ce_list;
 	void			*ce_clid;
 };
 
 typedef struct custom_extension custom_extension;
 
 /*
  * The KINTERRUPT structure in Windows is opaque to drivers.
  * We define our own custom version with things we need.
  */
 
 struct kinterrupt {
 	list_entry		ki_list;
 	device_t		ki_dev;
 	int			ki_rid;
 	void			*ki_cookie;
 	struct resource		*ki_irq;
 	kspin_lock		ki_lock_priv;
 	kspin_lock		*ki_lock;
 	void			*ki_svcfunc;
 	void			*ki_svcctx;
 };
 
 typedef struct kinterrupt kinterrupt;
 
 struct ksystem_time {
 	uint32_t	low_part;
 	int32_t		high1_time;
 	int32_t		high2_time;
 };
 
 enum nt_product_type {
 	NT_PRODUCT_WIN_NT = 1,
 	NT_PRODUCT_LAN_MAN_NT,
 	NT_PRODUCT_SERVER
 };
 
 enum alt_arch_type {
 	STANDARD_DESIGN,
 	NEC98x86,
 	END_ALTERNATIVES
 };
 
 struct kuser_shared_data {
 	uint32_t		tick_count;
 	uint32_t		tick_count_multiplier;
 	volatile struct		ksystem_time interrupt_time;
 	volatile struct		ksystem_time system_time;
 	volatile struct		ksystem_time time_zone_bias;
 	uint16_t		image_number_low;
 	uint16_t		image_number_high;
 	int16_t			nt_system_root[260];
 	uint32_t		max_stack_trace_depth;
 	uint32_t		crypto_exponent;
 	uint32_t		time_zone_id;
 	uint32_t		large_page_min;
 	uint32_t		reserved2[7];
 	enum nt_product_type	nt_product_type;
 	uint8_t			product_type_is_valid;
 	uint32_t		nt_major_version;
 	uint32_t		nt_minor_version;
 	uint8_t			processor_features[64];
 	uint32_t		reserved1;
 	uint32_t		reserved3;
 	volatile uint32_t	time_slip;
 	enum alt_arch_type	alt_arch_type;
 	int64_t			system_expiration_date;
 	uint32_t		suite_mask;
 	uint8_t			kdbg_enabled;
 	volatile uint32_t	active_console;
 	volatile uint32_t	dismount_count;
 	uint32_t		com_plus_package;
 	uint32_t		last_system_rit_event_tick_count;
 	uint32_t		num_phys_pages;
 	uint8_t			safe_boot_mode;
 	uint32_t		trace_log;
 	uint64_t		fill0;
 	uint64_t		sys_call[4];
 	union {
 		volatile struct	ksystem_time	tick_count;
 		volatile uint64_t		tick_count_quad;
 	} tick;
 };
 
 /*
  * In Windows, there are Physical Device Objects (PDOs) and
  * Functional Device Objects (FDOs). Physical Device Objects are
  * created and maintained by bus drivers. For example, the PCI
  * bus driver might detect two PCI ethernet cards on a given
  * bus. The PCI bus driver will then allocate two device_objects
  * for its own internal bookeeping purposes. This is analagous
  * to the device_t that the FreeBSD PCI code allocates and passes
  * into each PCI driver's probe and attach routines.
  *
  * When an ethernet driver claims one of the ethernet cards
  * on the bus, it will create its own device_object. This is
  * the Functional Device Object. This object is analagous to the
  * device-specific softc structure.
  */
 
 struct device_object {
 	uint16_t		do_type;
 	uint16_t		do_size;
 	uint32_t		do_refcnt;
 	struct driver_object	*do_drvobj;
 	struct device_object	*do_nextdev;
 	struct device_object	*do_attacheddev;
 	struct irp		*do_currirp;
 	void			*do_iotimer;
 	uint32_t		do_flags;
 	uint32_t		do_characteristics;
 	void			*do_vpb;
 	void			*do_devext;
 	uint32_t		do_devtype;
 	uint8_t			do_stacksize;
 	union {
 		list_entry		do_listent;
 		wait_ctx_block		do_wcb;
 	} queue;
 	uint32_t		do_alignreq;
 	kdevice_queue		do_devqueue;
 	struct kdpc		do_dpc;
 	uint32_t		do_activethreads;
 	void			*do_securitydesc;
 	struct nt_kevent	do_devlock;
 	uint16_t		do_sectorsz;
 	uint16_t		do_spare1;
 	struct devobj_extension	*do_devobj_ext;
 	void			*do_rsvd;
 };
 
 typedef struct device_object device_object;
 
 struct devobj_extension {
 	uint16_t		dve_type;
 	uint16_t		dve_size;
 	device_object		*dve_devobj;
 };
 
 typedef struct devobj_extension devobj_extension;
 
 /* Device object flags */
 
 #define	DO_VERIFY_VOLUME		0x00000002
 #define	DO_BUFFERED_IO			0x00000004
 #define	DO_EXCLUSIVE			0x00000008
 #define	DO_DIRECT_IO			0x00000010
 #define	DO_MAP_IO_BUFFER		0x00000020
 #define	DO_DEVICE_HAS_NAME		0x00000040
 #define	DO_DEVICE_INITIALIZING		0x00000080
 #define	DO_SYSTEM_BOOT_PARTITION	0x00000100
 #define	DO_LONG_TERM_REQUESTS		0x00000200
 #define	DO_NEVER_LAST_DEVICE		0x00000400
 #define	DO_SHUTDOWN_REGISTERED		0x00000800
 #define	DO_BUS_ENUMERATED_DEVICE	0x00001000
 #define	DO_POWER_PAGABLE		0x00002000
 #define	DO_POWER_INRUSH			0x00004000
 #define	DO_LOW_PRIORITY_FILESYSTEM	0x00010000
 
 /* Priority boosts */
 
 #define	IO_NO_INCREMENT			0
 #define	IO_CD_ROM_INCREMENT		1
 #define	IO_DISK_INCREMENT		1
 #define	IO_KEYBOARD_INCREMENT		6
 #define	IO_MAILSLOT_INCREMENT		2
 #define	IO_MOUSE_INCREMENT		6
 #define	IO_NAMED_PIPE_INCREMENT		2
 #define	IO_NETWORK_INCREMENT		2
 #define	IO_PARALLEL_INCREMENT		1
 #define	IO_SERIAL_INCREMENT		2
 #define	IO_SOUND_INCREMENT		8
 #define	IO_VIDEO_INCREMENT		1
 
 /* IRP major codes */
 
 #define	IRP_MJ_CREATE                   0x00
 #define	IRP_MJ_CREATE_NAMED_PIPE        0x01
 #define	IRP_MJ_CLOSE                    0x02
 #define	IRP_MJ_READ                     0x03
 #define	IRP_MJ_WRITE                    0x04
 #define	IRP_MJ_QUERY_INFORMATION        0x05
 #define	IRP_MJ_SET_INFORMATION          0x06
 #define	IRP_MJ_QUERY_EA                 0x07
 #define	IRP_MJ_SET_EA                   0x08
 #define	IRP_MJ_FLUSH_BUFFERS            0x09
 #define	IRP_MJ_QUERY_VOLUME_INFORMATION 0x0a
 #define	IRP_MJ_SET_VOLUME_INFORMATION   0x0b
 #define	IRP_MJ_DIRECTORY_CONTROL        0x0c
 #define	IRP_MJ_FILE_SYSTEM_CONTROL      0x0d
 #define	IRP_MJ_DEVICE_CONTROL           0x0e
 #define	IRP_MJ_INTERNAL_DEVICE_CONTROL  0x0f
 #define	IRP_MJ_SHUTDOWN                 0x10
 #define	IRP_MJ_LOCK_CONTROL             0x11
 #define	IRP_MJ_CLEANUP                  0x12
 #define	IRP_MJ_CREATE_MAILSLOT          0x13
 #define	IRP_MJ_QUERY_SECURITY           0x14
 #define	IRP_MJ_SET_SECURITY             0x15
 #define	IRP_MJ_POWER                    0x16
 #define	IRP_MJ_SYSTEM_CONTROL           0x17
 #define	IRP_MJ_DEVICE_CHANGE            0x18
 #define	IRP_MJ_QUERY_QUOTA              0x19
 #define	IRP_MJ_SET_QUOTA                0x1a
 #define	IRP_MJ_PNP                      0x1b
 #define	IRP_MJ_PNP_POWER                IRP_MJ_PNP      // Obsolete....
 #define	IRP_MJ_MAXIMUM_FUNCTION         0x1b
 #define	IRP_MJ_SCSI                     IRP_MJ_INTERNAL_DEVICE_CONTROL
 
 /* IRP minor codes */
 
 #define	IRP_MN_QUERY_DIRECTORY          0x01
 #define	IRP_MN_NOTIFY_CHANGE_DIRECTORY  0x02
 #define	IRP_MN_USER_FS_REQUEST          0x00
 
 #define	IRP_MN_MOUNT_VOLUME             0x01
 #define	IRP_MN_VERIFY_VOLUME            0x02
 #define	IRP_MN_LOAD_FILE_SYSTEM         0x03
 #define	IRP_MN_TRACK_LINK               0x04
 #define	IRP_MN_KERNEL_CALL              0x04
 
 #define	IRP_MN_LOCK                     0x01
 #define	IRP_MN_UNLOCK_SINGLE            0x02
 #define	IRP_MN_UNLOCK_ALL               0x03
 #define	IRP_MN_UNLOCK_ALL_BY_KEY        0x04
 
 #define	IRP_MN_NORMAL                   0x00
 #define	IRP_MN_DPC                      0x01
 #define	IRP_MN_MDL                      0x02
 #define	IRP_MN_COMPLETE                 0x04
 #define	IRP_MN_COMPRESSED               0x08
 
 #define	IRP_MN_MDL_DPC                  (IRP_MN_MDL | IRP_MN_DPC)
 #define	IRP_MN_COMPLETE_MDL             (IRP_MN_COMPLETE | IRP_MN_MDL)
 #define	IRP_MN_COMPLETE_MDL_DPC         (IRP_MN_COMPLETE_MDL | IRP_MN_DPC)
 
 #define	IRP_MN_SCSI_CLASS               0x01
 
 #define	IRP_MN_START_DEVICE                 0x00
 #define	IRP_MN_QUERY_REMOVE_DEVICE          0x01
 #define	IRP_MN_REMOVE_DEVICE                0x02
 #define	IRP_MN_CANCEL_REMOVE_DEVICE         0x03
 #define	IRP_MN_STOP_DEVICE                  0x04
 #define	IRP_MN_QUERY_STOP_DEVICE            0x05
 #define	IRP_MN_CANCEL_STOP_DEVICE           0x06
 
 #define	IRP_MN_QUERY_DEVICE_RELATIONS       0x07
 #define	IRP_MN_QUERY_INTERFACE              0x08
 #define	IRP_MN_QUERY_CAPABILITIES           0x09
 #define	IRP_MN_QUERY_RESOURCES              0x0A
 #define	IRP_MN_QUERY_RESOURCE_REQUIREMENTS  0x0B
 #define	IRP_MN_QUERY_DEVICE_TEXT            0x0C
 #define	IRP_MN_FILTER_RESOURCE_REQUIREMENTS 0x0D
 
 #define	IRP_MN_READ_CONFIG                  0x0F
 #define	IRP_MN_WRITE_CONFIG                 0x10
 #define	IRP_MN_EJECT                        0x11
 #define	IRP_MN_SET_LOCK                     0x12
 #define	IRP_MN_QUERY_ID                     0x13
 #define	IRP_MN_QUERY_PNP_DEVICE_STATE       0x14
 #define	IRP_MN_QUERY_BUS_INFORMATION        0x15
 #define	IRP_MN_DEVICE_USAGE_NOTIFICATION    0x16
 #define	IRP_MN_SURPRISE_REMOVAL             0x17
 #define	IRP_MN_QUERY_LEGACY_BUS_INFORMATION 0x18
 
 #define	IRP_MN_WAIT_WAKE                    0x00
 #define	IRP_MN_POWER_SEQUENCE               0x01
 #define	IRP_MN_SET_POWER                    0x02
 #define	IRP_MN_QUERY_POWER                  0x03
 
 #define	IRP_MN_QUERY_ALL_DATA               0x00
 #define	IRP_MN_QUERY_SINGLE_INSTANCE        0x01
 #define	IRP_MN_CHANGE_SINGLE_INSTANCE       0x02
 #define	IRP_MN_CHANGE_SINGLE_ITEM           0x03
 #define	IRP_MN_ENABLE_EVENTS                0x04
 #define	IRP_MN_DISABLE_EVENTS               0x05
 #define	IRP_MN_ENABLE_COLLECTION            0x06
 #define	IRP_MN_DISABLE_COLLECTION           0x07
 #define	IRP_MN_REGINFO                      0x08
 #define	IRP_MN_EXECUTE_METHOD               0x09
 #define	IRP_MN_REGINFO_EX                   0x0b
 
 /* IRP flags */
 
 #define	IRP_NOCACHE                     0x00000001
 #define	IRP_PAGING_IO                   0x00000002
 #define	IRP_MOUNT_COMPLETION            0x00000002
 #define	IRP_SYNCHRONOUS_API             0x00000004
 #define	IRP_ASSOCIATED_IRP              0x00000008
 #define	IRP_BUFFERED_IO                 0x00000010
 #define	IRP_DEALLOCATE_BUFFER           0x00000020
 #define	IRP_INPUT_OPERATION             0x00000040
 #define	IRP_SYNCHRONOUS_PAGING_IO       0x00000040
 #define	IRP_CREATE_OPERATION            0x00000080
 #define	IRP_READ_OPERATION              0x00000100
 #define	IRP_WRITE_OPERATION             0x00000200
 #define	IRP_CLOSE_OPERATION             0x00000400
 #define	IRP_DEFER_IO_COMPLETION         0x00000800
 #define	IRP_OB_QUERY_NAME               0x00001000
 #define	IRP_HOLD_DEVICE_QUEUE           0x00002000
 #define	IRP_RETRY_IO_COMPLETION         0x00004000
 #define	IRP_CLASS_CACHE_OPERATION       0x00008000
 #define	IRP_SET_USER_EVENT              IRP_CLOSE_OPERATION
 
 /* IRP I/O control flags */
 
 #define	IRP_QUOTA_CHARGED               0x01
 #define	IRP_ALLOCATED_MUST_SUCCEED      0x02
 #define	IRP_ALLOCATED_FIXED_SIZE        0x04
 #define	IRP_LOOKASIDE_ALLOCATION        0x08
 
 /* I/O method types */
 
 #define	METHOD_BUFFERED			0
 #define	METHOD_IN_DIRECT		1
 #define	METHOD_OUT_DIRECT		2
 #define	METHOD_NEITHER			3
 
 /* File access types */
 
 #define	FILE_ANY_ACCESS			0x0000
 #define	FILE_SPECIAL_ACCESS		FILE_ANY_ACCESS
 #define	FILE_READ_ACCESS		0x0001
 #define	FILE_WRITE_ACCESS		0x0002
 
 /* Recover I/O access method from IOCTL code. */
 
 #define	IO_METHOD(x)			((x) & 0xFFFFFFFC)
 
 /* Recover function code from IOCTL code */
 
 #define	IO_FUNC(x)			(((x) & 0x7FFC) >> 2)
 
 /* Macro to construct an IOCTL code. */
 
 #define	IOCTL_CODE(dev, func, iomethod, acc)	\
 	((dev) << 16) | (acc << 14) | (func << 2) | (iomethod))
 
 
 struct io_status_block {
 	union {
 		uint32_t		isb_status;
 		void			*isb_ptr;
 	} u;
 	register_t		isb_info;
 };
 #define	isb_status		u.isb_status
 #define	isb_ptr			u.isb_ptr
 
 typedef struct io_status_block io_status_block;
 
 struct kapc {
 	uint16_t		apc_type;
 	uint16_t		apc_size;
 	uint32_t		apc_spare0;
 	void			*apc_thread;
 	list_entry		apc_list;
 	void			*apc_kernfunc;
 	void			*apc_rundownfunc;
 	void			*apc_normalfunc;
 	void			*apc_normctx;
 	void			*apc_sysarg1;
 	void			*apc_sysarg2;
 	uint8_t			apc_stateidx;
 	uint8_t			apc_cpumode;
 	uint8_t			apc_inserted;
 };
 
 typedef struct kapc kapc;
 
 typedef uint32_t (*completion_func)(device_object *,
 	struct irp *, void *);
 typedef uint32_t (*cancel_func)(device_object *,
 	struct irp *);
 
 struct io_stack_location {
 	uint8_t			isl_major;
 	uint8_t			isl_minor;
 	uint8_t			isl_flags;
 	uint8_t			isl_ctl;
 
 	/*
 	 * There's a big-ass union here in the actual Windows
 	 * definition of the stucture, but it contains stuff
 	 * that doesn't really apply to BSD, and defining it
 	 * all properly would require duplicating over a dozen
 	 * other structures that we'll never use. Since the
 	 * io_stack_location structure is opaque to drivers
 	 * anyway, I'm not going to bother with the extra crap.
 	 */
 
 	union {
 		struct {
 			uint32_t		isl_len;
 			uint32_t		*isl_key;
 			uint64_t		isl_byteoff;
 		} isl_read;
 		struct {
 			uint32_t		isl_len;
 			uint32_t		*isl_key;
 			uint64_t		isl_byteoff;
 		} isl_write;
 		struct {
 			uint32_t		isl_obuflen;
 			uint32_t		isl_ibuflen;
 			uint32_t		isl_iocode;
 			void			*isl_type3ibuf;
 		} isl_ioctl;
 		struct {
 			void			*isl_arg1;
 			void			*isl_arg2;
 			void			*isl_arg3;
 			void			*isl_arg4;
 		} isl_others;
 	} isl_parameters __attribute__((packed));
 
 	void			*isl_devobj;
 	void			*isl_fileobj;
 	completion_func		isl_completionfunc;
 	void			*isl_completionctx;
 };
 
 typedef struct io_stack_location io_stack_location;
 
 /* Stack location control flags */
 
 #define	SL_PENDING_RETURNED		0x01
 #define	SL_INVOKE_ON_CANCEL		0x20
 #define	SL_INVOKE_ON_SUCCESS		0x40
 #define	SL_INVOKE_ON_ERROR		0x80
 
 struct irp {
 	uint16_t		irp_type;
 	uint16_t		irp_size;
 	mdl			*irp_mdl;
 	uint32_t		irp_flags;
 	union {
 		struct irp		*irp_master;
 		uint32_t		irp_irpcnt;
 		void			*irp_sysbuf;
 	} irp_assoc;
 	list_entry		irp_thlist;
 	io_status_block		irp_iostat;
 	uint8_t			irp_reqmode;
 	uint8_t			irp_pendingreturned;
 	uint8_t			irp_stackcnt;
 	uint8_t			irp_currentstackloc;
 	uint8_t			irp_cancel;
 	uint8_t			irp_cancelirql;
 	uint8_t			irp_apcenv;
 	uint8_t			irp_allocflags;
 	io_status_block		*irp_usriostat;
 	nt_kevent		*irp_usrevent;
 	union {
 		struct {
 			void			*irp_apcfunc;
 			void			*irp_apcctx;
 		} irp_asyncparms;
 		uint64_t			irp_allocsz;
 	} irp_overlay;
 	cancel_func		irp_cancelfunc;
 	void			*irp_userbuf;
 
 	/* Windows kernel info */
 
 	union {
 		struct {
 			union {
 				kdevice_qentry			irp_dqe;
 				struct {
 					void			*irp_drvctx[4];
 				} s1;
 			} u1;
 			void			*irp_thread;
 			char			*irp_auxbuf;
 			struct {
 				list_entry			irp_list;
 				union {
 					io_stack_location	*irp_csl;
 					uint32_t		irp_pkttype;
 				} u2;
 			} s2;
 			void			*irp_fileobj;
 		} irp_overlay;
 		union {
 			kapc			irp_apc;
 			struct {
 				void		*irp_ep;
 				void		*irp_dev;
 			} irp_usb;
 		} irp_misc;
 		void			*irp_compkey;
 	} irp_tail;
 };
 
 #define	irp_csl			s2.u2.irp_csl
 #define	irp_pkttype		s2.u2.irp_pkttype
 
 #define	IRP_NDIS_DEV(irp)	(irp)->irp_tail.irp_misc.irp_usb.irp_dev
 #define	IRP_NDISUSB_EP(irp)	(irp)->irp_tail.irp_misc.irp_usb.irp_ep
 
 typedef struct irp irp;
 
 #define	InterlockedExchangePointer(dst, val)				\
 	(void *)InterlockedExchange((uint32_t *)(dst), (uintptr_t)(val))
 
 #define	IoSizeOfIrp(ssize)						\
 	((uint16_t) (sizeof(irp) + ((ssize) * (sizeof(io_stack_location)))))
 
 #define	IoSetCancelRoutine(irp, func)					\
 	(cancel_func)InterlockedExchangePointer(			\
 	(void *)&(ip)->irp_cancelfunc, (void *)(func))
 
 #define	IoSetCancelValue(irp, val)					\
 	(u_long)InterlockedExchangePointer(				\
 	(void *)&(ip)->irp_cancel, (void *)(val))
 
 #define	IoGetCurrentIrpStackLocation(irp)				\
 	(irp)->irp_tail.irp_overlay.irp_csl
 
 #define	IoGetNextIrpStackLocation(irp)					\
 	((irp)->irp_tail.irp_overlay.irp_csl - 1)
 
 #define	IoSetNextIrpStackLocation(irp)					\
 	do {								\
 		irp->irp_currentstackloc--;				\
 		irp->irp_tail.irp_overlay.irp_csl--;			\
 	} while(0)
 
 #define	IoSetCompletionRoutine(irp, func, ctx, ok, err, cancel)		\
 	do {								\
 		io_stack_location		*s;			\
 		s = IoGetNextIrpStackLocation((irp));			\
 		s->isl_completionfunc = (func);				\
 		s->isl_completionctx = (ctx);				\
 		s->isl_ctl = 0;						\
 		if (ok) s->isl_ctl = SL_INVOKE_ON_SUCCESS;		\
 		if (err) s->isl_ctl |= SL_INVOKE_ON_ERROR;		\
 		if (cancel) s->isl_ctl |= SL_INVOKE_ON_CANCEL;		\
 	} while(0)
 
 #define	IoMarkIrpPending(irp)						\
 	IoGetCurrentIrpStackLocation(irp)->isl_ctl |= SL_PENDING_RETURNED
 #define	IoUnmarkIrpPending(irp)						\
 	IoGetCurrentIrpStackLocation(irp)->isl_ctl &= ~SL_PENDING_RETURNED
 
 #define	IoCopyCurrentIrpStackLocationToNext(irp)			\
 	do {								\
 		io_stack_location *src, *dst;				\
 		src = IoGetCurrentIrpStackLocation(irp);		\
 		dst = IoGetNextIrpStackLocation(irp);			\
 		bcopy((char *)src, (char *)dst,				\
 		    offsetof(io_stack_location, isl_completionfunc));	\
 	} while(0)
 
 #define	IoSkipCurrentIrpStackLocation(irp)				\
 	do {								\
 		(irp)->irp_currentstackloc++;				\
 		(irp)->irp_tail.irp_overlay.irp_csl++;			\
 	} while(0)
 
 #define	IoInitializeDpcRequest(dobj, dpcfunc)				\
 	KeInitializeDpc(&(dobj)->do_dpc, dpcfunc, dobj)
 
 #define	IoRequestDpc(dobj, irp, ctx)					\
 	KeInsertQueueDpc(&(dobj)->do_dpc, irp, ctx)
 
 typedef uint32_t (*driver_dispatch)(device_object *, irp *);
 
 /*
  * The driver_object is allocated once for each driver that's loaded
  * into the system. A new one is allocated for each driver and
  * populated a bit via the driver's DriverEntry function.
  * In general, a Windows DriverEntry() function will provide a pointer
  * to its AddDevice() method and set up the dispatch table.
  * For NDIS drivers, this is all done behind the scenes in the
  * NdisInitializeWrapper() and/or NdisMRegisterMiniport() routines.
  */
 
 struct driver_object {
 	uint16_t		dro_type;
 	uint16_t		dro_size;
 	device_object		*dro_devobj;
 	uint32_t		dro_flags;
 	void			*dro_driverstart;
 	uint32_t		dro_driversize;
 	void			*dro_driversection;
 	driver_extension	*dro_driverext;
 	unicode_string		dro_drivername;
 	unicode_string		*dro_hwdb;
 	void			*dro_pfastiodispatch;
 	void			*dro_driverinitfunc;
 	void			*dro_driverstartiofunc;
 	void			*dro_driverunloadfunc;
 	driver_dispatch		dro_dispatch[IRP_MJ_MAXIMUM_FUNCTION + 1];
 };
 
 typedef struct driver_object driver_object;
 
 #define	DEVPROP_DEVICE_DESCRIPTION	0x00000000
 #define	DEVPROP_HARDWARE_ID		0x00000001
 #define	DEVPROP_COMPATIBLE_IDS		0x00000002
 #define	DEVPROP_BOOTCONF		0x00000003
 #define	DEVPROP_BOOTCONF_TRANSLATED	0x00000004
 #define	DEVPROP_CLASS_NAME		0x00000005
 #define	DEVPROP_CLASS_GUID		0x00000006
 #define	DEVPROP_DRIVER_KEYNAME		0x00000007
 #define	DEVPROP_MANUFACTURER		0x00000008
 #define	DEVPROP_FRIENDLYNAME		0x00000009
 #define	DEVPROP_LOCATION_INFO		0x0000000A
 #define	DEVPROP_PHYSDEV_NAME		0x0000000B
 #define	DEVPROP_BUSTYPE_GUID		0x0000000C
 #define	DEVPROP_LEGACY_BUSTYPE		0x0000000D
 #define	DEVPROP_BUS_NUMBER		0x0000000E
 #define	DEVPROP_ENUMERATOR_NAME		0x0000000F
 #define	DEVPROP_ADDRESS			0x00000010
 #define	DEVPROP_UINUMBER		0x00000011
 #define	DEVPROP_INSTALL_STATE		0x00000012
 #define	DEVPROP_REMOVAL_POLICY		0x00000013
 
 /* Various supported device types (used with IoCreateDevice()) */
 
 #define	FILE_DEVICE_BEEP		0x00000001
 #define	FILE_DEVICE_CD_ROM		0x00000002
 #define	FILE_DEVICE_CD_ROM_FILE_SYSTEM	0x00000003
 #define	FILE_DEVICE_CONTROLLER		0x00000004
 #define	FILE_DEVICE_DATALINK		0x00000005
 #define	FILE_DEVICE_DFS			0x00000006
 #define	FILE_DEVICE_DISK		0x00000007
 #define	FILE_DEVICE_DISK_FILE_SYSTEM	0x00000008
 #define	FILE_DEVICE_FILE_SYSTEM		0x00000009
 #define	FILE_DEVICE_INPORT_PORT		0x0000000A
 #define	FILE_DEVICE_KEYBOARD		0x0000000B
 #define	FILE_DEVICE_MAILSLOT		0x0000000C
 #define	FILE_DEVICE_MIDI_IN		0x0000000D
 #define	FILE_DEVICE_MIDI_OUT		0x0000000E
 #define	FILE_DEVICE_MOUSE		0x0000000F
 #define	FILE_DEVICE_MULTI_UNC_PROVIDER	0x00000010
 #define	FILE_DEVICE_NAMED_PIPE		0x00000011
 #define	FILE_DEVICE_NETWORK		0x00000012
 #define	FILE_DEVICE_NETWORK_BROWSER	0x00000013
 #define	FILE_DEVICE_NETWORK_FILE_SYSTEM	0x00000014
 #define	FILE_DEVICE_NULL		0x00000015
 #define	FILE_DEVICE_PARALLEL_PORT	0x00000016
 #define	FILE_DEVICE_PHYSICAL_NETCARD	0x00000017
 #define	FILE_DEVICE_PRINTER		0x00000018
 #define	FILE_DEVICE_SCANNER		0x00000019
 #define	FILE_DEVICE_SERIAL_MOUSE_PORT	0x0000001A
 #define	FILE_DEVICE_SERIAL_PORT		0x0000001B
 #define	FILE_DEVICE_SCREEN		0x0000001C
 #define	FILE_DEVICE_SOUND		0x0000001D
 #define	FILE_DEVICE_STREAMS		0x0000001E
 #define	FILE_DEVICE_TAPE		0x0000001F
 #define	FILE_DEVICE_TAPE_FILE_SYSTEM	0x00000020
 #define	FILE_DEVICE_TRANSPORT		0x00000021
 #define	FILE_DEVICE_UNKNOWN		0x00000022
 #define	FILE_DEVICE_VIDEO		0x00000023
 #define	FILE_DEVICE_VIRTUAL_DISK	0x00000024
 #define	FILE_DEVICE_WAVE_IN		0x00000025
 #define	FILE_DEVICE_WAVE_OUT		0x00000026
 #define	FILE_DEVICE_8042_PORT		0x00000027
 #define	FILE_DEVICE_NETWORK_REDIRECTOR	0x00000028
 #define	FILE_DEVICE_BATTERY		0x00000029
 #define	FILE_DEVICE_BUS_EXTENDER	0x0000002A
 #define	FILE_DEVICE_MODEM		0x0000002B
 #define	FILE_DEVICE_VDM			0x0000002C
 #define	FILE_DEVICE_MASS_STORAGE	0x0000002D
 #define	FILE_DEVICE_SMB			0x0000002E
 #define	FILE_DEVICE_KS			0x0000002F
 #define	FILE_DEVICE_CHANGER		0x00000030
 #define	FILE_DEVICE_SMARTCARD		0x00000031
 #define	FILE_DEVICE_ACPI		0x00000032
 #define	FILE_DEVICE_DVD			0x00000033
 #define	FILE_DEVICE_FULLSCREEN_VIDEO	0x00000034
 #define	FILE_DEVICE_DFS_FILE_SYSTEM	0x00000035
 #define	FILE_DEVICE_DFS_VOLUME		0x00000036
 #define	FILE_DEVICE_SERENUM		0x00000037
 #define	FILE_DEVICE_TERMSRV		0x00000038
 #define	FILE_DEVICE_KSEC		0x00000039
 #define	FILE_DEVICE_FIPS		0x0000003A
 
 /* Device characteristics */
 
 #define	FILE_REMOVABLE_MEDIA		0x00000001
 #define	FILE_READ_ONLY_DEVICE		0x00000002
 #define	FILE_FLOPPY_DISKETTE		0x00000004
 #define	FILE_WRITE_ONCE_MEDIA		0x00000008
 #define	FILE_REMOTE_DEVICE		0x00000010
 #define	FILE_DEVICE_IS_MOUNTED		0x00000020
 #define	FILE_VIRTUAL_VOLUME		0x00000040
 #define	FILE_AUTOGENERATED_DEVICE_NAME	0x00000080
 #define	FILE_DEVICE_SECURE_OPEN		0x00000100
 
 /* Status codes */
 
 #define	STATUS_SUCCESS			0x00000000
 #define	STATUS_USER_APC			0x000000C0
 #define	STATUS_KERNEL_APC		0x00000100
 #define	STATUS_ALERTED			0x00000101
 #define	STATUS_TIMEOUT			0x00000102
 #define	STATUS_PENDING			0x00000103
 #define	STATUS_FAILURE			0xC0000001
 #define	STATUS_NOT_IMPLEMENTED		0xC0000002
+#define	STATUS_ACCESS_VIOLATION		0xC0000005
 #define	STATUS_INVALID_PARAMETER	0xC000000D
 #define	STATUS_INVALID_DEVICE_REQUEST	0xC0000010
 #define	STATUS_MORE_PROCESSING_REQUIRED	0xC0000016
 #define	STATUS_NO_MEMORY		0xC0000017
 #define	STATUS_BUFFER_TOO_SMALL		0xC0000023
 #define	STATUS_MUTANT_NOT_OWNED		0xC0000046
 #define	STATUS_NOT_SUPPORTED		0xC00000BB
 #define	STATUS_INVALID_PARAMETER_2	0xC00000F0
 #define	STATUS_INSUFFICIENT_RESOURCES	0xC000009A
 #define	STATUS_DEVICE_NOT_CONNECTED	0xC000009D
 #define	STATUS_CANCELLED		0xC0000120
 #define	STATUS_NOT_FOUND		0xC0000225
 #define	STATUS_DEVICE_REMOVED		0xC00002B6
 
 #define	STATUS_WAIT_0			0x00000000
 
 /* Memory pool types, for ExAllocatePoolWithTag() */
 
 #define	NonPagedPool			0x00000000
 #define	PagedPool			0x00000001
 #define	NonPagedPoolMustSucceed		0x00000002
 #define	DontUseThisType			0x00000003
 #define	NonPagedPoolCacheAligned	0x00000004
 #define	PagedPoolCacheAligned		0x00000005
 #define	NonPagedPoolCacheAlignedMustS	0x00000006
 #define	MaxPoolType			0x00000007
 
 /*
  * IO_WORKITEM is an opaque structures that must be allocated
  * via IoAllocateWorkItem() and released via IoFreeWorkItem().
  * Consequently, we can define it any way we want.
  */
 typedef void (*io_workitem_func)(device_object *, void *);
 
 struct io_workitem {
 	io_workitem_func	iw_func;
 	void			*iw_ctx;
 	list_entry		iw_listentry;
 	device_object		*iw_dobj;
 	int			iw_idx;
 };
 
 typedef struct io_workitem io_workitem;
 
 #define	WORKQUEUE_CRITICAL	0
 #define	WORKQUEUE_DELAYED	1
 #define	WORKQUEUE_HYPERCRITICAL	2
 
 #define	WORKITEM_THREADS	4
 #define	WORKITEM_LEGACY_THREAD	3
 #define	WORKIDX_INC(x)		(x) = (x + 1) % WORKITEM_LEGACY_THREAD
 
 /*
  * Older, deprecated work item API, needed to support NdisQueueWorkItem().
  */
 
 struct work_queue_item;
 
 typedef void (*work_item_func)(struct work_queue_item *, void *);
 
 struct work_queue_item {
 	list_entry		wqi_entry;
 	work_item_func		wqi_func;
 	void			*wqi_ctx;
 };
 
 typedef struct work_queue_item work_queue_item;
 
 #define	ExInitializeWorkItem(w, func, ctx)		\
 	do {						\
 		(w)->wqi_func = (func);			\
 		(w)->wqi_ctx = (ctx);			\
 		InitializeListHead(&((w)->wqi_entry));	\
 	} while (0)
 
 /*
  * FreeBSD's kernel stack is 2 pages in size by default. The
  * Windows stack is larger, so we need to give our threads more
  * stack pages. 4 should be enough, we use 8 just to extra safe.
  */
 #define	NDIS_KSTACK_PAGES	8
 
 /*
  * Different kinds of function wrapping we can do.
  */
 
 #define	WINDRV_WRAP_STDCALL	1
 #define	WINDRV_WRAP_FASTCALL	2
 #define	WINDRV_WRAP_REGPARM	3
 #define	WINDRV_WRAP_CDECL	4
 #define	WINDRV_WRAP_AMD64	5
 
 struct drvdb_ent {
 	driver_object		*windrv_object;
 	void			*windrv_devlist;
 	ndis_cfg		*windrv_regvals;
 	interface_type		windrv_bustype;
 	STAILQ_ENTRY(drvdb_ent) link;
 };
 
 extern image_patch_table ntoskrnl_functbl[];
 #ifdef __amd64__
 extern struct kuser_shared_data kuser_shared_data;
 #endif
 typedef void (*funcptr)(void);
 typedef int (*matchfuncptr)(interface_type, void *, void *);
 
 __BEGIN_DECLS
 extern int windrv_libinit(void);
 extern int windrv_libfini(void);
 extern driver_object *windrv_lookup(vm_offset_t, char *);
 extern struct drvdb_ent *windrv_match(matchfuncptr, void *);
 extern int windrv_load(module_t, vm_offset_t, int, interface_type,
 	void *, ndis_cfg *);
 extern int windrv_unload(module_t, vm_offset_t, int);
 extern int windrv_create_pdo(driver_object *, device_t);
 extern void windrv_destroy_pdo(driver_object *, device_t);
 extern device_object *windrv_find_pdo(driver_object *, device_t);
 extern int windrv_bus_attach(driver_object *, char *);
 extern int windrv_wrap(funcptr, funcptr *, int, int);
 extern int windrv_unwrap(funcptr);
 extern void ctxsw_utow(void);
 extern void ctxsw_wtou(void);
 
 extern int ntoskrnl_libinit(void);
 extern int ntoskrnl_libfini(void);
 
 extern void ntoskrnl_intr(void *);
 extern void ntoskrnl_time(uint64_t *);
 
 extern uint16_t ExQueryDepthSList(slist_header *);
 extern slist_entry
 	*InterlockedPushEntrySList(slist_header *, slist_entry *);
 extern slist_entry *InterlockedPopEntrySList(slist_header *);
 extern uint32_t RtlUnicodeStringToAnsiString(ansi_string *,
 	unicode_string *, uint8_t);
 extern uint32_t RtlAnsiStringToUnicodeString(unicode_string *,
 	ansi_string *, uint8_t);
 extern void RtlInitAnsiString(ansi_string *, char *);
 extern void RtlInitUnicodeString(unicode_string *,
 	uint16_t *);
 extern void RtlFreeUnicodeString(unicode_string *);
 extern void RtlFreeAnsiString(ansi_string *);
 extern void KeInitializeDpc(kdpc *, void *, void *);
 extern uint8_t KeInsertQueueDpc(kdpc *, void *, void *);
 extern uint8_t KeRemoveQueueDpc(kdpc *);
 extern void KeSetImportanceDpc(kdpc *, uint32_t);
 extern void KeSetTargetProcessorDpc(kdpc *, uint8_t);
 extern void KeFlushQueuedDpcs(void);
 extern uint32_t KeGetCurrentProcessorNumber(void);
 extern void KeInitializeTimer(ktimer *);
 extern void KeInitializeTimerEx(ktimer *, uint32_t);
 extern uint8_t KeSetTimer(ktimer *, int64_t, kdpc *);
 extern uint8_t KeSetTimerEx(ktimer *, int64_t, uint32_t, kdpc *);
 extern uint8_t KeCancelTimer(ktimer *);
 extern uint8_t KeReadStateTimer(ktimer *);
 extern uint32_t KeWaitForSingleObject(void *, uint32_t,
 	uint32_t, uint8_t, int64_t *);
 extern void KeInitializeEvent(nt_kevent *, uint32_t, uint8_t);
 extern void KeClearEvent(nt_kevent *);
 extern uint32_t KeReadStateEvent(nt_kevent *);
 extern uint32_t KeSetEvent(nt_kevent *, uint32_t, uint8_t);
 extern uint32_t KeResetEvent(nt_kevent *);
 #ifdef __i386__
 extern void KefAcquireSpinLockAtDpcLevel(kspin_lock *);
 extern void KefReleaseSpinLockFromDpcLevel(kspin_lock *);
 extern uint8_t KeAcquireSpinLockRaiseToDpc(kspin_lock *);
 #else
 extern void KeAcquireSpinLockAtDpcLevel(kspin_lock *);
 extern void KeReleaseSpinLockFromDpcLevel(kspin_lock *);
 #endif
 extern void KeInitializeSpinLock(kspin_lock *);
 extern uint8_t KeAcquireInterruptSpinLock(kinterrupt *);
 extern void KeReleaseInterruptSpinLock(kinterrupt *, uint8_t);
 extern uint8_t KeSynchronizeExecution(kinterrupt *, void *, void *);
 extern uintptr_t InterlockedExchange(volatile uint32_t *,
 	uintptr_t);
 extern void *ExAllocatePoolWithTag(uint32_t, size_t, uint32_t);
 extern void ExFreePool(void *);
 extern uint32_t IoConnectInterrupt(kinterrupt **, void *, void *,
 	kspin_lock *, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t,
 	uint32_t, uint8_t);
 extern uint8_t MmIsAddressValid(void *);
 extern void *MmMapIoSpace(uint64_t, uint32_t, uint32_t);
 extern void MmUnmapIoSpace(void *, size_t);
 extern void MmBuildMdlForNonPagedPool(mdl *);
 extern void IoDisconnectInterrupt(kinterrupt *);
 extern uint32_t IoAllocateDriverObjectExtension(driver_object *,
 	void *, uint32_t, void **);
 extern void *IoGetDriverObjectExtension(driver_object *, void *);
 extern uint32_t IoCreateDevice(driver_object *, uint32_t,
 	unicode_string *, uint32_t, uint32_t, uint8_t, device_object **);
 extern void IoDeleteDevice(device_object *);
 extern device_object *IoGetAttachedDevice(device_object *);
 extern uint32_t IofCallDriver(device_object *, irp *);
 extern void IofCompleteRequest(irp *, uint8_t);
 extern void IoAcquireCancelSpinLock(uint8_t *);
 extern void IoReleaseCancelSpinLock(uint8_t);
 extern uint8_t IoCancelIrp(irp *);
 extern void IoDetachDevice(device_object *);
 extern device_object *IoAttachDeviceToDeviceStack(device_object *,
 	device_object *);
 extern mdl *IoAllocateMdl(void *, uint32_t, uint8_t, uint8_t, irp *);
 extern void IoFreeMdl(mdl *);
 extern io_workitem *IoAllocateWorkItem(device_object *);
 extern void ExQueueWorkItem(work_queue_item *, u_int32_t);
 extern void IoFreeWorkItem(io_workitem *);
 extern void IoQueueWorkItem(io_workitem *, io_workitem_func,
 	uint32_t, void *);
 
 #define	IoCallDriver(a, b)		IofCallDriver(a, b)
 #define	IoCompleteRequest(a, b)		IofCompleteRequest(a, b)
 
 /*
  * On the Windows x86 arch, KeAcquireSpinLock() and KeReleaseSpinLock()
  * routines live in the HAL. We try to imitate this behavior.
  */
 #ifdef __i386__
 #define	KI_USER_SHARED_DATA 0xffdf0000
 #define	KeAcquireSpinLock(a, b)	*(b) = KfAcquireSpinLock(a)
 #define	KeReleaseSpinLock(a, b)	KfReleaseSpinLock(a, b)
 #define	KeRaiseIrql(a, b)	*(b) = KfRaiseIrql(a)
 #define	KeLowerIrql(a)		KfLowerIrql(a)
 #define	KeAcquireSpinLockAtDpcLevel(a)	KefAcquireSpinLockAtDpcLevel(a)
 #define	KeReleaseSpinLockFromDpcLevel(a)  KefReleaseSpinLockFromDpcLevel(a)
 #endif /* __i386__ */
 
 #ifdef __amd64__
 #define	KI_USER_SHARED_DATA 0xfffff78000000000UL
 #define	KeAcquireSpinLock(a, b)	*(b) = KfAcquireSpinLock(a)
 #define	KeReleaseSpinLock(a, b)	KfReleaseSpinLock(a, b)
 
 /*
  * These may need to be redefined later;
  * not sure where they live on amd64 yet.
  */
 #define	KeRaiseIrql(a, b)	*(b) = KfRaiseIrql(a)
 #define	KeLowerIrql(a)		KfLowerIrql(a)
 #endif /* __amd64__ */
 
 __END_DECLS
 
 #endif /* _NTOSKRNL_VAR_H_ */
Index: projects/binutils-2.17/sys/compat/ndis/subr_ndis.c
===================================================================
--- projects/binutils-2.17/sys/compat/ndis/subr_ndis.c	(revision 215829)
+++ projects/binutils-2.17/sys/compat/ndis/subr_ndis.c	(revision 215830)
@@ -1,3340 +1,3364 @@
 /*-
  * Copyright (c) 2003
  *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Bill Paul.
  * 4. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * This file implements a translation layer between the BSD networking
  * infrasturcture and Windows(R) NDIS network driver modules. A Windows
  * NDIS driver calls into several functions in the NDIS.SYS Windows
  * kernel module and exports a table of functions designed to be called
  * by the NDIS subsystem. Using the PE loader, we can patch our own
  * versions of the NDIS routines into a given Windows driver module and
  * convince the driver that it is in fact running on Windows.
  *
  * We provide a table of all our implemented NDIS routines which is patched
  * into the driver object code. All our exported routines must use the
  * _stdcall calling convention, since that's what the Windows object code
  * expects.
  */
 
 
 #include <sys/ctype.h>
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/errno.h>
 
 #include <sys/callout.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/timespec.h>
 #include <sys/smp.h>
 #include <sys/queue.h>
 #include <sys/proc.h>
 #include <sys/filedesc.h>
 #include <sys/namei.h>
 #include <sys/fcntl.h>
 #include <sys/vnode.h>
 #include <sys/kthread.h>
 #include <sys/linker.h>
 #include <sys/mount.h>
 #include <sys/sysproto.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <machine/stdarg.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_ioctl.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/usb/usb.h>
 #include <dev/usb/usbdi.h>
 
 #include <compat/ndis/pe_var.h>
 #include <compat/ndis/cfg_var.h>
 #include <compat/ndis/resource_var.h>
 #include <compat/ndis/ntoskrnl_var.h>
 #include <compat/ndis/hal_var.h>
 #include <compat/ndis/ndis_var.h>
 #include <dev/if_ndis/if_ndisvar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/uma.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 
 static char ndis_filepath[MAXPATHLEN];
 
 SYSCTL_STRING(_hw, OID_AUTO, ndis_filepath, CTLFLAG_RW, ndis_filepath,
     MAXPATHLEN, "Path used by NdisOpenFile() to search for files");
 
 static void NdisInitializeWrapper(ndis_handle *,
 	driver_object *, void *, void *);
 static ndis_status NdisMRegisterMiniport(ndis_handle,
 	ndis_miniport_characteristics *, int);
 static ndis_status NdisAllocateMemoryWithTag(void **,
 	uint32_t, uint32_t);
 static ndis_status NdisAllocateMemory(void **,
 	uint32_t, uint32_t, ndis_physaddr);
 static void NdisFreeMemory(void *, uint32_t, uint32_t);
 static ndis_status NdisMSetAttributesEx(ndis_handle, ndis_handle,
 	uint32_t, uint32_t, ndis_interface_type);
 static void NdisOpenConfiguration(ndis_status *,
 	ndis_handle *, ndis_handle);
 static void NdisOpenConfigurationKeyByIndex(ndis_status *,
 	ndis_handle, uint32_t, unicode_string *, ndis_handle *);
 static void NdisOpenConfigurationKeyByName(ndis_status *,
 	ndis_handle, unicode_string *, ndis_handle *);
 static ndis_status ndis_encode_parm(ndis_miniport_block *,
 	struct sysctl_oid *, ndis_parm_type, ndis_config_parm **);
 static ndis_status ndis_decode_parm(ndis_miniport_block *,
 	ndis_config_parm *, char *);
 static void NdisReadConfiguration(ndis_status *, ndis_config_parm **,
 	ndis_handle, unicode_string *, ndis_parm_type);
 static void NdisWriteConfiguration(ndis_status *, ndis_handle,
 	unicode_string *, ndis_config_parm *);
 static void NdisCloseConfiguration(ndis_handle);
 static void NdisAllocateSpinLock(ndis_spin_lock *);
 static void NdisFreeSpinLock(ndis_spin_lock *);
 static void NdisAcquireSpinLock(ndis_spin_lock *);
 static void NdisReleaseSpinLock(ndis_spin_lock *);
 static void NdisDprAcquireSpinLock(ndis_spin_lock *);
 static void NdisDprReleaseSpinLock(ndis_spin_lock *);
 static void NdisInitializeReadWriteLock(ndis_rw_lock *);
 static void NdisAcquireReadWriteLock(ndis_rw_lock *,
 	uint8_t, ndis_lock_state *);
 static void NdisReleaseReadWriteLock(ndis_rw_lock *, ndis_lock_state *);
 static uint32_t NdisReadPciSlotInformation(ndis_handle, uint32_t,
 	uint32_t, void *, uint32_t);
 static uint32_t NdisWritePciSlotInformation(ndis_handle, uint32_t,
 	uint32_t, void *, uint32_t);
 static void NdisWriteErrorLogEntry(ndis_handle, ndis_error_code, uint32_t, ...);
 static void ndis_map_cb(void *, bus_dma_segment_t *, int, int);
 static void NdisMStartBufferPhysicalMapping(ndis_handle,
 	ndis_buffer *, uint32_t, uint8_t, ndis_paddr_unit *, uint32_t *);
 static void NdisMCompleteBufferPhysicalMapping(ndis_handle,
 	ndis_buffer *, uint32_t);
 static void NdisMInitializeTimer(ndis_miniport_timer *, ndis_handle,
 	ndis_timer_function, void *);
 static void NdisInitializeTimer(ndis_timer *,
 	ndis_timer_function, void *);
 static void NdisSetTimer(ndis_timer *, uint32_t);
 static void NdisMSetPeriodicTimer(ndis_miniport_timer *, uint32_t);
 static void NdisMCancelTimer(ndis_timer *, uint8_t *);
 static void ndis_timercall(kdpc *, ndis_miniport_timer *,
 	void *, void *);
 static void NdisMQueryAdapterResources(ndis_status *, ndis_handle,
 	ndis_resource_list *, uint32_t *);
 static ndis_status NdisMRegisterIoPortRange(void **,
 	ndis_handle, uint32_t, uint32_t);
 static void NdisMDeregisterIoPortRange(ndis_handle,
 	uint32_t, uint32_t, void *);
 static void NdisReadNetworkAddress(ndis_status *, void **,
 	uint32_t *, ndis_handle);
 static ndis_status NdisQueryMapRegisterCount(uint32_t, uint32_t *);
 static ndis_status NdisMAllocateMapRegisters(ndis_handle,
 	uint32_t, uint8_t, uint32_t, uint32_t);
 static void NdisMFreeMapRegisters(ndis_handle);
 static void ndis_mapshared_cb(void *, bus_dma_segment_t *, int, int);
 static void NdisMAllocateSharedMemory(ndis_handle, uint32_t,
 	uint8_t, void **, ndis_physaddr *);
 static void ndis_asyncmem_complete(device_object *, void *);
 static ndis_status NdisMAllocateSharedMemoryAsync(ndis_handle,
 	uint32_t, uint8_t, void *);
 static void NdisMFreeSharedMemory(ndis_handle, uint32_t,
 	uint8_t, void *, ndis_physaddr);
 static ndis_status NdisMMapIoSpace(void **, ndis_handle,
 	ndis_physaddr, uint32_t);
 static void NdisMUnmapIoSpace(ndis_handle, void *, uint32_t);
 static uint32_t NdisGetCacheFillSize(void);
 static uint32_t NdisMGetDmaAlignment(ndis_handle);
 static ndis_status NdisMInitializeScatterGatherDma(ndis_handle,
 	uint8_t, uint32_t);
 static void NdisUnchainBufferAtFront(ndis_packet *, ndis_buffer **);
 static void NdisUnchainBufferAtBack(ndis_packet *, ndis_buffer **);
 static void NdisAllocateBufferPool(ndis_status *,
 	ndis_handle *, uint32_t);
 static void NdisFreeBufferPool(ndis_handle);
 static void NdisAllocateBuffer(ndis_status *, ndis_buffer **,
 	ndis_handle, void *, uint32_t);
 static void NdisFreeBuffer(ndis_buffer *);
 static uint32_t NdisBufferLength(ndis_buffer *);
 static void NdisQueryBuffer(ndis_buffer *, void **, uint32_t *);
 static void NdisQueryBufferSafe(ndis_buffer *, void **,
 	uint32_t *, uint32_t);
 static void *NdisBufferVirtualAddress(ndis_buffer *);
 static void *NdisBufferVirtualAddressSafe(ndis_buffer *, uint32_t);
 static void NdisAdjustBufferLength(ndis_buffer *, int);
 static uint32_t NdisInterlockedIncrement(uint32_t *);
 static uint32_t NdisInterlockedDecrement(uint32_t *);
 static void NdisInitializeEvent(ndis_event *);
 static void NdisSetEvent(ndis_event *);
 static void NdisResetEvent(ndis_event *);
 static uint8_t NdisWaitEvent(ndis_event *, uint32_t);
 static ndis_status NdisUnicodeStringToAnsiString(ansi_string *,
 	unicode_string *);
 static ndis_status
 	NdisAnsiStringToUnicodeString(unicode_string *, ansi_string *);
 static ndis_status NdisMPciAssignResources(ndis_handle,
 	uint32_t, ndis_resource_list **);
 static ndis_status NdisMRegisterInterrupt(ndis_miniport_interrupt *,
 	ndis_handle, uint32_t, uint32_t, uint8_t,
 	uint8_t, ndis_interrupt_mode);
 static void NdisMDeregisterInterrupt(ndis_miniport_interrupt *);
 static void NdisMRegisterAdapterShutdownHandler(ndis_handle, void *,
 	ndis_shutdown_handler);
 static void NdisMDeregisterAdapterShutdownHandler(ndis_handle);
 static uint32_t NDIS_BUFFER_TO_SPAN_PAGES(ndis_buffer *);
 static void NdisGetBufferPhysicalArraySize(ndis_buffer *,
 	uint32_t *);
 static void NdisQueryBufferOffset(ndis_buffer *,
 	uint32_t *, uint32_t *);
 static uint32_t NdisReadPcmciaAttributeMemory(ndis_handle,
 	uint32_t, void *, uint32_t);
 static uint32_t NdisWritePcmciaAttributeMemory(ndis_handle,
 	uint32_t, void *, uint32_t);
 static list_entry *NdisInterlockedInsertHeadList(list_entry *,
 	list_entry *, ndis_spin_lock *);
 static list_entry *NdisInterlockedRemoveHeadList(list_entry *,
 	ndis_spin_lock *);
 static list_entry *NdisInterlockedInsertTailList(list_entry *,
 	list_entry *, ndis_spin_lock *);
 static uint8_t
 	NdisMSynchronizeWithInterrupt(ndis_miniport_interrupt *,
 	void *, void *);
 static void NdisGetCurrentSystemTime(uint64_t *);
 static void NdisGetSystemUpTime(uint32_t *);
+static uint32_t NdisGetVersion(void);
 static void NdisInitializeString(unicode_string *, char *);
 static void NdisInitAnsiString(ansi_string *, char *);
 static void NdisInitUnicodeString(unicode_string *, uint16_t *);
 static void NdisFreeString(unicode_string *);
 static ndis_status NdisMRemoveMiniport(ndis_handle *);
 static void NdisTerminateWrapper(ndis_handle, void *);
 static void NdisMGetDeviceProperty(ndis_handle, device_object **,
 	device_object **, device_object **, cm_resource_list *,
 	cm_resource_list *);
 static void NdisGetFirstBufferFromPacket(ndis_packet *,
 	ndis_buffer **, void **, uint32_t *, uint32_t *);
 static void NdisGetFirstBufferFromPacketSafe(ndis_packet *,
 	ndis_buffer **, void **, uint32_t *, uint32_t *, uint32_t);
 static int ndis_find_sym(linker_file_t, char *, char *, caddr_t *);
 static void NdisOpenFile(ndis_status *, ndis_handle *, uint32_t *,
 	unicode_string *, ndis_physaddr);
 static void NdisMapFile(ndis_status *, void **, ndis_handle);
 static void NdisUnmapFile(ndis_handle);
 static void NdisCloseFile(ndis_handle);
 static uint8_t NdisSystemProcessorCount(void);
+static void NdisGetCurrentProcessorCounts(uint32_t *, uint32_t *, uint32_t *);
 static void NdisMIndicateStatusComplete(ndis_handle);
 static void NdisMIndicateStatus(ndis_handle, ndis_status,
     void *, uint32_t);
 static uint8_t ndis_intr(kinterrupt *, void *);
 static void ndis_intrhand(kdpc *, ndis_miniport_interrupt *, void *, void *);
 static funcptr ndis_findwrap(funcptr);
 static void NdisCopyFromPacketToPacket(ndis_packet *,
 	uint32_t, uint32_t, ndis_packet *, uint32_t, uint32_t *);
 static void NdisCopyFromPacketToPacketSafe(ndis_packet *,
 	uint32_t, uint32_t, ndis_packet *, uint32_t, uint32_t *, uint32_t);
 static void NdisIMCopySendPerPacketInfo(ndis_packet *, ndis_packet *);
 static ndis_status NdisMRegisterDevice(ndis_handle,
 	unicode_string *, unicode_string *, driver_dispatch **,
 	void **, ndis_handle *);
 static ndis_status NdisMDeregisterDevice(ndis_handle);
 static ndis_status
 	NdisMQueryAdapterInstanceName(unicode_string *, ndis_handle);
 static void NdisMRegisterUnloadHandler(ndis_handle, void *);
 static void dummy(void);
 
 /*
  * Some really old drivers do not properly check the return value
  * from NdisAllocatePacket() and NdisAllocateBuffer() and will
  * sometimes allocate few more buffers/packets that they originally
  * requested when they created the pool. To prevent this from being
  * a problem, we allocate a few extra buffers/packets beyond what
  * the driver asks for. This #define controls how many.
  */
 #define NDIS_POOL_EXTRA		16
 
 int
 ndis_libinit()
 {
 	image_patch_table	*patch;
 
 	strcpy(ndis_filepath, "/compat/ndis");
 
 	patch = ndis_functbl;
 	while (patch->ipt_func != NULL) {
 		windrv_wrap((funcptr)patch->ipt_func,
 		    (funcptr *)&patch->ipt_wrap,
 		    patch->ipt_argcnt, patch->ipt_ftype);
 		patch++;
 	}
 
 	return (0);
 }
 
 int
 ndis_libfini()
 {
 	image_patch_table	*patch;
 
 	patch = ndis_functbl;
 	while (patch->ipt_func != NULL) {
 		windrv_unwrap(patch->ipt_wrap);
 		patch++;
 	}
 
 	return (0);
 }
 
 static funcptr
 ndis_findwrap(func)
 	funcptr			func;
 {
 	image_patch_table	*patch;
 
 	patch = ndis_functbl;
 	while (patch->ipt_func != NULL) {
 		if ((funcptr)patch->ipt_func == func)
 			return ((funcptr)patch->ipt_wrap);
 		patch++;
 	}
 
 	return (NULL);
 }
 
 /*
  * This routine does the messy Windows Driver Model device attachment
  * stuff on behalf of NDIS drivers. We register our own AddDevice
  * routine here
  */
 static void
 NdisInitializeWrapper(wrapper, drv, path, unused)
 	ndis_handle		*wrapper;
 	driver_object		*drv;
 	void			*path;
 	void			*unused;
 {
 	/*
 	 * As of yet, I haven't come up with a compelling
 	 * reason to define a private NDIS wrapper structure,
 	 * so we use a pointer to the driver object as the
 	 * wrapper handle. The driver object has the miniport
 	 * characteristics struct for this driver hung off it
 	 * via IoAllocateDriverObjectExtension(), and that's
 	 * really all the private data we need.
 	 */
 
 	*wrapper = drv;
 
 	/*
 	 * If this was really Windows, we'd be registering dispatch
 	 * routines for the NDIS miniport module here, but we're
 	 * not Windows so all we really need to do is set up an
 	 * AddDevice function that'll be invoked when a new device
 	 * instance appears.
 	 */
 
 	drv->dro_driverext->dre_adddevicefunc = NdisAddDevice;
 }
 
 static void
 NdisTerminateWrapper(handle, syspec)
 	ndis_handle		handle;
 	void			*syspec;
 {
 	/* Nothing to see here, move along. */
 }
 
 static ndis_status
 NdisMRegisterMiniport(handle, characteristics, len)
 	ndis_handle		handle;
 	ndis_miniport_characteristics *characteristics;
 	int			len;
 {
 	ndis_miniport_characteristics	*ch = NULL;
 	driver_object		*drv;
 
 	drv = (driver_object *)handle;
 
 	/*
 	 * We need to save the NDIS miniport characteristics
 	 * somewhere. This data is per-driver, not per-device
 	 * (all devices handled by the same driver have the
 	 * same characteristics) so we hook it onto the driver
 	 * object using IoAllocateDriverObjectExtension().
 	 * The extra extension info is automagically deleted when
 	 * the driver is unloaded (see windrv_unload()).
 	 */
 
 	if (IoAllocateDriverObjectExtension(drv, (void *)1,
 	    sizeof(ndis_miniport_characteristics), (void **)&ch) !=
 	    STATUS_SUCCESS) {
 		return (NDIS_STATUS_RESOURCES);
 	}
 
 	bzero((char *)ch, sizeof(ndis_miniport_characteristics));
 
 	bcopy((char *)characteristics, (char *)ch, len);
 
 	if (ch->nmc_version_major < 5 || ch->nmc_version_minor < 1) {
 		ch->nmc_shutdown_handler = NULL;
 		ch->nmc_canceltxpkts_handler = NULL;
 		ch->nmc_pnpevent_handler = NULL;
 	}
 
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static ndis_status
 NdisAllocateMemoryWithTag(vaddr, len, tag)
 	void			**vaddr;
 	uint32_t		len;
 	uint32_t		tag;
 {
 	void			*mem;
 
 	mem = ExAllocatePoolWithTag(NonPagedPool, len, tag);
 	if (mem == NULL) {
 		return (NDIS_STATUS_RESOURCES);
 	}
 	*vaddr = mem;
 
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static ndis_status
 NdisAllocateMemory(vaddr, len, flags, highaddr)
 	void			**vaddr;
 	uint32_t		len;
 	uint32_t		flags;
 	ndis_physaddr		highaddr;
 {
 	void			*mem;
 
 	mem = ExAllocatePoolWithTag(NonPagedPool, len, 0);
 	if (mem == NULL)
 		return (NDIS_STATUS_RESOURCES);
 	*vaddr = mem;
 
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static void
 NdisFreeMemory(vaddr, len, flags)
 	void			*vaddr;
 	uint32_t		len;
 	uint32_t		flags;
 {
 	if (len == 0)
 		return;
 
 	ExFreePool(vaddr);
 }
 
 static ndis_status
 NdisMSetAttributesEx(adapter_handle, adapter_ctx, hangsecs,
 			flags, iftype)
 	ndis_handle			adapter_handle;
 	ndis_handle			adapter_ctx;
 	uint32_t			hangsecs;
 	uint32_t			flags;
 	ndis_interface_type		iftype;
 {
 	ndis_miniport_block		*block;
 
 	/*
 	 * Save the adapter context, we need it for calling
 	 * the driver's internal functions.
 	 */
 	block = (ndis_miniport_block *)adapter_handle;
 	block->nmb_miniportadapterctx = adapter_ctx;
 	block->nmb_checkforhangsecs = hangsecs;
 	block->nmb_flags = flags;
 
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static void
 NdisOpenConfiguration(status, cfg, wrapctx)
 	ndis_status		*status;
 	ndis_handle		*cfg;
 	ndis_handle		wrapctx;
 {
 	*cfg = wrapctx;
 	*status = NDIS_STATUS_SUCCESS;
 }
 
 static void
 NdisOpenConfigurationKeyByName(status, cfg, subkey, subhandle)
 	ndis_status		*status;
 	ndis_handle		cfg;
 	unicode_string		*subkey;
 	ndis_handle		*subhandle;
 {
 	*subhandle = cfg;
 	*status = NDIS_STATUS_SUCCESS;
 }
 
 static void
 NdisOpenConfigurationKeyByIndex(status, cfg, idx, subkey, subhandle)
 	ndis_status		*status;
 	ndis_handle		cfg;
 	uint32_t		idx;
 	unicode_string		*subkey;
 	ndis_handle		*subhandle;
 {
 	*status = NDIS_STATUS_FAILURE;
 }
 
 static ndis_status
 ndis_encode_parm(block, oid, type, parm)
 	ndis_miniport_block	*block;
 	struct sysctl_oid	*oid;
 	ndis_parm_type		type;
 	ndis_config_parm	**parm;
 {
 	ndis_config_parm	*p;
 	ndis_parmlist_entry	*np;
 	unicode_string		*us;
 	ansi_string		as;
 	int			base = 0;
 	uint32_t		val;
 	char			tmp[32];
 
 	np = ExAllocatePoolWithTag(NonPagedPool,
 	    sizeof(ndis_parmlist_entry), 0);
 	if (np == NULL)
 		return (NDIS_STATUS_RESOURCES);
 	InsertHeadList((&block->nmb_parmlist), (&np->np_list));
 	*parm = p = &np->np_parm;
 
 	switch(type) {
 	case ndis_parm_string:
 		/* See if this might be a number. */
 		val = strtoul((char *)oid->oid_arg1, NULL, 10);
 		us = &p->ncp_parmdata.ncp_stringdata;
 		p->ncp_type = ndis_parm_string;
 		if (val) {
 			snprintf(tmp, 32, "%x", val);
 			RtlInitAnsiString(&as, tmp);
 		} else {
 			RtlInitAnsiString(&as, (char *)oid->oid_arg1);
 		}
 
 		if (RtlAnsiStringToUnicodeString(us, &as, TRUE)) {
 			ExFreePool(np);
 			return (NDIS_STATUS_RESOURCES);
 		}
 		break;
 	case ndis_parm_int:
 		if (strncmp((char *)oid->oid_arg1, "0x", 2) == 0)
 			base = 16;
 		else
 			base = 10;
 		p->ncp_type = ndis_parm_int;
 		p->ncp_parmdata.ncp_intdata =
 		    strtol((char *)oid->oid_arg1, NULL, base);
 		break;
 	case ndis_parm_hexint:
 #ifdef notdef
 		if (strncmp((char *)oid->oid_arg1, "0x", 2) == 0)
 			base = 16;
 		else
 			base = 10;
 #endif
 		base = 16;
 		p->ncp_type = ndis_parm_hexint;
 		p->ncp_parmdata.ncp_intdata =
 		    strtoul((char *)oid->oid_arg1, NULL, base);
 		break;
 	default:
 		return (NDIS_STATUS_FAILURE);
 		break;
 	}
 
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static void
 NdisReadConfiguration(status, parm, cfg, key, type)
 	ndis_status		*status;
 	ndis_config_parm	**parm;
 	ndis_handle		cfg;
 	unicode_string		*key;
 	ndis_parm_type		type;
 {
 	char			*keystr = NULL;
 	ndis_miniport_block	*block;
 	struct ndis_softc	*sc;
 	struct sysctl_oid	*oidp;
 	struct sysctl_ctx_entry	*e;
 	ansi_string		as;
 
 	block = (ndis_miniport_block *)cfg;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 
 	if (key->us_len == 0 || key->us_buf == NULL) {
 		*status = NDIS_STATUS_FAILURE;
 		return;
 	}
 
 	if (RtlUnicodeStringToAnsiString(&as, key, TRUE)) {
 		*status = NDIS_STATUS_RESOURCES;
 		return;
 	}
 
 	keystr = as.as_buf;
 
 	/*
 	 * See if registry key is already in a list of known keys
 	 * included with the driver.
 	 */
 	TAILQ_FOREACH(e, device_get_sysctl_ctx(sc->ndis_dev), link) {
 		oidp = e->entry;
 		if (strcasecmp(oidp->oid_name, keystr) == 0) {
 			if (strcmp((char *)oidp->oid_arg1, "UNSET") == 0) {
 				RtlFreeAnsiString(&as);
 				*status = NDIS_STATUS_FAILURE;
 				return;
 			}
 
 			*status = ndis_encode_parm(block, oidp, type, parm);
 			RtlFreeAnsiString(&as);
 			return;
 		}
 	}
 
 	/*
 	 * If the key didn't match, add it to the list of dynamically
 	 * created ones. Sometimes, drivers refer to registry keys
 	 * that aren't documented in their .INF files. These keys
 	 * are supposed to be created by some sort of utility or
 	 * control panel snap-in that comes with the driver software.
 	 * Sometimes it's useful to be able to manipulate these.
 	 * If the driver requests the key in the form of a string,
 	 * make its default value an empty string, otherwise default
 	 * it to "0".
 	 */
 
 	if (type == ndis_parm_int || type == ndis_parm_hexint)
 		ndis_add_sysctl(sc, keystr, "(dynamic integer key)",
 		    "UNSET", CTLFLAG_RW);
 	else
 		ndis_add_sysctl(sc, keystr, "(dynamic string key)",
 		    "UNSET", CTLFLAG_RW);
 
 	RtlFreeAnsiString(&as);
 	*status = NDIS_STATUS_FAILURE;
 }
 
 static ndis_status
 ndis_decode_parm(block, parm, val)
 	ndis_miniport_block	*block;
 	ndis_config_parm	*parm;
 	char			*val;
 {
 	unicode_string		*ustr;
 	ansi_string		as;
 
 	switch(parm->ncp_type) {
 	case ndis_parm_string:
 		ustr = &parm->ncp_parmdata.ncp_stringdata;
 		if (RtlUnicodeStringToAnsiString(&as, ustr, TRUE))
 			return (NDIS_STATUS_RESOURCES);
 		bcopy(as.as_buf, val, as.as_len);
 		RtlFreeAnsiString(&as);
 		break;
 	case ndis_parm_int:
 		sprintf(val, "%d", parm->ncp_parmdata.ncp_intdata);
 		break;
 	case ndis_parm_hexint:
 		sprintf(val, "%xu", parm->ncp_parmdata.ncp_intdata);
 		break;
 	default:
 		return (NDIS_STATUS_FAILURE);
 		break;
 	}
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static void
 NdisWriteConfiguration(status, cfg, key, parm)
 	ndis_status		*status;
 	ndis_handle		cfg;
 	unicode_string		*key;
 	ndis_config_parm	*parm;
 {
 	ansi_string		as;
 	char			*keystr = NULL;
 	ndis_miniport_block	*block;
 	struct ndis_softc	*sc;
 	struct sysctl_oid	*oidp;
 	struct sysctl_ctx_entry	*e;
 	char			val[256];
 
 	block = (ndis_miniport_block *)cfg;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 
 	if (RtlUnicodeStringToAnsiString(&as, key, TRUE)) {
 		*status = NDIS_STATUS_RESOURCES;
 		return;
 	}
 
 	keystr = as.as_buf;
 
 	/* Decode the parameter into a string. */
 	bzero(val, sizeof(val));
 	*status = ndis_decode_parm(block, parm, val);
 	if (*status != NDIS_STATUS_SUCCESS) {
 		RtlFreeAnsiString(&as);
 		return;
 	}
 
 	/* See if the key already exists. */
 
 	TAILQ_FOREACH(e, device_get_sysctl_ctx(sc->ndis_dev), link) {
 		oidp = e->entry;
 		if (strcasecmp(oidp->oid_name, keystr) == 0) {
 			/* Found it, set the value. */
 			strcpy((char *)oidp->oid_arg1, val);
 			RtlFreeAnsiString(&as);
 			return;
 		}
 	}
 
 	/* Not found, add a new key with the specified value. */
 	ndis_add_sysctl(sc, keystr, "(dynamically set key)",
 		    val, CTLFLAG_RW);
 
 	RtlFreeAnsiString(&as);
 	*status = NDIS_STATUS_SUCCESS;
 }
 
 static void
 NdisCloseConfiguration(cfg)
 	ndis_handle		cfg;
 {
 	list_entry		*e;
 	ndis_parmlist_entry	*pe;
 	ndis_miniport_block	*block;
 	ndis_config_parm	*p;
 
 	block = (ndis_miniport_block *)cfg;
 
 	while (!IsListEmpty(&block->nmb_parmlist)) {
 		e = RemoveHeadList(&block->nmb_parmlist);
 		pe = CONTAINING_RECORD(e, ndis_parmlist_entry, np_list);
 		p = &pe->np_parm;
 		if (p->ncp_type == ndis_parm_string)
 			RtlFreeUnicodeString(&p->ncp_parmdata.ncp_stringdata);
 		ExFreePool(e);
 	}
 }
 
 /*
  * Initialize a Windows spinlock.
  */
 static void
 NdisAllocateSpinLock(lock)
 	ndis_spin_lock		*lock;
 {
 	KeInitializeSpinLock(&lock->nsl_spinlock);
 	lock->nsl_kirql = 0;
 }
 
 /*
  * Destroy a Windows spinlock. This is a no-op for now. There are two reasons
  * for this. One is that it's sort of superfluous: we don't have to do anything
  * special to deallocate the spinlock. The other is that there are some buggy
  * drivers which call NdisFreeSpinLock() _after_ calling NdisFreeMemory() on
  * the block of memory in which the spinlock resides. (Yes, ADMtek, I'm
  * talking to you.)
  */
 static void
 NdisFreeSpinLock(lock)
 	ndis_spin_lock		*lock;
 {
 #ifdef notdef
 	KeInitializeSpinLock(&lock->nsl_spinlock);
 	lock->nsl_kirql = 0;
 #endif
 }
 
 /*
  * Acquire a spinlock from IRQL <= DISPATCH_LEVEL.
  */
 
 static void
 NdisAcquireSpinLock(lock)
 	ndis_spin_lock		*lock;
 {
 	KeAcquireSpinLock(&lock->nsl_spinlock, &lock->nsl_kirql);
 }
 
 /*
  * Release a spinlock from IRQL == DISPATCH_LEVEL.
  */
 
 static void
 NdisReleaseSpinLock(lock)
 	ndis_spin_lock		*lock;
 {
 	KeReleaseSpinLock(&lock->nsl_spinlock, lock->nsl_kirql);
 }
 
 /*
  * Acquire a spinlock when already running at IRQL == DISPATCH_LEVEL.
  */
 static void
 NdisDprAcquireSpinLock(lock)
 	ndis_spin_lock		*lock;
 {
 	KeAcquireSpinLockAtDpcLevel(&lock->nsl_spinlock);
 }
 
 /*
  * Release a spinlock without leaving IRQL == DISPATCH_LEVEL.
  */
 static void
 NdisDprReleaseSpinLock(lock)
 	ndis_spin_lock		*lock;
 {
 	KeReleaseSpinLockFromDpcLevel(&lock->nsl_spinlock);
 }
 
 static void
 NdisInitializeReadWriteLock(lock)
 	ndis_rw_lock		*lock;
 {
 	KeInitializeSpinLock(&lock->nrl_spinlock);
 	bzero((char *)&lock->nrl_rsvd, sizeof(lock->nrl_rsvd));
 }
 
 static void
 NdisAcquireReadWriteLock(ndis_rw_lock *lock, uint8_t writeacc,
     ndis_lock_state *state)
 {
 	if (writeacc == TRUE) {
 		KeAcquireSpinLock(&lock->nrl_spinlock, &state->nls_oldirql);
 		lock->nrl_rsvd[0]++;
 	} else
 		lock->nrl_rsvd[1]++;
 }
 
 static void
 NdisReleaseReadWriteLock(lock, state)
 	ndis_rw_lock		*lock;
 	ndis_lock_state		*state;
 {
 	if (lock->nrl_rsvd[0]) {
 		lock->nrl_rsvd[0]--;
 		KeReleaseSpinLock(&lock->nrl_spinlock, state->nls_oldirql);
 	} else
 		lock->nrl_rsvd[1]--;
 }
 
 static uint32_t
 NdisReadPciSlotInformation(adapter, slot, offset, buf, len)
 	ndis_handle		adapter;
 	uint32_t		slot;
 	uint32_t		offset;
 	void			*buf;
 	uint32_t		len;
 {
 	ndis_miniport_block	*block;
 	int			i;
 	char			*dest;
 	device_t		dev;
 
 	block = (ndis_miniport_block *)adapter;
 	dest = buf;
 	if (block == NULL)
 		return (0);
 
 	dev = block->nmb_physdeviceobj->do_devext;
 
 	/*
 	 * I have a test system consisting of a Sun w2100z
 	 * dual 2.4Ghz Opteron machine and an Atheros 802.11a/b/g
 	 * "Aries" miniPCI NIC. (The NIC is installed in the
 	 * machine using a miniPCI to PCI bus adapter card.)
 	 * When running in SMP mode, I found that
 	 * performing a large number of consecutive calls to
 	 * NdisReadPciSlotInformation() would result in a
 	 * sudden system reset (or in some cases a freeze).
 	 * My suspicion is that the multiple reads are somehow
 	 * triggering a fatal PCI bus error that leads to a
 	 * machine check. The 1us delay in the loop below
 	 * seems to prevent this problem.
 	 */
 
 	for (i = 0; i < len; i++) {
 		DELAY(1);
 		dest[i] = pci_read_config(dev, i + offset, 1);
 	}
 
 	return (len);
 }
 
 static uint32_t
 NdisWritePciSlotInformation(adapter, slot, offset, buf, len)
 	ndis_handle		adapter;
 	uint32_t		slot;
 	uint32_t		offset;
 	void			*buf;
 	uint32_t		len;
 {
 	ndis_miniport_block	*block;
 	int			i;
 	char			*dest;
 	device_t		dev;
 
 	block = (ndis_miniport_block *)adapter;
 	dest = buf;
 
 	if (block == NULL)
 		return (0);
 
 	dev = block->nmb_physdeviceobj->do_devext;
 	for (i = 0; i < len; i++) {
 		DELAY(1);
 		pci_write_config(dev, i + offset, dest[i], 1);
 	}
 
 	return (len);
 }
 
 /*
  * The errorlog routine uses a variable argument list, so we
  * have to declare it this way.
  */
 
 #define ERRMSGLEN 512
 static void
 NdisWriteErrorLogEntry(ndis_handle adapter, ndis_error_code code,
 	uint32_t numerrors, ...)
 {
 	ndis_miniport_block	*block;
 	va_list			ap;
 	int			i, error;
 	char			*str = NULL;
 	uint16_t		flags;
 	device_t		dev;
 	driver_object		*drv;
 	struct ndis_softc	*sc;
 	struct ifnet		*ifp;
 	unicode_string		us;
 	ansi_string		as = { 0, 0, NULL };
 
 	block = (ndis_miniport_block *)adapter;
 	dev = block->nmb_physdeviceobj->do_devext;
 	drv = block->nmb_deviceobj->do_drvobj;
 	sc = device_get_softc(dev);
 	ifp = sc->ifp;
 
 	if (ifp != NULL && ifp->if_flags & IFF_DEBUG) {
 		error = pe_get_message((vm_offset_t)drv->dro_driverstart,
 		    code, &str, &i, &flags);
 		if (error == 0) {
 			if (flags & MESSAGE_RESOURCE_UNICODE) {
 				RtlInitUnicodeString(&us, (uint16_t *)str);
 				if (RtlUnicodeStringToAnsiString(&as,
 				    &us, TRUE) == STATUS_SUCCESS)
 					str = as.as_buf;
 				else
 					str = NULL;
 			}
 		}
 	}
 
 	device_printf(dev, "NDIS ERROR: %x (%s)\n", code,
 	    str == NULL ? "unknown error" : str);
 
 	if (ifp != NULL && ifp->if_flags & IFF_DEBUG) {
 		device_printf(dev, "NDIS NUMERRORS: %x\n", numerrors);
 		va_start(ap, numerrors);
 		for (i = 0; i < numerrors; i++)
 			device_printf(dev, "argptr: %p\n",
 			    va_arg(ap, void *));
 		va_end(ap);
 	}
 
 	if (as.as_len)
 		RtlFreeAnsiString(&as);
 }
 
 static void
 ndis_map_cb(arg, segs, nseg, error)
 	void			*arg;
 	bus_dma_segment_t	*segs;
 	int			nseg;
 	int			error;
 {
 	struct ndis_map_arg	*ctx;
 	int			i;
 
 	if (error)
 		return;
 
 	ctx = arg;
 
 	for (i = 0; i < nseg; i++) {
 		ctx->nma_fraglist[i].npu_physaddr.np_quad = segs[i].ds_addr;
 		ctx->nma_fraglist[i].npu_len = segs[i].ds_len;
 	}
 
 	ctx->nma_cnt = nseg;
 }
 
 static void
 NdisMStartBufferPhysicalMapping(ndis_handle adapter, ndis_buffer *buf,
     uint32_t mapreg, uint8_t writedev, ndis_paddr_unit *addrarray,
     uint32_t *arraysize)
 {
 	ndis_miniport_block	*block;
 	struct ndis_softc	*sc;
 	struct ndis_map_arg	nma;
 	bus_dmamap_t		map;
 	int			error;
 
 	if (adapter == NULL)
 		return;
 
 	block = (ndis_miniport_block *)adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 
 	if (mapreg > sc->ndis_mmapcnt)
 		return;
 
 	map = sc->ndis_mmaps[mapreg];
 	nma.nma_fraglist = addrarray;
 
 	error = bus_dmamap_load(sc->ndis_mtag, map,
 	    MmGetMdlVirtualAddress(buf), MmGetMdlByteCount(buf), ndis_map_cb,
 	    (void *)&nma, BUS_DMA_NOWAIT);
 
 	if (error)
 		return;
 
 	bus_dmamap_sync(sc->ndis_mtag, map,
 	    writedev ? BUS_DMASYNC_PREWRITE : BUS_DMASYNC_PREREAD);
 
 	*arraysize = nma.nma_cnt;
 }
 
 static void
 NdisMCompleteBufferPhysicalMapping(adapter, buf, mapreg)
 	ndis_handle		adapter;
 	ndis_buffer		*buf;
 	uint32_t		mapreg;
 {
 	ndis_miniport_block	*block;
 	struct ndis_softc	*sc;
 	bus_dmamap_t		map;
 
 	if (adapter == NULL)
 		return;
 
 	block = (ndis_miniport_block *)adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 
 	if (mapreg > sc->ndis_mmapcnt)
 		return;
 
 	map = sc->ndis_mmaps[mapreg];
 
 	bus_dmamap_sync(sc->ndis_mtag, map,
 	    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
 
 	bus_dmamap_unload(sc->ndis_mtag, map);
 }
 
 /*
  * This is an older (?) timer init routine which doesn't
  * accept a miniport context handle. Serialized miniports should
  * never call this function.
  */
 
 static void
 NdisInitializeTimer(timer, func, ctx)
 	ndis_timer		*timer;
 	ndis_timer_function	func;
 	void			*ctx;
 {
 	KeInitializeTimer(&timer->nt_ktimer);
 	KeInitializeDpc(&timer->nt_kdpc, func, ctx);
 	KeSetImportanceDpc(&timer->nt_kdpc, KDPC_IMPORTANCE_LOW);
 }
 
 static void
 ndis_timercall(dpc, timer, sysarg1, sysarg2)
 	kdpc			*dpc;
 	ndis_miniport_timer	*timer;
 	void			*sysarg1;
 	void			*sysarg2;
 {
 	/*
 	 * Since we're called as a DPC, we should be running
 	 * at DISPATCH_LEVEL here. This means to acquire the
 	 * spinlock, we can use KeAcquireSpinLockAtDpcLevel()
 	 * rather than KeAcquireSpinLock().
 	 */
 	if (NDIS_SERIALIZED(timer->nmt_block))
 		KeAcquireSpinLockAtDpcLevel(&timer->nmt_block->nmb_lock);
 
 	MSCALL4(timer->nmt_timerfunc, dpc, timer->nmt_timerctx,
 	    sysarg1, sysarg2);
 
 	if (NDIS_SERIALIZED(timer->nmt_block))
 		KeReleaseSpinLockFromDpcLevel(&timer->nmt_block->nmb_lock);
 }
 
 /*
  * For a long time I wondered why there were two NDIS timer initialization
  * routines, and why this one needed an NDIS_MINIPORT_TIMER and the
  * MiniportAdapterHandle. The NDIS_MINIPORT_TIMER has its own callout
  * function and context pointers separate from those in the DPC, which
  * allows for another level of indirection: when the timer fires, we
  * can have our own timer function invoked, and from there we can call
  * the driver's function. But why go to all that trouble? Then it hit
  * me: for serialized miniports, the timer callouts are not re-entrant.
  * By trapping the callouts and having access to the MiniportAdapterHandle,
  * we can protect the driver callouts by acquiring the NDIS serialization
  * lock. This is essential for allowing serialized miniports to work
  * correctly on SMP systems. On UP hosts, setting IRQL to DISPATCH_LEVEL
  * is enough to prevent other threads from pre-empting you, but with
  * SMP, you must acquire a lock as well, otherwise the other CPU is
  * free to clobber you.
  */
 static void
 NdisMInitializeTimer(timer, handle, func, ctx)
 	ndis_miniport_timer	*timer;
 	ndis_handle		handle;
 	ndis_timer_function	func;
 	void			*ctx;
 {
 	ndis_miniport_block	*block;
 	struct ndis_softc	*sc;
 
 	block = (ndis_miniport_block *)handle;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 
 	/* Save the driver's funcptr and context */
 
 	timer->nmt_timerfunc = func;
 	timer->nmt_timerctx = ctx;
 	timer->nmt_block = handle;
 
 	/*
 	 * Set up the timer so it will call our intermediate DPC.
 	 * Be sure to use the wrapped entry point, since
 	 * ntoskrnl_run_dpc() expects to invoke a function with
 	 * Microsoft calling conventions.
 	 */
 	KeInitializeTimer(&timer->nmt_ktimer);
 	KeInitializeDpc(&timer->nmt_kdpc,
 	    ndis_findwrap((funcptr)ndis_timercall), timer);
 	timer->nmt_ktimer.k_dpc = &timer->nmt_kdpc;
 }
 
 /*
  * In Windows, there's both an NdisMSetTimer() and an NdisSetTimer(),
  * but the former is just a macro wrapper around the latter.
  */
 static void
 NdisSetTimer(timer, msecs)
 	ndis_timer		*timer;
 	uint32_t		msecs;
 {
 	/*
 	 * KeSetTimer() wants the period in
 	 * hundred nanosecond intervals.
 	 */
 	KeSetTimer(&timer->nt_ktimer,
 	    ((int64_t)msecs * -10000), &timer->nt_kdpc);
 }
 
 static void
 NdisMSetPeriodicTimer(timer, msecs)
 	ndis_miniport_timer	*timer;
 	uint32_t		msecs;
 {
 	KeSetTimerEx(&timer->nmt_ktimer,
 	    ((int64_t)msecs * -10000), msecs, &timer->nmt_kdpc);
 }
 
 /*
  * Technically, this is really NdisCancelTimer(), but we also
  * (ab)use it for NdisMCancelTimer(), since in our implementation
  * we don't need the extra info in the ndis_miniport_timer
  * structure just to cancel a timer.
  */
 
 static void
 NdisMCancelTimer(timer, cancelled)
 	ndis_timer		*timer;
 	uint8_t			*cancelled;
 {
 
 	*cancelled = KeCancelTimer(&timer->nt_ktimer);
 }
 
 static void
 NdisMQueryAdapterResources(status, adapter, list, buflen)
 	ndis_status		*status;
 	ndis_handle		adapter;
 	ndis_resource_list	*list;
 	uint32_t		*buflen;
 {
 	ndis_miniport_block	*block;
 	struct ndis_softc	*sc;
 	int			rsclen;
 
 	block = (ndis_miniport_block *)adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 
 	rsclen = sizeof(ndis_resource_list) +
 	    (sizeof(cm_partial_resource_desc) * (sc->ndis_rescnt - 1));
 	if (*buflen < rsclen) {
 		*buflen = rsclen;
 		*status = NDIS_STATUS_INVALID_LENGTH;
 		return;
 	}
 
 	bcopy((char *)block->nmb_rlist, (char *)list, rsclen);
 	*status = NDIS_STATUS_SUCCESS;
 }
 
 static ndis_status
 NdisMRegisterIoPortRange(offset, adapter, port, numports)
 	void			**offset;
 	ndis_handle		adapter;
 	uint32_t		port;
 	uint32_t		numports;
 {
 	struct ndis_miniport_block	*block;
 	struct ndis_softc	*sc;
 
 	if (adapter == NULL)
 		return (NDIS_STATUS_FAILURE);
 
 	block = (ndis_miniport_block *)adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 
 	if (sc->ndis_res_io == NULL)
 		return (NDIS_STATUS_FAILURE);
 
 	/* Don't let the device map more ports than we have. */
 	if (rman_get_size(sc->ndis_res_io) < numports)
 		return (NDIS_STATUS_INVALID_LENGTH);
 
 	*offset = (void *)rman_get_start(sc->ndis_res_io);
 
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static void
 NdisMDeregisterIoPortRange(adapter, port, numports, offset)
 	ndis_handle		adapter;
 	uint32_t		port;
 	uint32_t		numports;
 	void			*offset;
 {
 }
 
 static void
 NdisReadNetworkAddress(status, addr, addrlen, adapter)
 	ndis_status		*status;
 	void			**addr;
 	uint32_t		*addrlen;
 	ndis_handle		adapter;
 {
 	struct ndis_softc	*sc;
 	ndis_miniport_block	*block;
 	uint8_t			empty[] = { 0, 0, 0, 0, 0, 0 };
 
 	block = (ndis_miniport_block *)adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 	if (sc->ifp == NULL) {
 		*status = NDIS_STATUS_FAILURE;
 		return;
 	}
 
 	if (sc->ifp->if_addr == NULL ||
 	    bcmp(IF_LLADDR(sc->ifp), empty, ETHER_ADDR_LEN) == 0)
 		*status = NDIS_STATUS_FAILURE;
 	else {
 		*addr = IF_LLADDR(sc->ifp);
 		*addrlen = ETHER_ADDR_LEN;
 		*status = NDIS_STATUS_SUCCESS;
 	}
 }
 
 static ndis_status
 NdisQueryMapRegisterCount(bustype, cnt)
 	uint32_t		bustype;
 	uint32_t		*cnt;
 {
 	*cnt = 8192;
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static ndis_status
 NdisMAllocateMapRegisters(ndis_handle adapter, uint32_t dmachannel,
     uint8_t dmasize, uint32_t physmapneeded, uint32_t maxmap)
 {
 	struct ndis_softc	*sc;
 	ndis_miniport_block	*block;
 	int			error, i, nseg = NDIS_MAXSEG;
 
 	block = (ndis_miniport_block *)adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 
 	sc->ndis_mmaps = malloc(sizeof(bus_dmamap_t) * physmapneeded,
 	    M_DEVBUF, M_NOWAIT|M_ZERO);
 
 	if (sc->ndis_mmaps == NULL)
 		return (NDIS_STATUS_RESOURCES);
 
 	error = bus_dma_tag_create(sc->ndis_parent_tag, ETHER_ALIGN, 0,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
 	    NULL, maxmap * nseg, nseg, maxmap, BUS_DMA_ALLOCNOW,
 	    NULL, NULL, &sc->ndis_mtag);
 
 	if (error) {
 		free(sc->ndis_mmaps, M_DEVBUF);
 		return (NDIS_STATUS_RESOURCES);
 	}
 
 	for (i = 0; i < physmapneeded; i++)
 		bus_dmamap_create(sc->ndis_mtag, 0, &sc->ndis_mmaps[i]);
 
 	sc->ndis_mmapcnt = physmapneeded;
 
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static void
 NdisMFreeMapRegisters(adapter)
 	ndis_handle		adapter;
 {
 	struct ndis_softc	*sc;
 	ndis_miniport_block	*block;
 	int			i;
 
 	block = (ndis_miniport_block *)adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 
 	for (i = 0; i < sc->ndis_mmapcnt; i++)
 		bus_dmamap_destroy(sc->ndis_mtag, sc->ndis_mmaps[i]);
 
 	free(sc->ndis_mmaps, M_DEVBUF);
 
 	bus_dma_tag_destroy(sc->ndis_mtag);
 }
 
 static void
 ndis_mapshared_cb(arg, segs, nseg, error)
 	void			*arg;
 	bus_dma_segment_t	*segs;
 	int			nseg;
 	int			error;
 {
 	ndis_physaddr		*p;
 
 	if (error || nseg > 1)
 		return;
 
 	p = arg;
 
 	p->np_quad = segs[0].ds_addr;
 }
 
 /*
  * This maps to bus_dmamem_alloc().
  */
 
 static void
 NdisMAllocateSharedMemory(ndis_handle adapter, uint32_t len, uint8_t cached,
     void **vaddr, ndis_physaddr *paddr)
 {
 	ndis_miniport_block	*block;
 	struct ndis_softc	*sc;
 	struct ndis_shmem	*sh;
 	int			error;
 
 	if (adapter == NULL)
 		return;
 
 	block = (ndis_miniport_block *)adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 
 	sh = malloc(sizeof(struct ndis_shmem), M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (sh == NULL)
 		return;
 
 	InitializeListHead(&sh->ndis_list);
 
 	/*
 	 * When performing shared memory allocations, create a tag
 	 * with a lowaddr limit that restricts physical memory mappings
 	 * so that they all fall within the first 1GB of memory.
 	 * At least one device/driver combination (Linksys Instant
 	 * Wireless PCI Card V2.7, Broadcom 802.11b) seems to have
 	 * problems with performing DMA operations with physical
 	 * addresses that lie above the 1GB mark. I don't know if this
 	 * is a hardware limitation or if the addresses are being
 	 * truncated within the driver, but this seems to be the only
 	 * way to make these cards work reliably in systems with more
 	 * than 1GB of physical memory.
 	 */
 
 	error = bus_dma_tag_create(sc->ndis_parent_tag, 64,
 	    0, NDIS_BUS_SPACE_SHARED_MAXADDR, BUS_SPACE_MAXADDR, NULL,
 	    NULL, len, 1, len, BUS_DMA_ALLOCNOW, NULL, NULL,
 	    &sh->ndis_stag);
 
 	if (error) {
 		free(sh, M_DEVBUF);
 		return;
 	}
 
 	error = bus_dmamem_alloc(sh->ndis_stag, vaddr,
 	    BUS_DMA_NOWAIT | BUS_DMA_ZERO, &sh->ndis_smap);
 
 	if (error) {
 		bus_dma_tag_destroy(sh->ndis_stag);
 		free(sh, M_DEVBUF);
 		return;
 	}
 
 	error = bus_dmamap_load(sh->ndis_stag, sh->ndis_smap, *vaddr,
 	    len, ndis_mapshared_cb, (void *)paddr, BUS_DMA_NOWAIT);
 
 	if (error) {
 		bus_dmamem_free(sh->ndis_stag, *vaddr, sh->ndis_smap);
 		bus_dma_tag_destroy(sh->ndis_stag);
 		free(sh, M_DEVBUF);
 		return;
 	}
 
 	/*
 	 * Save the physical address along with the source address.
 	 * The AirGo MIMO driver will call NdisMFreeSharedMemory()
 	 * with a bogus virtual address sometimes, but with a valid
 	 * physical address. To keep this from causing trouble, we
 	 * use the physical address to as a sanity check in case
 	 * searching based on the virtual address fails.
 	 */
 
 	NDIS_LOCK(sc);
 	sh->ndis_paddr.np_quad = paddr->np_quad;
 	sh->ndis_saddr = *vaddr;
 	InsertHeadList((&sc->ndis_shlist), (&sh->ndis_list));
 	NDIS_UNLOCK(sc);
 }
 
 struct ndis_allocwork {
 	uint32_t		na_len;
 	uint8_t			na_cached;
 	void			*na_ctx;
 	io_workitem		*na_iw;
 };
 
 static void
 ndis_asyncmem_complete(dobj, arg)
 	device_object		*dobj;
 	void			*arg;
 {
 	ndis_miniport_block	*block;
 	struct ndis_softc	*sc;
 	struct ndis_allocwork	*w;
 	void			*vaddr;
 	ndis_physaddr		paddr;
 	ndis_allocdone_handler	donefunc;
 
 	w = arg;
 	block = (ndis_miniport_block *)dobj->do_devext;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 
 	vaddr = NULL;
 	paddr.np_quad = 0;
 
 	donefunc = sc->ndis_chars->nmc_allocate_complete_func;
 	NdisMAllocateSharedMemory(block, w->na_len,
 	    w->na_cached, &vaddr, &paddr);
 	MSCALL5(donefunc, block, vaddr, &paddr, w->na_len, w->na_ctx);
 
 	IoFreeWorkItem(w->na_iw);
 	free(w, M_DEVBUF);
 }
 
 static ndis_status
 NdisMAllocateSharedMemoryAsync(ndis_handle adapter, uint32_t len,
     uint8_t cached, void *ctx)
 {
 	ndis_miniport_block	*block;
 	struct ndis_allocwork	*w;
 	io_workitem		*iw;
 	io_workitem_func	ifw;
 
 	if (adapter == NULL)
 		return (NDIS_STATUS_FAILURE);
 
 	block = adapter;
 
 	iw = IoAllocateWorkItem(block->nmb_deviceobj);
 	if (iw == NULL)
 		return (NDIS_STATUS_FAILURE);
 
 	w = malloc(sizeof(struct ndis_allocwork), M_TEMP, M_NOWAIT);
 
 	if (w == NULL)
 		return (NDIS_STATUS_FAILURE);
 
 	w->na_cached = cached;
 	w->na_len = len;
 	w->na_ctx = ctx;
 	w->na_iw = iw;
 
 	ifw = (io_workitem_func)ndis_findwrap((funcptr)ndis_asyncmem_complete);
 	IoQueueWorkItem(iw, ifw, WORKQUEUE_DELAYED, w);
 
 	return (NDIS_STATUS_PENDING);
 }
 
 static void
 NdisMFreeSharedMemory(ndis_handle adapter, uint32_t len, uint8_t cached,
     void *vaddr, ndis_physaddr paddr)
 {
 	ndis_miniport_block	*block;
 	struct ndis_softc	*sc;
 	struct ndis_shmem	*sh = NULL;
 	list_entry		*l;
 
 	if (vaddr == NULL || adapter == NULL)
 		return;
 
 	block = (ndis_miniport_block *)adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 
 	/* Sanity check: is list empty? */
 
 	if (IsListEmpty(&sc->ndis_shlist))
 		return;
 
 	NDIS_LOCK(sc);
 	l = sc->ndis_shlist.nle_flink;
 	while (l != &sc->ndis_shlist) {
 		sh = CONTAINING_RECORD(l, struct ndis_shmem, ndis_list);
 		if (sh->ndis_saddr == vaddr)
 			break;
 		/*
 		 * Check the physaddr too, just in case the driver lied
 		 * about the virtual address.
 		 */
 		if (sh->ndis_paddr.np_quad == paddr.np_quad)
 			break;
 		l = l->nle_flink;
 	}
 
 	if (sh == NULL) {
 		NDIS_UNLOCK(sc);
 		printf("NDIS: buggy driver tried to free "
 		    "invalid shared memory: vaddr: %p paddr: 0x%jx\n",
 		    vaddr, (uintmax_t)paddr.np_quad);
 		return;
 	}
 
 	RemoveEntryList(&sh->ndis_list);
 
 	NDIS_UNLOCK(sc);
 
 	bus_dmamap_unload(sh->ndis_stag, sh->ndis_smap);
 	bus_dmamem_free(sh->ndis_stag, sh->ndis_saddr, sh->ndis_smap);
 	bus_dma_tag_destroy(sh->ndis_stag);
 
 	free(sh, M_DEVBUF);
 }
 
 static ndis_status
 NdisMMapIoSpace(vaddr, adapter, paddr, len)
 	void			**vaddr;
 	ndis_handle		adapter;
 	ndis_physaddr		paddr;
 	uint32_t		len;
 {
 	if (adapter == NULL)
 		return (NDIS_STATUS_FAILURE);
 
 	*vaddr = MmMapIoSpace(paddr.np_quad, len, 0);
 
 	if (*vaddr == NULL)
 		return (NDIS_STATUS_FAILURE);
 
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static void
 NdisMUnmapIoSpace(adapter, vaddr, len)
 	ndis_handle		adapter;
 	void			*vaddr;
 	uint32_t		len;
 {
 	MmUnmapIoSpace(vaddr, len);
 }
 
 static uint32_t
 NdisGetCacheFillSize(void)
 {
 	return (128);
 }
 
 static uint32_t
 NdisMGetDmaAlignment(handle)
 	ndis_handle		handle;
 {
 	return (16);
 }
 
 /*
  * NDIS has two methods for dealing with NICs that support DMA.
  * One is to just pass packets to the driver and let it call
  * NdisMStartBufferPhysicalMapping() to map each buffer in the packet
  * all by itself, and the other is to let the NDIS library handle the
  * buffer mapping internally, and hand the driver an already populated
  * scatter/gather fragment list. If the driver calls
  * NdisMInitializeScatterGatherDma(), it wants to use the latter
  * method.
  */
 
 static ndis_status
 NdisMInitializeScatterGatherDma(ndis_handle adapter, uint8_t is64,
     uint32_t maxphysmap)
 {
 	struct ndis_softc	*sc;
 	ndis_miniport_block	*block;
 	int			error;
 
 	if (adapter == NULL)
 		return (NDIS_STATUS_FAILURE);
 	block = (ndis_miniport_block *)adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 
 	/* Don't do this twice. */
 	if (sc->ndis_sc == 1)
 		return (NDIS_STATUS_SUCCESS);
 
 	error = bus_dma_tag_create(sc->ndis_parent_tag, ETHER_ALIGN, 0,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	    MCLBYTES * NDIS_MAXSEG, NDIS_MAXSEG, MCLBYTES, BUS_DMA_ALLOCNOW,
 	    NULL, NULL, &sc->ndis_ttag);
 
 	sc->ndis_sc = 1;
 
 	return (NDIS_STATUS_SUCCESS);
 }
 
 void
 NdisAllocatePacketPool(status, pool, descnum, protrsvdlen)
 	ndis_status		*status;
 	ndis_handle		*pool;
 	uint32_t		descnum;
 	uint32_t		protrsvdlen;
 {
 	ndis_packet_pool	*p;
 	ndis_packet		*packets;
 	int			i;
 
 	p = ExAllocatePoolWithTag(NonPagedPool, sizeof(ndis_packet_pool), 0);
 	if (p == NULL) {
 		*status = NDIS_STATUS_RESOURCES;
 		return;
 	}
 
 	p->np_cnt = descnum + NDIS_POOL_EXTRA;
 	p->np_protrsvd = protrsvdlen;
 	p->np_len = sizeof(ndis_packet) + protrsvdlen;
 
 	packets = ExAllocatePoolWithTag(NonPagedPool, p->np_cnt *
 	    p->np_len, 0);
 
 
 	if (packets == NULL) {
 		ExFreePool(p);
 		*status = NDIS_STATUS_RESOURCES;
 		return;
 	}
 
 	p->np_pktmem = packets;
 
 	for (i = 0; i < p->np_cnt; i++)
 		InterlockedPushEntrySList(&p->np_head,
 		    (struct slist_entry *)&packets[i]);
 
 #ifdef NDIS_DEBUG_PACKETS 
 	p->np_dead = 0; 
 	KeInitializeSpinLock(&p->np_lock);
 	KeInitializeEvent(&p->np_event, EVENT_TYPE_NOTIFY, TRUE);
 #endif
 
 	*pool = p; 
 	*status = NDIS_STATUS_SUCCESS;
 }
 
 void
 NdisAllocatePacketPoolEx(status, pool, descnum, oflowdescnum, protrsvdlen)
 	ndis_status		*status;
 	ndis_handle		*pool;
 	uint32_t		descnum;
 	uint32_t		oflowdescnum;
 	uint32_t		protrsvdlen;
 {
 	return (NdisAllocatePacketPool(status, pool,
 	    descnum + oflowdescnum, protrsvdlen));
 }
 
 uint32_t
 NdisPacketPoolUsage(pool)
 	ndis_handle		pool;
 {
 	ndis_packet_pool	*p;
 
 	p = (ndis_packet_pool *)pool;
 	return (p->np_cnt - ExQueryDepthSList(&p->np_head));
 }
 
 void
 NdisFreePacketPool(pool)
 	ndis_handle		pool;
 {
 	ndis_packet_pool	*p;
 	int			usage;
 #ifdef NDIS_DEBUG_PACKETS
 	uint8_t			irql;
 #endif
 
 	p = (ndis_packet_pool *)pool;
 
 #ifdef NDIS_DEBUG_PACKETS
 	KeAcquireSpinLock(&p->np_lock, &irql);
 #endif
 
 	usage = NdisPacketPoolUsage(pool);
 
 #ifdef NDIS_DEBUG_PACKETS
 	if (usage) {
 		p->np_dead = 1;
 		KeResetEvent(&p->np_event);
 		KeReleaseSpinLock(&p->np_lock, irql);
 		KeWaitForSingleObject(&p->np_event, 0, 0, FALSE, NULL);
 	} else
 		KeReleaseSpinLock(&p->np_lock, irql);
 #endif
 
 	ExFreePool(p->np_pktmem);
 	ExFreePool(p);
 }
 
 void
 NdisAllocatePacket(status, packet, pool)
 	ndis_status		*status;
 	ndis_packet		**packet;
 	ndis_handle		pool;
 {
 	ndis_packet_pool	*p;
 	ndis_packet		*pkt;
 #ifdef NDIS_DEBUG_PACKETS
 	uint8_t			irql;
 #endif
 
 	p = (ndis_packet_pool *)pool;
 
 #ifdef NDIS_DEBUG_PACKETS
 	KeAcquireSpinLock(&p->np_lock, &irql);
 	if (p->np_dead) {
 		KeReleaseSpinLock(&p->np_lock, irql);
 		printf("NDIS: tried to allocate packet from dead pool %p\n",
 		    pool);
 		*status = NDIS_STATUS_RESOURCES;
 		return;
 	}
 #endif
 
 	pkt = (ndis_packet *)InterlockedPopEntrySList(&p->np_head);
 
 #ifdef NDIS_DEBUG_PACKETS
 	KeReleaseSpinLock(&p->np_lock, irql);
 #endif
 
 	if (pkt == NULL) {
 		*status = NDIS_STATUS_RESOURCES;
 		return;
 	}
 
 
 	bzero((char *)pkt, sizeof(ndis_packet));
 
 	/* Save pointer to the pool. */
 	pkt->np_private.npp_pool = pool;
 
 	/* Set the oob offset pointer. Lots of things expect this. */
 	pkt->np_private.npp_packetooboffset = offsetof(ndis_packet, np_oob);
 
 	/*
 	 * We must initialize the packet flags correctly in order
 	 * for the NDIS_SET_PACKET_MEDIA_SPECIFIC_INFO() and
 	 * NDIS_GET_PACKET_MEDIA_SPECIFIC_INFO() macros to work
 	 * correctly.
 	 */
 	pkt->np_private.npp_ndispktflags = NDIS_PACKET_ALLOCATED_BY_NDIS;
 	pkt->np_private.npp_validcounts = FALSE;
 
 	*packet = pkt;
 
 	*status = NDIS_STATUS_SUCCESS;
 }
 
 void
 NdisFreePacket(packet)
 	ndis_packet		*packet;
 {
 	ndis_packet_pool	*p;
 #ifdef NDIS_DEBUG_PACKETS
 	uint8_t			irql;
 #endif
 
 	p = (ndis_packet_pool *)packet->np_private.npp_pool;
 
 #ifdef NDIS_DEBUG_PACKETS
 	KeAcquireSpinLock(&p->np_lock, &irql);
 #endif
 
 	InterlockedPushEntrySList(&p->np_head, (slist_entry *)packet);
 
 #ifdef NDIS_DEBUG_PACKETS
 	if (p->np_dead) {
 		if (ExQueryDepthSList(&p->np_head) == p->np_cnt)
 			KeSetEvent(&p->np_event, IO_NO_INCREMENT, FALSE);
 	}
 	KeReleaseSpinLock(&p->np_lock, irql);
 #endif
 }
 
 static void
 NdisUnchainBufferAtFront(packet, buf)
 	ndis_packet		*packet;
 	ndis_buffer		**buf;
 {
 	ndis_packet_private	*priv;
 
 	if (packet == NULL || buf == NULL)
 		return;
 
 	priv = &packet->np_private;
 
 	priv->npp_validcounts = FALSE;
 
 	if (priv->npp_head == priv->npp_tail) {
 		*buf = priv->npp_head;
 		priv->npp_head = priv->npp_tail = NULL;
 	} else {
 		*buf = priv->npp_head;
 		priv->npp_head = (*buf)->mdl_next;
 	}
 }
 
 static void
 NdisUnchainBufferAtBack(packet, buf)
 	ndis_packet		*packet;
 	ndis_buffer		**buf;
 {
 	ndis_packet_private	*priv;
 	ndis_buffer		*tmp;
 
 	if (packet == NULL || buf == NULL)
 		return;
 
 	priv = &packet->np_private;
 
 	priv->npp_validcounts = FALSE;
 
 	if (priv->npp_head == priv->npp_tail) {
 		*buf = priv->npp_head;
 		priv->npp_head = priv->npp_tail = NULL;
 	} else {
 		*buf = priv->npp_tail;
 		tmp = priv->npp_head;
 		while (tmp->mdl_next != priv->npp_tail)
 			tmp = tmp->mdl_next;
 		priv->npp_tail = tmp;
 		tmp->mdl_next = NULL;
 	}
 }
 
 /*
  * The NDIS "buffer" is really an MDL (memory descriptor list)
  * which is used to describe a buffer in a way that allows it
  * to mapped into different contexts. We have to be careful how
  * we handle them: in some versions of Windows, the NdisFreeBuffer()
  * routine is an actual function in the NDIS API, but in others
  * it's just a macro wrapper around IoFreeMdl(). There's really
  * no way to use the 'descnum' parameter to count how many
  * "buffers" are allocated since in order to use IoFreeMdl() to
  * dispose of a buffer, we have to use IoAllocateMdl() to allocate
  * them, and IoAllocateMdl() just grabs them out of the heap.
  */
 
 static void
 NdisAllocateBufferPool(status, pool, descnum)
 	ndis_status		*status;
 	ndis_handle		*pool;
 	uint32_t		descnum;
 {
 
 	/*
 	 * The only thing we can really do here is verify that descnum
 	 * is a reasonable value, but I really don't know what to check
 	 * it against.
 	 */
 
 	*pool = NonPagedPool;
 	*status = NDIS_STATUS_SUCCESS;
 }
 
 static void
 NdisFreeBufferPool(pool)
 	ndis_handle		pool;
 {
 }
 
 static void
 NdisAllocateBuffer(status, buffer, pool, vaddr, len)
 	ndis_status		*status;
 	ndis_buffer		**buffer;
 	ndis_handle		pool;
 	void			*vaddr;
 	uint32_t		len;
 {
 	ndis_buffer		*buf;
 
 	buf = IoAllocateMdl(vaddr, len, FALSE, FALSE, NULL);
 	if (buf == NULL) {
 		*status = NDIS_STATUS_RESOURCES;
 		return;
 	}
 
 	MmBuildMdlForNonPagedPool(buf);
 
 	*buffer = buf;
 	*status = NDIS_STATUS_SUCCESS;
 }
 
 static void
 NdisFreeBuffer(buf)
 	ndis_buffer		*buf;
 {
 	IoFreeMdl(buf);
 }
 
 /* Aw c'mon. */
 
 static uint32_t
 NdisBufferLength(buf)
 	ndis_buffer		*buf;
 {
 	return (MmGetMdlByteCount(buf));
 }
 
 /*
  * Get the virtual address and length of a buffer.
  * Note: the vaddr argument is optional.
  */
 
 static void
 NdisQueryBuffer(buf, vaddr, len)
 	ndis_buffer		*buf;
 	void			**vaddr;
 	uint32_t		*len;
 {
 	if (vaddr != NULL)
 		*vaddr = MmGetMdlVirtualAddress(buf);
 	*len = MmGetMdlByteCount(buf);
 }
 
 /* Same as above -- we don't care about the priority. */
 
 static void
 NdisQueryBufferSafe(buf, vaddr, len, prio)
 	ndis_buffer		*buf;
 	void			**vaddr;
 	uint32_t		*len;
 	uint32_t		prio;
 {
 	if (vaddr != NULL)
 		*vaddr = MmGetMdlVirtualAddress(buf);
 	*len = MmGetMdlByteCount(buf);
 }
 
 /* Damnit Microsoft!! How many ways can you do the same thing?! */
 
 static void *
 NdisBufferVirtualAddress(buf)
 	ndis_buffer		*buf;
 {
 	return (MmGetMdlVirtualAddress(buf));
 }
 
 static void *
 NdisBufferVirtualAddressSafe(buf, prio)
 	ndis_buffer		*buf;
 	uint32_t		prio;
 {
 	return (MmGetMdlVirtualAddress(buf));
 }
 
 static void
 NdisAdjustBufferLength(buf, len)
 	ndis_buffer		*buf;
 	int			len;
 {
 	MmGetMdlByteCount(buf) = len;
 }
 
 static uint32_t
 NdisInterlockedIncrement(addend)
 	uint32_t		*addend;
 {
 	atomic_add_long((u_long *)addend, 1);
 	return (*addend);
 }
 
 static uint32_t
 NdisInterlockedDecrement(addend)
 	uint32_t		*addend;
 {
 	atomic_subtract_long((u_long *)addend, 1);
 	return (*addend);
 }
 
+static uint32_t
+NdisGetVersion(void)
+{
+	return (0x00050001);
+}
+
 static void
 NdisInitializeEvent(event)
 	ndis_event		*event;
 {
 	/*
 	 * NDIS events are always notification
 	 * events, and should be initialized to the
 	 * not signaled state.
 	 */
 	KeInitializeEvent(&event->ne_event, EVENT_TYPE_NOTIFY, FALSE);
 }
 
 static void
 NdisSetEvent(event)
 	ndis_event		*event;
 {
 	KeSetEvent(&event->ne_event, IO_NO_INCREMENT, FALSE);
 }
 
 static void
 NdisResetEvent(event)
 	ndis_event		*event;
 {
 	KeResetEvent(&event->ne_event);
 }
 
 static uint8_t
 NdisWaitEvent(event, msecs)
 	ndis_event		*event;
 	uint32_t		msecs;
 {
 	int64_t			duetime;
 	uint32_t		rval;
 
 	duetime = ((int64_t)msecs * -10000);
 	rval = KeWaitForSingleObject(event,
 	    0, 0, TRUE, msecs ? & duetime : NULL);
 
 	if (rval == STATUS_TIMEOUT)
 		return (FALSE);
 
 	return (TRUE);
 }
 
 static ndis_status
 NdisUnicodeStringToAnsiString(dstr, sstr)
 	ansi_string		*dstr;
 	unicode_string		*sstr;
 {
 	uint32_t		rval;
 
 	rval = RtlUnicodeStringToAnsiString(dstr, sstr, FALSE);
 
 	if (rval == STATUS_INSUFFICIENT_RESOURCES)
 		return (NDIS_STATUS_RESOURCES);
 	if (rval)
 		return (NDIS_STATUS_FAILURE);
 
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static ndis_status
 NdisAnsiStringToUnicodeString(dstr, sstr)
 	unicode_string		*dstr;
 	ansi_string		*sstr;
 {
 	uint32_t		rval;
 
 	rval = RtlAnsiStringToUnicodeString(dstr, sstr, FALSE);
 
 	if (rval == STATUS_INSUFFICIENT_RESOURCES)
 		return (NDIS_STATUS_RESOURCES);
 	if (rval)
 		return (NDIS_STATUS_FAILURE);
 
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static ndis_status
 NdisMPciAssignResources(adapter, slot, list)
 	ndis_handle		adapter;
 	uint32_t		slot;
 	ndis_resource_list	**list;
 {
 	ndis_miniport_block	*block;
 
 	if (adapter == NULL || list == NULL)
 		return (NDIS_STATUS_FAILURE);
 
 	block = (ndis_miniport_block *)adapter;
 	*list = block->nmb_rlist;
 
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static uint8_t
 ndis_intr(iobj, arg)
 	kinterrupt		*iobj;
 	void			*arg;
 {
 	struct ndis_softc	*sc;
 	uint8_t			is_our_intr = FALSE;
 	int			call_isr = 0;
 	ndis_miniport_interrupt	*intr;
 
 	sc = arg;
 	intr = sc->ndis_block->nmb_interrupt;
 
 	if (intr == NULL || sc->ndis_block->nmb_miniportadapterctx == NULL)
 		return (FALSE);
 
 	if (sc->ndis_block->nmb_interrupt->ni_isrreq == TRUE)
 		MSCALL3(intr->ni_isrfunc, &is_our_intr, &call_isr,
 		    sc->ndis_block->nmb_miniportadapterctx);
 	else {
 		MSCALL1(sc->ndis_chars->nmc_disable_interrupts_func,
 		    sc->ndis_block->nmb_miniportadapterctx);
 		call_isr = 1;
 	}
  
 	if (call_isr)
 		IoRequestDpc(sc->ndis_block->nmb_deviceobj, NULL, sc);
 
 	return (is_our_intr);
 }
 
 static void
 ndis_intrhand(dpc, intr, sysarg1, sysarg2)
 	kdpc			*dpc;
 	ndis_miniport_interrupt	*intr;
 	void			*sysarg1;
 	void			*sysarg2;
 {
 	struct ndis_softc	*sc;
 	ndis_miniport_block	*block;
 	ndis_handle             adapter;
 
 	block = intr->ni_block;
 	adapter = block->nmb_miniportadapterctx;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 
 	if (NDIS_SERIALIZED(sc->ndis_block))
 		KeAcquireSpinLockAtDpcLevel(&block->nmb_lock);
 
 	MSCALL1(intr->ni_dpcfunc, adapter);
 
 	/* If there's a MiniportEnableInterrupt() routine, call it. */
 
 	if (sc->ndis_chars->nmc_enable_interrupts_func != NULL)
 		MSCALL1(sc->ndis_chars->nmc_enable_interrupts_func, adapter);
 
 	if (NDIS_SERIALIZED(sc->ndis_block))
 		KeReleaseSpinLockFromDpcLevel(&block->nmb_lock);
 
 	/*
 	 * Set the completion event if we've drained all
 	 * pending interrupts.
 	 */
 
 	KeAcquireSpinLockAtDpcLevel(&intr->ni_dpccountlock);
 	intr->ni_dpccnt--;
 	if (intr->ni_dpccnt == 0)
 		KeSetEvent(&intr->ni_dpcevt, IO_NO_INCREMENT, FALSE);
 	KeReleaseSpinLockFromDpcLevel(&intr->ni_dpccountlock);
 }
 
 static ndis_status
 NdisMRegisterInterrupt(ndis_miniport_interrupt *intr, ndis_handle adapter,
     uint32_t ivec, uint32_t ilevel, uint8_t reqisr, uint8_t shared,
     ndis_interrupt_mode imode)
 {
 	ndis_miniport_block	*block;
 	ndis_miniport_characteristics *ch;
 	struct ndis_softc	*sc;
 	int			error;
 
 	block = adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 	ch = IoGetDriverObjectExtension(block->nmb_deviceobj->do_drvobj,
 	    (void *)1);
 
 	intr->ni_rsvd = ExAllocatePoolWithTag(NonPagedPool,
 	    sizeof(struct mtx), 0);
 	if (intr->ni_rsvd == NULL)
 		return (NDIS_STATUS_RESOURCES);
 
 	intr->ni_block = adapter;
 	intr->ni_isrreq = reqisr;
 	intr->ni_shared = shared;
 	intr->ni_dpccnt = 0;
 	intr->ni_isrfunc = ch->nmc_isr_func;
 	intr->ni_dpcfunc = ch->nmc_interrupt_func;
 
 	KeInitializeEvent(&intr->ni_dpcevt, EVENT_TYPE_NOTIFY, TRUE);
 	KeInitializeDpc(&intr->ni_dpc,
 	    ndis_findwrap((funcptr)ndis_intrhand), intr);
 	KeSetImportanceDpc(&intr->ni_dpc, KDPC_IMPORTANCE_LOW);
 
 	error = IoConnectInterrupt(&intr->ni_introbj,
 	    ndis_findwrap((funcptr)ndis_intr), sc, NULL,
 	    ivec, ilevel, 0, imode, shared, 0, FALSE);
 
 	if (error != STATUS_SUCCESS)
 		return (NDIS_STATUS_FAILURE);
 
 	block->nmb_interrupt = intr;
 
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static void
 NdisMDeregisterInterrupt(intr)
 	ndis_miniport_interrupt	*intr;
 {
 	ndis_miniport_block	*block;
 	uint8_t			irql;
 
 	block = intr->ni_block;
 
 	/* Should really be KeSynchronizeExecution() */
 
 	KeAcquireSpinLock(intr->ni_introbj->ki_lock, &irql);
 	block->nmb_interrupt = NULL;
 	KeReleaseSpinLock(intr->ni_introbj->ki_lock, irql);
 /*
 	KeFlushQueuedDpcs();
 */
 	/* Disconnect our ISR */
 
 	IoDisconnectInterrupt(intr->ni_introbj);
 
 	KeWaitForSingleObject(&intr->ni_dpcevt, 0, 0, FALSE, NULL);
 	KeResetEvent(&intr->ni_dpcevt);
 }
 
 static void
 NdisMRegisterAdapterShutdownHandler(adapter, shutdownctx, shutdownfunc)
 	ndis_handle		adapter;
 	void			*shutdownctx;
 	ndis_shutdown_handler	shutdownfunc;
 {
 	ndis_miniport_block	*block;
 	ndis_miniport_characteristics *chars;
 	struct ndis_softc	*sc;
 
 	if (adapter == NULL)
 		return;
 
 	block = (ndis_miniport_block *)adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 	chars = sc->ndis_chars;
 
 	chars->nmc_shutdown_handler = shutdownfunc;
 	chars->nmc_rsvd0 = shutdownctx;
 }
 
 static void
 NdisMDeregisterAdapterShutdownHandler(adapter)
 	ndis_handle		adapter;
 {
 	ndis_miniport_block	*block;
 	ndis_miniport_characteristics *chars;
 	struct ndis_softc	*sc;
 
 	if (adapter == NULL)
 		return;
 
 	block = (ndis_miniport_block *)adapter;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 	chars = sc->ndis_chars;
 
 	chars->nmc_shutdown_handler = NULL;
 	chars->nmc_rsvd0 = NULL;
 }
 
 static uint32_t
 NDIS_BUFFER_TO_SPAN_PAGES(buf)
 	ndis_buffer		*buf;
 {
 	if (buf == NULL)
 		return (0);
 	if (MmGetMdlByteCount(buf) == 0)
 		return (1);
 	return (SPAN_PAGES(MmGetMdlVirtualAddress(buf),
 	    MmGetMdlByteCount(buf)));
 }
 
 static void
 NdisGetBufferPhysicalArraySize(buf, pages)
 	ndis_buffer		*buf;
 	uint32_t		*pages;
 {
 	if (buf == NULL)
 		return;
 
 	*pages = NDIS_BUFFER_TO_SPAN_PAGES(buf);
 }
 
 static void
 NdisQueryBufferOffset(buf, off, len)
 	ndis_buffer		*buf;
 	uint32_t		*off;
 	uint32_t		*len;
 {
 	if (buf == NULL)
 		return;
 
 	*off = MmGetMdlByteOffset(buf);
 	*len = MmGetMdlByteCount(buf);
 }
 
 void
 NdisMSleep(usecs)
 	uint32_t		usecs;
 {
 	ktimer			timer;
 
 	/*
 	 * During system bootstrap, (i.e. cold == 1), we aren't
 	 * allowed to sleep, so we have to do a hard DELAY()
 	 * instead.
 	 */
 
 	if (cold)
 		DELAY(usecs);
 	else {
 		KeInitializeTimer(&timer);
 		KeSetTimer(&timer, ((int64_t)usecs * -10), NULL);
 		KeWaitForSingleObject(&timer, 0, 0, FALSE, NULL);
 	}
 }
 
 static uint32_t
 NdisReadPcmciaAttributeMemory(handle, offset, buf, len)
 	ndis_handle		handle;
 	uint32_t		offset;
 	void			*buf;
 	uint32_t		len;
 {
 	struct ndis_softc	*sc;
 	ndis_miniport_block	*block;
 	bus_space_handle_t	bh;
 	bus_space_tag_t		bt;
 	char			*dest;
 	int			i;
 
 	if (handle == NULL)
 		return (0);
 
 	block = (ndis_miniport_block *)handle;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 	dest = buf;
 
 	bh = rman_get_bushandle(sc->ndis_res_am);
 	bt = rman_get_bustag(sc->ndis_res_am);
 
 	for (i = 0; i < len; i++)
 		dest[i] = bus_space_read_1(bt, bh, (offset + i) * 2);
 
 	return (i);
 }
 
 static uint32_t
 NdisWritePcmciaAttributeMemory(handle, offset, buf, len)
 	ndis_handle		handle;
 	uint32_t		offset;
 	void			*buf;
 	uint32_t		len;
 {
 	struct ndis_softc	*sc;
 	ndis_miniport_block	*block;
 	bus_space_handle_t	bh;
 	bus_space_tag_t		bt;
 	char			*src;
 	int			i;
 
 	if (handle == NULL)
 		return (0);
 
 	block = (ndis_miniport_block *)handle;
 	sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
 	src = buf;
 
 	bh = rman_get_bushandle(sc->ndis_res_am);
 	bt = rman_get_bustag(sc->ndis_res_am);
 
 	for (i = 0; i < len; i++)
 		bus_space_write_1(bt, bh, (offset + i) * 2, src[i]);
 
 	return (i);
 }
 
 static list_entry *
 NdisInterlockedInsertHeadList(head, entry, lock)
 	list_entry		*head;
 	list_entry		*entry;
 	ndis_spin_lock		*lock;
 {
 	list_entry		*flink;
 
 	KeAcquireSpinLock(&lock->nsl_spinlock, &lock->nsl_kirql);
 	flink = head->nle_flink;
 	entry->nle_flink = flink;
 	entry->nle_blink = head;
 	flink->nle_blink = entry;
 	head->nle_flink = entry;
 	KeReleaseSpinLock(&lock->nsl_spinlock, lock->nsl_kirql);
 
 	return (flink);
 }
 
 static list_entry *
 NdisInterlockedRemoveHeadList(head, lock)
 	list_entry		*head;
 	ndis_spin_lock		*lock;
 {
 	list_entry		*flink;
 	list_entry		*entry;
 
 	KeAcquireSpinLock(&lock->nsl_spinlock, &lock->nsl_kirql);
 	entry = head->nle_flink;
 	flink = entry->nle_flink;
 	head->nle_flink = flink;
 	flink->nle_blink = head;
 	KeReleaseSpinLock(&lock->nsl_spinlock, lock->nsl_kirql);
 
 	return (entry);
 }
 
 static list_entry *
 NdisInterlockedInsertTailList(head, entry, lock)
 	list_entry		*head;
 	list_entry		*entry;
 	ndis_spin_lock		*lock;
 {
 	list_entry		*blink;
 
 	KeAcquireSpinLock(&lock->nsl_spinlock, &lock->nsl_kirql);
 	blink = head->nle_blink;
 	entry->nle_flink = head;
 	entry->nle_blink = blink;
 	blink->nle_flink = entry;
 	head->nle_blink = entry;
 	KeReleaseSpinLock(&lock->nsl_spinlock, lock->nsl_kirql);
 
 	return (blink);
 }
 
 static uint8_t
 NdisMSynchronizeWithInterrupt(intr, syncfunc, syncctx)
 	ndis_miniport_interrupt	*intr;
 	void			*syncfunc;
 	void			*syncctx;
 {
 	return (KeSynchronizeExecution(intr->ni_introbj, syncfunc, syncctx));
 }
 
 static void
 NdisGetCurrentSystemTime(tval)
 	uint64_t		*tval;
 {
 	ntoskrnl_time(tval);
 }
 
 /*
  * Return the number of milliseconds since the system booted.
  */
 static void
 NdisGetSystemUpTime(tval)
 	uint32_t		*tval;
 {
 	struct timespec		ts;
 
 	nanouptime(&ts);
 	*tval = ts.tv_nsec / 1000000 + ts.tv_sec * 1000;
 }
 
 static void
 NdisInitializeString(dst, src)
 	unicode_string		*dst;
 	char			*src;
 {
 	ansi_string		as;
 	RtlInitAnsiString(&as, src);
 	RtlAnsiStringToUnicodeString(dst, &as, TRUE);
 }
 
 static void
 NdisFreeString(str)
 	unicode_string		*str;
 {
 	RtlFreeUnicodeString(str);
 }
 
 static ndis_status
 NdisMRemoveMiniport(adapter)
 	ndis_handle		*adapter;
 {
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static void
 NdisInitAnsiString(dst, src)
 	ansi_string		*dst;
 	char			*src;
 {
 	RtlInitAnsiString(dst, src);
 }
 
 static void
 NdisInitUnicodeString(dst, src)
 	unicode_string		*dst;
 	uint16_t		*src;
 {
 	RtlInitUnicodeString(dst, src);
 }
 
 static void NdisMGetDeviceProperty(adapter, phydevobj,
 	funcdevobj, nextdevobj, resources, transresources)
 	ndis_handle		adapter;
 	device_object		**phydevobj;
 	device_object		**funcdevobj;
 	device_object		**nextdevobj;
 	cm_resource_list	*resources;
 	cm_resource_list	*transresources;
 {
 	ndis_miniport_block	*block;
 
 	block = (ndis_miniport_block *)adapter;
 
 	if (phydevobj != NULL)
 		*phydevobj = block->nmb_physdeviceobj;
 	if (funcdevobj != NULL)
 		*funcdevobj = block->nmb_deviceobj;
 	if (nextdevobj != NULL)
 		*nextdevobj = block->nmb_nextdeviceobj;
 }
 
 static void
 NdisGetFirstBufferFromPacket(packet, buf, firstva, firstlen, totlen)
 	ndis_packet		*packet;
 	ndis_buffer		**buf;
 	void			**firstva;
 	uint32_t		*firstlen;
 	uint32_t		*totlen;
 {
 	ndis_buffer		*tmp;
 
 	tmp = packet->np_private.npp_head;
 	*buf = tmp;
 	if (tmp == NULL) {
 		*firstva = NULL;
 		*firstlen = *totlen = 0;
 	} else {
 		*firstva = MmGetMdlVirtualAddress(tmp);
 		*firstlen = *totlen = MmGetMdlByteCount(tmp);
 		for (tmp = tmp->mdl_next; tmp != NULL; tmp = tmp->mdl_next)
 			*totlen += MmGetMdlByteCount(tmp);
 	}
 }
 
 static void
 NdisGetFirstBufferFromPacketSafe(packet, buf, firstva, firstlen, totlen, prio)
 	ndis_packet		*packet;
 	ndis_buffer		**buf;
 	void			**firstva;
 	uint32_t		*firstlen;
 	uint32_t		*totlen;
 	uint32_t		prio;
 {
 	NdisGetFirstBufferFromPacket(packet, buf, firstva, firstlen, totlen);
 }
 
 static int
 ndis_find_sym(lf, filename, suffix, sym)
 	linker_file_t		lf;
 	char			*filename;
 	char			*suffix;
 	caddr_t			*sym;
 {
 	char			*fullsym;
 	char			*suf;
 	int			i;
 
 	fullsym = ExAllocatePoolWithTag(NonPagedPool, MAXPATHLEN, 0);
 	if (fullsym == NULL)
 		return (ENOMEM);
 
 	bzero(fullsym, MAXPATHLEN);
 	strncpy(fullsym, filename, MAXPATHLEN);
 	if (strlen(filename) < 4) {
 		ExFreePool(fullsym);
 		return (EINVAL);
 	}
 
 	/* If the filename has a .ko suffix, strip if off. */
 	suf = fullsym + (strlen(filename) - 3);
 	if (strcmp(suf, ".ko") == 0)
 		*suf = '\0';
 
 	for (i = 0; i < strlen(fullsym); i++) {
 		if (fullsym[i] == '.')
 			fullsym[i] = '_';
 		else
 			fullsym[i] = tolower(fullsym[i]);
 	}
 	strcat(fullsym, suffix);
 	*sym = linker_file_lookup_symbol(lf, fullsym, 0);
 	ExFreePool(fullsym);
 	if (*sym == 0)
 		return (ENOENT);
 
 	return (0);
 }
 
 struct ndis_checkmodule {
 	char	*afilename;
 	ndis_fh	*fh;
 };
 
 /*
  * See if a single module contains the symbols for a specified file.
  */
 static int
 NdisCheckModule(linker_file_t lf, void *context)
 {
 	struct ndis_checkmodule *nc;
 	caddr_t			kldstart, kldend;
 
 	nc = (struct ndis_checkmodule *)context;
 	if (ndis_find_sym(lf, nc->afilename, "_start", &kldstart))
 		return (0);
 	if (ndis_find_sym(lf, nc->afilename, "_end", &kldend))
 		return (0);
 	nc->fh->nf_vp = lf;
 	nc->fh->nf_map = NULL;
 	nc->fh->nf_type = NDIS_FH_TYPE_MODULE;
 	nc->fh->nf_maplen = (kldend - kldstart) & 0xFFFFFFFF;
 	return (1);
 }
 
 /* can also return NDIS_STATUS_RESOURCES/NDIS_STATUS_ERROR_READING_FILE */
 static void
 NdisOpenFile(status, filehandle, filelength, filename, highestaddr)
 	ndis_status		*status;
 	ndis_handle		*filehandle;
 	uint32_t		*filelength;
 	unicode_string		*filename;
 	ndis_physaddr		highestaddr;
 {
 	ansi_string		as;
 	char			*afilename = NULL;
 	struct thread		*td = curthread;
 	struct nameidata	nd;
 	int			flags, error, vfslocked;
 	struct vattr		vat;
 	struct vattr		*vap = &vat;
 	ndis_fh			*fh;
 	char			*path;
 	struct ndis_checkmodule	nc;
 
 	if (RtlUnicodeStringToAnsiString(&as, filename, TRUE)) {
 		*status = NDIS_STATUS_RESOURCES;
 		return;
 	}
 
 	afilename = strdup(as.as_buf, M_DEVBUF);
 	RtlFreeAnsiString(&as);
 
 	fh = ExAllocatePoolWithTag(NonPagedPool, sizeof(ndis_fh), 0);
 	if (fh == NULL) {
 		free(afilename, M_DEVBUF);
 		*status = NDIS_STATUS_RESOURCES;
 		return;
 	}
 
 	fh->nf_name = afilename;
 
 	/*
 	 * During system bootstrap, it's impossible to load files
 	 * from the rootfs since it's not mounted yet. We therefore
 	 * offer the possibility of opening files that have been
 	 * preloaded as modules instead. Both choices will work
 	 * when kldloading a module from multiuser, but only the
 	 * module option will work during bootstrap. The module
 	 * loading option works by using the ndiscvt(8) utility
 	 * to convert the arbitrary file into a .ko using objcopy(1).
 	 * This file will contain two special symbols: filename_start
 	 * and filename_end. All we have to do is traverse the KLD
 	 * list in search of those symbols and we've found the file
 	 * data. As an added bonus, ndiscvt(8) will also generate
 	 * a normal .o file which can be linked statically with
 	 * the kernel. This means that the symbols will actual reside
 	 * in the kernel's symbol table, but that doesn't matter to
 	 * us since the kernel appears to us as just another module.
 	 */
 
 	nc.afilename = afilename;
 	nc.fh = fh;
 	if (linker_file_foreach(NdisCheckModule, &nc)) {
 		*filelength = fh->nf_maplen;
 		*filehandle = fh;
 		*status = NDIS_STATUS_SUCCESS;
 		return;
 	}
 
 	if (TAILQ_EMPTY(&mountlist)) {
 		ExFreePool(fh);
 		*status = NDIS_STATUS_FILE_NOT_FOUND;
 		printf("NDIS: could not find file %s in linker list\n",
 		    afilename);
 		printf("NDIS: and no filesystems mounted yet, "
 		    "aborting NdisOpenFile()\n");
 		free(afilename, M_DEVBUF);
 		return;
 	}
 
 	path = ExAllocatePoolWithTag(NonPagedPool, MAXPATHLEN, 0);
 	if (path == NULL) {
 		ExFreePool(fh);
 		free(afilename, M_DEVBUF);
 		*status = NDIS_STATUS_RESOURCES;
 		return;
 	}
 
 	snprintf(path, MAXPATHLEN, "%s/%s", ndis_filepath, afilename);
 
 	/* Some threads don't have a current working directory. */
 
 	if (td->td_proc->p_fd->fd_rdir == NULL)
 		td->td_proc->p_fd->fd_rdir = rootvnode;
 	if (td->td_proc->p_fd->fd_cdir == NULL)
 		td->td_proc->p_fd->fd_cdir = rootvnode;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, path, td);
 
 	flags = FREAD;
 	error = vn_open(&nd, &flags, 0, NULL);
 	if (error) {
 		*status = NDIS_STATUS_FILE_NOT_FOUND;
 		ExFreePool(fh);
 		printf("NDIS: open file %s failed: %d\n", path, error);
 		ExFreePool(path);
 		free(afilename, M_DEVBUF);
 		return;
 	}
 	vfslocked = NDHASGIANT(&nd);
 
 	ExFreePool(path);
 
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	/* Get the file size. */
 	VOP_GETATTR(nd.ni_vp, vap, td->td_ucred);
 	VOP_UNLOCK(nd.ni_vp, 0);
 	VFS_UNLOCK_GIANT(vfslocked);
 
 	fh->nf_vp = nd.ni_vp;
 	fh->nf_map = NULL;
 	fh->nf_type = NDIS_FH_TYPE_VFS;
 	*filehandle = fh;
 	*filelength = fh->nf_maplen = vap->va_size & 0xFFFFFFFF;
 	*status = NDIS_STATUS_SUCCESS;
 }
 
 static void
 NdisMapFile(status, mappedbuffer, filehandle)
 	ndis_status		*status;
 	void			**mappedbuffer;
 	ndis_handle		filehandle;
 {
 	ndis_fh			*fh;
 	struct thread		*td = curthread;
 	linker_file_t		lf;
 	caddr_t			kldstart;
 	int			error, resid, vfslocked;
 	struct vnode		*vp;
 
 	if (filehandle == NULL) {
 		*status = NDIS_STATUS_FAILURE;
 		return;
 	}
 
 	fh = (ndis_fh *)filehandle;
 
 	if (fh->nf_vp == NULL) {
 		*status = NDIS_STATUS_FAILURE;
 		return;
 	}
 
 	if (fh->nf_map != NULL) {
 		*status = NDIS_STATUS_ALREADY_MAPPED;
 		return;
 	}
 
 	if (fh->nf_type == NDIS_FH_TYPE_MODULE) {
 		lf = fh->nf_vp;
 		if (ndis_find_sym(lf, fh->nf_name, "_start", &kldstart)) {
 			*status = NDIS_STATUS_FAILURE;
 			return;
 		}
 		fh->nf_map = kldstart;
 		*status = NDIS_STATUS_SUCCESS;
 		*mappedbuffer = fh->nf_map;
 		return;
 	}
 
 	fh->nf_map = ExAllocatePoolWithTag(NonPagedPool, fh->nf_maplen, 0);
 
 	if (fh->nf_map == NULL) {
 		*status = NDIS_STATUS_RESOURCES;
 		return;
 	}
 
 	vp = fh->nf_vp;
 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 	error = vn_rdwr(UIO_READ, vp, fh->nf_map, fh->nf_maplen, 0,
 	    UIO_SYSSPACE, 0, td->td_ucred, NOCRED, &resid, td);
 	VFS_UNLOCK_GIANT(vfslocked);
 
 	if (error)
 		*status = NDIS_STATUS_FAILURE;
 	else {
 		*status = NDIS_STATUS_SUCCESS;
 		*mappedbuffer = fh->nf_map;
 	}
 }
 
 static void
 NdisUnmapFile(filehandle)
 	ndis_handle		filehandle;
 {
 	ndis_fh			*fh;
 	fh = (ndis_fh *)filehandle;
 
 	if (fh->nf_map == NULL)
 		return;
 
 	if (fh->nf_type == NDIS_FH_TYPE_VFS)
 		ExFreePool(fh->nf_map);
 	fh->nf_map = NULL;
 }
 
 static void
 NdisCloseFile(filehandle)
 	ndis_handle		filehandle;
 {
 	struct thread		*td = curthread;
 	ndis_fh			*fh;
 	int			vfslocked;
 	struct vnode		*vp;
 
 	if (filehandle == NULL)
 		return;
 
 	fh = (ndis_fh *)filehandle;
 	if (fh->nf_map != NULL) {
 		if (fh->nf_type == NDIS_FH_TYPE_VFS)
 			ExFreePool(fh->nf_map);
 		fh->nf_map = NULL;
 	}
 
 	if (fh->nf_vp == NULL)
 		return;
 
 	if (fh->nf_type == NDIS_FH_TYPE_VFS) {
 		vp = fh->nf_vp;
 		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 		vn_close(vp, FREAD, td->td_ucred, td);
 		VFS_UNLOCK_GIANT(vfslocked);
 	}
 
 	fh->nf_vp = NULL;
 	free(fh->nf_name, M_DEVBUF);
 	ExFreePool(fh);
 }
 
 static uint8_t
 NdisSystemProcessorCount()
 {
 	return (mp_ncpus);
 }
 
+static void
+NdisGetCurrentProcessorCounts(idle_count, kernel_and_user, index)
+	uint32_t		*idle_count;
+	uint32_t		*kernel_and_user;
+	uint32_t		*index;
+{
+	struct pcpu		*pcpu;
+
+	pcpu = pcpu_find(curthread->td_oncpu);
+	*index = pcpu->pc_cpuid;
+	*idle_count = pcpu->pc_cp_time[CP_IDLE];
+	*kernel_and_user = pcpu->pc_cp_time[CP_INTR];
+}
+
 typedef void (*ndis_statusdone_handler)(ndis_handle);
 typedef void (*ndis_status_handler)(ndis_handle, ndis_status,
     void *, uint32_t);
 
 static void
 NdisMIndicateStatusComplete(adapter)
 	ndis_handle		adapter;
 {
 	ndis_miniport_block	*block;
 	ndis_statusdone_handler	statusdonefunc;
 
 	block = (ndis_miniport_block *)adapter;
 	statusdonefunc = block->nmb_statusdone_func;
 
 	MSCALL1(statusdonefunc, adapter);
 }
 
 static void
 NdisMIndicateStatus(adapter, status, sbuf, slen)
 	ndis_handle		adapter;
 	ndis_status		status;
 	void			*sbuf;
 	uint32_t		slen;
 {
 	ndis_miniport_block	*block;
 	ndis_status_handler	statusfunc;
 
 	block = (ndis_miniport_block *)adapter;
 	statusfunc = block->nmb_status_func;
 
 	MSCALL4(statusfunc, adapter, status, sbuf, slen);
 }
 
 /*
  * The DDK documentation says that you should use IoQueueWorkItem()
  * instead of ExQueueWorkItem(). The problem is, IoQueueWorkItem()
  * is fundamentally incompatible with NdisScheduleWorkItem(), which
  * depends on the API semantics of ExQueueWorkItem(). In our world,
  * ExQueueWorkItem() is implemented on top of IoAllocateQueueItem()
  * anyway.
  *
  * There are actually three distinct APIs here. NdisScheduleWorkItem()
  * takes a pointer to an NDIS_WORK_ITEM. ExQueueWorkItem() takes a pointer
  * to a WORK_QUEUE_ITEM. And finally, IoQueueWorkItem() takes a pointer
  * to an opaque work item thingie which you get from IoAllocateWorkItem().
  * An NDIS_WORK_ITEM is not the same as a WORK_QUEUE_ITEM. However,
  * the NDIS_WORK_ITEM has some opaque storage at the end of it, and we
  * (ab)use this storage as a WORK_QUEUE_ITEM, which is what we submit
  * to ExQueueWorkItem().
  *
  * Got all that? (Sheesh.)
  */
 
 ndis_status
 NdisScheduleWorkItem(work)
 	ndis_work_item		*work;
 {
 	work_queue_item		*wqi;
 
 	wqi = (work_queue_item *)work->nwi_wraprsvd;
 	ExInitializeWorkItem(wqi,
 	    (work_item_func)work->nwi_func, work->nwi_ctx);
 	ExQueueWorkItem(wqi, WORKQUEUE_DELAYED);
 
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static void
 NdisCopyFromPacketToPacket(dpkt, doff, reqlen, spkt, soff, cpylen)
 	ndis_packet		*dpkt;
 	uint32_t		doff;
 	uint32_t		reqlen;
 	ndis_packet		*spkt;
 	uint32_t		soff;
 	uint32_t		*cpylen;
 {
 	ndis_buffer		*src, *dst;
 	char			*sptr, *dptr;
 	int			resid, copied, len, scnt, dcnt;
 
 	*cpylen = 0;
 
 	src = spkt->np_private.npp_head;
 	dst = dpkt->np_private.npp_head;
 
 	sptr = MmGetMdlVirtualAddress(src);
 	dptr = MmGetMdlVirtualAddress(dst);
 	scnt = MmGetMdlByteCount(src);
 	dcnt = MmGetMdlByteCount(dst);
 
 	while (soff) {
 		if (MmGetMdlByteCount(src) > soff) {
 			sptr += soff;
 			scnt = MmGetMdlByteCount(src)- soff;
 			break;
 		}
 		soff -= MmGetMdlByteCount(src);
 		src = src->mdl_next;
 		if (src == NULL)
 			return;
 		sptr = MmGetMdlVirtualAddress(src);
 	}
 
 	while (doff) {
 		if (MmGetMdlByteCount(dst) > doff) {
 			dptr += doff;
 			dcnt = MmGetMdlByteCount(dst) - doff;
 			break;
 		}
 		doff -= MmGetMdlByteCount(dst);
 		dst = dst->mdl_next;
 		if (dst == NULL)
 			return;
 		dptr = MmGetMdlVirtualAddress(dst);
 	}
 
 	resid = reqlen;
 	copied = 0;
 
 	while(1) {
 		if (resid < scnt)
 			len = resid;
 		else
 			len = scnt;
 		if (dcnt < len)
 			len = dcnt;
 
 		bcopy(sptr, dptr, len);
 
 		copied += len;
 		resid -= len;
 		if (resid == 0)
 			break;
 
 		dcnt -= len;
 		if (dcnt == 0) {
 			dst = dst->mdl_next;
 			if (dst == NULL)
 				break;
 			dptr = MmGetMdlVirtualAddress(dst);
 			dcnt = MmGetMdlByteCount(dst);
 		}
 
 		scnt -= len;
 		if (scnt == 0) {
 			src = src->mdl_next;
 			if (src == NULL)
 				break;
 			sptr = MmGetMdlVirtualAddress(src);
 			scnt = MmGetMdlByteCount(src);
 		}
 	}
 
 	*cpylen = copied;
 }
 
 static void
 NdisCopyFromPacketToPacketSafe(dpkt, doff, reqlen, spkt, soff, cpylen, prio)
 	ndis_packet		*dpkt;
 	uint32_t		doff;
 	uint32_t		reqlen;
 	ndis_packet		*spkt;
 	uint32_t		soff;
 	uint32_t		*cpylen;
 	uint32_t		prio;
 {
 	NdisCopyFromPacketToPacket(dpkt, doff, reqlen, spkt, soff, cpylen);
 }
 
 static void
 NdisIMCopySendPerPacketInfo(dpkt, spkt)
 	ndis_packet		*dpkt;
 	ndis_packet		*spkt;
 {
 	memcpy(&dpkt->np_ext, &spkt->np_ext, sizeof(ndis_packet_extension));
 }
 
 static ndis_status
 NdisMRegisterDevice(handle, devname, symname, majorfuncs, devobj, devhandle)
 	ndis_handle		handle;
 	unicode_string		*devname;
 	unicode_string		*symname;
 	driver_dispatch		*majorfuncs[];
 	void			**devobj;
 	ndis_handle		*devhandle;
 {
 	uint32_t		status;
 	device_object		*dobj;
 
 	status = IoCreateDevice(handle, 0, devname,
 	    FILE_DEVICE_UNKNOWN, 0, FALSE, &dobj);
 
 	if (status == STATUS_SUCCESS) {
 		*devobj = dobj;
 		*devhandle = dobj;
 	}
 
 	return (status);
 }
 
 static ndis_status
 NdisMDeregisterDevice(handle)
 	ndis_handle		handle;
 {
 	IoDeleteDevice(handle);
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static ndis_status
 NdisMQueryAdapterInstanceName(name, handle)
 	unicode_string		*name;
 	ndis_handle		handle;
 {
 	ndis_miniport_block	*block;
 	device_t		dev;
 	ansi_string		as;
 
 	block = (ndis_miniport_block *)handle;
 	dev = block->nmb_physdeviceobj->do_devext;
 
 	RtlInitAnsiString(&as, __DECONST(char *, device_get_nameunit(dev)));
 	if (RtlAnsiStringToUnicodeString(name, &as, TRUE))
 		return (NDIS_STATUS_RESOURCES);
 
 	return (NDIS_STATUS_SUCCESS);
 }
 
 static void
 NdisMRegisterUnloadHandler(handle, func)
 	ndis_handle		handle;
 	void			*func;
 {
 }
 
 static void
 dummy()
 {
 	printf("NDIS dummy called...\n");
 }
 
 /*
  * Note: a couple of entries in this table specify the
  * number of arguments as "foo + 1". These are routines
  * that accept a 64-bit argument, passed by value. On
  * x86, these arguments consume two longwords on the stack,
  * so we lie and say there's one additional argument so
  * that the wrapping routines will do the right thing.
  */
 
 image_patch_table ndis_functbl[] = {
 	IMPORT_SFUNC(NdisCopyFromPacketToPacket, 6),
 	IMPORT_SFUNC(NdisCopyFromPacketToPacketSafe, 7),
 	IMPORT_SFUNC(NdisIMCopySendPerPacketInfo, 2),
 	IMPORT_SFUNC(NdisScheduleWorkItem, 1),
 	IMPORT_SFUNC(NdisMIndicateStatusComplete, 1),
 	IMPORT_SFUNC(NdisMIndicateStatus, 4),
 	IMPORT_SFUNC(NdisSystemProcessorCount, 0),
+	IMPORT_SFUNC(NdisGetCurrentProcessorCounts, 3),
 	IMPORT_SFUNC(NdisUnchainBufferAtBack, 2),
 	IMPORT_SFUNC(NdisGetFirstBufferFromPacket, 5),
 	IMPORT_SFUNC(NdisGetFirstBufferFromPacketSafe, 6),
 	IMPORT_SFUNC(NdisGetBufferPhysicalArraySize, 2),
 	IMPORT_SFUNC(NdisMGetDeviceProperty, 6),
 	IMPORT_SFUNC(NdisInitAnsiString, 2),
 	IMPORT_SFUNC(NdisInitUnicodeString, 2),
 	IMPORT_SFUNC(NdisWriteConfiguration, 4),
 	IMPORT_SFUNC(NdisAnsiStringToUnicodeString, 2),
 	IMPORT_SFUNC(NdisTerminateWrapper, 2),
 	IMPORT_SFUNC(NdisOpenConfigurationKeyByName, 4),
 	IMPORT_SFUNC(NdisOpenConfigurationKeyByIndex, 5),
 	IMPORT_SFUNC(NdisMRemoveMiniport, 1),
 	IMPORT_SFUNC(NdisInitializeString, 2),
 	IMPORT_SFUNC(NdisFreeString, 1),
 	IMPORT_SFUNC(NdisGetCurrentSystemTime, 1),
 	IMPORT_SFUNC(NdisGetSystemUpTime, 1),
+	IMPORT_SFUNC(NdisGetVersion, 0),
 	IMPORT_SFUNC(NdisMSynchronizeWithInterrupt, 3),
 	IMPORT_SFUNC(NdisMAllocateSharedMemoryAsync, 4),
 	IMPORT_SFUNC(NdisInterlockedInsertHeadList, 3),
 	IMPORT_SFUNC(NdisInterlockedInsertTailList, 3),
 	IMPORT_SFUNC(NdisInterlockedRemoveHeadList, 2),
 	IMPORT_SFUNC(NdisInitializeWrapper, 4),
 	IMPORT_SFUNC(NdisMRegisterMiniport, 3),
 	IMPORT_SFUNC(NdisAllocateMemoryWithTag, 3),
 	IMPORT_SFUNC(NdisAllocateMemory, 4 + 1),
 	IMPORT_SFUNC(NdisMSetAttributesEx, 5),
 	IMPORT_SFUNC(NdisCloseConfiguration, 1),
 	IMPORT_SFUNC(NdisReadConfiguration, 5),
 	IMPORT_SFUNC(NdisOpenConfiguration, 3),
 	IMPORT_SFUNC(NdisAcquireSpinLock, 1),
 	IMPORT_SFUNC(NdisReleaseSpinLock, 1),
 	IMPORT_SFUNC(NdisDprAcquireSpinLock, 1),
 	IMPORT_SFUNC(NdisDprReleaseSpinLock, 1),
 	IMPORT_SFUNC(NdisAllocateSpinLock, 1),
 	IMPORT_SFUNC(NdisInitializeReadWriteLock, 1),
 	IMPORT_SFUNC(NdisAcquireReadWriteLock, 3),
 	IMPORT_SFUNC(NdisReleaseReadWriteLock, 2),
 	IMPORT_SFUNC(NdisFreeSpinLock, 1),
 	IMPORT_SFUNC(NdisFreeMemory, 3),
 	IMPORT_SFUNC(NdisReadPciSlotInformation, 5),
 	IMPORT_SFUNC(NdisWritePciSlotInformation, 5),
 	IMPORT_SFUNC_MAP(NdisImmediateReadPciSlotInformation,
 	    NdisReadPciSlotInformation, 5),
 	IMPORT_SFUNC_MAP(NdisImmediateWritePciSlotInformation,
 	    NdisWritePciSlotInformation, 5),
 	IMPORT_CFUNC(NdisWriteErrorLogEntry, 0),
 	IMPORT_SFUNC(NdisMStartBufferPhysicalMapping, 6),
 	IMPORT_SFUNC(NdisMCompleteBufferPhysicalMapping, 3),
 	IMPORT_SFUNC(NdisMInitializeTimer, 4),
 	IMPORT_SFUNC(NdisInitializeTimer, 3),
 	IMPORT_SFUNC(NdisSetTimer, 2),
 	IMPORT_SFUNC(NdisMCancelTimer, 2),
 	IMPORT_SFUNC_MAP(NdisCancelTimer, NdisMCancelTimer, 2),
 	IMPORT_SFUNC(NdisMSetPeriodicTimer, 2),
 	IMPORT_SFUNC(NdisMQueryAdapterResources, 4),
 	IMPORT_SFUNC(NdisMRegisterIoPortRange, 4),
 	IMPORT_SFUNC(NdisMDeregisterIoPortRange, 4),
 	IMPORT_SFUNC(NdisReadNetworkAddress, 4),
 	IMPORT_SFUNC(NdisQueryMapRegisterCount, 2),
 	IMPORT_SFUNC(NdisMAllocateMapRegisters, 5),
 	IMPORT_SFUNC(NdisMFreeMapRegisters, 1),
 	IMPORT_SFUNC(NdisMAllocateSharedMemory, 5),
 	IMPORT_SFUNC(NdisMMapIoSpace, 4 + 1),
 	IMPORT_SFUNC(NdisMUnmapIoSpace, 3),
 	IMPORT_SFUNC(NdisGetCacheFillSize, 0),
 	IMPORT_SFUNC(NdisMGetDmaAlignment, 1),
 	IMPORT_SFUNC(NdisMInitializeScatterGatherDma, 3),
 	IMPORT_SFUNC(NdisAllocatePacketPool, 4),
 	IMPORT_SFUNC(NdisAllocatePacketPoolEx, 5),
 	IMPORT_SFUNC(NdisAllocatePacket, 3),
 	IMPORT_SFUNC(NdisFreePacket, 1),
 	IMPORT_SFUNC(NdisFreePacketPool, 1),
 	IMPORT_SFUNC_MAP(NdisDprAllocatePacket, NdisAllocatePacket, 3),
 	IMPORT_SFUNC_MAP(NdisDprFreePacket, NdisFreePacket, 1),
 	IMPORT_SFUNC(NdisAllocateBufferPool, 3),
 	IMPORT_SFUNC(NdisAllocateBuffer, 5),
 	IMPORT_SFUNC(NdisQueryBuffer, 3),
 	IMPORT_SFUNC(NdisQueryBufferSafe, 4),
 	IMPORT_SFUNC(NdisBufferVirtualAddress, 1),
 	IMPORT_SFUNC(NdisBufferVirtualAddressSafe, 2),
 	IMPORT_SFUNC(NdisBufferLength, 1),
 	IMPORT_SFUNC(NdisFreeBuffer, 1),
 	IMPORT_SFUNC(NdisFreeBufferPool, 1),
 	IMPORT_SFUNC(NdisInterlockedIncrement, 1),
 	IMPORT_SFUNC(NdisInterlockedDecrement, 1),
 	IMPORT_SFUNC(NdisInitializeEvent, 1),
 	IMPORT_SFUNC(NdisSetEvent, 1),
 	IMPORT_SFUNC(NdisResetEvent, 1),
 	IMPORT_SFUNC(NdisWaitEvent, 2),
 	IMPORT_SFUNC(NdisUnicodeStringToAnsiString, 2),
 	IMPORT_SFUNC(NdisMPciAssignResources, 3),
 	IMPORT_SFUNC(NdisMFreeSharedMemory, 5 + 1),
 	IMPORT_SFUNC(NdisMRegisterInterrupt, 7),
 	IMPORT_SFUNC(NdisMDeregisterInterrupt, 1),
 	IMPORT_SFUNC(NdisMRegisterAdapterShutdownHandler, 3),
 	IMPORT_SFUNC(NdisMDeregisterAdapterShutdownHandler, 1),
 	IMPORT_SFUNC(NDIS_BUFFER_TO_SPAN_PAGES, 1),
 	IMPORT_SFUNC(NdisQueryBufferOffset, 3),
 	IMPORT_SFUNC(NdisAdjustBufferLength, 2),
 	IMPORT_SFUNC(NdisPacketPoolUsage, 1),
 	IMPORT_SFUNC(NdisMSleep, 1),
 	IMPORT_SFUNC(NdisUnchainBufferAtFront, 2),
 	IMPORT_SFUNC(NdisReadPcmciaAttributeMemory, 4),
 	IMPORT_SFUNC(NdisWritePcmciaAttributeMemory, 4),
 	IMPORT_SFUNC(NdisOpenFile, 5 + 1),
 	IMPORT_SFUNC(NdisMapFile, 3),
 	IMPORT_SFUNC(NdisUnmapFile, 1),
 	IMPORT_SFUNC(NdisCloseFile, 1),
 	IMPORT_SFUNC(NdisMRegisterDevice, 6),
 	IMPORT_SFUNC(NdisMDeregisterDevice, 1),
 	IMPORT_SFUNC(NdisMQueryAdapterInstanceName, 2),
 	IMPORT_SFUNC(NdisMRegisterUnloadHandler, 2),
 	IMPORT_SFUNC(ndis_timercall, 4),
 	IMPORT_SFUNC(ndis_asyncmem_complete, 2),
 	IMPORT_SFUNC(ndis_intr, 2),
 	IMPORT_SFUNC(ndis_intrhand, 4),
 
 	/*
 	 * This last entry is a catch-all for any function we haven't
 	 * implemented yet. The PE import list patching routine will
 	 * use it for any function that doesn't have an explicit match
 	 * in this table.
 	 */
 
 	{ NULL, (FUNC)dummy, NULL, 0, WINDRV_WRAP_STDCALL },
 
 	/* End of list. */
 
 	{ NULL, NULL, NULL }
 };
Index: projects/binutils-2.17/sys/compat/ndis/subr_ntoskrnl.c
===================================================================
--- projects/binutils-2.17/sys/compat/ndis/subr_ntoskrnl.c	(revision 215829)
+++ projects/binutils-2.17/sys/compat/ndis/subr_ntoskrnl.c	(revision 215830)
@@ -1,4317 +1,4436 @@
 /*-
  * Copyright (c) 2003
  *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Bill Paul.
  * 4. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/ctype.h>
 #include <sys/unistd.h>
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/errno.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #include <sys/callout.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/condvar.h>
 #include <sys/kthread.h>
 #include <sys/module.h>
 #include <sys/smp.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/stdarg.h>
 #include <machine/resource.h>
 
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/uma.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 
 #include <compat/ndis/pe_var.h>
 #include <compat/ndis/cfg_var.h>
 #include <compat/ndis/resource_var.h>
 #include <compat/ndis/ntoskrnl_var.h>
 #include <compat/ndis/hal_var.h>
 #include <compat/ndis/ndis_var.h>
 
 #ifdef NTOSKRNL_DEBUG_TIMERS
 static int sysctl_show_timers(SYSCTL_HANDLER_ARGS);
 
 SYSCTL_PROC(_debug, OID_AUTO, ntoskrnl_timers, CTLFLAG_RW, 0, 0,
 	sysctl_show_timers, "I", "Show ntoskrnl timer stats");
 #endif
 
 struct kdpc_queue {
 	list_entry		kq_disp;
 	struct thread		*kq_td;
 	int			kq_cpu;
 	int			kq_exit;
 	int			kq_running;
 	kspin_lock		kq_lock;
 	nt_kevent		kq_proc;
 	nt_kevent		kq_done;
 };
 
 typedef struct kdpc_queue kdpc_queue;
 
 struct wb_ext {
 	struct cv		we_cv;
 	struct thread		*we_td;
 };
 
 typedef struct wb_ext wb_ext;
 
 #define NTOSKRNL_TIMEOUTS	256
 #ifdef NTOSKRNL_DEBUG_TIMERS
 static uint64_t ntoskrnl_timer_fires;
 static uint64_t ntoskrnl_timer_sets;
 static uint64_t ntoskrnl_timer_reloads;
 static uint64_t ntoskrnl_timer_cancels;
 #endif
 
 struct callout_entry {
 	struct callout		ce_callout;
 	list_entry		ce_list;
 };
 
 typedef struct callout_entry callout_entry;
 
 static struct list_entry ntoskrnl_calllist;
 static struct mtx ntoskrnl_calllock;
 struct kuser_shared_data kuser_shared_data;
 
 static struct list_entry ntoskrnl_intlist;
 static kspin_lock ntoskrnl_intlock;
 
 static uint8_t RtlEqualUnicodeString(unicode_string *,
 	unicode_string *, uint8_t);
+static void RtlCopyString(ansi_string *, const ansi_string *);
 static void RtlCopyUnicodeString(unicode_string *,
 	unicode_string *);
 static irp *IoBuildSynchronousFsdRequest(uint32_t, device_object *,
 	 void *, uint32_t, uint64_t *, nt_kevent *, io_status_block *);
 static irp *IoBuildAsynchronousFsdRequest(uint32_t,
 	device_object *, void *, uint32_t, uint64_t *, io_status_block *);
 static irp *IoBuildDeviceIoControlRequest(uint32_t,
 	device_object *, void *, uint32_t, void *, uint32_t,
 	uint8_t, nt_kevent *, io_status_block *);
 static irp *IoAllocateIrp(uint8_t, uint8_t);
 static void IoReuseIrp(irp *, uint32_t);
 static void IoFreeIrp(irp *);
 static void IoInitializeIrp(irp *, uint16_t, uint8_t);
 static irp *IoMakeAssociatedIrp(irp *, uint8_t);
 static uint32_t KeWaitForMultipleObjects(uint32_t,
 	nt_dispatch_header **, uint32_t, uint32_t, uint32_t, uint8_t,
 	int64_t *, wait_block *);
 static void ntoskrnl_waittest(nt_dispatch_header *, uint32_t);
 static void ntoskrnl_satisfy_wait(nt_dispatch_header *, struct thread *);
 static void ntoskrnl_satisfy_multiple_waits(wait_block *);
 static int ntoskrnl_is_signalled(nt_dispatch_header *, struct thread *);
 static void ntoskrnl_insert_timer(ktimer *, int);
 static void ntoskrnl_remove_timer(ktimer *);
 #ifdef NTOSKRNL_DEBUG_TIMERS
 static void ntoskrnl_show_timers(void);
 #endif
 static void ntoskrnl_timercall(void *);
 static void ntoskrnl_dpc_thread(void *);
 static void ntoskrnl_destroy_dpc_threads(void);
 static void ntoskrnl_destroy_workitem_threads(void);
 static void ntoskrnl_workitem_thread(void *);
 static void ntoskrnl_workitem(device_object *, void *);
 static void ntoskrnl_unicode_to_ascii(uint16_t *, char *, int);
 static void ntoskrnl_ascii_to_unicode(char *, uint16_t *, int);
 static uint8_t ntoskrnl_insert_dpc(list_entry *, kdpc *);
 static void WRITE_REGISTER_USHORT(uint16_t *, uint16_t);
 static uint16_t READ_REGISTER_USHORT(uint16_t *);
 static void WRITE_REGISTER_ULONG(uint32_t *, uint32_t);
 static uint32_t READ_REGISTER_ULONG(uint32_t *);
 static void WRITE_REGISTER_UCHAR(uint8_t *, uint8_t);
 static uint8_t READ_REGISTER_UCHAR(uint8_t *);
 static int64_t _allmul(int64_t, int64_t);
 static int64_t _alldiv(int64_t, int64_t);
 static int64_t _allrem(int64_t, int64_t);
 static int64_t _allshr(int64_t, uint8_t);
 static int64_t _allshl(int64_t, uint8_t);
 static uint64_t _aullmul(uint64_t, uint64_t);
 static uint64_t _aulldiv(uint64_t, uint64_t);
 static uint64_t _aullrem(uint64_t, uint64_t);
 static uint64_t _aullshr(uint64_t, uint8_t);
 static uint64_t _aullshl(uint64_t, uint8_t);
 static slist_entry *ntoskrnl_pushsl(slist_header *, slist_entry *);
+static void InitializeSListHead(slist_header *);
 static slist_entry *ntoskrnl_popsl(slist_header *);
+static void ExFreePoolWithTag(void *, uint32_t);
 static void ExInitializePagedLookasideList(paged_lookaside_list *,
 	lookaside_alloc_func *, lookaside_free_func *,
 	uint32_t, size_t, uint32_t, uint16_t);
 static void ExDeletePagedLookasideList(paged_lookaside_list *);
 static void ExInitializeNPagedLookasideList(npaged_lookaside_list *,
 	lookaside_alloc_func *, lookaside_free_func *,
 	uint32_t, size_t, uint32_t, uint16_t);
 static void ExDeleteNPagedLookasideList(npaged_lookaside_list *);
 static slist_entry
 	*ExInterlockedPushEntrySList(slist_header *,
 	slist_entry *, kspin_lock *);
 static slist_entry
 	*ExInterlockedPopEntrySList(slist_header *, kspin_lock *);
 static uint32_t InterlockedIncrement(volatile uint32_t *);
 static uint32_t InterlockedDecrement(volatile uint32_t *);
 static void ExInterlockedAddLargeStatistic(uint64_t *, uint32_t);
 static void *MmAllocateContiguousMemory(uint32_t, uint64_t);
 static void *MmAllocateContiguousMemorySpecifyCache(uint32_t,
 	uint64_t, uint64_t, uint64_t, enum nt_caching_type);
 static void MmFreeContiguousMemory(void *);
 static void MmFreeContiguousMemorySpecifyCache(void *, uint32_t,
 	enum nt_caching_type);
 static uint32_t MmSizeOfMdl(void *, size_t);
 static void *MmMapLockedPages(mdl *, uint8_t);
 static void *MmMapLockedPagesSpecifyCache(mdl *,
 	uint8_t, uint32_t, void *, uint32_t, uint32_t);
 static void MmUnmapLockedPages(void *, mdl *);
 static device_t ntoskrnl_finddev(device_t, uint64_t, struct resource **);
 static void RtlZeroMemory(void *, size_t);
+static void RtlSecureZeroMemory(void *, size_t);
+static void RtlFillMemory(void *, size_t, uint8_t);
+static void RtlMoveMemory(void *, const void *, size_t);
+static ndis_status RtlCharToInteger(const char *, uint32_t, uint32_t *);
 static void RtlCopyMemory(void *, const void *, size_t);
 static size_t RtlCompareMemory(const void *, const void *, size_t);
 static ndis_status RtlUnicodeStringToInteger(unicode_string *,
 	uint32_t, uint32_t *);
 static int atoi (const char *);
 static long atol (const char *);
 static int rand(void);
 static void srand(unsigned int);
 static void KeQuerySystemTime(uint64_t *);
 static uint32_t KeTickCount(void);
 static uint8_t IoIsWdmVersionAvailable(uint8_t, uint8_t);
 static void ntoskrnl_thrfunc(void *);
 static ndis_status PsCreateSystemThread(ndis_handle *,
 	uint32_t, void *, ndis_handle, void *, void *, void *);
 static ndis_status PsTerminateSystemThread(ndis_status);
 static ndis_status IoGetDeviceObjectPointer(unicode_string *,
 	uint32_t, void *, device_object *);
 static ndis_status IoGetDeviceProperty(device_object *, uint32_t,
 	uint32_t, void *, uint32_t *);
 static void KeInitializeMutex(kmutant *, uint32_t);
 static uint32_t KeReleaseMutex(kmutant *, uint8_t);
 static uint32_t KeReadStateMutex(kmutant *);
 static ndis_status ObReferenceObjectByHandle(ndis_handle,
 	uint32_t, void *, uint8_t, void **, void **);
 static void ObfDereferenceObject(void *);
 static uint32_t ZwClose(ndis_handle);
 static uint32_t WmiQueryTraceInformation(uint32_t, void *, uint32_t,
 	uint32_t, void *);
 static uint32_t WmiTraceMessage(uint64_t, uint32_t, void *, uint16_t, ...);
 static uint32_t IoWMIRegistrationControl(device_object *, uint32_t);
 static void *ntoskrnl_memset(void *, int, size_t);
 static void *ntoskrnl_memmove(void *, void *, size_t);
 static void *ntoskrnl_memchr(void *, unsigned char, size_t);
 static char *ntoskrnl_strstr(char *, char *);
 static char *ntoskrnl_strncat(char *, char *, size_t);
 static int ntoskrnl_toupper(int);
 static int ntoskrnl_tolower(int);
 static funcptr ntoskrnl_findwrap(funcptr);
 static uint32_t DbgPrint(char *, ...);
 static void DbgBreakPoint(void);
 static void KeBugCheckEx(uint32_t, u_long, u_long, u_long, u_long);
 static int32_t KeDelayExecutionThread(uint8_t, uint8_t, int64_t *);
 static int32_t KeSetPriorityThread(struct thread *, int32_t);
 static void dummy(void);
 
 static struct mtx ntoskrnl_dispatchlock;
 static struct mtx ntoskrnl_interlock;
 static kspin_lock ntoskrnl_cancellock;
 static int ntoskrnl_kth = 0;
 static struct nt_objref_head ntoskrnl_reflist;
 static uma_zone_t mdl_zone;
 static uma_zone_t iw_zone;
 static struct kdpc_queue *kq_queues;
 static struct kdpc_queue *wq_queues;
 static int wq_idx = 0;
 
 int
 ntoskrnl_libinit()
 {
 	image_patch_table	*patch;
 	int			error;
 	struct proc		*p;
 	kdpc_queue		*kq;
 	callout_entry		*e;
 	int			i;
 
 	mtx_init(&ntoskrnl_dispatchlock,
 	    "ntoskrnl dispatch lock", MTX_NDIS_LOCK, MTX_DEF|MTX_RECURSE);
 	mtx_init(&ntoskrnl_interlock, MTX_NTOSKRNL_SPIN_LOCK, NULL, MTX_SPIN);
 	KeInitializeSpinLock(&ntoskrnl_cancellock);
 	KeInitializeSpinLock(&ntoskrnl_intlock);
 	TAILQ_INIT(&ntoskrnl_reflist);
 
 	InitializeListHead(&ntoskrnl_calllist);
 	InitializeListHead(&ntoskrnl_intlist);
 	mtx_init(&ntoskrnl_calllock, MTX_NTOSKRNL_SPIN_LOCK, NULL, MTX_SPIN);
 
 	kq_queues = ExAllocatePoolWithTag(NonPagedPool,
 #ifdef NTOSKRNL_MULTIPLE_DPCS
 	    sizeof(kdpc_queue) * mp_ncpus, 0);
 #else
 	    sizeof(kdpc_queue), 0);
 #endif
 
 	if (kq_queues == NULL)
 		return (ENOMEM);
 
 	wq_queues = ExAllocatePoolWithTag(NonPagedPool,
 	    sizeof(kdpc_queue) * WORKITEM_THREADS, 0);
 
 	if (wq_queues == NULL)
 		return (ENOMEM);
 
 #ifdef NTOSKRNL_MULTIPLE_DPCS
 	bzero((char *)kq_queues, sizeof(kdpc_queue) * mp_ncpus);
 #else
 	bzero((char *)kq_queues, sizeof(kdpc_queue));
 #endif
 	bzero((char *)wq_queues, sizeof(kdpc_queue) * WORKITEM_THREADS);
 
 	/*
 	 * Launch the DPC threads.
 	 */
 
 #ifdef NTOSKRNL_MULTIPLE_DPCS
 	for (i = 0; i < mp_ncpus; i++) {
 #else
 	for (i = 0; i < 1; i++) {
 #endif
 		kq = kq_queues + i;
 		kq->kq_cpu = i;
 		error = kproc_create(ntoskrnl_dpc_thread, kq, &p,
 		    RFHIGHPID, NDIS_KSTACK_PAGES, "Windows DPC %d", i);
 		if (error)
 			panic("failed to launch DPC thread");
 	}
 
 	/*
 	 * Launch the workitem threads.
 	 */
 
 	for (i = 0; i < WORKITEM_THREADS; i++) {
 		kq = wq_queues + i;
 		error = kproc_create(ntoskrnl_workitem_thread, kq, &p,
 		    RFHIGHPID, NDIS_KSTACK_PAGES, "Windows Workitem %d", i);
 		if (error)
 			panic("failed to launch workitem thread");
 	}
 
 	patch = ntoskrnl_functbl;
 	while (patch->ipt_func != NULL) {
 		windrv_wrap((funcptr)patch->ipt_func,
 		    (funcptr *)&patch->ipt_wrap,
 		    patch->ipt_argcnt, patch->ipt_ftype);
 		patch++;
 	}
 
 	for (i = 0; i < NTOSKRNL_TIMEOUTS; i++) {
 		e = ExAllocatePoolWithTag(NonPagedPool,
 		    sizeof(callout_entry), 0);
 		if (e == NULL)
 			panic("failed to allocate timeouts");
 		mtx_lock_spin(&ntoskrnl_calllock);
 		InsertHeadList((&ntoskrnl_calllist), (&e->ce_list));
 		mtx_unlock_spin(&ntoskrnl_calllock);
 	}
 
 	/*
 	 * MDLs are supposed to be variable size (they describe
 	 * buffers containing some number of pages, but we don't
 	 * know ahead of time how many pages that will be). But
 	 * always allocating them off the heap is very slow. As
 	 * a compromise, we create an MDL UMA zone big enough to
 	 * handle any buffer requiring up to 16 pages, and we
 	 * use those for any MDLs for buffers of 16 pages or less
 	 * in size. For buffers larger than that (which we assume
 	 * will be few and far between, we allocate the MDLs off
 	 * the heap.
 	 */
 
 	mdl_zone = uma_zcreate("Windows MDL", MDL_ZONE_SIZE,
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 
 	iw_zone = uma_zcreate("Windows WorkItem", sizeof(io_workitem),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 
 	return (0);
 }
 
 int
 ntoskrnl_libfini()
 {
 	image_patch_table	*patch;
 	callout_entry		*e;
 	list_entry		*l;
 
 	patch = ntoskrnl_functbl;
 	while (patch->ipt_func != NULL) {
 		windrv_unwrap(patch->ipt_wrap);
 		patch++;
 	}
 
 	/* Stop the workitem queues. */
 	ntoskrnl_destroy_workitem_threads();
 	/* Stop the DPC queues. */
 	ntoskrnl_destroy_dpc_threads();
 
 	ExFreePool(kq_queues);
 	ExFreePool(wq_queues);
 
 	uma_zdestroy(mdl_zone);
 	uma_zdestroy(iw_zone);
 
 	mtx_lock_spin(&ntoskrnl_calllock);
 	while(!IsListEmpty(&ntoskrnl_calllist)) {
 		l = RemoveHeadList(&ntoskrnl_calllist);
 		e = CONTAINING_RECORD(l, callout_entry, ce_list);
 		mtx_unlock_spin(&ntoskrnl_calllock);
 		ExFreePool(e);
 		mtx_lock_spin(&ntoskrnl_calllock);
 	}
 	mtx_unlock_spin(&ntoskrnl_calllock);
 
 	mtx_destroy(&ntoskrnl_dispatchlock);
 	mtx_destroy(&ntoskrnl_interlock);
 	mtx_destroy(&ntoskrnl_calllock);
 
 	return (0);
 }
 
 /*
  * We need to be able to reference this externally from the wrapper;
  * GCC only generates a local implementation of memset.
  */
 static void *
 ntoskrnl_memset(buf, ch, size)
 	void			*buf;
 	int			ch;
 	size_t			size;
 {
 	return (memset(buf, ch, size));
 }
 
 static void *
 ntoskrnl_memmove(dst, src, size)
 	void			*src;
 	void			*dst;
 	size_t			size;
 {
 	bcopy(src, dst, size);
 	return (dst);
 }
 
 static void *
 ntoskrnl_memchr(void *buf, unsigned char ch, size_t len)
 {
 	if (len != 0) {
 		unsigned char *p = buf;
 
 		do {
 			if (*p++ == ch)
 				return (p - 1);
 		} while (--len != 0);
 	}
 	return (NULL);
 }
 
 static char *
 ntoskrnl_strstr(s, find)
 	char *s, *find;
 {
 	char c, sc;
 	size_t len;
 
 	if ((c = *find++) != 0) {
 		len = strlen(find);
 		do {
 			do {
 				if ((sc = *s++) == 0)
 					return (NULL);
 			} while (sc != c);
 		} while (strncmp(s, find, len) != 0);
 		s--;
 	}
 	return ((char *)s);
 }
 
 /* Taken from libc */
 static char *
 ntoskrnl_strncat(dst, src, n)
 	char		*dst;
 	char		*src;
 	size_t		n;
 {
 	if (n != 0) {
 		char *d = dst;
 		const char *s = src;
 
 		while (*d != 0)
 			d++;
 		do {
 			if ((*d = *s++) == 0)
 				break;
 			d++;
 		} while (--n != 0);
 		*d = 0;
 	}
 	return (dst);
 }
 
 static int
 ntoskrnl_toupper(c)
 	int			c;
 {
 	return (toupper(c));
 }
 
 static int
 ntoskrnl_tolower(c)
 	int			c;
 {
 	return (tolower(c));
 }
 
 static uint8_t
 RtlEqualUnicodeString(unicode_string *str1, unicode_string *str2,
 	uint8_t caseinsensitive)
 {
 	int			i;
 
 	if (str1->us_len != str2->us_len)
 		return (FALSE);
 
 	for (i = 0; i < str1->us_len; i++) {
 		if (caseinsensitive == TRUE) {
 			if (toupper((char)(str1->us_buf[i] & 0xFF)) !=
 			    toupper((char)(str2->us_buf[i] & 0xFF)))
 				return (FALSE);
 		} else {
 			if (str1->us_buf[i] != str2->us_buf[i])
 				return (FALSE);
 		}
 	}
 
 	return (TRUE);
 }
 
 static void
+RtlCopyString(dst, src)
+	ansi_string		*dst;
+	const ansi_string	*src;
+{
+	if (src != NULL && src->as_buf != NULL && dst->as_buf != NULL) {
+		dst->as_len = min(src->as_len, dst->as_maxlen);
+		memcpy(dst->as_buf, src->as_buf, dst->as_len);
+		if (dst->as_len < dst->as_maxlen)
+			dst->as_buf[dst->as_len] = 0;
+	} else
+		dst->as_len = 0;
+}
+
+static void
 RtlCopyUnicodeString(dest, src)
 	unicode_string		*dest;
 	unicode_string		*src;
 {
 
 	if (dest->us_maxlen >= src->us_len)
 		dest->us_len = src->us_len;
 	else
 		dest->us_len = dest->us_maxlen;
 	memcpy(dest->us_buf, src->us_buf, dest->us_len);
 }
 
 static void
 ntoskrnl_ascii_to_unicode(ascii, unicode, len)
 	char			*ascii;
 	uint16_t		*unicode;
 	int			len;
 {
 	int			i;
 	uint16_t		*ustr;
 
 	ustr = unicode;
 	for (i = 0; i < len; i++) {
 		*ustr = (uint16_t)ascii[i];
 		ustr++;
 	}
 }
 
 static void
 ntoskrnl_unicode_to_ascii(unicode, ascii, len)
 	uint16_t		*unicode;
 	char			*ascii;
 	int			len;
 {
 	int			i;
 	uint8_t			*astr;
 
 	astr = ascii;
 	for (i = 0; i < len / 2; i++) {
 		*astr = (uint8_t)unicode[i];
 		astr++;
 	}
 }
 
 uint32_t
 RtlUnicodeStringToAnsiString(ansi_string *dest, unicode_string *src, uint8_t allocate)
 {
 	if (dest == NULL || src == NULL)
 		return (STATUS_INVALID_PARAMETER);
 
 	dest->as_len = src->us_len / 2;
 	if (dest->as_maxlen < dest->as_len)
 		dest->as_len = dest->as_maxlen;
 
 	if (allocate == TRUE) {
 		dest->as_buf = ExAllocatePoolWithTag(NonPagedPool,
 		    (src->us_len / 2) + 1, 0);
 		if (dest->as_buf == NULL)
 			return (STATUS_INSUFFICIENT_RESOURCES);
 		dest->as_len = dest->as_maxlen = src->us_len / 2;
 	} else {
 		dest->as_len = src->us_len / 2; /* XXX */
 		if (dest->as_maxlen < dest->as_len)
 			dest->as_len = dest->as_maxlen;
 	}
 
 	ntoskrnl_unicode_to_ascii(src->us_buf, dest->as_buf,
 	    dest->as_len * 2);
 
 	return (STATUS_SUCCESS);
 }
 
 uint32_t
 RtlAnsiStringToUnicodeString(unicode_string *dest, ansi_string *src,
 	uint8_t allocate)
 {
 	if (dest == NULL || src == NULL)
 		return (STATUS_INVALID_PARAMETER);
 
 	if (allocate == TRUE) {
 		dest->us_buf = ExAllocatePoolWithTag(NonPagedPool,
 		    src->as_len * 2, 0);
 		if (dest->us_buf == NULL)
 			return (STATUS_INSUFFICIENT_RESOURCES);
 		dest->us_len = dest->us_maxlen = strlen(src->as_buf) * 2;
 	} else {
 		dest->us_len = src->as_len * 2; /* XXX */
 		if (dest->us_maxlen < dest->us_len)
 			dest->us_len = dest->us_maxlen;
 	}
 
 	ntoskrnl_ascii_to_unicode(src->as_buf, dest->us_buf,
 	    dest->us_len / 2);
 
 	return (STATUS_SUCCESS);
 }
 
 void *
 ExAllocatePoolWithTag(pooltype, len, tag)
 	uint32_t		pooltype;
 	size_t			len;
 	uint32_t		tag;
 {
 	void			*buf;
 
 	buf = malloc(len, M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (buf == NULL)
 		return (NULL);
 
 	return (buf);
 }
 
+static void
+ExFreePoolWithTag(buf, tag)
+	void		*buf;
+	uint32_t	tag;
+{
+	ExFreePool(buf);
+}
+
 void
 ExFreePool(buf)
 	void			*buf;
 {
 	free(buf, M_DEVBUF);
 }
 
 uint32_t
 IoAllocateDriverObjectExtension(drv, clid, extlen, ext)
 	driver_object		*drv;
 	void			*clid;
 	uint32_t		extlen;
 	void			**ext;
 {
 	custom_extension	*ce;
 
 	ce = ExAllocatePoolWithTag(NonPagedPool, sizeof(custom_extension)
 	    + extlen, 0);
 
 	if (ce == NULL)
 		return (STATUS_INSUFFICIENT_RESOURCES);
 
 	ce->ce_clid = clid;
 	InsertTailList((&drv->dro_driverext->dre_usrext), (&ce->ce_list));
 
 	*ext = (void *)(ce + 1);
 
 	return (STATUS_SUCCESS);
 }
 
 void *
 IoGetDriverObjectExtension(drv, clid)
 	driver_object		*drv;
 	void			*clid;
 {
 	list_entry		*e;
 	custom_extension	*ce;
 
 	/*
 	 * Sanity check. Our dummy bus drivers don't have
 	 * any driver extentions.
 	 */
 
 	if (drv->dro_driverext == NULL)
 		return (NULL);
 
 	e = drv->dro_driverext->dre_usrext.nle_flink;
 	while (e != &drv->dro_driverext->dre_usrext) {
 		ce = (custom_extension *)e;
 		if (ce->ce_clid == clid)
 			return ((void *)(ce + 1));
 		e = e->nle_flink;
 	}
 
 	return (NULL);
 }
 
 
 uint32_t
 IoCreateDevice(driver_object *drv, uint32_t devextlen, unicode_string *devname,
 	uint32_t devtype, uint32_t devchars, uint8_t exclusive,
 	device_object **newdev)
 {
 	device_object		*dev;
 
 	dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device_object), 0);
 	if (dev == NULL)
 		return (STATUS_INSUFFICIENT_RESOURCES);
 
 	dev->do_type = devtype;
 	dev->do_drvobj = drv;
 	dev->do_currirp = NULL;
 	dev->do_flags = 0;
 
 	if (devextlen) {
 		dev->do_devext = ExAllocatePoolWithTag(NonPagedPool,
 		    devextlen, 0);
 
 		if (dev->do_devext == NULL) {
 			ExFreePool(dev);
 			return (STATUS_INSUFFICIENT_RESOURCES);
 		}
 
 		bzero(dev->do_devext, devextlen);
 	} else
 		dev->do_devext = NULL;
 
 	dev->do_size = sizeof(device_object) + devextlen;
 	dev->do_refcnt = 1;
 	dev->do_attacheddev = NULL;
 	dev->do_nextdev = NULL;
 	dev->do_devtype = devtype;
 	dev->do_stacksize = 1;
 	dev->do_alignreq = 1;
 	dev->do_characteristics = devchars;
 	dev->do_iotimer = NULL;
 	KeInitializeEvent(&dev->do_devlock, EVENT_TYPE_SYNC, TRUE);
 
 	/*
 	 * Vpd is used for disk/tape devices,
 	 * but we don't support those. (Yet.)
 	 */
 	dev->do_vpb = NULL;
 
 	dev->do_devobj_ext = ExAllocatePoolWithTag(NonPagedPool,
 	    sizeof(devobj_extension), 0);
 
 	if (dev->do_devobj_ext == NULL) {
 		if (dev->do_devext != NULL)
 			ExFreePool(dev->do_devext);
 		ExFreePool(dev);
 		return (STATUS_INSUFFICIENT_RESOURCES);
 	}
 
 	dev->do_devobj_ext->dve_type = 0;
 	dev->do_devobj_ext->dve_size = sizeof(devobj_extension);
 	dev->do_devobj_ext->dve_devobj = dev;
 
 	/*
 	 * Attach this device to the driver object's list
 	 * of devices. Note: this is not the same as attaching
 	 * the device to the device stack. The driver's AddDevice
 	 * routine must explicitly call IoAddDeviceToDeviceStack()
 	 * to do that.
 	 */
 
 	if (drv->dro_devobj == NULL) {
 		drv->dro_devobj = dev;
 		dev->do_nextdev = NULL;
 	} else {
 		dev->do_nextdev = drv->dro_devobj;
 		drv->dro_devobj = dev;
 	}
 
 	*newdev = dev;
 
 	return (STATUS_SUCCESS);
 }
 
 void
 IoDeleteDevice(dev)
 	device_object		*dev;
 {
 	device_object		*prev;
 
 	if (dev == NULL)
 		return;
 
 	if (dev->do_devobj_ext != NULL)
 		ExFreePool(dev->do_devobj_ext);
 
 	if (dev->do_devext != NULL)
 		ExFreePool(dev->do_devext);
 
 	/* Unlink the device from the driver's device list. */
 
 	prev = dev->do_drvobj->dro_devobj;
 	if (prev == dev)
 		dev->do_drvobj->dro_devobj = dev->do_nextdev;
 	else {
 		while (prev->do_nextdev != dev)
 			prev = prev->do_nextdev;
 		prev->do_nextdev = dev->do_nextdev;
 	}
 
 	ExFreePool(dev);
 }
 
 device_object *
 IoGetAttachedDevice(dev)
 	device_object		*dev;
 {
 	device_object		*d;
 
 	if (dev == NULL)
 		return (NULL);
 
 	d = dev;
 
 	while (d->do_attacheddev != NULL)
 		d = d->do_attacheddev;
 
 	return (d);
 }
 
 static irp *
 IoBuildSynchronousFsdRequest(func, dobj, buf, len, off, event, status)
 	uint32_t		func;
 	device_object		*dobj;
 	void			*buf;
 	uint32_t		len;
 	uint64_t		*off;
 	nt_kevent		*event;
 	io_status_block		*status;
 {
 	irp			*ip;
 
 	ip = IoBuildAsynchronousFsdRequest(func, dobj, buf, len, off, status);
 	if (ip == NULL)
 		return (NULL);
 	ip->irp_usrevent = event;
 
 	return (ip);
 }
 
 static irp *
 IoBuildAsynchronousFsdRequest(func, dobj, buf, len, off, status)
 	uint32_t		func;
 	device_object		*dobj;
 	void			*buf;
 	uint32_t		len;
 	uint64_t		*off;
 	io_status_block		*status;
 {
 	irp			*ip;
 	io_stack_location	*sl;
 
 	ip = IoAllocateIrp(dobj->do_stacksize, TRUE);
 	if (ip == NULL)
 		return (NULL);
 
 	ip->irp_usriostat = status;
 	ip->irp_tail.irp_overlay.irp_thread = NULL;
 
 	sl = IoGetNextIrpStackLocation(ip);
 	sl->isl_major = func;
 	sl->isl_minor = 0;
 	sl->isl_flags = 0;
 	sl->isl_ctl = 0;
 	sl->isl_devobj = dobj;
 	sl->isl_fileobj = NULL;
 	sl->isl_completionfunc = NULL;
 
 	ip->irp_userbuf = buf;
 
 	if (dobj->do_flags & DO_BUFFERED_IO) {
 		ip->irp_assoc.irp_sysbuf =
 		    ExAllocatePoolWithTag(NonPagedPool, len, 0);
 		if (ip->irp_assoc.irp_sysbuf == NULL) {
 			IoFreeIrp(ip);
 			return (NULL);
 		}
 		bcopy(buf, ip->irp_assoc.irp_sysbuf, len);
 	}
 
 	if (dobj->do_flags & DO_DIRECT_IO) {
 		ip->irp_mdl = IoAllocateMdl(buf, len, FALSE, FALSE, ip);
 		if (ip->irp_mdl == NULL) {
 			if (ip->irp_assoc.irp_sysbuf != NULL)
 				ExFreePool(ip->irp_assoc.irp_sysbuf);
 			IoFreeIrp(ip);
 			return (NULL);
 		}
 		ip->irp_userbuf = NULL;
 		ip->irp_assoc.irp_sysbuf = NULL;
 	}
 
 	if (func == IRP_MJ_READ) {
 		sl->isl_parameters.isl_read.isl_len = len;
 		if (off != NULL)
 			sl->isl_parameters.isl_read.isl_byteoff = *off;
 		else
 			sl->isl_parameters.isl_read.isl_byteoff = 0;
 	}
 
 	if (func == IRP_MJ_WRITE) {
 		sl->isl_parameters.isl_write.isl_len = len;
 		if (off != NULL)
 			sl->isl_parameters.isl_write.isl_byteoff = *off;
 		else
 			sl->isl_parameters.isl_write.isl_byteoff = 0;
 	}
 
 	return (ip);
 }
 
 static irp *
 IoBuildDeviceIoControlRequest(uint32_t iocode, device_object *dobj, void *ibuf,
 	uint32_t ilen, void *obuf, uint32_t olen, uint8_t isinternal,
 	nt_kevent *event, io_status_block *status)
 {
 	irp			*ip;
 	io_stack_location	*sl;
 	uint32_t		buflen;
 
 	ip = IoAllocateIrp(dobj->do_stacksize, TRUE);
 	if (ip == NULL)
 		return (NULL);
 	ip->irp_usrevent = event;
 	ip->irp_usriostat = status;
 	ip->irp_tail.irp_overlay.irp_thread = NULL;
 
 	sl = IoGetNextIrpStackLocation(ip);
 	sl->isl_major = isinternal == TRUE ?
 	    IRP_MJ_INTERNAL_DEVICE_CONTROL : IRP_MJ_DEVICE_CONTROL;
 	sl->isl_minor = 0;
 	sl->isl_flags = 0;
 	sl->isl_ctl = 0;
 	sl->isl_devobj = dobj;
 	sl->isl_fileobj = NULL;
 	sl->isl_completionfunc = NULL;
 	sl->isl_parameters.isl_ioctl.isl_iocode = iocode;
 	sl->isl_parameters.isl_ioctl.isl_ibuflen = ilen;
 	sl->isl_parameters.isl_ioctl.isl_obuflen = olen;
 
 	switch(IO_METHOD(iocode)) {
 	case METHOD_BUFFERED:
 		if (ilen > olen)
 			buflen = ilen;
 		else
 			buflen = olen;
 		if (buflen) {
 			ip->irp_assoc.irp_sysbuf =
 			    ExAllocatePoolWithTag(NonPagedPool, buflen, 0);
 			if (ip->irp_assoc.irp_sysbuf == NULL) {
 				IoFreeIrp(ip);
 				return (NULL);
 			}
 		}
 		if (ilen && ibuf != NULL) {
 			bcopy(ibuf, ip->irp_assoc.irp_sysbuf, ilen);
 			bzero((char *)ip->irp_assoc.irp_sysbuf + ilen,
 			    buflen - ilen);
 		} else
 			bzero(ip->irp_assoc.irp_sysbuf, ilen);
 		ip->irp_userbuf = obuf;
 		break;
 	case METHOD_IN_DIRECT:
 	case METHOD_OUT_DIRECT:
 		if (ilen && ibuf != NULL) {
 			ip->irp_assoc.irp_sysbuf =
 			    ExAllocatePoolWithTag(NonPagedPool, ilen, 0);
 			if (ip->irp_assoc.irp_sysbuf == NULL) {
 				IoFreeIrp(ip);
 				return (NULL);
 			}
 			bcopy(ibuf, ip->irp_assoc.irp_sysbuf, ilen);
 		}
 		if (olen && obuf != NULL) {
 			ip->irp_mdl = IoAllocateMdl(obuf, olen,
 			    FALSE, FALSE, ip);
 			/*
 			 * Normally we would MmProbeAndLockPages()
 			 * here, but we don't have to in our
 			 * imlementation.
 			 */
 		}
 		break;
 	case METHOD_NEITHER:
 		ip->irp_userbuf = obuf;
 		sl->isl_parameters.isl_ioctl.isl_type3ibuf = ibuf;
 		break;
 	default:
 		break;
 	}
 
 	/*
 	 * Ideally, we should associate this IRP with the calling
 	 * thread here.
 	 */
 
 	return (ip);
 }
 
 static irp *
 IoAllocateIrp(uint8_t stsize, uint8_t chargequota)
 {
 	irp			*i;
 
 	i = ExAllocatePoolWithTag(NonPagedPool, IoSizeOfIrp(stsize), 0);
 	if (i == NULL)
 		return (NULL);
 
 	IoInitializeIrp(i, IoSizeOfIrp(stsize), stsize);
 
 	return (i);
 }
 
 static irp *
 IoMakeAssociatedIrp(irp *ip, uint8_t stsize)
 {
 	irp			*associrp;
 
 	associrp = IoAllocateIrp(stsize, FALSE);
 	if (associrp == NULL)
 		return (NULL);
 
 	mtx_lock(&ntoskrnl_dispatchlock);
 	associrp->irp_flags |= IRP_ASSOCIATED_IRP;
 	associrp->irp_tail.irp_overlay.irp_thread =
 	    ip->irp_tail.irp_overlay.irp_thread;
 	associrp->irp_assoc.irp_master = ip;
 	mtx_unlock(&ntoskrnl_dispatchlock);
 
 	return (associrp);
 }
 
 static void
 IoFreeIrp(ip)
 	irp			*ip;
 {
 	ExFreePool(ip);
 }
 
 static void
 IoInitializeIrp(irp *io, uint16_t psize, uint8_t ssize)
 {
 	bzero((char *)io, IoSizeOfIrp(ssize));
 	io->irp_size = psize;
 	io->irp_stackcnt = ssize;
 	io->irp_currentstackloc = ssize;
 	InitializeListHead(&io->irp_thlist);
 	io->irp_tail.irp_overlay.irp_csl =
 	    (io_stack_location *)(io + 1) + ssize;
 }
 
 static void
 IoReuseIrp(ip, status)
 	irp			*ip;
 	uint32_t		status;
 {
 	uint8_t			allocflags;
 
 	allocflags = ip->irp_allocflags;
 	IoInitializeIrp(ip, ip->irp_size, ip->irp_stackcnt);
 	ip->irp_iostat.isb_status = status;
 	ip->irp_allocflags = allocflags;
 }
 
 void
 IoAcquireCancelSpinLock(uint8_t *irql)
 {
 	KeAcquireSpinLock(&ntoskrnl_cancellock, irql);
 }
 
 void
 IoReleaseCancelSpinLock(uint8_t irql)
 {
 	KeReleaseSpinLock(&ntoskrnl_cancellock, irql);
 }
 
 uint8_t
 IoCancelIrp(irp *ip)
 {
 	cancel_func		cfunc;
 	uint8_t			cancelirql;
 
 	IoAcquireCancelSpinLock(&cancelirql);
 	cfunc = IoSetCancelRoutine(ip, NULL);
 	ip->irp_cancel = TRUE;
 	if (cfunc == NULL) {
 		IoReleaseCancelSpinLock(cancelirql);
 		return (FALSE);
 	}
 	ip->irp_cancelirql = cancelirql;
 	MSCALL2(cfunc, IoGetCurrentIrpStackLocation(ip)->isl_devobj, ip);
 	return (uint8_t)IoSetCancelValue(ip, TRUE);
 }
 
 uint32_t
 IofCallDriver(dobj, ip)
 	device_object		*dobj;
 	irp			*ip;
 {
 	driver_object		*drvobj;
 	io_stack_location	*sl;
 	uint32_t		status;
 	driver_dispatch		disp;
 
 	drvobj = dobj->do_drvobj;
 
 	if (ip->irp_currentstackloc <= 0)
 		panic("IoCallDriver(): out of stack locations");
 
 	IoSetNextIrpStackLocation(ip);
 	sl = IoGetCurrentIrpStackLocation(ip);
 
 	sl->isl_devobj = dobj;
 
 	disp = drvobj->dro_dispatch[sl->isl_major];
 	status = MSCALL2(disp, dobj, ip);
 
 	return (status);
 }
 
 void
 IofCompleteRequest(irp *ip, uint8_t prioboost)
 {
 	uint32_t		status;
 	device_object		*dobj;
 	io_stack_location	*sl;
 	completion_func		cf;
 
 	KASSERT(ip->irp_iostat.isb_status != STATUS_PENDING,
 	    ("incorrect IRP(%p) status (STATUS_PENDING)", ip));
 
 	sl = IoGetCurrentIrpStackLocation(ip);
 	IoSkipCurrentIrpStackLocation(ip);
 
 	do {
 		if (sl->isl_ctl & SL_PENDING_RETURNED)
 			ip->irp_pendingreturned = TRUE;
 
 		if (ip->irp_currentstackloc != (ip->irp_stackcnt + 1))
 			dobj = IoGetCurrentIrpStackLocation(ip)->isl_devobj;
 		else
 			dobj = NULL;
 
 		if (sl->isl_completionfunc != NULL &&
 		    ((ip->irp_iostat.isb_status == STATUS_SUCCESS &&
 		    sl->isl_ctl & SL_INVOKE_ON_SUCCESS) ||
 		    (ip->irp_iostat.isb_status != STATUS_SUCCESS &&
 		    sl->isl_ctl & SL_INVOKE_ON_ERROR) ||
 		    (ip->irp_cancel == TRUE &&
 		    sl->isl_ctl & SL_INVOKE_ON_CANCEL))) {
 			cf = sl->isl_completionfunc;
 			status = MSCALL3(cf, dobj, ip, sl->isl_completionctx);
 			if (status == STATUS_MORE_PROCESSING_REQUIRED)
 				return;
 		} else {
 			if ((ip->irp_currentstackloc <= ip->irp_stackcnt) &&
 			    (ip->irp_pendingreturned == TRUE))
 				IoMarkIrpPending(ip);
 		}
 
 		/* move to the next.  */
 		IoSkipCurrentIrpStackLocation(ip);
 		sl++;
 	} while (ip->irp_currentstackloc <= (ip->irp_stackcnt + 1));
 
 	if (ip->irp_usriostat != NULL)
 		*ip->irp_usriostat = ip->irp_iostat;
 	if (ip->irp_usrevent != NULL)
 		KeSetEvent(ip->irp_usrevent, prioboost, FALSE);
 
 	/* Handle any associated IRPs. */
 
 	if (ip->irp_flags & IRP_ASSOCIATED_IRP) {
 		uint32_t		masterirpcnt;
 		irp			*masterirp;
 		mdl			*m;
 
 		masterirp = ip->irp_assoc.irp_master;
 		masterirpcnt =
 		    InterlockedDecrement(&masterirp->irp_assoc.irp_irpcnt);
 
 		while ((m = ip->irp_mdl) != NULL) {
 			ip->irp_mdl = m->mdl_next;
 			IoFreeMdl(m);
 		}
 		IoFreeIrp(ip);
 		if (masterirpcnt == 0)
 			IoCompleteRequest(masterirp, IO_NO_INCREMENT);
 		return;
 	}
 
 	/* With any luck, these conditions will never arise. */
 
 	if (ip->irp_flags & IRP_PAGING_IO) {
 		if (ip->irp_mdl != NULL)
 			IoFreeMdl(ip->irp_mdl);
 		IoFreeIrp(ip);
 	}
 }
 
 void
 ntoskrnl_intr(arg)
 	void			*arg;
 {
 	kinterrupt		*iobj;
 	uint8_t			irql;
 	uint8_t			claimed;
 	list_entry		*l;
 
 	KeAcquireSpinLock(&ntoskrnl_intlock, &irql);
 	l = ntoskrnl_intlist.nle_flink;
 	while (l != &ntoskrnl_intlist) {
 		iobj = CONTAINING_RECORD(l, kinterrupt, ki_list);
 		claimed = MSCALL2(iobj->ki_svcfunc, iobj, iobj->ki_svcctx);
 		if (claimed == TRUE)
 			break;
 		l = l->nle_flink;
 	}
 	KeReleaseSpinLock(&ntoskrnl_intlock, irql);
 }
 
 uint8_t
 KeAcquireInterruptSpinLock(iobj)
 	kinterrupt		*iobj;
 {
 	uint8_t			irql;
 	KeAcquireSpinLock(&ntoskrnl_intlock, &irql);
 	return (irql);
 }
 
 void
 KeReleaseInterruptSpinLock(kinterrupt *iobj, uint8_t irql)
 {
 	KeReleaseSpinLock(&ntoskrnl_intlock, irql);
 }
 
 uint8_t
 KeSynchronizeExecution(iobj, syncfunc, syncctx)
 	kinterrupt		*iobj;
 	void			*syncfunc;
 	void			*syncctx;
 {
 	uint8_t			irql;
 
 	KeAcquireSpinLock(&ntoskrnl_intlock, &irql);
 	MSCALL1(syncfunc, syncctx);
 	KeReleaseSpinLock(&ntoskrnl_intlock, irql);
 
 	return (TRUE);
 }
 
 /*
  * IoConnectInterrupt() is passed only the interrupt vector and
  * irql that a device wants to use, but no device-specific tag
  * of any kind. This conflicts rather badly with FreeBSD's
  * bus_setup_intr(), which needs the device_t for the device
  * requesting interrupt delivery. In order to bypass this
  * inconsistency, we implement a second level of interrupt
  * dispatching on top of bus_setup_intr(). All devices use
  * ntoskrnl_intr() as their ISR, and any device requesting
  * interrupts will be registered with ntoskrnl_intr()'s interrupt
  * dispatch list. When an interrupt arrives, we walk the list
  * and invoke all the registered ISRs. This effectively makes all
  * interrupts shared, but it's the only way to duplicate the
  * semantics of IoConnectInterrupt() and IoDisconnectInterrupt() properly.
  */
 
 uint32_t
 IoConnectInterrupt(kinterrupt **iobj, void *svcfunc, void *svcctx,
 	kspin_lock *lock, uint32_t vector, uint8_t irql, uint8_t syncirql,
 	uint8_t imode, uint8_t shared, uint32_t affinity, uint8_t savefloat)
 {
 	uint8_t			curirql;
 
 	*iobj = ExAllocatePoolWithTag(NonPagedPool, sizeof(kinterrupt), 0);
 	if (*iobj == NULL)
 		return (STATUS_INSUFFICIENT_RESOURCES);
 
 	(*iobj)->ki_svcfunc = svcfunc;
 	(*iobj)->ki_svcctx = svcctx;
 
 	if (lock == NULL) {
 		KeInitializeSpinLock(&(*iobj)->ki_lock_priv);
 		(*iobj)->ki_lock = &(*iobj)->ki_lock_priv;
 	} else
 		(*iobj)->ki_lock = lock;
 
 	KeAcquireSpinLock(&ntoskrnl_intlock, &curirql);
 	InsertHeadList((&ntoskrnl_intlist), (&(*iobj)->ki_list));
 	KeReleaseSpinLock(&ntoskrnl_intlock, curirql);
 
 	return (STATUS_SUCCESS);
 }
 
 void
 IoDisconnectInterrupt(iobj)
 	kinterrupt		*iobj;
 {
 	uint8_t			irql;
 
 	if (iobj == NULL)
 		return;
 
 	KeAcquireSpinLock(&ntoskrnl_intlock, &irql);
 	RemoveEntryList((&iobj->ki_list));
 	KeReleaseSpinLock(&ntoskrnl_intlock, irql);
 
 	ExFreePool(iobj);
 }
 
 device_object *
 IoAttachDeviceToDeviceStack(src, dst)
 	device_object		*src;
 	device_object		*dst;
 {
 	device_object		*attached;
 
 	mtx_lock(&ntoskrnl_dispatchlock);
 	attached = IoGetAttachedDevice(dst);
 	attached->do_attacheddev = src;
 	src->do_attacheddev = NULL;
 	src->do_stacksize = attached->do_stacksize + 1;
 	mtx_unlock(&ntoskrnl_dispatchlock);
 
 	return (attached);
 }
 
 void
 IoDetachDevice(topdev)
 	device_object		*topdev;
 {
 	device_object		*tail;
 
 	mtx_lock(&ntoskrnl_dispatchlock);
 
 	/* First, break the chain. */
 	tail = topdev->do_attacheddev;
 	if (tail == NULL) {
 		mtx_unlock(&ntoskrnl_dispatchlock);
 		return;
 	}
 	topdev->do_attacheddev = tail->do_attacheddev;
 	topdev->do_refcnt--;
 
 	/* Now reduce the stacksize count for the takm_il objects. */
 
 	tail = topdev->do_attacheddev;
 	while (tail != NULL) {
 		tail->do_stacksize--;
 		tail = tail->do_attacheddev;
 	}
 
 	mtx_unlock(&ntoskrnl_dispatchlock);
 }
 
 /*
  * For the most part, an object is considered signalled if
  * dh_sigstate == TRUE. The exception is for mutant objects
  * (mutexes), where the logic works like this:
  *
  * - If the thread already owns the object and sigstate is
  *   less than or equal to 0, then the object is considered
  *   signalled (recursive acquisition).
  * - If dh_sigstate == 1, the object is also considered
  *   signalled.
  */
 
 static int
 ntoskrnl_is_signalled(obj, td)
 	nt_dispatch_header	*obj;
 	struct thread		*td;
 {
 	kmutant			*km;
 
 	if (obj->dh_type == DISP_TYPE_MUTANT) {
 		km = (kmutant *)obj;
 		if ((obj->dh_sigstate <= 0 && km->km_ownerthread == td) ||
 		    obj->dh_sigstate == 1)
 			return (TRUE);
 		return (FALSE);
 	}
 
 	if (obj->dh_sigstate > 0)
 		return (TRUE);
 	return (FALSE);
 }
 
 static void
 ntoskrnl_satisfy_wait(obj, td)
 	nt_dispatch_header	*obj;
 	struct thread		*td;
 {
 	kmutant			*km;
 
 	switch (obj->dh_type) {
 	case DISP_TYPE_MUTANT:
 		km = (struct kmutant *)obj;
 		obj->dh_sigstate--;
 		/*
 		 * If sigstate reaches 0, the mutex is now
 		 * non-signalled (the new thread owns it).
 		 */
 		if (obj->dh_sigstate == 0) {
 			km->km_ownerthread = td;
 			if (km->km_abandoned == TRUE)
 				km->km_abandoned = FALSE;
 		}
 		break;
 	/* Synchronization objects get reset to unsignalled. */
 	case DISP_TYPE_SYNCHRONIZATION_EVENT:
 	case DISP_TYPE_SYNCHRONIZATION_TIMER:
 		obj->dh_sigstate = 0;
 		break;
 	case DISP_TYPE_SEMAPHORE:
 		obj->dh_sigstate--;
 		break;
 	default:
 		break;
 	}
 }
 
 static void
 ntoskrnl_satisfy_multiple_waits(wb)
 	wait_block		*wb;
 {
 	wait_block		*cur;
 	struct thread		*td;
 
 	cur = wb;
 	td = wb->wb_kthread;
 
 	do {
 		ntoskrnl_satisfy_wait(wb->wb_object, td);
 		cur->wb_awakened = TRUE;
 		cur = cur->wb_next;
 	} while (cur != wb);
 }
 
 /* Always called with dispatcher lock held. */
 static void
 ntoskrnl_waittest(obj, increment)
 	nt_dispatch_header	*obj;
 	uint32_t		increment;
 {
 	wait_block		*w, *next;
 	list_entry		*e;
 	struct thread		*td;
 	wb_ext			*we;
 	int			satisfied;
 
 	/*
 	 * Once an object has been signalled, we walk its list of
 	 * wait blocks. If a wait block can be awakened, then satisfy
 	 * waits as necessary and wake the thread.
 	 *
 	 * The rules work like this:
 	 *
 	 * If a wait block is marked as WAITTYPE_ANY, then
 	 * we can satisfy the wait conditions on the current
 	 * object and wake the thread right away. Satisfying
 	 * the wait also has the effect of breaking us out
 	 * of the search loop.
 	 *
 	 * If the object is marked as WAITTYLE_ALL, then the
 	 * wait block will be part of a circularly linked
 	 * list of wait blocks belonging to a waiting thread
 	 * that's sleeping in KeWaitForMultipleObjects(). In
 	 * order to wake the thread, all the objects in the
 	 * wait list must be in the signalled state. If they
 	 * are, we then satisfy all of them and wake the
 	 * thread.
 	 *
 	 */
 
 	e = obj->dh_waitlisthead.nle_flink;
 
 	while (e != &obj->dh_waitlisthead && obj->dh_sigstate > 0) {
 		w = CONTAINING_RECORD(e, wait_block, wb_waitlist);
 		we = w->wb_ext;
 		td = we->we_td;
 		satisfied = FALSE;
 		if (w->wb_waittype == WAITTYPE_ANY) {
 			/*
 			 * Thread can be awakened if
 			 * any wait is satisfied.
 			 */
 			ntoskrnl_satisfy_wait(obj, td);
 			satisfied = TRUE;
 			w->wb_awakened = TRUE;
 		} else {
 			/*
 			 * Thread can only be woken up
 			 * if all waits are satisfied.
 			 * If the thread is waiting on multiple
 			 * objects, they should all be linked
 			 * through the wb_next pointers in the
 			 * wait blocks.
 			 */
 			satisfied = TRUE;
 			next = w->wb_next;
 			while (next != w) {
 				if (ntoskrnl_is_signalled(obj, td) == FALSE) {
 					satisfied = FALSE;
 					break;
 				}
 				next = next->wb_next;
 			}
 			ntoskrnl_satisfy_multiple_waits(w);
 		}
 
 		if (satisfied == TRUE)
 			cv_broadcastpri(&we->we_cv,
 			    (w->wb_oldpri - (increment * 4)) > PRI_MIN_KERN ?
 			    w->wb_oldpri - (increment * 4) : PRI_MIN_KERN);
 
 		e = e->nle_flink;
 	}
 }
 
 /*
  * Return the number of 100 nanosecond intervals since
  * January 1, 1601. (?!?!)
  */
 void
 ntoskrnl_time(tval)
 	uint64_t                *tval;
 {
 	struct timespec		ts;
 
 	nanotime(&ts);
 	*tval = (uint64_t)ts.tv_nsec / 100 + (uint64_t)ts.tv_sec * 10000000 +
 	    11644473600 * 10000000; /* 100ns ticks from 1601 to 1970 */
 }
 
 static void
 KeQuerySystemTime(current_time)
 	uint64_t		*current_time;
 {
 	ntoskrnl_time(current_time);
 }
 
 static uint32_t
 KeTickCount(void)
 {
 	struct timeval tv;
 	getmicrouptime(&tv);
 	return tvtohz(&tv);
 }
 
 
 /*
  * KeWaitForSingleObject() is a tricky beast, because it can be used
  * with several different object types: semaphores, timers, events,
  * mutexes and threads. Semaphores don't appear very often, but the
  * other object types are quite common. KeWaitForSingleObject() is
  * what's normally used to acquire a mutex, and it can be used to
  * wait for a thread termination.
  *
  * The Windows NDIS API is implemented in terms of Windows kernel
  * primitives, and some of the object manipulation is duplicated in
  * NDIS. For example, NDIS has timers and events, which are actually
  * Windows kevents and ktimers. Now, you're supposed to only use the
  * NDIS variants of these objects within the confines of the NDIS API,
  * but there are some naughty developers out there who will use
  * KeWaitForSingleObject() on NDIS timer and event objects, so we
  * have to support that as well. Conseqently, our NDIS timer and event
  * code has to be closely tied into our ntoskrnl timer and event code,
  * just as it is in Windows.
  *
  * KeWaitForSingleObject() may do different things for different kinds
  * of objects:
  *
  * - For events, we check if the event has been signalled. If the
  *   event is already in the signalled state, we just return immediately,
  *   otherwise we wait for it to be set to the signalled state by someone
  *   else calling KeSetEvent(). Events can be either synchronization or
  *   notification events.
  *
  * - For timers, if the timer has already fired and the timer is in
  *   the signalled state, we just return, otherwise we wait on the
  *   timer. Unlike an event, timers get signalled automatically when
  *   they expire rather than someone having to trip them manually.
  *   Timers initialized with KeInitializeTimer() are always notification
  *   events: KeInitializeTimerEx() lets you initialize a timer as
  *   either a notification or synchronization event.
  *
  * - For mutexes, we try to acquire the mutex and if we can't, we wait
  *   on the mutex until it's available and then grab it. When a mutex is
  *   released, it enters the signalled state, which wakes up one of the
  *   threads waiting to acquire it. Mutexes are always synchronization
  *   events.
  *
  * - For threads, the only thing we do is wait until the thread object
  *   enters a signalled state, which occurs when the thread terminates.
  *   Threads are always notification events.
  *
  * A notification event wakes up all threads waiting on an object. A
  * synchronization event wakes up just one. Also, a synchronization event
  * is auto-clearing, which means we automatically set the event back to
  * the non-signalled state once the wakeup is done.
  */
 
 uint32_t
 KeWaitForSingleObject(void *arg, uint32_t reason, uint32_t mode,
     uint8_t alertable, int64_t *duetime)
 {
 	wait_block		w;
 	struct thread		*td = curthread;
 	struct timeval		tv;
 	int			error = 0;
 	uint64_t		curtime;
 	wb_ext			we;
 	nt_dispatch_header	*obj;
 
 	obj = arg;
 
 	if (obj == NULL)
 		return (STATUS_INVALID_PARAMETER);
 
 	mtx_lock(&ntoskrnl_dispatchlock);
 
 	cv_init(&we.we_cv, "KeWFS");
 	we.we_td = td;
 
 	/*
 	 * Check to see if this object is already signalled,
 	 * and just return without waiting if it is.
 	 */
 	if (ntoskrnl_is_signalled(obj, td) == TRUE) {
 		/* Sanity check the signal state value. */
 		if (obj->dh_sigstate != INT32_MIN) {
 			ntoskrnl_satisfy_wait(obj, curthread);
 			mtx_unlock(&ntoskrnl_dispatchlock);
 			return (STATUS_SUCCESS);
 		} else {
 			/*
 			 * There's a limit to how many times we can
 			 * recursively acquire a mutant. If we hit
 			 * the limit, something is very wrong.
 			 */
 			if (obj->dh_type == DISP_TYPE_MUTANT) {
 				mtx_unlock(&ntoskrnl_dispatchlock);
 				panic("mutant limit exceeded");
 			}
 		}
 	}
 
 	bzero((char *)&w, sizeof(wait_block));
 	w.wb_object = obj;
 	w.wb_ext = &we;
 	w.wb_waittype = WAITTYPE_ANY;
 	w.wb_next = &w;
 	w.wb_waitkey = 0;
 	w.wb_awakened = FALSE;
 	w.wb_oldpri = td->td_priority;
 
 	InsertTailList((&obj->dh_waitlisthead), (&w.wb_waitlist));
 
 	/*
 	 * The timeout value is specified in 100 nanosecond units
 	 * and can be a positive or negative number. If it's positive,
 	 * then the duetime is absolute, and we need to convert it
 	 * to an absolute offset relative to now in order to use it.
 	 * If it's negative, then the duetime is relative and we
 	 * just have to convert the units.
 	 */
 
 	if (duetime != NULL) {
 		if (*duetime < 0) {
 			tv.tv_sec = - (*duetime) / 10000000;
 			tv.tv_usec = (- (*duetime) / 10) -
 			    (tv.tv_sec * 1000000);
 		} else {
 			ntoskrnl_time(&curtime);
 			if (*duetime < curtime)
 				tv.tv_sec = tv.tv_usec = 0;
 			else {
 				tv.tv_sec = ((*duetime) - curtime) / 10000000;
 				tv.tv_usec = ((*duetime) - curtime) / 10 -
 				    (tv.tv_sec * 1000000);
 			}
 		}
 	}
 
 	if (duetime == NULL)
 		cv_wait(&we.we_cv, &ntoskrnl_dispatchlock);
 	else
 		error = cv_timedwait(&we.we_cv,
 		    &ntoskrnl_dispatchlock, tvtohz(&tv));
 
 	RemoveEntryList(&w.wb_waitlist);
 
 	cv_destroy(&we.we_cv);
 
 	/* We timed out. Leave the object alone and return status. */
 
 	if (error == EWOULDBLOCK) {
 		mtx_unlock(&ntoskrnl_dispatchlock);
 		return (STATUS_TIMEOUT);
 	}
 
 	mtx_unlock(&ntoskrnl_dispatchlock);
 
 	return (STATUS_SUCCESS);
 /*
 	return (KeWaitForMultipleObjects(1, &obj, WAITTYPE_ALL, reason,
 	    mode, alertable, duetime, &w));
 */
 }
 
 static uint32_t
 KeWaitForMultipleObjects(uint32_t cnt, nt_dispatch_header *obj[], uint32_t wtype,
 	uint32_t reason, uint32_t mode, uint8_t alertable, int64_t *duetime,
 	wait_block *wb_array)
 {
 	struct thread		*td = curthread;
 	wait_block		*whead, *w;
 	wait_block		_wb_array[MAX_WAIT_OBJECTS];
 	nt_dispatch_header	*cur;
 	struct timeval		tv;
 	int			i, wcnt = 0, error = 0;
 	uint64_t		curtime;
 	struct timespec		t1, t2;
 	uint32_t		status = STATUS_SUCCESS;
 	wb_ext			we;
 
 	if (cnt > MAX_WAIT_OBJECTS)
 		return (STATUS_INVALID_PARAMETER);
 	if (cnt > THREAD_WAIT_OBJECTS && wb_array == NULL)
 		return (STATUS_INVALID_PARAMETER);
 
 	mtx_lock(&ntoskrnl_dispatchlock);
 
 	cv_init(&we.we_cv, "KeWFM");
 	we.we_td = td;
 
 	if (wb_array == NULL)
 		whead = _wb_array;
 	else
 		whead = wb_array;
 
 	bzero((char *)whead, sizeof(wait_block) * cnt);
 
 	/* First pass: see if we can satisfy any waits immediately. */
 
 	wcnt = 0;
 	w = whead;
 
 	for (i = 0; i < cnt; i++) {
 		InsertTailList((&obj[i]->dh_waitlisthead),
 		    (&w->wb_waitlist));
 		w->wb_ext = &we;
 		w->wb_object = obj[i];
 		w->wb_waittype = wtype;
 		w->wb_waitkey = i;
 		w->wb_awakened = FALSE;
 		w->wb_oldpri = td->td_priority;
 		w->wb_next = w + 1;
 		w++;
 		wcnt++;
 		if (ntoskrnl_is_signalled(obj[i], td)) {
 			/*
 			 * There's a limit to how many times
 			 * we can recursively acquire a mutant.
 			 * If we hit the limit, something
 			 * is very wrong.
 			 */
 			if (obj[i]->dh_sigstate == INT32_MIN &&
 			    obj[i]->dh_type == DISP_TYPE_MUTANT) {
 				mtx_unlock(&ntoskrnl_dispatchlock);
 				panic("mutant limit exceeded");
 			}
 
 			/*
 			 * If this is a WAITTYPE_ANY wait, then
 			 * satisfy the waited object and exit
 			 * right now.
 			 */
 
 			if (wtype == WAITTYPE_ANY) {
 				ntoskrnl_satisfy_wait(obj[i], td);
 				status = STATUS_WAIT_0 + i;
 				goto wait_done;
 			} else {
 				w--;
 				wcnt--;
 				w->wb_object = NULL;
 				RemoveEntryList(&w->wb_waitlist);
 			}
 		}
 	}
 
 	/*
 	 * If this is a WAITTYPE_ALL wait and all objects are
 	 * already signalled, satisfy the waits and exit now.
 	 */
 
 	if (wtype == WAITTYPE_ALL && wcnt == 0) {
 		for (i = 0; i < cnt; i++)
 			ntoskrnl_satisfy_wait(obj[i], td);
 		status = STATUS_SUCCESS;
 		goto wait_done;
 	}
 
 	/*
 	 * Create a circular waitblock list. The waitcount
 	 * must always be non-zero when we get here.
 	 */
 
 	(w - 1)->wb_next = whead;
 
 	/* Wait on any objects that aren't yet signalled. */
 
 	/* Calculate timeout, if any. */
 
 	if (duetime != NULL) {
 		if (*duetime < 0) {
 			tv.tv_sec = - (*duetime) / 10000000;
 			tv.tv_usec = (- (*duetime) / 10) -
 			    (tv.tv_sec * 1000000);
 		} else {
 			ntoskrnl_time(&curtime);
 			if (*duetime < curtime)
 				tv.tv_sec = tv.tv_usec = 0;
 			else {
 				tv.tv_sec = ((*duetime) - curtime) / 10000000;
 				tv.tv_usec = ((*duetime) - curtime) / 10 -
 				    (tv.tv_sec * 1000000);
 			}
 		}
 	}
 
 	while (wcnt) {
 		nanotime(&t1);
 
 		if (duetime == NULL)
 			cv_wait(&we.we_cv, &ntoskrnl_dispatchlock);
 		else
 			error = cv_timedwait(&we.we_cv,
 			    &ntoskrnl_dispatchlock, tvtohz(&tv));
 
 		/* Wait with timeout expired. */
 
 		if (error) {
 			status = STATUS_TIMEOUT;
 			goto wait_done;
 		}
 
 		nanotime(&t2);
 
 		/* See what's been signalled. */
 
 		w = whead;
 		do {
 			cur = w->wb_object;
 			if (ntoskrnl_is_signalled(cur, td) == TRUE ||
 			    w->wb_awakened == TRUE) {
 				/* Sanity check the signal state value. */
 				if (cur->dh_sigstate == INT32_MIN &&
 				    cur->dh_type == DISP_TYPE_MUTANT) {
 					mtx_unlock(&ntoskrnl_dispatchlock);
 					panic("mutant limit exceeded");
 				}
 				wcnt--;
 				if (wtype == WAITTYPE_ANY) {
 					status = w->wb_waitkey &
 					    STATUS_WAIT_0;
 					goto wait_done;
 				}
 			}
 			w = w->wb_next;
 		} while (w != whead);
 
 		/*
 		 * If all objects have been signalled, or if this
 		 * is a WAITTYPE_ANY wait and we were woke up by
 		 * someone, we can bail.
 		 */
 
 		if (wcnt == 0) {
 			status = STATUS_SUCCESS;
 			goto wait_done;
 		}
 
 		/*
 		 * If this is WAITTYPE_ALL wait, and there's still
 		 * objects that haven't been signalled, deduct the
 		 * time that's elapsed so far from the timeout and
 		 * wait again (or continue waiting indefinitely if
 		 * there's no timeout).
 		 */
 
 		if (duetime != NULL) {
 			tv.tv_sec -= (t2.tv_sec - t1.tv_sec);
 			tv.tv_usec -= (t2.tv_nsec - t1.tv_nsec) / 1000;
 		}
 	}
 
 
 wait_done:
 
 	cv_destroy(&we.we_cv);
 
 	for (i = 0; i < cnt; i++) {
 		if (whead[i].wb_object != NULL)
 			RemoveEntryList(&whead[i].wb_waitlist);
 
 	}
 	mtx_unlock(&ntoskrnl_dispatchlock);
 
 	return (status);
 }
 
 static void
 WRITE_REGISTER_USHORT(uint16_t *reg, uint16_t val)
 {
 	bus_space_write_2(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val);
 }
 
 static uint16_t
 READ_REGISTER_USHORT(reg)
 	uint16_t		*reg;
 {
 	return (bus_space_read_2(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg));
 }
 
 static void
 WRITE_REGISTER_ULONG(reg, val)
 	uint32_t		*reg;
 	uint32_t		val;
 {
 	bus_space_write_4(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val);
 }
 
 static uint32_t
 READ_REGISTER_ULONG(reg)
 	uint32_t		*reg;
 {
 	return (bus_space_read_4(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg));
 }
 
 static uint8_t
 READ_REGISTER_UCHAR(uint8_t *reg)
 {
 	return (bus_space_read_1(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg));
 }
 
 static void
 WRITE_REGISTER_UCHAR(uint8_t *reg, uint8_t val)
 {
 	bus_space_write_1(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val);
 }
 
 static int64_t
 _allmul(a, b)
 	int64_t			a;
 	int64_t			b;
 {
 	return (a * b);
 }
 
 static int64_t
 _alldiv(a, b)
 	int64_t			a;
 	int64_t			b;
 {
 	return (a / b);
 }
 
 static int64_t
 _allrem(a, b)
 	int64_t			a;
 	int64_t			b;
 {
 	return (a % b);
 }
 
 static uint64_t
 _aullmul(a, b)
 	uint64_t		a;
 	uint64_t		b;
 {
 	return (a * b);
 }
 
 static uint64_t
 _aulldiv(a, b)
 	uint64_t		a;
 	uint64_t		b;
 {
 	return (a / b);
 }
 
 static uint64_t
 _aullrem(a, b)
 	uint64_t		a;
 	uint64_t		b;
 {
 	return (a % b);
 }
 
 static int64_t
 _allshl(int64_t a, uint8_t b)
 {
 	return (a << b);
 }
 
 static uint64_t
 _aullshl(uint64_t a, uint8_t b)
 {
 	return (a << b);
 }
 
 static int64_t
 _allshr(int64_t a, uint8_t b)
 {
 	return (a >> b);
 }
 
 static uint64_t
 _aullshr(uint64_t a, uint8_t b)
 {
 	return (a >> b);
 }
 
 static slist_entry *
 ntoskrnl_pushsl(head, entry)
 	slist_header		*head;
 	slist_entry		*entry;
 {
 	slist_entry		*oldhead;
 
 	oldhead = head->slh_list.slh_next;
 	entry->sl_next = head->slh_list.slh_next;
 	head->slh_list.slh_next = entry;
 	head->slh_list.slh_depth++;
 	head->slh_list.slh_seq++;
 
 	return (oldhead);
 }
 
+static void
+InitializeSListHead(head)
+	slist_header		*head;
+{
+	memset(head, 0, sizeof(*head));
+}
+
 static slist_entry *
 ntoskrnl_popsl(head)
 	slist_header		*head;
 {
 	slist_entry		*first;
 
 	first = head->slh_list.slh_next;
 	if (first != NULL) {
 		head->slh_list.slh_next = first->sl_next;
 		head->slh_list.slh_depth--;
 		head->slh_list.slh_seq++;
 	}
 
 	return (first);
 }
 
 /*
  * We need this to make lookaside lists work for amd64.
  * We pass a pointer to ExAllocatePoolWithTag() the lookaside
  * list structure. For amd64 to work right, this has to be a
  * pointer to the wrapped version of the routine, not the
  * original. Letting the Windows driver invoke the original
  * function directly will result in a convention calling
  * mismatch and a pretty crash. On x86, this effectively
  * becomes a no-op since ipt_func and ipt_wrap are the same.
  */
 
 static funcptr
 ntoskrnl_findwrap(func)
 	funcptr			func;
 {
 	image_patch_table	*patch;
 
 	patch = ntoskrnl_functbl;
 	while (patch->ipt_func != NULL) {
 		if ((funcptr)patch->ipt_func == func)
 			return ((funcptr)patch->ipt_wrap);
 		patch++;
 	}
 
 	return (NULL);
 }
 
 static void
 ExInitializePagedLookasideList(paged_lookaside_list *lookaside,
 	lookaside_alloc_func *allocfunc, lookaside_free_func *freefunc,
 	uint32_t flags, size_t size, uint32_t tag, uint16_t depth)
 {
 	bzero((char *)lookaside, sizeof(paged_lookaside_list));
 
 	if (size < sizeof(slist_entry))
 		lookaside->nll_l.gl_size = sizeof(slist_entry);
 	else
 		lookaside->nll_l.gl_size = size;
 	lookaside->nll_l.gl_tag = tag;
 	if (allocfunc == NULL)
 		lookaside->nll_l.gl_allocfunc =
 		    ntoskrnl_findwrap((funcptr)ExAllocatePoolWithTag);
 	else
 		lookaside->nll_l.gl_allocfunc = allocfunc;
 
 	if (freefunc == NULL)
 		lookaside->nll_l.gl_freefunc =
 		    ntoskrnl_findwrap((funcptr)ExFreePool);
 	else
 		lookaside->nll_l.gl_freefunc = freefunc;
 
 #ifdef __i386__
 	KeInitializeSpinLock(&lookaside->nll_obsoletelock);
 #endif
 
 	lookaside->nll_l.gl_type = NonPagedPool;
 	lookaside->nll_l.gl_depth = depth;
 	lookaside->nll_l.gl_maxdepth = LOOKASIDE_DEPTH;
 }
 
 static void
 ExDeletePagedLookasideList(lookaside)
 	paged_lookaside_list   *lookaside;
 {
 	void			*buf;
 	void		(*freefunc)(void *);
 
 	freefunc = lookaside->nll_l.gl_freefunc;
 	while((buf = ntoskrnl_popsl(&lookaside->nll_l.gl_listhead)) != NULL)
 		MSCALL1(freefunc, buf);
 }
 
 static void
 ExInitializeNPagedLookasideList(npaged_lookaside_list *lookaside,
 	lookaside_alloc_func *allocfunc, lookaside_free_func *freefunc,
 	uint32_t flags, size_t size, uint32_t tag, uint16_t depth)
 {
 	bzero((char *)lookaside, sizeof(npaged_lookaside_list));
 
 	if (size < sizeof(slist_entry))
 		lookaside->nll_l.gl_size = sizeof(slist_entry);
 	else
 		lookaside->nll_l.gl_size = size;
 	lookaside->nll_l.gl_tag = tag;
 	if (allocfunc == NULL)
 		lookaside->nll_l.gl_allocfunc =
 		    ntoskrnl_findwrap((funcptr)ExAllocatePoolWithTag);
 	else
 		lookaside->nll_l.gl_allocfunc = allocfunc;
 
 	if (freefunc == NULL)
 		lookaside->nll_l.gl_freefunc =
 		    ntoskrnl_findwrap((funcptr)ExFreePool);
 	else
 		lookaside->nll_l.gl_freefunc = freefunc;
 
 #ifdef __i386__
 	KeInitializeSpinLock(&lookaside->nll_obsoletelock);
 #endif
 
 	lookaside->nll_l.gl_type = NonPagedPool;
 	lookaside->nll_l.gl_depth = depth;
 	lookaside->nll_l.gl_maxdepth = LOOKASIDE_DEPTH;
 }
 
 static void
 ExDeleteNPagedLookasideList(lookaside)
 	npaged_lookaside_list   *lookaside;
 {
 	void			*buf;
 	void		(*freefunc)(void *);
 
 	freefunc = lookaside->nll_l.gl_freefunc;
 	while((buf = ntoskrnl_popsl(&lookaside->nll_l.gl_listhead)) != NULL)
 		MSCALL1(freefunc, buf);
 }
 
 slist_entry *
 InterlockedPushEntrySList(head, entry)
 	slist_header		*head;
 	slist_entry		*entry;
 {
 	slist_entry		*oldhead;
 
 	mtx_lock_spin(&ntoskrnl_interlock);
 	oldhead = ntoskrnl_pushsl(head, entry);
 	mtx_unlock_spin(&ntoskrnl_interlock);
 
 	return (oldhead);
 }
 
 slist_entry *
 InterlockedPopEntrySList(head)
 	slist_header		*head;
 {
 	slist_entry		*first;
 
 	mtx_lock_spin(&ntoskrnl_interlock);
 	first = ntoskrnl_popsl(head);
 	mtx_unlock_spin(&ntoskrnl_interlock);
 
 	return (first);
 }
 
 static slist_entry *
 ExInterlockedPushEntrySList(head, entry, lock)
 	slist_header		*head;
 	slist_entry		*entry;
 	kspin_lock		*lock;
 {
 	return (InterlockedPushEntrySList(head, entry));
 }
 
 static slist_entry *
 ExInterlockedPopEntrySList(head, lock)
 	slist_header		*head;
 	kspin_lock		*lock;
 {
 	return (InterlockedPopEntrySList(head));
 }
 
 uint16_t
 ExQueryDepthSList(head)
 	slist_header		*head;
 {
 	uint16_t		depth;
 
 	mtx_lock_spin(&ntoskrnl_interlock);
 	depth = head->slh_list.slh_depth;
 	mtx_unlock_spin(&ntoskrnl_interlock);
 
 	return (depth);
 }
 
 void
 KeInitializeSpinLock(lock)
 	kspin_lock		*lock;
 {
 	*lock = 0;
 }
 
 #ifdef __i386__
 void
 KefAcquireSpinLockAtDpcLevel(lock)
 	kspin_lock		*lock;
 {
 #ifdef NTOSKRNL_DEBUG_SPINLOCKS
 	int			i = 0;
 #endif
 
 	while (atomic_cmpset_acq_int((volatile u_int *)lock, 0, 1) == 0) {
 		/* sit and spin */;
 #ifdef NTOSKRNL_DEBUG_SPINLOCKS
 		i++;
 		if (i > 200000000)
 			panic("DEADLOCK!");
 #endif
 	}
 }
 
 void
 KefReleaseSpinLockFromDpcLevel(lock)
 	kspin_lock		*lock;
 {
 	atomic_store_rel_int((volatile u_int *)lock, 0);
 }
 
 uint8_t
 KeAcquireSpinLockRaiseToDpc(kspin_lock *lock)
 {
 	uint8_t			oldirql;
 
 	if (KeGetCurrentIrql() > DISPATCH_LEVEL)
 		panic("IRQL_NOT_LESS_THAN_OR_EQUAL");
 
 	KeRaiseIrql(DISPATCH_LEVEL, &oldirql);
 	KeAcquireSpinLockAtDpcLevel(lock);
 
 	return (oldirql);
 }
 #else
 void
 KeAcquireSpinLockAtDpcLevel(kspin_lock *lock)
 {
 	while (atomic_cmpset_acq_int((volatile u_int *)lock, 0, 1) == 0)
 		/* sit and spin */;
 }
 
 void
 KeReleaseSpinLockFromDpcLevel(kspin_lock *lock)
 {
 	atomic_store_rel_int((volatile u_int *)lock, 0);
 }
 #endif /* __i386__ */
 
 uintptr_t
 InterlockedExchange(dst, val)
 	volatile uint32_t	*dst;
 	uintptr_t		val;
 {
 	uintptr_t		r;
 
 	mtx_lock_spin(&ntoskrnl_interlock);
 	r = *dst;
 	*dst = val;
 	mtx_unlock_spin(&ntoskrnl_interlock);
 
 	return (r);
 }
 
 static uint32_t
 InterlockedIncrement(addend)
 	volatile uint32_t	*addend;
 {
 	atomic_add_long((volatile u_long *)addend, 1);
 	return (*addend);
 }
 
 static uint32_t
 InterlockedDecrement(addend)
 	volatile uint32_t	*addend;
 {
 	atomic_subtract_long((volatile u_long *)addend, 1);
 	return (*addend);
 }
 
 static void
 ExInterlockedAddLargeStatistic(addend, inc)
 	uint64_t		*addend;
 	uint32_t		inc;
 {
 	mtx_lock_spin(&ntoskrnl_interlock);
 	*addend += inc;
 	mtx_unlock_spin(&ntoskrnl_interlock);
 };
 
 mdl *
 IoAllocateMdl(void *vaddr, uint32_t len, uint8_t secondarybuf,
 	uint8_t chargequota, irp *iopkt)
 {
 	mdl			*m;
 	int			zone = 0;
 
 	if (MmSizeOfMdl(vaddr, len) > MDL_ZONE_SIZE)
 		m = ExAllocatePoolWithTag(NonPagedPool,
 		    MmSizeOfMdl(vaddr, len), 0);
 	else {
 		m = uma_zalloc(mdl_zone, M_NOWAIT | M_ZERO);
 		zone++;
 	}
 
 	if (m == NULL)
 		return (NULL);
 
 	MmInitializeMdl(m, vaddr, len);
 
 	/*
 	 * MmInitializMdl() clears the flags field, so we
 	 * have to set this here. If the MDL came from the
 	 * MDL UMA zone, tag it so we can release it to
 	 * the right place later.
 	 */
 	if (zone)
 		m->mdl_flags = MDL_ZONE_ALLOCED;
 
 	if (iopkt != NULL) {
 		if (secondarybuf == TRUE) {
 			mdl			*last;
 			last = iopkt->irp_mdl;
 			while (last->mdl_next != NULL)
 				last = last->mdl_next;
 			last->mdl_next = m;
 		} else {
 			if (iopkt->irp_mdl != NULL)
 				panic("leaking an MDL in IoAllocateMdl()");
 			iopkt->irp_mdl = m;
 		}
 	}
 
 	return (m);
 }
 
 void
 IoFreeMdl(m)
 	mdl			*m;
 {
 	if (m == NULL)
 		return;
 
 	if (m->mdl_flags & MDL_ZONE_ALLOCED)
 		uma_zfree(mdl_zone, m);
 	else
 		ExFreePool(m);
 }
 
 static void *
 MmAllocateContiguousMemory(size, highest)
 	uint32_t		size;
 	uint64_t		highest;
 {
 	void *addr;
 	size_t pagelength = roundup(size, PAGE_SIZE);
 
 	addr = ExAllocatePoolWithTag(NonPagedPool, pagelength, 0);
 
 	return (addr);
 }
 
 static void *
 MmAllocateContiguousMemorySpecifyCache(size, lowest, highest,
     boundary, cachetype)
 	uint32_t		size;
 	uint64_t		lowest;
 	uint64_t		highest;
 	uint64_t		boundary;
 	enum nt_caching_type	cachetype;
 {
 	vm_memattr_t		memattr;
 	void			*ret;
 
 	switch (cachetype) {
 	case MmNonCached:
 		memattr = VM_MEMATTR_UNCACHEABLE;
 		break;
 	case MmWriteCombined:
 		memattr = VM_MEMATTR_WRITE_COMBINING;
 		break;
 	case MmNonCachedUnordered:
 		memattr = VM_MEMATTR_UNCACHEABLE;
 		break;
 	case MmCached:
 	case MmHardwareCoherentCached:
 	case MmUSWCCached:
 	default:
 		memattr = VM_MEMATTR_DEFAULT;
 		break;
 	}
 
 	ret = (void *)kmem_alloc_contig(kernel_map, size, M_ZERO | M_NOWAIT,
 	    lowest, highest, PAGE_SIZE, boundary, memattr);
 	if (ret != NULL)
 		malloc_type_allocated(M_DEVBUF, round_page(size));
 	return (ret);
 }
 
 static void
 MmFreeContiguousMemory(base)
 	void			*base;
 {
 	ExFreePool(base);
 }
 
 static void
 MmFreeContiguousMemorySpecifyCache(base, size, cachetype)
 	void			*base;
 	uint32_t		size;
 	enum nt_caching_type	cachetype;
 {
 	contigfree(base, size, M_DEVBUF);
 }
 
 static uint32_t
 MmSizeOfMdl(vaddr, len)
 	void			*vaddr;
 	size_t			len;
 {
 	uint32_t		l;
 
 	l = sizeof(struct mdl) +
 	    (sizeof(vm_offset_t *) * SPAN_PAGES(vaddr, len));
 
 	return (l);
 }
 
 /*
  * The Microsoft documentation says this routine fills in the
  * page array of an MDL with the _physical_ page addresses that
  * comprise the buffer, but we don't really want to do that here.
  * Instead, we just fill in the page array with the kernel virtual
  * addresses of the buffers.
  */
 void
 MmBuildMdlForNonPagedPool(m)
 	mdl			*m;
 {
 	vm_offset_t		*mdl_pages;
 	int			pagecnt, i;
 
 	pagecnt = SPAN_PAGES(m->mdl_byteoffset, m->mdl_bytecount);
 
 	if (pagecnt > (m->mdl_size - sizeof(mdl)) / sizeof(vm_offset_t *))
 		panic("not enough pages in MDL to describe buffer");
 
 	mdl_pages = MmGetMdlPfnArray(m);
 
 	for (i = 0; i < pagecnt; i++)
 		*mdl_pages = (vm_offset_t)m->mdl_startva + (i * PAGE_SIZE);
 
 	m->mdl_flags |= MDL_SOURCE_IS_NONPAGED_POOL;
 	m->mdl_mappedsystemva = MmGetMdlVirtualAddress(m);
 }
 
 static void *
 MmMapLockedPages(mdl *buf, uint8_t accessmode)
 {
 	buf->mdl_flags |= MDL_MAPPED_TO_SYSTEM_VA;
 	return (MmGetMdlVirtualAddress(buf));
 }
 
 static void *
 MmMapLockedPagesSpecifyCache(mdl *buf, uint8_t accessmode, uint32_t cachetype,
 	void *vaddr, uint32_t bugcheck, uint32_t prio)
 {
 	return (MmMapLockedPages(buf, accessmode));
 }
 
 static void
 MmUnmapLockedPages(vaddr, buf)
 	void			*vaddr;
 	mdl			*buf;
 {
 	buf->mdl_flags &= ~MDL_MAPPED_TO_SYSTEM_VA;
 }
 
 /*
  * This function has a problem in that it will break if you
  * compile this module without PAE and try to use it on a PAE
  * kernel. Unfortunately, there's no way around this at the
  * moment. It's slightly less broken that using pmap_kextract().
  * You'd think the virtual memory subsystem would help us out
  * here, but it doesn't.
  */
 
 static uint64_t
 MmGetPhysicalAddress(void *base)
 {
 	return (pmap_extract(kernel_map->pmap, (vm_offset_t)base));
 }
 
 uint8_t
 MmIsAddressValid(vaddr)
 	void			*vaddr;
 {
 	if (pmap_extract(kernel_map->pmap, (vm_offset_t)vaddr))
 		return (TRUE);
 
 	return (FALSE);
 }
 
 void *
 MmMapIoSpace(paddr, len, cachetype)
 	uint64_t		paddr;
 	uint32_t		len;
 	uint32_t		cachetype;
 {
 	devclass_t		nexus_class;
 	device_t		*nexus_devs, devp;
 	int			nexus_count = 0;
 	device_t		matching_dev = NULL;
 	struct resource		*res;
 	int			i;
 	vm_offset_t		v;
 
 	/* There will always be at least one nexus. */
 
 	nexus_class = devclass_find("nexus");
 	devclass_get_devices(nexus_class, &nexus_devs, &nexus_count);
 
 	for (i = 0; i < nexus_count; i++) {
 		devp = nexus_devs[i];
 		matching_dev = ntoskrnl_finddev(devp, paddr, &res);
 		if (matching_dev)
 			break;
 	}
 
 	free(nexus_devs, M_TEMP);
 
 	if (matching_dev == NULL)
 		return (NULL);
 
 	v = (vm_offset_t)rman_get_virtual(res);
 	if (paddr > rman_get_start(res))
 		v += paddr - rman_get_start(res);
 
 	return ((void *)v);
 }
 
 void
 MmUnmapIoSpace(vaddr, len)
 	void			*vaddr;
 	size_t			len;
 {
 }
 
 
 static device_t
 ntoskrnl_finddev(dev, paddr, res)
 	device_t		dev;
 	uint64_t		paddr;
 	struct resource		**res;
 {
 	device_t		*children = NULL;
 	device_t		matching_dev;
 	int			childcnt;
 	struct resource		*r;
 	struct resource_list	*rl;
 	struct resource_list_entry	*rle;
 	uint32_t		flags;
 	int			i;
 
 	/* We only want devices that have been successfully probed. */
 
 	if (device_is_alive(dev) == FALSE)
 		return (NULL);
 
 	rl = BUS_GET_RESOURCE_LIST(device_get_parent(dev), dev);
 	if (rl != NULL) {
 		STAILQ_FOREACH(rle, rl, link) {
 			r = rle->res;
 
 			if (r == NULL)
 				continue;
 
 			flags = rman_get_flags(r);
 
 			if (rle->type == SYS_RES_MEMORY &&
 			    paddr >= rman_get_start(r) &&
 			    paddr <= rman_get_end(r)) {
 				if (!(flags & RF_ACTIVE))
 					bus_activate_resource(dev,
 					    SYS_RES_MEMORY, 0, r);
 				*res = r;
 				return (dev);
 			}
 		}
 	}
 
 	/*
 	 * If this device has children, do another
 	 * level of recursion to inspect them.
 	 */
 
 	device_get_children(dev, &children, &childcnt);
 
 	for (i = 0; i < childcnt; i++) {
 		matching_dev = ntoskrnl_finddev(children[i], paddr, res);
 		if (matching_dev != NULL) {
 			free(children, M_TEMP);
 			return (matching_dev);
 		}
 	}
 
 
 	/* Won't somebody please think of the children! */
 
 	if (children != NULL)
 		free(children, M_TEMP);
 
 	return (NULL);
 }
 
 /*
  * Workitems are unlike DPCs, in that they run in a user-mode thread
  * context rather than at DISPATCH_LEVEL in kernel context. In our
  * case we run them in kernel context anyway.
  */
 static void
 ntoskrnl_workitem_thread(arg)
 	void			*arg;
 {
 	kdpc_queue		*kq;
 	list_entry		*l;
 	io_workitem		*iw;
 	uint8_t			irql;
 
 	kq = arg;
 
 	InitializeListHead(&kq->kq_disp);
 	kq->kq_td = curthread;
 	kq->kq_exit = 0;
 	KeInitializeSpinLock(&kq->kq_lock);
 	KeInitializeEvent(&kq->kq_proc, EVENT_TYPE_SYNC, FALSE);
 
 	while (1) {
 		KeWaitForSingleObject(&kq->kq_proc, 0, 0, TRUE, NULL);
 
 		KeAcquireSpinLock(&kq->kq_lock, &irql);
 
 		if (kq->kq_exit) {
 			kq->kq_exit = 0;
 			KeReleaseSpinLock(&kq->kq_lock, irql);
 			break;
 		}
 
 		while (!IsListEmpty(&kq->kq_disp)) {
 			l = RemoveHeadList(&kq->kq_disp);
 			iw = CONTAINING_RECORD(l,
 			    io_workitem, iw_listentry);
 			InitializeListHead((&iw->iw_listentry));
 			if (iw->iw_func == NULL)
 				continue;
 			KeReleaseSpinLock(&kq->kq_lock, irql);
 			MSCALL2(iw->iw_func, iw->iw_dobj, iw->iw_ctx);
 			KeAcquireSpinLock(&kq->kq_lock, &irql);
 		}
 
 		KeReleaseSpinLock(&kq->kq_lock, irql);
 	}
 
 	kproc_exit(0);
 	return; /* notreached */
 }
 
+static ndis_status
+RtlCharToInteger(src, base, val)
+	const char		*src;
+	uint32_t		base;
+	uint32_t		*val;
+{
+	int negative = 0;
+	uint32_t res;
+
+	if (!src || !val)
+		return (STATUS_ACCESS_VIOLATION);
+	while (*src != '\0' && *src <= ' ')
+		src++;
+	if (*src == '+')
+		src++;
+	else if (*src == '-') {
+		src++;
+		negative = 1;
+	}
+	if (base == 0) {
+		base = 10;
+		if (*src == '0') {
+			src++;
+			if (*src == 'b') {
+				base = 2;
+				src++;
+			} else if (*src == 'o') {
+				base = 8;
+				src++;
+			} else if (*src == 'x') {
+				base = 16;
+				src++;
+			}
+		}
+	} else if (!(base == 2 || base == 8 || base == 10 || base == 16))
+		return (STATUS_INVALID_PARAMETER);
+
+	for (res = 0; *src; src++) {
+		int v;
+		if (isdigit(*src))
+			v = *src - '0';
+		else if (isxdigit(*src))
+			v = tolower(*src) - 'a' + 10;
+		else
+			v = base;
+		if (v >= base)
+			return (STATUS_INVALID_PARAMETER);
+		res = res * base + v;
+	}
+	*val = negative ? -res : res;
+	return (STATUS_SUCCESS);
+}
+
 static void
 ntoskrnl_destroy_workitem_threads(void)
 {
 	kdpc_queue		*kq;
 	int			i;
 
 	for (i = 0; i < WORKITEM_THREADS; i++) {
 		kq = wq_queues + i;
 		kq->kq_exit = 1;
 		KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE);
 		while (kq->kq_exit)
 			tsleep(kq->kq_td->td_proc, PWAIT, "waitiw", hz/10);
 	}
 }
 
 io_workitem *
 IoAllocateWorkItem(dobj)
 	device_object		*dobj;
 {
 	io_workitem		*iw;
 
 	iw = uma_zalloc(iw_zone, M_NOWAIT);
 	if (iw == NULL)
 		return (NULL);
 
 	InitializeListHead(&iw->iw_listentry);
 	iw->iw_dobj = dobj;
 
 	mtx_lock(&ntoskrnl_dispatchlock);
 	iw->iw_idx = wq_idx;
 	WORKIDX_INC(wq_idx);
 	mtx_unlock(&ntoskrnl_dispatchlock);
 
 	return (iw);
 }
 
 void
 IoFreeWorkItem(iw)
 	io_workitem		*iw;
 {
 	uma_zfree(iw_zone, iw);
 }
 
 void
 IoQueueWorkItem(iw, iw_func, qtype, ctx)
 	io_workitem		*iw;
 	io_workitem_func	iw_func;
 	uint32_t		qtype;
 	void			*ctx;
 {
 	kdpc_queue		*kq;
 	list_entry		*l;
 	io_workitem		*cur;
 	uint8_t			irql;
 
 	kq = wq_queues + iw->iw_idx;
 
 	KeAcquireSpinLock(&kq->kq_lock, &irql);
 
 	/*
 	 * Traverse the list and make sure this workitem hasn't
 	 * already been inserted. Queuing the same workitem
 	 * twice will hose the list but good.
 	 */
 
 	l = kq->kq_disp.nle_flink;
 	while (l != &kq->kq_disp) {
 		cur = CONTAINING_RECORD(l, io_workitem, iw_listentry);
 		if (cur == iw) {
 			/* Already queued -- do nothing. */
 			KeReleaseSpinLock(&kq->kq_lock, irql);
 			return;
 		}
 		l = l->nle_flink;
 	}
 
 	iw->iw_func = iw_func;
 	iw->iw_ctx = ctx;
 
 	InsertTailList((&kq->kq_disp), (&iw->iw_listentry));
 	KeReleaseSpinLock(&kq->kq_lock, irql);
 
 	KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE);
 }
 
 static void
 ntoskrnl_workitem(dobj, arg)
 	device_object		*dobj;
 	void			*arg;
 {
 	io_workitem		*iw;
 	work_queue_item		*w;
 	work_item_func		f;
 
 	iw = arg;
 	w = (work_queue_item *)dobj;
 	f = (work_item_func)w->wqi_func;
 	uma_zfree(iw_zone, iw);
 	MSCALL2(f, w, w->wqi_ctx);
 }
 
 /*
  * The ExQueueWorkItem() API is deprecated in Windows XP. Microsoft
  * warns that it's unsafe and to use IoQueueWorkItem() instead. The
  * problem with ExQueueWorkItem() is that it can't guard against
  * the condition where a driver submits a job to the work queue and
  * is then unloaded before the job is able to run. IoQueueWorkItem()
  * acquires a reference to the device's device_object via the
  * object manager and retains it until after the job has completed,
  * which prevents the driver from being unloaded before the job
  * runs. (We don't currently support this behavior, though hopefully
  * that will change once the object manager API is fleshed out a bit.)
  *
  * Having said all that, the ExQueueWorkItem() API remains, because
  * there are still other parts of Windows that use it, including
  * NDIS itself: NdisScheduleWorkItem() calls ExQueueWorkItem().
  * We fake up the ExQueueWorkItem() API on top of our implementation
  * of IoQueueWorkItem(). Workitem thread #3 is reserved exclusively
  * for ExQueueWorkItem() jobs, and we pass a pointer to the work
  * queue item (provided by the caller) in to IoAllocateWorkItem()
  * instead of the device_object. We need to save this pointer so
  * we can apply a sanity check: as with the DPC queue and other
  * workitem queues, we can't allow the same work queue item to
  * be queued twice. If it's already pending, we silently return
  */
 
 void
 ExQueueWorkItem(w, qtype)
 	work_queue_item		*w;
 	uint32_t		qtype;
 {
 	io_workitem		*iw;
 	io_workitem_func	iwf;
 	kdpc_queue		*kq;
 	list_entry		*l;
 	io_workitem		*cur;
 	uint8_t			irql;
 
 
 	/*
 	 * We need to do a special sanity test to make sure
 	 * the ExQueueWorkItem() API isn't used to queue
 	 * the same workitem twice. Rather than checking the
 	 * io_workitem pointer itself, we test the attached
 	 * device object, which is really a pointer to the
 	 * legacy work queue item structure.
 	 */
 
 	kq = wq_queues + WORKITEM_LEGACY_THREAD;
 	KeAcquireSpinLock(&kq->kq_lock, &irql);
 	l = kq->kq_disp.nle_flink;
 	while (l != &kq->kq_disp) {
 		cur = CONTAINING_RECORD(l, io_workitem, iw_listentry);
 		if (cur->iw_dobj == (device_object *)w) {
 			/* Already queued -- do nothing. */
 			KeReleaseSpinLock(&kq->kq_lock, irql);
 			return;
 		}
 		l = l->nle_flink;
 	}
 	KeReleaseSpinLock(&kq->kq_lock, irql);
 
 	iw = IoAllocateWorkItem((device_object *)w);
 	if (iw == NULL)
 		return;
 
 	iw->iw_idx = WORKITEM_LEGACY_THREAD;
 	iwf = (io_workitem_func)ntoskrnl_findwrap((funcptr)ntoskrnl_workitem);
 	IoQueueWorkItem(iw, iwf, qtype, iw);
 }
 
 static void
 RtlZeroMemory(dst, len)
 	void			*dst;
 	size_t			len;
 {
 	bzero(dst, len);
 }
 
 static void
+RtlSecureZeroMemory(dst, len)
+	void			*dst;
+	size_t			len;
+{
+	memset(dst, 0, len);
+}
+
+static void
+RtlFillMemory(dst, len, c)
+	void			*dst;
+	size_t			len;
+	uint8_t			c;
+{
+	memset(dst, c, len);
+}
+
+static void
+RtlMoveMemory(dst, src, len)
+	void			*dst;
+	const void		*src;
+	size_t			len;
+{
+	memmove(dst, src, len);
+}
+
+static void
 RtlCopyMemory(dst, src, len)
 	void			*dst;
 	const void		*src;
 	size_t			len;
 {
 	bcopy(src, dst, len);
 }
 
 static size_t
 RtlCompareMemory(s1, s2, len)
 	const void		*s1;
 	const void		*s2;
 	size_t			len;
 {
-	size_t			i, total = 0;
+	size_t			i;
 	uint8_t			*m1, *m2;
 
 	m1 = __DECONST(char *, s1);
 	m2 = __DECONST(char *, s2);
 
-	for (i = 0; i < len; i++) {
-		if (m1[i] == m2[i])
-			total++;
-	}
-	return (total);
+	for (i = 0; i < len && m1[i] == m2[i]; i++);
+	return (i);
 }
 
 void
 RtlInitAnsiString(dst, src)
 	ansi_string		*dst;
 	char			*src;
 {
 	ansi_string		*a;
 
 	a = dst;
 	if (a == NULL)
 		return;
 	if (src == NULL) {
 		a->as_len = a->as_maxlen = 0;
 		a->as_buf = NULL;
 	} else {
 		a->as_buf = src;
 		a->as_len = a->as_maxlen = strlen(src);
 	}
 }
 
 void
 RtlInitUnicodeString(dst, src)
 	unicode_string		*dst;
 	uint16_t		*src;
 {
 	unicode_string		*u;
 	int			i;
 
 	u = dst;
 	if (u == NULL)
 		return;
 	if (src == NULL) {
 		u->us_len = u->us_maxlen = 0;
 		u->us_buf = NULL;
 	} else {
 		i = 0;
 		while(src[i] != 0)
 			i++;
 		u->us_buf = src;
 		u->us_len = u->us_maxlen = i * 2;
 	}
 }
 
 ndis_status
 RtlUnicodeStringToInteger(ustr, base, val)
 	unicode_string		*ustr;
 	uint32_t		base;
 	uint32_t		*val;
 {
 	uint16_t		*uchr;
 	int			len, neg = 0;
 	char			abuf[64];
 	char			*astr;
 
 	uchr = ustr->us_buf;
 	len = ustr->us_len;
 	bzero(abuf, sizeof(abuf));
 
 	if ((char)((*uchr) & 0xFF) == '-') {
 		neg = 1;
 		uchr++;
 		len -= 2;
 	} else if ((char)((*uchr) & 0xFF) == '+') {
 		neg = 0;
 		uchr++;
 		len -= 2;
 	}
 
 	if (base == 0) {
 		if ((char)((*uchr) & 0xFF) == 'b') {
 			base = 2;
 			uchr++;
 			len -= 2;
 		} else if ((char)((*uchr) & 0xFF) == 'o') {
 			base = 8;
 			uchr++;
 			len -= 2;
 		} else if ((char)((*uchr) & 0xFF) == 'x') {
 			base = 16;
 			uchr++;
 			len -= 2;
 		} else
 			base = 10;
 	}
 
 	astr = abuf;
 	if (neg) {
 		strcpy(astr, "-");
 		astr++;
 	}
 
 	ntoskrnl_unicode_to_ascii(uchr, astr, len);
 	*val = strtoul(abuf, NULL, base);
 
 	return (STATUS_SUCCESS);
 }
 
 void
 RtlFreeUnicodeString(ustr)
 	unicode_string		*ustr;
 {
 	if (ustr->us_buf == NULL)
 		return;
 	ExFreePool(ustr->us_buf);
 	ustr->us_buf = NULL;
 }
 
 void
 RtlFreeAnsiString(astr)
 	ansi_string		*astr;
 {
 	if (astr->as_buf == NULL)
 		return;
 	ExFreePool(astr->as_buf);
 	astr->as_buf = NULL;
 }
 
 static int
 atoi(str)
 	const char		*str;
 {
 	return (int)strtol(str, (char **)NULL, 10);
 }
 
 static long
 atol(str)
 	const char		*str;
 {
 	return strtol(str, (char **)NULL, 10);
 }
 
 static int
 rand(void)
 {
 	struct timeval		tv;
 
 	microtime(&tv);
 	srandom(tv.tv_usec);
 	return ((int)random());
 }
 
 static void
 srand(seed)
 	unsigned int		seed;
 {
 	srandom(seed);
 }
 
 static uint8_t
 IoIsWdmVersionAvailable(uint8_t major, uint8_t minor)
 {
 	if (major == WDM_MAJOR && minor == WDM_MINOR_WINXP)
 		return (TRUE);
 	return (FALSE);
 }
 
 static ndis_status
 IoGetDeviceObjectPointer(name, reqaccess, fileobj, devobj)
 	unicode_string		*name;
 	uint32_t		reqaccess;
 	void			*fileobj;
 	device_object		*devobj;
 {
 	return (STATUS_SUCCESS);
 }
 
 static ndis_status
 IoGetDeviceProperty(devobj, regprop, buflen, prop, reslen)
 	device_object		*devobj;
 	uint32_t		regprop;
 	uint32_t		buflen;
 	void			*prop;
 	uint32_t		*reslen;
 {
 	driver_object		*drv;
 	uint16_t		**name;
 
 	drv = devobj->do_drvobj;
 
 	switch (regprop) {
 	case DEVPROP_DRIVER_KEYNAME:
 		name = prop;
 		*name = drv->dro_drivername.us_buf;
 		*reslen = drv->dro_drivername.us_len;
 		break;
 	default:
 		return (STATUS_INVALID_PARAMETER_2);
 		break;
 	}
 
 	return (STATUS_SUCCESS);
 }
 
 static void
 KeInitializeMutex(kmutex, level)
 	kmutant			*kmutex;
 	uint32_t		level;
 {
 	InitializeListHead((&kmutex->km_header.dh_waitlisthead));
 	kmutex->km_abandoned = FALSE;
 	kmutex->km_apcdisable = 1;
 	kmutex->km_header.dh_sigstate = 1;
 	kmutex->km_header.dh_type = DISP_TYPE_MUTANT;
 	kmutex->km_header.dh_size = sizeof(kmutant) / sizeof(uint32_t);
 	kmutex->km_ownerthread = NULL;
 }
 
 static uint32_t
 KeReleaseMutex(kmutant *kmutex, uint8_t kwait)
 {
 	uint32_t		prevstate;
 
 	mtx_lock(&ntoskrnl_dispatchlock);
 	prevstate = kmutex->km_header.dh_sigstate;
 	if (kmutex->km_ownerthread != curthread) {
 		mtx_unlock(&ntoskrnl_dispatchlock);
 		return (STATUS_MUTANT_NOT_OWNED);
 	}
 
 	kmutex->km_header.dh_sigstate++;
 	kmutex->km_abandoned = FALSE;
 
 	if (kmutex->km_header.dh_sigstate == 1) {
 		kmutex->km_ownerthread = NULL;
 		ntoskrnl_waittest(&kmutex->km_header, IO_NO_INCREMENT);
 	}
 
 	mtx_unlock(&ntoskrnl_dispatchlock);
 
 	return (prevstate);
 }
 
 static uint32_t
 KeReadStateMutex(kmutex)
 	kmutant			*kmutex;
 {
 	return (kmutex->km_header.dh_sigstate);
 }
 
 void
 KeInitializeEvent(nt_kevent *kevent, uint32_t type, uint8_t state)
 {
 	InitializeListHead((&kevent->k_header.dh_waitlisthead));
 	kevent->k_header.dh_sigstate = state;
 	if (type == EVENT_TYPE_NOTIFY)
 		kevent->k_header.dh_type = DISP_TYPE_NOTIFICATION_EVENT;
 	else
 		kevent->k_header.dh_type = DISP_TYPE_SYNCHRONIZATION_EVENT;
 	kevent->k_header.dh_size = sizeof(nt_kevent) / sizeof(uint32_t);
 }
 
 uint32_t
 KeResetEvent(kevent)
 	nt_kevent		*kevent;
 {
 	uint32_t		prevstate;
 
 	mtx_lock(&ntoskrnl_dispatchlock);
 	prevstate = kevent->k_header.dh_sigstate;
 	kevent->k_header.dh_sigstate = FALSE;
 	mtx_unlock(&ntoskrnl_dispatchlock);
 
 	return (prevstate);
 }
 
 uint32_t
 KeSetEvent(nt_kevent *kevent, uint32_t increment, uint8_t kwait)
 {
 	uint32_t		prevstate;
 	wait_block		*w;
 	nt_dispatch_header	*dh;
 	struct thread		*td;
 	wb_ext			*we;
 
 	mtx_lock(&ntoskrnl_dispatchlock);
 	prevstate = kevent->k_header.dh_sigstate;
 	dh = &kevent->k_header;
 
 	if (IsListEmpty(&dh->dh_waitlisthead))
 		/*
 		 * If there's nobody in the waitlist, just set
 		 * the state to signalled.
 		 */
 		dh->dh_sigstate = 1;
 	else {
 		/*
 		 * Get the first waiter. If this is a synchronization
 		 * event, just wake up that one thread (don't bother
 		 * setting the state to signalled since we're supposed
 		 * to automatically clear synchronization events anyway).
 		 *
 		 * If it's a notification event, or the the first
 		 * waiter is doing a WAITTYPE_ALL wait, go through
 		 * the full wait satisfaction process.
 		 */
 		w = CONTAINING_RECORD(dh->dh_waitlisthead.nle_flink,
 		    wait_block, wb_waitlist);
 		we = w->wb_ext;
 		td = we->we_td;
 		if (kevent->k_header.dh_type == DISP_TYPE_NOTIFICATION_EVENT ||
 		    w->wb_waittype == WAITTYPE_ALL) {
 			if (prevstate == 0) {
 				dh->dh_sigstate = 1;
 				ntoskrnl_waittest(dh, increment);
 			}
 		} else {
 			w->wb_awakened |= TRUE;
 			cv_broadcastpri(&we->we_cv,
 			    (w->wb_oldpri - (increment * 4)) > PRI_MIN_KERN ?
 			    w->wb_oldpri - (increment * 4) : PRI_MIN_KERN);
 		}
 	}
 
 	mtx_unlock(&ntoskrnl_dispatchlock);
 
 	return (prevstate);
 }
 
 void
 KeClearEvent(kevent)
 	nt_kevent		*kevent;
 {
 	kevent->k_header.dh_sigstate = FALSE;
 }
 
 uint32_t
 KeReadStateEvent(kevent)
 	nt_kevent		*kevent;
 {
 	return (kevent->k_header.dh_sigstate);
 }
 
 /*
  * The object manager in Windows is responsible for managing
  * references and access to various types of objects, including
  * device_objects, events, threads, timers and so on. However,
  * there's a difference in the way objects are handled in user
  * mode versus kernel mode.
  *
  * In user mode (i.e. Win32 applications), all objects are
  * managed by the object manager. For example, when you create
  * a timer or event object, you actually end up with an 
  * object_header (for the object manager's bookkeeping
  * purposes) and an object body (which contains the actual object
  * structure, e.g. ktimer, kevent, etc...). This allows Windows
  * to manage resource quotas and to enforce access restrictions
  * on basically every kind of system object handled by the kernel.
  *
  * However, in kernel mode, you only end up using the object
  * manager some of the time. For example, in a driver, you create
  * a timer object by simply allocating the memory for a ktimer
  * structure and initializing it with KeInitializeTimer(). Hence,
  * the timer has no object_header and no reference counting or
  * security/resource checks are done on it. The assumption in
  * this case is that if you're running in kernel mode, you know
  * what you're doing, and you're already at an elevated privilege
  * anyway.
  *
  * There are some exceptions to this. The two most important ones
  * for our purposes are device_objects and threads. We need to use
  * the object manager to do reference counting on device_objects,
  * and for threads, you can only get a pointer to a thread's
  * dispatch header by using ObReferenceObjectByHandle() on the
  * handle returned by PsCreateSystemThread().
  */
 
 static ndis_status
 ObReferenceObjectByHandle(ndis_handle handle, uint32_t reqaccess, void *otype,
 	uint8_t accessmode, void **object, void **handleinfo)
 {
 	nt_objref		*nr;
 
 	nr = malloc(sizeof(nt_objref), M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (nr == NULL)
 		return (STATUS_INSUFFICIENT_RESOURCES);
 
 	InitializeListHead((&nr->no_dh.dh_waitlisthead));
 	nr->no_obj = handle;
 	nr->no_dh.dh_type = DISP_TYPE_THREAD;
 	nr->no_dh.dh_sigstate = 0;
 	nr->no_dh.dh_size = (uint8_t)(sizeof(struct thread) /
 	    sizeof(uint32_t));
 	TAILQ_INSERT_TAIL(&ntoskrnl_reflist, nr, link);
 	*object = nr;
 
 	return (STATUS_SUCCESS);
 }
 
 static void
 ObfDereferenceObject(object)
 	void			*object;
 {
 	nt_objref		*nr;
 
 	nr = object;
 	TAILQ_REMOVE(&ntoskrnl_reflist, nr, link);
 	free(nr, M_DEVBUF);
 }
 
 static uint32_t
 ZwClose(handle)
 	ndis_handle		handle;
 {
 	return (STATUS_SUCCESS);
 }
 
 static uint32_t
 WmiQueryTraceInformation(traceclass, traceinfo, infolen, reqlen, buf)
 	uint32_t		traceclass;
 	void			*traceinfo;
 	uint32_t		infolen;
 	uint32_t		reqlen;
 	void			*buf;
 {
 	return (STATUS_NOT_FOUND);
 }
 
 static uint32_t
 WmiTraceMessage(uint64_t loghandle, uint32_t messageflags,
 	void *guid, uint16_t messagenum, ...)
 {
 	return (STATUS_SUCCESS);
 }
 
 static uint32_t
 IoWMIRegistrationControl(dobj, action)
 	device_object		*dobj;
 	uint32_t		action;
 {
 	return (STATUS_SUCCESS);
 }
 
 /*
  * This is here just in case the thread returns without calling
  * PsTerminateSystemThread().
  */
 static void
 ntoskrnl_thrfunc(arg)
 	void			*arg;
 {
 	thread_context		*thrctx;
 	uint32_t (*tfunc)(void *);
 	void			*tctx;
 	uint32_t		rval;
 
 	thrctx = arg;
 	tfunc = thrctx->tc_thrfunc;
 	tctx = thrctx->tc_thrctx;
 	free(thrctx, M_TEMP);
 
 	rval = MSCALL1(tfunc, tctx);
 
 	PsTerminateSystemThread(rval);
 	return; /* notreached */
 }
 
 static ndis_status
 PsCreateSystemThread(handle, reqaccess, objattrs, phandle,
 	clientid, thrfunc, thrctx)
 	ndis_handle		*handle;
 	uint32_t		reqaccess;
 	void			*objattrs;
 	ndis_handle		phandle;
 	void			*clientid;
 	void			*thrfunc;
 	void			*thrctx;
 {
 	int			error;
 	thread_context		*tc;
 	struct proc		*p;
 
 	tc = malloc(sizeof(thread_context), M_TEMP, M_NOWAIT);
 	if (tc == NULL)
 		return (STATUS_INSUFFICIENT_RESOURCES);
 
 	tc->tc_thrctx = thrctx;
 	tc->tc_thrfunc = thrfunc;
 
 	error = kproc_create(ntoskrnl_thrfunc, tc, &p,
 	    RFHIGHPID, NDIS_KSTACK_PAGES, "Windows Kthread %d", ntoskrnl_kth);
 
 	if (error) {
 		free(tc, M_TEMP);
 		return (STATUS_INSUFFICIENT_RESOURCES);
 	}
 
 	*handle = p;
 	ntoskrnl_kth++;
 
 	return (STATUS_SUCCESS);
 }
 
 /*
  * In Windows, the exit of a thread is an event that you're allowed
  * to wait on, assuming you've obtained a reference to the thread using
  * ObReferenceObjectByHandle(). Unfortunately, the only way we can
  * simulate this behavior is to register each thread we create in a
  * reference list, and if someone holds a reference to us, we poke
  * them.
  */
 static ndis_status
 PsTerminateSystemThread(status)
 	ndis_status		status;
 {
 	struct nt_objref	*nr;
 
 	mtx_lock(&ntoskrnl_dispatchlock);
 	TAILQ_FOREACH(nr, &ntoskrnl_reflist, link) {
 		if (nr->no_obj != curthread->td_proc)
 			continue;
 		nr->no_dh.dh_sigstate = 1;
 		ntoskrnl_waittest(&nr->no_dh, IO_NO_INCREMENT);
 		break;
 	}
 	mtx_unlock(&ntoskrnl_dispatchlock);
 
 	ntoskrnl_kth--;
 
 	kproc_exit(0);
 	return (0);	/* notreached */
 }
 
 static uint32_t
 DbgPrint(char *fmt, ...)
 {
 	va_list			ap;
 
 	if (bootverbose) {
 		va_start(ap, fmt);
 		vprintf(fmt, ap);
 	}
 
 	return (STATUS_SUCCESS);
 }
 
 static void
 DbgBreakPoint(void)
 {
 
 	kdb_enter(KDB_WHY_NDIS, "DbgBreakPoint(): breakpoint");
 }
 
 static void
 KeBugCheckEx(code, param1, param2, param3, param4)
     uint32_t			code;
     u_long			param1;
     u_long			param2;
     u_long			param3;
     u_long			param4;
 {
 	panic("KeBugCheckEx: STOP 0x%X", code);
 }
 
 static void
 ntoskrnl_timercall(arg)
 	void			*arg;
 {
 	ktimer			*timer;
 	struct timeval		tv;
 	kdpc			*dpc;
 
 	mtx_lock(&ntoskrnl_dispatchlock);
 
 	timer = arg;
 
 #ifdef NTOSKRNL_DEBUG_TIMERS
 	ntoskrnl_timer_fires++;
 #endif
 	ntoskrnl_remove_timer(timer);
 
 	/*
 	 * This should never happen, but complain
 	 * if it does.
 	 */
 
 	if (timer->k_header.dh_inserted == FALSE) {
 		mtx_unlock(&ntoskrnl_dispatchlock);
 		printf("NTOS: timer %p fired even though "
 		    "it was canceled\n", timer);
 		return;
 	}
 
 	/* Mark the timer as no longer being on the timer queue. */
 
 	timer->k_header.dh_inserted = FALSE;
 
 	/* Now signal the object and satisfy any waits on it. */
 
 	timer->k_header.dh_sigstate = 1;
 	ntoskrnl_waittest(&timer->k_header, IO_NO_INCREMENT);
 
 	/*
 	 * If this is a periodic timer, re-arm it
 	 * so it will fire again. We do this before
 	 * calling any deferred procedure calls because
 	 * it's possible the DPC might cancel the timer,
 	 * in which case it would be wrong for us to
 	 * re-arm it again afterwards.
 	 */
 
 	if (timer->k_period) {
 		tv.tv_sec = 0;
 		tv.tv_usec = timer->k_period * 1000;
 		timer->k_header.dh_inserted = TRUE;
 		ntoskrnl_insert_timer(timer, tvtohz(&tv));
 #ifdef NTOSKRNL_DEBUG_TIMERS
 		ntoskrnl_timer_reloads++;
 #endif
 	}
 
 	dpc = timer->k_dpc;
 
 	mtx_unlock(&ntoskrnl_dispatchlock);
 
 	/* If there's a DPC associated with the timer, queue it up. */
 
 	if (dpc != NULL)
 		KeInsertQueueDpc(dpc, NULL, NULL);
 }
 
 #ifdef NTOSKRNL_DEBUG_TIMERS
 static int
 sysctl_show_timers(SYSCTL_HANDLER_ARGS)
 {
 	int			ret;
 
 	ret = 0;
 	ntoskrnl_show_timers();
 	return (sysctl_handle_int(oidp, &ret, 0, req));
 }
 
 static void
 ntoskrnl_show_timers()
 {
 	int			i = 0;
 	list_entry		*l;
 
 	mtx_lock_spin(&ntoskrnl_calllock);
 	l = ntoskrnl_calllist.nle_flink;
 	while(l != &ntoskrnl_calllist) {
 		i++;
 		l = l->nle_flink;
 	}
 	mtx_unlock_spin(&ntoskrnl_calllock);
 
 	printf("\n");
 	printf("%d timers available (out of %d)\n", i, NTOSKRNL_TIMEOUTS);
 	printf("timer sets: %qu\n", ntoskrnl_timer_sets);
 	printf("timer reloads: %qu\n", ntoskrnl_timer_reloads);
 	printf("timer cancels: %qu\n", ntoskrnl_timer_cancels);
 	printf("timer fires: %qu\n", ntoskrnl_timer_fires);
 	printf("\n");
 }
 #endif
 
 /*
  * Must be called with dispatcher lock held.
  */
 
 static void
 ntoskrnl_insert_timer(timer, ticks)
 	ktimer			*timer;
 	int			ticks;
 {
 	callout_entry		*e;
 	list_entry		*l;
 	struct callout		*c;
 
 	/*
 	 * Try and allocate a timer.
 	 */
 	mtx_lock_spin(&ntoskrnl_calllock);
 	if (IsListEmpty(&ntoskrnl_calllist)) {
 		mtx_unlock_spin(&ntoskrnl_calllock);
 #ifdef NTOSKRNL_DEBUG_TIMERS
 		ntoskrnl_show_timers();
 #endif
 		panic("out of timers!");
 	}
 	l = RemoveHeadList(&ntoskrnl_calllist);
 	mtx_unlock_spin(&ntoskrnl_calllock);
 
 	e = CONTAINING_RECORD(l, callout_entry, ce_list);
 	c = &e->ce_callout;
 
 	timer->k_callout = c;
 
 	callout_init(c, CALLOUT_MPSAFE);
 	callout_reset(c, ticks, ntoskrnl_timercall, timer);
 }
 
 static void
 ntoskrnl_remove_timer(timer)
 	ktimer			*timer;
 {
 	callout_entry		*e;
 
 	e = (callout_entry *)timer->k_callout;
 	callout_stop(timer->k_callout);
 
 	mtx_lock_spin(&ntoskrnl_calllock);
 	InsertHeadList((&ntoskrnl_calllist), (&e->ce_list));
 	mtx_unlock_spin(&ntoskrnl_calllock);
 }
 
 void
 KeInitializeTimer(timer)
 	ktimer			*timer;
 {
 	if (timer == NULL)
 		return;
 
 	KeInitializeTimerEx(timer,  EVENT_TYPE_NOTIFY);
 }
 
 void
 KeInitializeTimerEx(timer, type)
 	ktimer			*timer;
 	uint32_t		type;
 {
 	if (timer == NULL)
 		return;
 
 	bzero((char *)timer, sizeof(ktimer));
 	InitializeListHead((&timer->k_header.dh_waitlisthead));
 	timer->k_header.dh_sigstate = FALSE;
 	timer->k_header.dh_inserted = FALSE;
 	if (type == EVENT_TYPE_NOTIFY)
 		timer->k_header.dh_type = DISP_TYPE_NOTIFICATION_TIMER;
 	else
 		timer->k_header.dh_type = DISP_TYPE_SYNCHRONIZATION_TIMER;
 	timer->k_header.dh_size = sizeof(ktimer) / sizeof(uint32_t);
 }
 
 /*
  * DPC subsystem. A Windows Defered Procedure Call has the following
  * properties:
  * - It runs at DISPATCH_LEVEL.
  * - It can have one of 3 importance values that control when it
  *   runs relative to other DPCs in the queue.
  * - On SMP systems, it can be set to run on a specific processor.
  * In order to satisfy the last property, we create a DPC thread for
  * each CPU in the system and bind it to that CPU. Each thread
  * maintains three queues with different importance levels, which
  * will be processed in order from lowest to highest.
  *
  * In Windows, interrupt handlers run as DPCs. (Not to be confused
  * with ISRs, which run in interrupt context and can preempt DPCs.)
  * ISRs are given the highest importance so that they'll take
  * precedence over timers and other things.
  */
 
 static void
 ntoskrnl_dpc_thread(arg)
 	void			*arg;
 {
 	kdpc_queue		*kq;
 	kdpc			*d;
 	list_entry		*l;
 	uint8_t			irql;
 
 	kq = arg;
 
 	InitializeListHead(&kq->kq_disp);
 	kq->kq_td = curthread;
 	kq->kq_exit = 0;
 	kq->kq_running = FALSE;
 	KeInitializeSpinLock(&kq->kq_lock);
 	KeInitializeEvent(&kq->kq_proc, EVENT_TYPE_SYNC, FALSE);
 	KeInitializeEvent(&kq->kq_done, EVENT_TYPE_SYNC, FALSE);
 
 	/*
 	 * Elevate our priority. DPCs are used to run interrupt
 	 * handlers, and they should trigger as soon as possible
 	 * once scheduled by an ISR.
 	 */
 
 	thread_lock(curthread);
 #ifdef NTOSKRNL_MULTIPLE_DPCS
 	sched_bind(curthread, kq->kq_cpu);
 #endif
 	sched_prio(curthread, PRI_MIN_KERN);
 	thread_unlock(curthread);
 
 	while (1) {
 		KeWaitForSingleObject(&kq->kq_proc, 0, 0, TRUE, NULL);
 
 		KeAcquireSpinLock(&kq->kq_lock, &irql);
 
 		if (kq->kq_exit) {
 			kq->kq_exit = 0;
 			KeReleaseSpinLock(&kq->kq_lock, irql);
 			break;
 		}
 
 		kq->kq_running = TRUE;
 
 		while (!IsListEmpty(&kq->kq_disp)) {
 			l = RemoveHeadList((&kq->kq_disp));
 			d = CONTAINING_RECORD(l, kdpc, k_dpclistentry);
 			InitializeListHead((&d->k_dpclistentry));
 			KeReleaseSpinLockFromDpcLevel(&kq->kq_lock);
 			MSCALL4(d->k_deferedfunc, d, d->k_deferredctx,
 			    d->k_sysarg1, d->k_sysarg2);
 			KeAcquireSpinLockAtDpcLevel(&kq->kq_lock);
 		}
 
 		kq->kq_running = FALSE;
 
 		KeReleaseSpinLock(&kq->kq_lock, irql);
 
 		KeSetEvent(&kq->kq_done, IO_NO_INCREMENT, FALSE);
 	}
 
 	kproc_exit(0);
 	return; /* notreached */
 }
 
 static void
 ntoskrnl_destroy_dpc_threads(void)
 {
 	kdpc_queue		*kq;
 	kdpc			dpc;
 	int			i;
 
 	kq = kq_queues;
 #ifdef NTOSKRNL_MULTIPLE_DPCS
 	for (i = 0; i < mp_ncpus; i++) {
 #else
 	for (i = 0; i < 1; i++) {
 #endif
 		kq += i;
 
 		kq->kq_exit = 1;
 		KeInitializeDpc(&dpc, NULL, NULL);
 		KeSetTargetProcessorDpc(&dpc, i);
 		KeInsertQueueDpc(&dpc, NULL, NULL);
 		while (kq->kq_exit)
 			tsleep(kq->kq_td->td_proc, PWAIT, "dpcw", hz/10);
 	}
 }
 
 static uint8_t
 ntoskrnl_insert_dpc(head, dpc)
 	list_entry		*head;
 	kdpc			*dpc;
 {
 	list_entry		*l;
 	kdpc			*d;
 
 	l = head->nle_flink;
 	while (l != head) {
 		d = CONTAINING_RECORD(l, kdpc, k_dpclistentry);
 		if (d == dpc)
 			return (FALSE);
 		l = l->nle_flink;
 	}
 
 	if (dpc->k_importance == KDPC_IMPORTANCE_LOW)
 		InsertTailList((head), (&dpc->k_dpclistentry));
 	else
 		InsertHeadList((head), (&dpc->k_dpclistentry));
 
 	return (TRUE);
 }
 
 void
 KeInitializeDpc(dpc, dpcfunc, dpcctx)
 	kdpc			*dpc;
 	void			*dpcfunc;
 	void			*dpcctx;
 {
 
 	if (dpc == NULL)
 		return;
 
 	dpc->k_deferedfunc = dpcfunc;
 	dpc->k_deferredctx = dpcctx;
 	dpc->k_num = KDPC_CPU_DEFAULT;
 	dpc->k_importance = KDPC_IMPORTANCE_MEDIUM;
 	InitializeListHead((&dpc->k_dpclistentry));
 }
 
 uint8_t
 KeInsertQueueDpc(dpc, sysarg1, sysarg2)
 	kdpc			*dpc;
 	void			*sysarg1;
 	void			*sysarg2;
 {
 	kdpc_queue		*kq;
 	uint8_t			r;
 	uint8_t			irql;
 
 	if (dpc == NULL)
 		return (FALSE);
 
 	kq = kq_queues;
 
 #ifdef NTOSKRNL_MULTIPLE_DPCS
 	KeRaiseIrql(DISPATCH_LEVEL, &irql);
 
 	/*
 	 * By default, the DPC is queued to run on the same CPU
 	 * that scheduled it.
 	 */
 
 	if (dpc->k_num == KDPC_CPU_DEFAULT)
 		kq += curthread->td_oncpu;
 	else
 		kq += dpc->k_num;
 	KeAcquireSpinLockAtDpcLevel(&kq->kq_lock);
 #else
 	KeAcquireSpinLock(&kq->kq_lock, &irql);
 #endif
 
 	r = ntoskrnl_insert_dpc(&kq->kq_disp, dpc);
 	if (r == TRUE) {
 		dpc->k_sysarg1 = sysarg1;
 		dpc->k_sysarg2 = sysarg2;
 	}
 	KeReleaseSpinLock(&kq->kq_lock, irql);
 
 	if (r == FALSE)
 		return (r);
 
 	KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE);
 
 	return (r);
 }
 
 uint8_t
 KeRemoveQueueDpc(dpc)
 	kdpc			*dpc;
 {
 	kdpc_queue		*kq;
 	uint8_t			irql;
 
 	if (dpc == NULL)
 		return (FALSE);
 
 #ifdef NTOSKRNL_MULTIPLE_DPCS
 	KeRaiseIrql(DISPATCH_LEVEL, &irql);
 
 	kq = kq_queues + dpc->k_num;
 
 	KeAcquireSpinLockAtDpcLevel(&kq->kq_lock);
 #else
 	kq = kq_queues;
 	KeAcquireSpinLock(&kq->kq_lock, &irql);
 #endif
 
 	if (dpc->k_dpclistentry.nle_flink == &dpc->k_dpclistentry) {
 		KeReleaseSpinLockFromDpcLevel(&kq->kq_lock);
 		KeLowerIrql(irql);
 		return (FALSE);
 	}
 
 	RemoveEntryList((&dpc->k_dpclistentry));
 	InitializeListHead((&dpc->k_dpclistentry));
 
 	KeReleaseSpinLock(&kq->kq_lock, irql);
 
 	return (TRUE);
 }
 
 void
 KeSetImportanceDpc(dpc, imp)
 	kdpc			*dpc;
 	uint32_t		imp;
 {
 	if (imp != KDPC_IMPORTANCE_LOW &&
 	    imp != KDPC_IMPORTANCE_MEDIUM &&
 	    imp != KDPC_IMPORTANCE_HIGH)
 		return;
 
 	dpc->k_importance = (uint8_t)imp;
 }
 
 void
 KeSetTargetProcessorDpc(kdpc *dpc, uint8_t cpu)
 {
 	if (cpu > mp_ncpus)
 		return;
 
 	dpc->k_num = cpu;
 }
 
 void
 KeFlushQueuedDpcs(void)
 {
 	kdpc_queue		*kq;
 	int			i;
 
 	/*
 	 * Poke each DPC queue and wait
 	 * for them to drain.
 	 */
 
 #ifdef NTOSKRNL_MULTIPLE_DPCS
 	for (i = 0; i < mp_ncpus; i++) {
 #else
 	for (i = 0; i < 1; i++) {
 #endif
 		kq = kq_queues + i;
 		KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE);
 		KeWaitForSingleObject(&kq->kq_done, 0, 0, TRUE, NULL);
 	}
 }
 
 uint32_t
 KeGetCurrentProcessorNumber(void)
 {
 	return ((uint32_t)curthread->td_oncpu);
 }
 
 uint8_t
 KeSetTimerEx(timer, duetime, period, dpc)
 	ktimer			*timer;
 	int64_t			duetime;
 	uint32_t		period;
 	kdpc			*dpc;
 {
 	struct timeval		tv;
 	uint64_t		curtime;
 	uint8_t			pending;
 
 	if (timer == NULL)
 		return (FALSE);
 
 	mtx_lock(&ntoskrnl_dispatchlock);
 
 	if (timer->k_header.dh_inserted == TRUE) {
 		ntoskrnl_remove_timer(timer);
 #ifdef NTOSKRNL_DEBUG_TIMERS
 		ntoskrnl_timer_cancels++;
 #endif
 		timer->k_header.dh_inserted = FALSE;
 		pending = TRUE;
 	} else
 		pending = FALSE;
 
 	timer->k_duetime = duetime;
 	timer->k_period = period;
 	timer->k_header.dh_sigstate = FALSE;
 	timer->k_dpc = dpc;
 
 	if (duetime < 0) {
 		tv.tv_sec = - (duetime) / 10000000;
 		tv.tv_usec = (- (duetime) / 10) -
 		    (tv.tv_sec * 1000000);
 	} else {
 		ntoskrnl_time(&curtime);
 		if (duetime < curtime)
 			tv.tv_sec = tv.tv_usec = 0;
 		else {
 			tv.tv_sec = ((duetime) - curtime) / 10000000;
 			tv.tv_usec = ((duetime) - curtime) / 10 -
 			    (tv.tv_sec * 1000000);
 		}
 	}
 
 	timer->k_header.dh_inserted = TRUE;
 	ntoskrnl_insert_timer(timer, tvtohz(&tv));
 #ifdef NTOSKRNL_DEBUG_TIMERS
 	ntoskrnl_timer_sets++;
 #endif
 
 	mtx_unlock(&ntoskrnl_dispatchlock);
 
 	return (pending);
 }
 
 uint8_t
 KeSetTimer(timer, duetime, dpc)
 	ktimer			*timer;
 	int64_t			duetime;
 	kdpc			*dpc;
 {
 	return (KeSetTimerEx(timer, duetime, 0, dpc));
 }
 
 /*
  * The Windows DDK documentation seems to say that cancelling
  * a timer that has a DPC will result in the DPC also being
  * cancelled, but this isn't really the case.
  */
 
 uint8_t
 KeCancelTimer(timer)
 	ktimer			*timer;
 {
 	uint8_t			pending;
 
 	if (timer == NULL)
 		return (FALSE);
 
 	mtx_lock(&ntoskrnl_dispatchlock);
 
 	pending = timer->k_header.dh_inserted;
 
 	if (timer->k_header.dh_inserted == TRUE) {
 		timer->k_header.dh_inserted = FALSE;
 		ntoskrnl_remove_timer(timer);
 #ifdef NTOSKRNL_DEBUG_TIMERS
 		ntoskrnl_timer_cancels++;
 #endif
 	}
 
 	mtx_unlock(&ntoskrnl_dispatchlock);
 
 	return (pending);
 }
 
 uint8_t
 KeReadStateTimer(timer)
 	ktimer			*timer;
 {
 	return (timer->k_header.dh_sigstate);
 }
 
 static int32_t
 KeDelayExecutionThread(uint8_t wait_mode, uint8_t alertable, int64_t *interval)
 {
 	ktimer                  timer;
 
 	if (wait_mode != 0)
 		panic("invalid wait_mode %d", wait_mode);
 
 	KeInitializeTimer(&timer);
 	KeSetTimer(&timer, *interval, NULL);
 	KeWaitForSingleObject(&timer, 0, 0, alertable, NULL);
 
 	return STATUS_SUCCESS;
 }
 
 static uint64_t
 KeQueryInterruptTime(void)
 {
 	int ticks;
 	struct timeval tv;
 
 	getmicrouptime(&tv);
 
 	ticks = tvtohz(&tv);
 
 	return ticks * ((10000000 + hz - 1) / hz);
 }
 
 static struct thread *
 KeGetCurrentThread(void)
 {
 
 	return curthread;
 }
 
 static int32_t
 KeSetPriorityThread(td, pri)
 	struct thread	*td;
 	int32_t		pri;
 {
 	int32_t old;
 
 	if (td == NULL)
 		return LOW_REALTIME_PRIORITY;
 
 	if (td->td_priority <= PRI_MIN_KERN)
 		old = HIGH_PRIORITY;
 	else if (td->td_priority >= PRI_MAX_KERN)
 		old = LOW_PRIORITY;
 	else
 		old = LOW_REALTIME_PRIORITY;
 
 	thread_lock(td);
 	if (pri == HIGH_PRIORITY)
 		sched_prio(td, PRI_MIN_KERN);
 	if (pri == LOW_REALTIME_PRIORITY)
 		sched_prio(td, PRI_MIN_KERN + (PRI_MAX_KERN - PRI_MIN_KERN) / 2);
 	if (pri == LOW_PRIORITY)
 		sched_prio(td, PRI_MAX_KERN);
 	thread_unlock(td);
 
 	return old;
 }
 
 static void
 dummy()
 {
 	printf("ntoskrnl dummy called...\n");
 }
 
 
 image_patch_table ntoskrnl_functbl[] = {
 	IMPORT_SFUNC(RtlZeroMemory, 2),
+	IMPORT_SFUNC(RtlSecureZeroMemory, 2),
+	IMPORT_SFUNC(RtlFillMemory, 3),
+	IMPORT_SFUNC(RtlMoveMemory, 3),
+	IMPORT_SFUNC(RtlCharToInteger, 3),
 	IMPORT_SFUNC(RtlCopyMemory, 3),
+	IMPORT_SFUNC(RtlCopyString, 2),
 	IMPORT_SFUNC(RtlCompareMemory, 3),
 	IMPORT_SFUNC(RtlEqualUnicodeString, 3),
 	IMPORT_SFUNC(RtlCopyUnicodeString, 2),
 	IMPORT_SFUNC(RtlUnicodeStringToAnsiString, 3),
 	IMPORT_SFUNC(RtlAnsiStringToUnicodeString, 3),
 	IMPORT_SFUNC(RtlInitAnsiString, 2),
 	IMPORT_SFUNC_MAP(RtlInitString, RtlInitAnsiString, 2),
 	IMPORT_SFUNC(RtlInitUnicodeString, 2),
 	IMPORT_SFUNC(RtlFreeAnsiString, 1),
 	IMPORT_SFUNC(RtlFreeUnicodeString, 1),
 	IMPORT_SFUNC(RtlUnicodeStringToInteger, 3),
 	IMPORT_CFUNC(sprintf, 0),
 	IMPORT_CFUNC(vsprintf, 0),
 	IMPORT_CFUNC_MAP(_snprintf, snprintf, 0),
 	IMPORT_CFUNC_MAP(_vsnprintf, vsnprintf, 0),
 	IMPORT_CFUNC(DbgPrint, 0),
 	IMPORT_SFUNC(DbgBreakPoint, 0),
 	IMPORT_SFUNC(KeBugCheckEx, 5),
 	IMPORT_CFUNC(strncmp, 0),
 	IMPORT_CFUNC(strcmp, 0),
 	IMPORT_CFUNC_MAP(stricmp, strcasecmp, 0),
 	IMPORT_CFUNC(strncpy, 0),
 	IMPORT_CFUNC(strcpy, 0),
 	IMPORT_CFUNC(strlen, 0),
 	IMPORT_CFUNC_MAP(toupper, ntoskrnl_toupper, 0),
 	IMPORT_CFUNC_MAP(tolower, ntoskrnl_tolower, 0),
 	IMPORT_CFUNC_MAP(strstr, ntoskrnl_strstr, 0),
 	IMPORT_CFUNC_MAP(strncat, ntoskrnl_strncat, 0),
 	IMPORT_CFUNC_MAP(strchr, index, 0),
 	IMPORT_CFUNC_MAP(strrchr, rindex, 0),
 	IMPORT_CFUNC(memcpy, 0),
 	IMPORT_CFUNC_MAP(memmove, ntoskrnl_memmove, 0),
 	IMPORT_CFUNC_MAP(memset, ntoskrnl_memset, 0),
 	IMPORT_CFUNC_MAP(memchr, ntoskrnl_memchr, 0),
 	IMPORT_SFUNC(IoAllocateDriverObjectExtension, 4),
 	IMPORT_SFUNC(IoGetDriverObjectExtension, 2),
 	IMPORT_FFUNC(IofCallDriver, 2),
 	IMPORT_FFUNC(IofCompleteRequest, 2),
 	IMPORT_SFUNC(IoAcquireCancelSpinLock, 1),
 	IMPORT_SFUNC(IoReleaseCancelSpinLock, 1),
 	IMPORT_SFUNC(IoCancelIrp, 1),
 	IMPORT_SFUNC(IoConnectInterrupt, 11),
 	IMPORT_SFUNC(IoDisconnectInterrupt, 1),
 	IMPORT_SFUNC(IoCreateDevice, 7),
 	IMPORT_SFUNC(IoDeleteDevice, 1),
 	IMPORT_SFUNC(IoGetAttachedDevice, 1),
 	IMPORT_SFUNC(IoAttachDeviceToDeviceStack, 2),
 	IMPORT_SFUNC(IoDetachDevice, 1),
 	IMPORT_SFUNC(IoBuildSynchronousFsdRequest, 7),
 	IMPORT_SFUNC(IoBuildAsynchronousFsdRequest, 6),
 	IMPORT_SFUNC(IoBuildDeviceIoControlRequest, 9),
 	IMPORT_SFUNC(IoAllocateIrp, 2),
 	IMPORT_SFUNC(IoReuseIrp, 2),
 	IMPORT_SFUNC(IoMakeAssociatedIrp, 2),
 	IMPORT_SFUNC(IoFreeIrp, 1),
 	IMPORT_SFUNC(IoInitializeIrp, 3),
 	IMPORT_SFUNC(KeAcquireInterruptSpinLock, 1),
 	IMPORT_SFUNC(KeReleaseInterruptSpinLock, 2),
 	IMPORT_SFUNC(KeSynchronizeExecution, 3),
 	IMPORT_SFUNC(KeWaitForSingleObject, 5),
 	IMPORT_SFUNC(KeWaitForMultipleObjects, 8),
 	IMPORT_SFUNC(_allmul, 4),
 	IMPORT_SFUNC(_alldiv, 4),
 	IMPORT_SFUNC(_allrem, 4),
 	IMPORT_RFUNC(_allshr, 0),
 	IMPORT_RFUNC(_allshl, 0),
 	IMPORT_SFUNC(_aullmul, 4),
 	IMPORT_SFUNC(_aulldiv, 4),
 	IMPORT_SFUNC(_aullrem, 4),
 	IMPORT_RFUNC(_aullshr, 0),
 	IMPORT_RFUNC(_aullshl, 0),
 	IMPORT_CFUNC(atoi, 0),
 	IMPORT_CFUNC(atol, 0),
 	IMPORT_CFUNC(rand, 0),
 	IMPORT_CFUNC(srand, 0),
 	IMPORT_SFUNC(WRITE_REGISTER_USHORT, 2),
 	IMPORT_SFUNC(READ_REGISTER_USHORT, 1),
 	IMPORT_SFUNC(WRITE_REGISTER_ULONG, 2),
 	IMPORT_SFUNC(READ_REGISTER_ULONG, 1),
 	IMPORT_SFUNC(READ_REGISTER_UCHAR, 1),
 	IMPORT_SFUNC(WRITE_REGISTER_UCHAR, 2),
 	IMPORT_SFUNC(ExInitializePagedLookasideList, 7),
 	IMPORT_SFUNC(ExDeletePagedLookasideList, 1),
 	IMPORT_SFUNC(ExInitializeNPagedLookasideList, 7),
 	IMPORT_SFUNC(ExDeleteNPagedLookasideList, 1),
 	IMPORT_FFUNC(InterlockedPopEntrySList, 1),
+	IMPORT_FFUNC(InitializeSListHead, 1),
 	IMPORT_FFUNC(InterlockedPushEntrySList, 2),
 	IMPORT_SFUNC(ExQueryDepthSList, 1),
 	IMPORT_FFUNC_MAP(ExpInterlockedPopEntrySList,
 		InterlockedPopEntrySList, 1),
 	IMPORT_FFUNC_MAP(ExpInterlockedPushEntrySList,
 		InterlockedPushEntrySList, 2),
 	IMPORT_FFUNC(ExInterlockedPopEntrySList, 2),
 	IMPORT_FFUNC(ExInterlockedPushEntrySList, 3),
 	IMPORT_SFUNC(ExAllocatePoolWithTag, 3),
+	IMPORT_SFUNC(ExFreePoolWithTag, 2),
 	IMPORT_SFUNC(ExFreePool, 1),
 #ifdef __i386__
 	IMPORT_FFUNC(KefAcquireSpinLockAtDpcLevel, 1),
 	IMPORT_FFUNC(KefReleaseSpinLockFromDpcLevel,1),
 	IMPORT_FFUNC(KeAcquireSpinLockRaiseToDpc, 1),
 #else
 	/*
 	 * For AMD64, we can get away with just mapping
 	 * KeAcquireSpinLockRaiseToDpc() directly to KfAcquireSpinLock()
 	 * because the calling conventions end up being the same.
 	 * On i386, we have to be careful because KfAcquireSpinLock()
 	 * is _fastcall but KeAcquireSpinLockRaiseToDpc() isn't.
 	 */
 	IMPORT_SFUNC(KeAcquireSpinLockAtDpcLevel, 1),
 	IMPORT_SFUNC(KeReleaseSpinLockFromDpcLevel, 1),
 	IMPORT_SFUNC_MAP(KeAcquireSpinLockRaiseToDpc, KfAcquireSpinLock, 1),
 #endif
 	IMPORT_SFUNC_MAP(KeReleaseSpinLock, KfReleaseSpinLock, 1),
 	IMPORT_FFUNC(InterlockedIncrement, 1),
 	IMPORT_FFUNC(InterlockedDecrement, 1),
 	IMPORT_FFUNC(InterlockedExchange, 2),
 	IMPORT_FFUNC(ExInterlockedAddLargeStatistic, 2),
 	IMPORT_SFUNC(IoAllocateMdl, 5),
 	IMPORT_SFUNC(IoFreeMdl, 1),
 	IMPORT_SFUNC(MmAllocateContiguousMemory, 2 + 1),
 	IMPORT_SFUNC(MmAllocateContiguousMemorySpecifyCache, 5 + 3),
 	IMPORT_SFUNC(MmFreeContiguousMemory, 1),
 	IMPORT_SFUNC(MmFreeContiguousMemorySpecifyCache, 3),
 	IMPORT_SFUNC(MmSizeOfMdl, 1),
 	IMPORT_SFUNC(MmMapLockedPages, 2),
 	IMPORT_SFUNC(MmMapLockedPagesSpecifyCache, 6),
 	IMPORT_SFUNC(MmUnmapLockedPages, 2),
 	IMPORT_SFUNC(MmBuildMdlForNonPagedPool, 1),
 	IMPORT_SFUNC(MmGetPhysicalAddress, 1),
 	IMPORT_SFUNC(MmIsAddressValid, 1),
 	IMPORT_SFUNC(MmMapIoSpace, 3 + 1),
 	IMPORT_SFUNC(MmUnmapIoSpace, 2),
 	IMPORT_SFUNC(KeInitializeSpinLock, 1),
 	IMPORT_SFUNC(IoIsWdmVersionAvailable, 2),
 	IMPORT_SFUNC(IoGetDeviceObjectPointer, 4),
 	IMPORT_SFUNC(IoGetDeviceProperty, 5),
 	IMPORT_SFUNC(IoAllocateWorkItem, 1),
 	IMPORT_SFUNC(IoFreeWorkItem, 1),
 	IMPORT_SFUNC(IoQueueWorkItem, 4),
 	IMPORT_SFUNC(ExQueueWorkItem, 2),
 	IMPORT_SFUNC(ntoskrnl_workitem, 2),
 	IMPORT_SFUNC(KeInitializeMutex, 2),
 	IMPORT_SFUNC(KeReleaseMutex, 2),
 	IMPORT_SFUNC(KeReadStateMutex, 1),
 	IMPORT_SFUNC(KeInitializeEvent, 3),
 	IMPORT_SFUNC(KeSetEvent, 3),
 	IMPORT_SFUNC(KeResetEvent, 1),
 	IMPORT_SFUNC(KeClearEvent, 1),
 	IMPORT_SFUNC(KeReadStateEvent, 1),
 	IMPORT_SFUNC(KeInitializeTimer, 1),
 	IMPORT_SFUNC(KeInitializeTimerEx, 2),
 	IMPORT_SFUNC(KeSetTimer, 3),
 	IMPORT_SFUNC(KeSetTimerEx, 4),
 	IMPORT_SFUNC(KeCancelTimer, 1),
 	IMPORT_SFUNC(KeReadStateTimer, 1),
 	IMPORT_SFUNC(KeInitializeDpc, 3),
 	IMPORT_SFUNC(KeInsertQueueDpc, 3),
 	IMPORT_SFUNC(KeRemoveQueueDpc, 1),
 	IMPORT_SFUNC(KeSetImportanceDpc, 2),
 	IMPORT_SFUNC(KeSetTargetProcessorDpc, 2),
 	IMPORT_SFUNC(KeFlushQueuedDpcs, 0),
 	IMPORT_SFUNC(KeGetCurrentProcessorNumber, 1),
 	IMPORT_SFUNC(ObReferenceObjectByHandle, 6),
 	IMPORT_FFUNC(ObfDereferenceObject, 1),
 	IMPORT_SFUNC(ZwClose, 1),
 	IMPORT_SFUNC(PsCreateSystemThread, 7),
 	IMPORT_SFUNC(PsTerminateSystemThread, 1),
 	IMPORT_SFUNC(IoWMIRegistrationControl, 2),
 	IMPORT_SFUNC(WmiQueryTraceInformation, 5),
 	IMPORT_CFUNC(WmiTraceMessage, 0),
 	IMPORT_SFUNC(KeQuerySystemTime, 1),
 	IMPORT_CFUNC(KeTickCount, 0),
 	IMPORT_SFUNC(KeDelayExecutionThread, 3),
 	IMPORT_SFUNC(KeQueryInterruptTime, 0),
 	IMPORT_SFUNC(KeGetCurrentThread, 0),
 	IMPORT_SFUNC(KeSetPriorityThread, 2),
 
 	/*
 	 * This last entry is a catch-all for any function we haven't
 	 * implemented yet. The PE import list patching routine will
 	 * use it for any function that doesn't have an explicit match
 	 * in this table.
 	 */
 
 	{ NULL, (FUNC)dummy, NULL, 0, WINDRV_WRAP_STDCALL },
 
 	/* End of list. */
 
 	{ NULL, NULL, NULL }
 };
Index: projects/binutils-2.17/sys/contrib/dev/acpica
===================================================================
--- projects/binutils-2.17/sys/contrib/dev/acpica	(revision 215829)
+++ projects/binutils-2.17/sys/contrib/dev/acpica	(revision 215830)

Property changes on: projects/binutils-2.17/sys/contrib/dev/acpica
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/contrib/dev/acpica:r215709-215824
Index: projects/binutils-2.17/sys/contrib/pf
===================================================================
--- projects/binutils-2.17/sys/contrib/pf	(revision 215829)
+++ projects/binutils-2.17/sys/contrib/pf	(revision 215830)

Property changes on: projects/binutils-2.17/sys/contrib/pf
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/contrib/pf:r215709-215824
Index: projects/binutils-2.17/sys/contrib/x86emu
===================================================================
--- projects/binutils-2.17/sys/contrib/x86emu	(revision 215829)
+++ projects/binutils-2.17/sys/contrib/x86emu	(revision 215830)

Property changes on: projects/binutils-2.17/sys/contrib/x86emu
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/contrib/x86emu:r215709-215824
Index: projects/binutils-2.17/sys/dev/ahci/ahci.c
===================================================================
--- projects/binutils-2.17/sys/dev/ahci/ahci.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/ahci/ahci.c	(revision 215830)
@@ -1,2672 +1,2672 @@
 /*-
  * Copyright (c) 2009 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ata.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sema.h>
 #include <sys/taskqueue.h>
 #include <vm/uma.h>
 #include <machine/stdarg.h>
 #include <machine/resource.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 #include "ahci.h"
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_sim.h>
 #include <cam/cam_xpt_sim.h>
 #include <cam/cam_debug.h>
 
 /* local prototypes */
 static int ahci_setup_interrupt(device_t dev);
 static void ahci_intr(void *data);
 static void ahci_intr_one(void *data);
 static int ahci_suspend(device_t dev);
 static int ahci_resume(device_t dev);
 static int ahci_ch_init(device_t dev);
 static int ahci_ch_deinit(device_t dev);
 static int ahci_ch_suspend(device_t dev);
 static int ahci_ch_resume(device_t dev);
 static void ahci_ch_pm(void *arg);
 static void ahci_ch_intr_locked(void *data);
 static void ahci_ch_intr(void *data);
 static int ahci_ctlr_reset(device_t dev);
 static int ahci_ctlr_setup(device_t dev);
 static void ahci_begin_transaction(device_t dev, union ccb *ccb);
 static void ahci_dmasetprd(void *arg, bus_dma_segment_t *segs, int nsegs, int error);
 static void ahci_execute_transaction(struct ahci_slot *slot);
 static void ahci_timeout(struct ahci_slot *slot);
 static void ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et);
 static int ahci_setup_fis(device_t dev, struct ahci_cmd_tab *ctp, union ccb *ccb, int tag);
 static void ahci_dmainit(device_t dev);
 static void ahci_dmasetupc_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int error);
 static void ahci_dmafini(device_t dev);
 static void ahci_slotsalloc(device_t dev);
 static void ahci_slotsfree(device_t dev);
 static void ahci_reset(device_t dev);
 static void ahci_start(device_t dev, int fbs);
 static void ahci_stop(device_t dev);
 static void ahci_clo(device_t dev);
 static void ahci_start_fr(device_t dev);
 static void ahci_stop_fr(device_t dev);
 
 static int ahci_sata_connect(struct ahci_channel *ch);
 static int ahci_sata_phy_reset(device_t dev);
 static int ahci_wait_ready(device_t dev, int t);
 
 static void ahci_issue_read_log(device_t dev);
 static void ahci_process_read_log(device_t dev, union ccb *ccb);
 
 static void ahciaction(struct cam_sim *sim, union ccb *ccb);
 static void ahcipoll(struct cam_sim *sim);
 
 MALLOC_DEFINE(M_AHCI, "AHCI driver", "AHCI driver data buffers");
 
 static struct {
 	uint32_t	id;
 	uint8_t		rev;
 	const char	*name;
 	int		quirks;
 #define AHCI_Q_NOFORCE	1
 #define AHCI_Q_NOPMP	2
 #define AHCI_Q_NONCQ	4
 #define AHCI_Q_1CH	8
 #define AHCI_Q_2CH	16
 #define AHCI_Q_4CH	32
 #define AHCI_Q_EDGEIS	64
 #define AHCI_Q_SATA2	128
 #define AHCI_Q_NOBSYRES	256
 #define AHCI_Q_NOAA	512
 } ahci_ids[] = {
 	{0x43801002, 0x00, "ATI IXP600",	0},
 	{0x43901002, 0x00, "ATI IXP700",	0},
 	{0x43911002, 0x00, "ATI IXP700",	0},
 	{0x43921002, 0x00, "ATI IXP700",	0},
 	{0x43931002, 0x00, "ATI IXP700",	0},
 	{0x43941002, 0x00, "ATI IXP800",	0},
 	{0x43951002, 0x00, "ATI IXP800",	0},
 	{0x26528086, 0x00, "Intel ICH6",	AHCI_Q_NOFORCE},
 	{0x26538086, 0x00, "Intel ICH6M",	AHCI_Q_NOFORCE},
 	{0x26818086, 0x00, "Intel ESB2",	0},
 	{0x26828086, 0x00, "Intel ESB2",	0},
 	{0x26838086, 0x00, "Intel ESB2",	0},
 	{0x27c18086, 0x00, "Intel ICH7",	0},
 	{0x27c38086, 0x00, "Intel ICH7",	0},
 	{0x27c58086, 0x00, "Intel ICH7M",	0},
 	{0x27c68086, 0x00, "Intel ICH7M",	0},
 	{0x28218086, 0x00, "Intel ICH8",	0},
 	{0x28228086, 0x00, "Intel ICH8",	0},
 	{0x28248086, 0x00, "Intel ICH8",	0},
 	{0x28298086, 0x00, "Intel ICH8M",	0},
 	{0x282a8086, 0x00, "Intel ICH8M",	0},
 	{0x29228086, 0x00, "Intel ICH9",	0},
 	{0x29238086, 0x00, "Intel ICH9",	0},
 	{0x29248086, 0x00, "Intel ICH9",	0},
 	{0x29258086, 0x00, "Intel ICH9",	0},
 	{0x29278086, 0x00, "Intel ICH9",	0},
 	{0x29298086, 0x00, "Intel ICH9M",	0},
 	{0x292a8086, 0x00, "Intel ICH9M",	0},
 	{0x292b8086, 0x00, "Intel ICH9M",	0},
 	{0x292c8086, 0x00, "Intel ICH9M",	0},
 	{0x292f8086, 0x00, "Intel ICH9M",	0},
 	{0x294d8086, 0x00, "Intel ICH9",	0},
 	{0x294e8086, 0x00, "Intel ICH9M",	0},
 	{0x3a058086, 0x00, "Intel ICH10",	0},
 	{0x3a228086, 0x00, "Intel ICH10",	0},
 	{0x3a258086, 0x00, "Intel ICH10",	0},
 	{0x3b228086, 0x00, "Intel 5 Series/3400 Series",	0},
 	{0x3b238086, 0x00, "Intel 5 Series/3400 Series",	0},
 	{0x3b258086, 0x00, "Intel 5 Series/3400 Series",	0},
 	{0x3b298086, 0x00, "Intel 5 Series/3400 Series",	0},
 	{0x3b2c8086, 0x00, "Intel 5 Series/3400 Series",	0},
 	{0x3b2f8086, 0x00, "Intel 5 Series/3400 Series",	0},
 	{0x1c028086, 0x00, "Intel Cougar Point",	0},
 	{0x1c038086, 0x00, "Intel Cougar Point",	0},
 	{0x1c048086, 0x00, "Intel Cougar Point",	0},
 	{0x1c058086, 0x00, "Intel Cougar Point",	0},
 	{0x2361197b, 0x00, "JMicron JMB361",	AHCI_Q_NOFORCE},
 	{0x2363197b, 0x00, "JMicron JMB363",	AHCI_Q_NOFORCE},
 	{0x2365197b, 0x00, "JMicron JMB365",	AHCI_Q_NOFORCE},
 	{0x2366197b, 0x00, "JMicron JMB366",	AHCI_Q_NOFORCE},
 	{0x2368197b, 0x00, "JMicron JMB368",	AHCI_Q_NOFORCE},
 	{0x611111ab, 0x00, "Marvell 88SX6111",	AHCI_Q_NOFORCE|AHCI_Q_1CH|AHCI_Q_EDGEIS},
 	{0x612111ab, 0x00, "Marvell 88SX6121",	AHCI_Q_NOFORCE|AHCI_Q_2CH|AHCI_Q_EDGEIS},
 	{0x614111ab, 0x00, "Marvell 88SX6141",	AHCI_Q_NOFORCE|AHCI_Q_4CH|AHCI_Q_EDGEIS},
 	{0x614511ab, 0x00, "Marvell 88SX6145",	AHCI_Q_NOFORCE|AHCI_Q_4CH|AHCI_Q_EDGEIS},
 	{0x91231b4b, 0x11, "Marvell 88SE912x",	AHCI_Q_NOBSYRES},
 	{0x91231b4b, 0x00, "Marvell 88SE912x",	AHCI_Q_EDGEIS|AHCI_Q_SATA2|AHCI_Q_NOBSYRES},
 	{0x044c10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x044d10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x044e10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x044f10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x045c10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x045d10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x045e10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x045f10de, 0x00, "NVIDIA MCP65",	AHCI_Q_NOAA},
 	{0x055010de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055110de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055210de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055310de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055410de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055510de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055610de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055710de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055810de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055910de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055A10de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x055B10de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x058410de, 0x00, "NVIDIA MCP67",	AHCI_Q_NOAA},
 	{0x07f010de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f110de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f210de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f310de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f410de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f510de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f610de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f710de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f810de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07f910de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07fa10de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x07fb10de, 0x00, "NVIDIA MCP73",	AHCI_Q_NOAA},
 	{0x0ad010de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad110de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad210de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad310de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad410de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad510de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad610de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad710de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad810de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ad910de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ada10de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0adb10de, 0x00, "NVIDIA MCP77",	AHCI_Q_NOAA},
 	{0x0ab410de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0ab510de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0ab610de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0ab710de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0ab810de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0ab910de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0aba10de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0abb10de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0abc10de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0abd10de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0abe10de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0abf10de, 0x00, "NVIDIA MCP79",	AHCI_Q_NOAA},
 	{0x0d8410de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8510de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8610de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8710de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8810de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8910de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8a10de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8b10de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8c10de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8d10de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8e10de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x0d8f10de, 0x00, "NVIDIA MCP89",	AHCI_Q_NOAA},
 	{0x33491106, 0x00, "VIA VT8251",	AHCI_Q_NOPMP|AHCI_Q_NONCQ},
 	{0x62871106, 0x00, "VIA VT8251",	AHCI_Q_NOPMP|AHCI_Q_NONCQ},
 	{0x11841039, 0x00, "SiS 966",		0},
 	{0x11851039, 0x00, "SiS 968",		0},
 	{0x01861039, 0x00, "SiS 968",		0},
 	{0x00000000, 0x00, NULL,		0}
 };
 
 static int
 ahci_probe(device_t dev)
 {
 	char buf[64];
 	int i, valid = 0;
 	uint32_t devid = pci_get_devid(dev);
 	uint8_t revid = pci_get_revid(dev);
 
 	/* Is this a possible AHCI candidate? */
 	if (pci_get_class(dev) == PCIC_STORAGE &&
 	    pci_get_subclass(dev) == PCIS_STORAGE_SATA &&
 	    pci_get_progif(dev) == PCIP_STORAGE_SATA_AHCI_1_0)
 		valid = 1;
 	/* Is this a known AHCI chip? */
 	for (i = 0; ahci_ids[i].id != 0; i++) {
 		if (ahci_ids[i].id == devid &&
 		    ahci_ids[i].rev <= revid &&
 		    (valid || !(ahci_ids[i].quirks & AHCI_Q_NOFORCE))) {
 			/* Do not attach JMicrons with single PCI function. */
 			if (pci_get_vendor(dev) == 0x197b &&
 			    (pci_read_config(dev, 0xdf, 1) & 0x40) == 0)
 				return (ENXIO);
 			snprintf(buf, sizeof(buf), "%s AHCI SATA controller",
 			    ahci_ids[i].name);
 			device_set_desc_copy(dev, buf);
 			return (BUS_PROBE_VENDOR);
 		}
 	}
 	if (!valid)
 		return (ENXIO);
 	device_set_desc_copy(dev, "AHCI SATA controller");
 	return (BUS_PROBE_VENDOR);
 }
 
 static int
 ahci_ata_probe(device_t dev)
 {
 	char buf[64];
 	int i;
 	uint32_t devid = pci_get_devid(dev);
 	uint8_t revid = pci_get_revid(dev);
 
 	if ((intptr_t)device_get_ivars(dev) >= 0)
 		return (ENXIO);
 	/* Is this a known AHCI chip? */
 	for (i = 0; ahci_ids[i].id != 0; i++) {
 		if (ahci_ids[i].id == devid &&
 		    ahci_ids[i].rev <= revid) {
 			snprintf(buf, sizeof(buf), "%s AHCI SATA controller",
 			    ahci_ids[i].name);
 			device_set_desc_copy(dev, buf);
 			return (BUS_PROBE_VENDOR);
 		}
 	}
 	device_set_desc_copy(dev, "AHCI SATA controller");
 	return (BUS_PROBE_VENDOR);
 }
 
 static int
 ahci_attach(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	device_t child;
 	int	error, unit, speed, i;
 	uint32_t devid = pci_get_devid(dev);
 	uint8_t revid = pci_get_revid(dev);
 	u_int32_t version;
 
 	ctlr->dev = dev;
 	i = 0;
 	while (ahci_ids[i].id != 0 &&
 	    (ahci_ids[i].id != devid ||
 	     ahci_ids[i].rev > revid))
 		i++;
 	ctlr->quirks = ahci_ids[i].quirks;
 	resource_int_value(device_get_name(dev),
 	    device_get_unit(dev), "ccc", &ctlr->ccc);
 	/* if we have a memory BAR(5) we are likely on an AHCI part */
 	ctlr->r_rid = PCIR_BAR(5);
 	if (!(ctlr->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &ctlr->r_rid, RF_ACTIVE)))
 		return ENXIO;
 	/* Setup our own memory management for channels. */
 	ctlr->sc_iomem.rm_start = rman_get_start(ctlr->r_mem);
 	ctlr->sc_iomem.rm_end = rman_get_end(ctlr->r_mem);
 	ctlr->sc_iomem.rm_type = RMAN_ARRAY;
 	ctlr->sc_iomem.rm_descr = "I/O memory addresses";
 	if ((error = rman_init(&ctlr->sc_iomem)) != 0) {
 		bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem);
 		return (error);
 	}
 	if ((error = rman_manage_region(&ctlr->sc_iomem,
 	    rman_get_start(ctlr->r_mem), rman_get_end(ctlr->r_mem))) != 0) {
 		bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem);
 		rman_fini(&ctlr->sc_iomem);
 		return (error);
 	}
 	pci_enable_busmaster(dev);
 	/* Reset controller */
 	if ((error = ahci_ctlr_reset(dev)) != 0) {
 		bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem);
 		rman_fini(&ctlr->sc_iomem);
 		return (error);
 	};
 	/* Get the HW capabilities */
 	version = ATA_INL(ctlr->r_mem, AHCI_VS);
 	ctlr->caps = ATA_INL(ctlr->r_mem, AHCI_CAP);
 	if (version >= 0x00010020)
 		ctlr->caps2 = ATA_INL(ctlr->r_mem, AHCI_CAP2);
 	if (ctlr->caps & AHCI_CAP_EMS)
 		ctlr->capsem = ATA_INL(ctlr->r_mem, AHCI_EM_CTL);
 	ctlr->ichannels = ATA_INL(ctlr->r_mem, AHCI_PI);
 	if (ctlr->quirks & AHCI_Q_1CH) {
 		ctlr->caps &= ~AHCI_CAP_NPMASK;
 		ctlr->ichannels &= 0x01;
 	}
 	if (ctlr->quirks & AHCI_Q_2CH) {
 		ctlr->caps &= ~AHCI_CAP_NPMASK;
 		ctlr->caps |= 1;
 		ctlr->ichannels &= 0x03;
 	}
 	if (ctlr->quirks & AHCI_Q_4CH) {
 		ctlr->caps &= ~AHCI_CAP_NPMASK;
 		ctlr->caps |= 3;
 		ctlr->ichannels &= 0x0f;
 	}
 	ctlr->channels = MAX(flsl(ctlr->ichannels),
 	    (ctlr->caps & AHCI_CAP_NPMASK) + 1);
 	if (ctlr->quirks & AHCI_Q_NOPMP)
 		ctlr->caps &= ~AHCI_CAP_SPM;
 	if (ctlr->quirks & AHCI_Q_NONCQ)
 		ctlr->caps &= ~AHCI_CAP_SNCQ;
 	if ((ctlr->caps & AHCI_CAP_CCCS) == 0)
 		ctlr->ccc = 0;
 	ahci_ctlr_setup(dev);
 	/* Setup interrupts. */
 	if (ahci_setup_interrupt(dev)) {
 		bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem);
 		rman_fini(&ctlr->sc_iomem);
 		return ENXIO;
 	}
 	/* Announce HW capabilities. */
 	speed = (ctlr->caps & AHCI_CAP_ISS) >> AHCI_CAP_ISS_SHIFT;
 	device_printf(dev,
 		    "AHCI v%x.%02x with %d %sGbps ports, Port Multiplier %s%s\n",
 		    ((version >> 20) & 0xf0) + ((version >> 16) & 0x0f),
 		    ((version >> 4) & 0xf0) + (version & 0x0f),
 		    (ctlr->caps & AHCI_CAP_NPMASK) + 1,
 		    ((speed == 1) ? "1.5":((speed == 2) ? "3":
 		    ((speed == 3) ? "6":"?"))),
 		    (ctlr->caps & AHCI_CAP_SPM) ?
 		    "supported" : "not supported",
 		    (ctlr->caps & AHCI_CAP_FBSS) ?
 		    " with FBS" : "");
 	if (bootverbose) {
 		device_printf(dev, "Caps:%s%s%s%s%s%s%s%s %sGbps",
 		    (ctlr->caps & AHCI_CAP_64BIT) ? " 64bit":"",
 		    (ctlr->caps & AHCI_CAP_SNCQ) ? " NCQ":"",
 		    (ctlr->caps & AHCI_CAP_SSNTF) ? " SNTF":"",
 		    (ctlr->caps & AHCI_CAP_SMPS) ? " MPS":"",
 		    (ctlr->caps & AHCI_CAP_SSS) ? " SS":"",
 		    (ctlr->caps & AHCI_CAP_SALP) ? " ALP":"",
 		    (ctlr->caps & AHCI_CAP_SAL) ? " AL":"",
 		    (ctlr->caps & AHCI_CAP_SCLO) ? " CLO":"",
 		    ((speed == 1) ? "1.5":((speed == 2) ? "3":
 		    ((speed == 3) ? "6":"?"))));
 		printf("%s%s%s%s%s%s %dcmd%s%s%s %dports\n",
 		    (ctlr->caps & AHCI_CAP_SAM) ? " AM":"",
 		    (ctlr->caps & AHCI_CAP_SPM) ? " PM":"",
 		    (ctlr->caps & AHCI_CAP_FBSS) ? " FBS":"",
 		    (ctlr->caps & AHCI_CAP_PMD) ? " PMD":"",
 		    (ctlr->caps & AHCI_CAP_SSC) ? " SSC":"",
 		    (ctlr->caps & AHCI_CAP_PSC) ? " PSC":"",
 		    ((ctlr->caps & AHCI_CAP_NCS) >> AHCI_CAP_NCS_SHIFT) + 1,
 		    (ctlr->caps & AHCI_CAP_CCCS) ? " CCC":"",
 		    (ctlr->caps & AHCI_CAP_EMS) ? " EM":"",
 		    (ctlr->caps & AHCI_CAP_SXS) ? " eSATA":"",
 		    (ctlr->caps & AHCI_CAP_NPMASK) + 1);
 	}
 	if (bootverbose && version >= 0x00010020) {
 		device_printf(dev, "Caps2:%s%s%s\n",
 		    (ctlr->caps2 & AHCI_CAP2_APST) ? " APST":"",
 		    (ctlr->caps2 & AHCI_CAP2_NVMP) ? " NVMP":"",
 		    (ctlr->caps2 & AHCI_CAP2_BOH) ? " BOH":"");
 	}
 	if (bootverbose && (ctlr->caps & AHCI_CAP_EMS)) {
 		device_printf(dev, "EM Caps:%s%s%s%s%s%s%s%s\n",
 		    (ctlr->capsem & AHCI_EM_PM) ? " PM":"",
 		    (ctlr->capsem & AHCI_EM_ALHD) ? " ALHD":"",
 		    (ctlr->capsem & AHCI_EM_XMT) ? " XMT":"",
 		    (ctlr->capsem & AHCI_EM_SMB) ? " SMB":"",
 		    (ctlr->capsem & AHCI_EM_SGPIO) ? " SGPIO":"",
 		    (ctlr->capsem & AHCI_EM_SES2) ? " SES-2":"",
 		    (ctlr->capsem & AHCI_EM_SAFTE) ? " SAF-TE":"",
 		    (ctlr->capsem & AHCI_EM_LED) ? " LED":"");
 	}
 	/* Attach all channels on this controller */
 	for (unit = 0; unit < ctlr->channels; unit++) {
 		if ((ctlr->ichannels & (1 << unit)) == 0)
 			continue;
 		child = device_add_child(dev, "ahcich", -1);
 		if (child == NULL)
 			device_printf(dev, "failed to add channel device\n");
 		else
 			device_set_ivars(child, (void *)(intptr_t)unit);
 	}
 	bus_generic_attach(dev);
 	return 0;
 }
 
 static int
 ahci_detach(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	device_t *children;
 	int nchildren, i;
 
 	/* Detach & delete all children */
 	if (!device_get_children(dev, &children, &nchildren)) {
 		for (i = 0; i < nchildren; i++)
 			device_delete_child(dev, children[i]);
 		free(children, M_TEMP);
 	}
 	/* Free interrupts. */
 	for (i = 0; i < ctlr->numirqs; i++) {
 		if (ctlr->irqs[i].r_irq) {
 			bus_teardown_intr(dev, ctlr->irqs[i].r_irq,
 			    ctlr->irqs[i].handle);
 			bus_release_resource(dev, SYS_RES_IRQ,
 			    ctlr->irqs[i].r_irq_rid, ctlr->irqs[i].r_irq);
 		}
 	}
 	pci_release_msi(dev);
 	/* Free memory. */
 	rman_fini(&ctlr->sc_iomem);
 	if (ctlr->r_mem)
 		bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem);
 	return (0);
 }
 
 static int
 ahci_ctlr_reset(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	int timeout;
 
 	if (pci_read_config(dev, 0x00, 4) == 0x28298086 &&
 	    (pci_read_config(dev, 0x92, 1) & 0xfe) == 0x04)
 		pci_write_config(dev, 0x92, 0x01, 1);
 	/* Enable AHCI mode */
 	ATA_OUTL(ctlr->r_mem, AHCI_GHC, AHCI_GHC_AE);
 	/* Reset AHCI controller */
 	ATA_OUTL(ctlr->r_mem, AHCI_GHC, AHCI_GHC_AE|AHCI_GHC_HR);
 	for (timeout = 1000; timeout > 0; timeout--) {
 		DELAY(1000);
 		if ((ATA_INL(ctlr->r_mem, AHCI_GHC) & AHCI_GHC_HR) == 0)
 			break;
 	}
 	if (timeout == 0) {
 		device_printf(dev, "AHCI controller reset failure\n");
 		return ENXIO;
 	}
 	/* Reenable AHCI mode */
 	ATA_OUTL(ctlr->r_mem, AHCI_GHC, AHCI_GHC_AE);
 	return (0);
 }
 
 static int
 ahci_ctlr_setup(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	/* Clear interrupts */
 	ATA_OUTL(ctlr->r_mem, AHCI_IS, ATA_INL(ctlr->r_mem, AHCI_IS));
 	/* Configure CCC */
 	if (ctlr->ccc) {
 		ATA_OUTL(ctlr->r_mem, AHCI_CCCP, ATA_INL(ctlr->r_mem, AHCI_PI));
 		ATA_OUTL(ctlr->r_mem, AHCI_CCCC,
 		    (ctlr->ccc << AHCI_CCCC_TV_SHIFT) |
 		    (4 << AHCI_CCCC_CC_SHIFT) |
 		    AHCI_CCCC_EN);
 		ctlr->cccv = (ATA_INL(ctlr->r_mem, AHCI_CCCC) &
 		    AHCI_CCCC_INT_MASK) >> AHCI_CCCC_INT_SHIFT;
 		if (bootverbose) {
 			device_printf(dev,
 			    "CCC with %dms/4cmd enabled on vector %d\n",
 			    ctlr->ccc, ctlr->cccv);
 		}
 	}
 	/* Enable AHCI interrupts */
 	ATA_OUTL(ctlr->r_mem, AHCI_GHC,
 	    ATA_INL(ctlr->r_mem, AHCI_GHC) | AHCI_GHC_IE);
 	return (0);
 }
 
 static int
 ahci_suspend(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 
 	bus_generic_suspend(dev);
 	/* Disable interupts, so the state change(s) doesn't trigger */
 	ATA_OUTL(ctlr->r_mem, AHCI_GHC,
 	     ATA_INL(ctlr->r_mem, AHCI_GHC) & (~AHCI_GHC_IE));
 	return 0;
 }
 
 static int
 ahci_resume(device_t dev)
 {
 	int res;
 
 	if ((res = ahci_ctlr_reset(dev)) != 0)
 		return (res);
 	ahci_ctlr_setup(dev);
 	return (bus_generic_resume(dev));
 }
 
 static int
 ahci_setup_interrupt(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	int i, msi = 1;
 
 	/* Process hints. */
 	resource_int_value(device_get_name(dev),
 	    device_get_unit(dev), "msi", &msi);
 	if (msi < 0)
 		msi = 0;
 	else if (msi == 1)
 		msi = min(1, pci_msi_count(dev));
 	else if (msi > 1)
 		msi = pci_msi_count(dev);
 	/* Allocate MSI if needed/present. */
 	if (msi && pci_alloc_msi(dev, &msi) == 0) {
 		ctlr->numirqs = msi;
 	} else {
 		msi = 0;
 		ctlr->numirqs = 1;
 	}
 	/* Check for single MSI vector fallback. */
 	if (ctlr->numirqs > 1 &&
 	    (ATA_INL(ctlr->r_mem, AHCI_GHC) & AHCI_GHC_MRSM) != 0) {
 		device_printf(dev, "Falling back to one MSI\n");
 		ctlr->numirqs = 1;
 	}
 	/* Allocate all IRQs. */
 	for (i = 0; i < ctlr->numirqs; i++) {
 		ctlr->irqs[i].ctlr = ctlr;
 		ctlr->irqs[i].r_irq_rid = i + (msi ? 1 : 0);
 		if (ctlr->numirqs == 1 || i >= ctlr->channels ||
 		    (ctlr->ccc && i == ctlr->cccv))
 			ctlr->irqs[i].mode = AHCI_IRQ_MODE_ALL;
 		else if (i == ctlr->numirqs - 1)
 			ctlr->irqs[i].mode = AHCI_IRQ_MODE_AFTER;
 		else
 			ctlr->irqs[i].mode = AHCI_IRQ_MODE_ONE;
 		if (!(ctlr->irqs[i].r_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ,
 		    &ctlr->irqs[i].r_irq_rid, RF_SHAREABLE | RF_ACTIVE))) {
 			device_printf(dev, "unable to map interrupt\n");
 			return ENXIO;
 		}
 		if ((bus_setup_intr(dev, ctlr->irqs[i].r_irq, ATA_INTR_FLAGS, NULL,
 		    (ctlr->irqs[i].mode == AHCI_IRQ_MODE_ONE) ? ahci_intr_one : ahci_intr,
 		    &ctlr->irqs[i], &ctlr->irqs[i].handle))) {
 			/* SOS XXX release r_irq */
 			device_printf(dev, "unable to setup interrupt\n");
 			return ENXIO;
 		}
 		if (ctlr->numirqs > 1) {
 			bus_describe_intr(dev, ctlr->irqs[i].r_irq,
 			    ctlr->irqs[i].handle,
 			    ctlr->irqs[i].mode == AHCI_IRQ_MODE_ONE ?
 			    "ch%d" : "%d", i);
 		}
 	}
 	return (0);
 }
 
 /*
  * Common case interrupt handler.
  */
 static void
 ahci_intr(void *data)
 {
 	struct ahci_controller_irq *irq = data;
 	struct ahci_controller *ctlr = irq->ctlr;
 	u_int32_t is, ise = 0;
 	void *arg;
 	int unit;
 
 	if (irq->mode == AHCI_IRQ_MODE_ALL) {
 		unit = 0;
 		if (ctlr->ccc)
 			is = ctlr->ichannels;
 		else
 			is = ATA_INL(ctlr->r_mem, AHCI_IS);
 	} else {	/* AHCI_IRQ_MODE_AFTER */
 		unit = irq->r_irq_rid - 1;
 		is = ATA_INL(ctlr->r_mem, AHCI_IS);
 	}
 	/* CCC interrupt is edge triggered. */
 	if (ctlr->ccc)
 		ise = 1 << ctlr->cccv;
 	/* Some controllers have edge triggered IS. */
 	if (ctlr->quirks & AHCI_Q_EDGEIS)
 		ise |= is;
 	if (ise != 0)
 		ATA_OUTL(ctlr->r_mem, AHCI_IS, ise);
 	for (; unit < ctlr->channels; unit++) {
 		if ((is & (1 << unit)) != 0 &&
 		    (arg = ctlr->interrupt[unit].argument)) {
 				ctlr->interrupt[unit].function(arg);
 		}
 	}
 	/* AHCI declares level triggered IS. */
 	if (!(ctlr->quirks & AHCI_Q_EDGEIS))
 		ATA_OUTL(ctlr->r_mem, AHCI_IS, is);
 }
 
 /*
  * Simplified interrupt handler for multivector MSI mode.
  */
 static void
 ahci_intr_one(void *data)
 {
 	struct ahci_controller_irq *irq = data;
 	struct ahci_controller *ctlr = irq->ctlr;
 	void *arg;
 	int unit;
 
 	unit = irq->r_irq_rid - 1;
 	/* Some controllers have edge triggered IS. */
 	if (ctlr->quirks & AHCI_Q_EDGEIS)
 		ATA_OUTL(ctlr->r_mem, AHCI_IS, 1 << unit);
 	if ((arg = ctlr->interrupt[unit].argument))
 	    ctlr->interrupt[unit].function(arg);
 	/* AHCI declares level triggered IS. */
 	if (!(ctlr->quirks & AHCI_Q_EDGEIS))
 		ATA_OUTL(ctlr->r_mem, AHCI_IS, 1 << unit);
 }
 
 static struct resource *
 ahci_alloc_resource(device_t dev, device_t child, int type, int *rid,
 		       u_long start, u_long end, u_long count, u_int flags)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	int unit = ((struct ahci_channel *)device_get_softc(child))->unit;
 	struct resource *res = NULL;
 	int offset = AHCI_OFFSET + (unit << 7);
 	long st;
 
 	switch (type) {
 	case SYS_RES_MEMORY:
 		st = rman_get_start(ctlr->r_mem);
 		res = rman_reserve_resource(&ctlr->sc_iomem, st + offset,
 		    st + offset + 127, 128, RF_ACTIVE, child);
 		if (res) {
 			bus_space_handle_t bsh;
 			bus_space_tag_t bst;
 			bsh = rman_get_bushandle(ctlr->r_mem);
 			bst = rman_get_bustag(ctlr->r_mem);
 			bus_space_subregion(bst, bsh, offset, 128, &bsh);
 			rman_set_bushandle(res, bsh);
 			rman_set_bustag(res, bst);
 		}
 		break;
 	case SYS_RES_IRQ:
 		if (*rid == ATA_IRQ_RID)
 			res = ctlr->irqs[0].r_irq;
 		break;
 	}
 	return (res);
 }
 
 static int
 ahci_release_resource(device_t dev, device_t child, int type, int rid,
 			 struct resource *r)
 {
 
 	switch (type) {
 	case SYS_RES_MEMORY:
 		rman_release_resource(r);
 		return (0);
 	case SYS_RES_IRQ:
 		if (rid != ATA_IRQ_RID)
 			return ENOENT;
 		return (0);
 	}
 	return (EINVAL);
 }
 
 static int
 ahci_setup_intr(device_t dev, device_t child, struct resource *irq, 
 		   int flags, driver_filter_t *filter, driver_intr_t *function, 
 		   void *argument, void **cookiep)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	int unit = (intptr_t)device_get_ivars(child);
 
 	if (filter != NULL) {
 		printf("ahci.c: we cannot use a filter here\n");
 		return (EINVAL);
 	}
 	ctlr->interrupt[unit].function = function;
 	ctlr->interrupt[unit].argument = argument;
 	return (0);
 }
 
 static int
 ahci_teardown_intr(device_t dev, device_t child, struct resource *irq,
 		      void *cookie)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
 	int unit = (intptr_t)device_get_ivars(child);
 
 	ctlr->interrupt[unit].function = NULL;
 	ctlr->interrupt[unit].argument = NULL;
 	return (0);
 }
 
 static int
 ahci_print_child(device_t dev, device_t child)
 {
 	int retval;
 
 	retval = bus_print_child_header(dev, child);
 	retval += printf(" at channel %d",
 	    (int)(intptr_t)device_get_ivars(child));
 	retval += bus_print_child_footer(dev, child);
 
 	return (retval);
 }
 
 static int
 ahci_child_location_str(device_t dev, device_t child, char *buf,
     size_t buflen)
 {
 
 	snprintf(buf, buflen, "channel=%d",
 	    (int)(intptr_t)device_get_ivars(child));
 	return (0);
 }
 
 devclass_t ahci_devclass;
 static device_method_t ahci_methods[] = {
 	DEVMETHOD(device_probe,     ahci_probe),
 	DEVMETHOD(device_attach,    ahci_attach),
 	DEVMETHOD(device_detach,    ahci_detach),
 	DEVMETHOD(device_suspend,   ahci_suspend),
 	DEVMETHOD(device_resume,    ahci_resume),
 	DEVMETHOD(bus_print_child,  ahci_print_child),
 	DEVMETHOD(bus_alloc_resource,       ahci_alloc_resource),
 	DEVMETHOD(bus_release_resource,     ahci_release_resource),
 	DEVMETHOD(bus_setup_intr,   ahci_setup_intr),
 	DEVMETHOD(bus_teardown_intr,ahci_teardown_intr),
 	DEVMETHOD(bus_child_location_str, ahci_child_location_str),
 	{ 0, 0 }
 };
 static driver_t ahci_driver = {
         "ahci",
         ahci_methods,
         sizeof(struct ahci_controller)
 };
 DRIVER_MODULE(ahci, pci, ahci_driver, ahci_devclass, 0, 0);
 static device_method_t ahci_ata_methods[] = {
 	DEVMETHOD(device_probe,     ahci_ata_probe),
 	DEVMETHOD(device_attach,    ahci_attach),
 	DEVMETHOD(device_detach,    ahci_detach),
 	DEVMETHOD(device_suspend,   ahci_suspend),
 	DEVMETHOD(device_resume,    ahci_resume),
 	DEVMETHOD(bus_print_child,  ahci_print_child),
 	DEVMETHOD(bus_alloc_resource,       ahci_alloc_resource),
 	DEVMETHOD(bus_release_resource,     ahci_release_resource),
 	DEVMETHOD(bus_setup_intr,   ahci_setup_intr),
 	DEVMETHOD(bus_teardown_intr,ahci_teardown_intr),
 	DEVMETHOD(bus_child_location_str, ahci_child_location_str),
 	{ 0, 0 }
 };
 static driver_t ahci_ata_driver = {
         "ahci",
         ahci_ata_methods,
         sizeof(struct ahci_controller)
 };
 DRIVER_MODULE(ahci, atapci, ahci_ata_driver, ahci_devclass, 0, 0);
 MODULE_VERSION(ahci, 1);
 MODULE_DEPEND(ahci, cam, 1, 1, 1);
 
 static int
 ahci_ch_probe(device_t dev)
 {
 
 	device_set_desc_copy(dev, "AHCI channel");
 	return (0);
 }
 
 static int
 ahci_ch_attach(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(device_get_parent(dev));
 	struct ahci_channel *ch = device_get_softc(dev);
 	struct cam_devq *devq;
 	int rid, error, i, sata_rev = 0;
 	u_int32_t version;
 
 	ch->dev = dev;
 	ch->unit = (intptr_t)device_get_ivars(dev);
 	ch->caps = ctlr->caps;
 	ch->caps2 = ctlr->caps2;
 	ch->quirks = ctlr->quirks;
-	ch->numslots = ((ch->caps & AHCI_CAP_NCS) >> AHCI_CAP_NCS_SHIFT) + 1,
+	ch->numslots = ((ch->caps & AHCI_CAP_NCS) >> AHCI_CAP_NCS_SHIFT) + 1;
 	mtx_init(&ch->mtx, "AHCI channel lock", NULL, MTX_DEF);
 	resource_int_value(device_get_name(dev),
 	    device_get_unit(dev), "pm_level", &ch->pm_level);
 	if (ch->pm_level > 3)
 		callout_init_mtx(&ch->pm_timer, &ch->mtx, 0);
 	/* Limit speed for my onboard JMicron external port.
 	 * It is not eSATA really. */
 	if (pci_get_devid(ctlr->dev) == 0x2363197b &&
 	    pci_get_subvendor(ctlr->dev) == 0x1043 &&
 	    pci_get_subdevice(ctlr->dev) == 0x81e4 &&
 	    ch->unit == 0)
 		sata_rev = 1;
 	if (ch->quirks & AHCI_Q_SATA2)
 		sata_rev = 2;
 	resource_int_value(device_get_name(dev),
 	    device_get_unit(dev), "sata_rev", &sata_rev);
 	for (i = 0; i < 16; i++) {
 		ch->user[i].revision = sata_rev;
 		ch->user[i].mode = 0;
 		ch->user[i].bytecount = 8192;
 		ch->user[i].tags = ch->numslots;
 		ch->user[i].caps = 0;
 		ch->curr[i] = ch->user[i];
 		if (ch->pm_level) {
 			ch->user[i].caps = CTS_SATA_CAPS_H_PMREQ |
 			    CTS_SATA_CAPS_H_APST |
 			    CTS_SATA_CAPS_D_PMREQ | CTS_SATA_CAPS_D_APST;
 		}
 		ch->user[i].caps |= CTS_SATA_CAPS_H_DMAAA;
 	}
 	rid = ch->unit;
 	if (!(ch->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &rid, RF_ACTIVE)))
 		return (ENXIO);
 	ahci_dmainit(dev);
 	ahci_slotsalloc(dev);
 	ahci_ch_init(dev);
 	mtx_lock(&ch->mtx);
 	rid = ATA_IRQ_RID;
 	if (!(ch->r_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ,
 	    &rid, RF_SHAREABLE | RF_ACTIVE))) {
 		device_printf(dev, "Unable to map interrupt\n");
 		error = ENXIO;
 		goto err0;
 	}
 	if ((bus_setup_intr(dev, ch->r_irq, ATA_INTR_FLAGS, NULL,
 	    ahci_ch_intr_locked, dev, &ch->ih))) {
 		device_printf(dev, "Unable to setup interrupt\n");
 		error = ENXIO;
 		goto err1;
 	}
 	ch->chcaps = ATA_INL(ch->r_mem, AHCI_P_CMD);
 	version = ATA_INL(ctlr->r_mem, AHCI_VS);
 	if (version < 0x00010020 && (ctlr->caps & AHCI_CAP_FBSS))
 		ch->chcaps |= AHCI_P_CMD_FBSCP;
 	if (bootverbose) {
 		device_printf(dev, "Caps:%s%s%s%s%s\n",
 		    (ch->chcaps & AHCI_P_CMD_HPCP) ? " HPCP":"",
 		    (ch->chcaps & AHCI_P_CMD_MPSP) ? " MPSP":"",
 		    (ch->chcaps & AHCI_P_CMD_CPD) ? " CPD":"",
 		    (ch->chcaps & AHCI_P_CMD_ESP) ? " ESP":"",
 		    (ch->chcaps & AHCI_P_CMD_FBSCP) ? " FBSCP":"");
 	}
 	/* Create the device queue for our SIM. */
 	devq = cam_simq_alloc(ch->numslots);
 	if (devq == NULL) {
 		device_printf(dev, "Unable to allocate simq\n");
 		error = ENOMEM;
 		goto err1;
 	}
 	/* Construct SIM entry */
 	ch->sim = cam_sim_alloc(ahciaction, ahcipoll, "ahcich", ch,
 	    device_get_unit(dev), &ch->mtx,
 	    min(2, ch->numslots),
 	    (ch->caps & AHCI_CAP_SNCQ) ? ch->numslots : 0,
 	    devq);
 	if (ch->sim == NULL) {
 		cam_simq_free(devq);
 		device_printf(dev, "unable to allocate sim\n");
 		error = ENOMEM;
 		goto err1;
 	}
 	if (xpt_bus_register(ch->sim, dev, 0) != CAM_SUCCESS) {
 		device_printf(dev, "unable to register xpt bus\n");
 		error = ENXIO;
 		goto err2;
 	}
 	if (xpt_create_path(&ch->path, /*periph*/NULL, cam_sim_path(ch->sim),
 	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		device_printf(dev, "unable to create path\n");
 		error = ENXIO;
 		goto err3;
 	}
 	if (ch->pm_level > 3) {
 		callout_reset(&ch->pm_timer,
 		    (ch->pm_level == 4) ? hz / 1000 : hz / 8,
 		    ahci_ch_pm, dev);
 	}
 	mtx_unlock(&ch->mtx);
 	return (0);
 
 err3:
 	xpt_bus_deregister(cam_sim_path(ch->sim));
 err2:
 	cam_sim_free(ch->sim, /*free_devq*/TRUE);
 err1:
 	bus_release_resource(dev, SYS_RES_IRQ, ATA_IRQ_RID, ch->r_irq);
 err0:
 	bus_release_resource(dev, SYS_RES_MEMORY, ch->unit, ch->r_mem);
 	mtx_unlock(&ch->mtx);
 	mtx_destroy(&ch->mtx);
 	return (error);
 }
 
 static int
 ahci_ch_detach(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 
 	mtx_lock(&ch->mtx);
 	xpt_async(AC_LOST_DEVICE, ch->path, NULL);
 	xpt_free_path(ch->path);
 	xpt_bus_deregister(cam_sim_path(ch->sim));
 	cam_sim_free(ch->sim, /*free_devq*/TRUE);
 	mtx_unlock(&ch->mtx);
 
 	if (ch->pm_level > 3)
 		callout_drain(&ch->pm_timer);
 	bus_teardown_intr(dev, ch->r_irq, ch->ih);
 	bus_release_resource(dev, SYS_RES_IRQ, ATA_IRQ_RID, ch->r_irq);
 
 	ahci_ch_deinit(dev);
 	ahci_slotsfree(dev);
 	ahci_dmafini(dev);
 
 	bus_release_resource(dev, SYS_RES_MEMORY, ch->unit, ch->r_mem);
 	mtx_destroy(&ch->mtx);
 	return (0);
 }
 
 static int
 ahci_ch_init(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	uint64_t work;
 
 	/* Disable port interrupts */
 	ATA_OUTL(ch->r_mem, AHCI_P_IE, 0);
 	/* Setup work areas */
 	work = ch->dma.work_bus + AHCI_CL_OFFSET;
 	ATA_OUTL(ch->r_mem, AHCI_P_CLB, work & 0xffffffff);
 	ATA_OUTL(ch->r_mem, AHCI_P_CLBU, work >> 32);
 	work = ch->dma.rfis_bus;
 	ATA_OUTL(ch->r_mem, AHCI_P_FB, work & 0xffffffff); 
 	ATA_OUTL(ch->r_mem, AHCI_P_FBU, work >> 32);
 	/* Activate the channel and power/spin up device */
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD,
 	     (AHCI_P_CMD_ACTIVE | AHCI_P_CMD_POD | AHCI_P_CMD_SUD |
 	     ((ch->pm_level == 2 || ch->pm_level == 3) ? AHCI_P_CMD_ALPE : 0) |
 	     ((ch->pm_level > 2) ? AHCI_P_CMD_ASP : 0 )));
 	ahci_start_fr(dev);
 	ahci_start(dev, 1);
 	return (0);
 }
 
 static int
 ahci_ch_deinit(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 
 	/* Disable port interrupts. */
 	ATA_OUTL(ch->r_mem, AHCI_P_IE, 0);
 	/* Reset command register. */
 	ahci_stop(dev);
 	ahci_stop_fr(dev);
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD, 0);
 	/* Allow everything, including partial and slumber modes. */
 	ATA_OUTL(ch->r_mem, AHCI_P_SCTL, 0);
 	/* Request slumber mode transition and give some time to get there. */
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD, AHCI_P_CMD_SLUMBER);
 	DELAY(100);
 	/* Disable PHY. */
 	ATA_OUTL(ch->r_mem, AHCI_P_SCTL, ATA_SC_DET_DISABLE);
 	return (0);
 }
 
 static int
 ahci_ch_suspend(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 
 	mtx_lock(&ch->mtx);
 	xpt_freeze_simq(ch->sim, 1);
 	while (ch->oslots)
 		msleep(ch, &ch->mtx, PRIBIO, "ahcisusp", hz/100);
 	ahci_ch_deinit(dev);
 	mtx_unlock(&ch->mtx);
 	return (0);
 }
 
 static int
 ahci_ch_resume(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 
 	mtx_lock(&ch->mtx);
 	ahci_ch_init(dev);
 	ahci_reset(dev);
 	xpt_release_simq(ch->sim, TRUE);
 	mtx_unlock(&ch->mtx);
 	return (0);
 }
 
 devclass_t ahcich_devclass;
 static device_method_t ahcich_methods[] = {
 	DEVMETHOD(device_probe,     ahci_ch_probe),
 	DEVMETHOD(device_attach,    ahci_ch_attach),
 	DEVMETHOD(device_detach,    ahci_ch_detach),
 	DEVMETHOD(device_suspend,   ahci_ch_suspend),
 	DEVMETHOD(device_resume,    ahci_ch_resume),
 	{ 0, 0 }
 };
 static driver_t ahcich_driver = {
         "ahcich",
         ahcich_methods,
         sizeof(struct ahci_channel)
 };
 DRIVER_MODULE(ahcich, ahci, ahcich_driver, ahcich_devclass, 0, 0);
 
 struct ahci_dc_cb_args {
 	bus_addr_t maddr;
 	int error;
 };
 
 static void
 ahci_dmainit(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	struct ahci_dc_cb_args dcba;
 	size_t rfsize;
 
 	if (ch->caps & AHCI_CAP_64BIT)
 		ch->dma.max_address = BUS_SPACE_MAXADDR;
 	else
 		ch->dma.max_address = BUS_SPACE_MAXADDR_32BIT;
 	/* Command area. */
 	if (bus_dma_tag_create(bus_get_dma_tag(dev), 1024, 0,
 	    ch->dma.max_address, BUS_SPACE_MAXADDR,
 	    NULL, NULL, AHCI_WORK_SIZE, 1, AHCI_WORK_SIZE,
 	    0, NULL, NULL, &ch->dma.work_tag))
 		goto error;
 	if (bus_dmamem_alloc(ch->dma.work_tag, (void **)&ch->dma.work, 0,
 	    &ch->dma.work_map))
 		goto error;
 	if (bus_dmamap_load(ch->dma.work_tag, ch->dma.work_map, ch->dma.work,
 	    AHCI_WORK_SIZE, ahci_dmasetupc_cb, &dcba, 0) || dcba.error) {
 		bus_dmamem_free(ch->dma.work_tag, ch->dma.work, ch->dma.work_map);
 		goto error;
 	}
 	ch->dma.work_bus = dcba.maddr;
 	/* FIS receive area. */
 	if (ch->chcaps & AHCI_P_CMD_FBSCP)
 	    rfsize = 4096;
 	else
 	    rfsize = 256;
 	if (bus_dma_tag_create(bus_get_dma_tag(dev), rfsize, 0,
 	    ch->dma.max_address, BUS_SPACE_MAXADDR,
 	    NULL, NULL, rfsize, 1, rfsize,
 	    0, NULL, NULL, &ch->dma.rfis_tag))
 		goto error;
 	if (bus_dmamem_alloc(ch->dma.rfis_tag, (void **)&ch->dma.rfis, 0,
 	    &ch->dma.rfis_map))
 		goto error;
 	if (bus_dmamap_load(ch->dma.rfis_tag, ch->dma.rfis_map, ch->dma.rfis,
 	    rfsize, ahci_dmasetupc_cb, &dcba, 0) || dcba.error) {
 		bus_dmamem_free(ch->dma.rfis_tag, ch->dma.rfis, ch->dma.rfis_map);
 		goto error;
 	}
 	ch->dma.rfis_bus = dcba.maddr;
 	/* Data area. */
 	if (bus_dma_tag_create(bus_get_dma_tag(dev), 2, 0,
 	    ch->dma.max_address, BUS_SPACE_MAXADDR,
 	    NULL, NULL,
 	    AHCI_SG_ENTRIES * PAGE_SIZE * ch->numslots,
 	    AHCI_SG_ENTRIES, AHCI_PRD_MAX,
 	    0, busdma_lock_mutex, &ch->mtx, &ch->dma.data_tag)) {
 		goto error;
 	}
 	return;
 
 error:
 	device_printf(dev, "WARNING - DMA initialization failed\n");
 	ahci_dmafini(dev);
 }
 
 static void
 ahci_dmasetupc_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int error)
 {
 	struct ahci_dc_cb_args *dcba = (struct ahci_dc_cb_args *)xsc;
 
 	if (!(dcba->error = error))
 		dcba->maddr = segs[0].ds_addr;
 }
 
 static void
 ahci_dmafini(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 
 	if (ch->dma.data_tag) {
 		bus_dma_tag_destroy(ch->dma.data_tag);
 		ch->dma.data_tag = NULL;
 	}
 	if (ch->dma.rfis_bus) {
 		bus_dmamap_unload(ch->dma.rfis_tag, ch->dma.rfis_map);
 		bus_dmamem_free(ch->dma.rfis_tag, ch->dma.rfis, ch->dma.rfis_map);
 		ch->dma.rfis_bus = 0;
 		ch->dma.rfis_map = NULL;
 		ch->dma.rfis = NULL;
 	}
 	if (ch->dma.work_bus) {
 		bus_dmamap_unload(ch->dma.work_tag, ch->dma.work_map);
 		bus_dmamem_free(ch->dma.work_tag, ch->dma.work, ch->dma.work_map);
 		ch->dma.work_bus = 0;
 		ch->dma.work_map = NULL;
 		ch->dma.work = NULL;
 	}
 	if (ch->dma.work_tag) {
 		bus_dma_tag_destroy(ch->dma.work_tag);
 		ch->dma.work_tag = NULL;
 	}
 }
 
 static void
 ahci_slotsalloc(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	int i;
 
 	/* Alloc and setup command/dma slots */
 	bzero(ch->slot, sizeof(ch->slot));
 	for (i = 0; i < ch->numslots; i++) {
 		struct ahci_slot *slot = &ch->slot[i];
 
 		slot->dev = dev;
 		slot->slot = i;
 		slot->state = AHCI_SLOT_EMPTY;
 		slot->ccb = NULL;
 		callout_init_mtx(&slot->timeout, &ch->mtx, 0);
 
 		if (bus_dmamap_create(ch->dma.data_tag, 0, &slot->dma.data_map))
 			device_printf(ch->dev, "FAILURE - create data_map\n");
 	}
 }
 
 static void
 ahci_slotsfree(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	int i;
 
 	/* Free all dma slots */
 	for (i = 0; i < ch->numslots; i++) {
 		struct ahci_slot *slot = &ch->slot[i];
 
 		callout_drain(&slot->timeout);
 		if (slot->dma.data_map) {
 			bus_dmamap_destroy(ch->dma.data_tag, slot->dma.data_map);
 			slot->dma.data_map = NULL;
 		}
 	}
 }
 
 static void
 ahci_phy_check_events(device_t dev, u_int32_t serr)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 
 	if ((serr & ATA_SE_PHY_CHANGED) && (ch->pm_level == 0)) {
 		u_int32_t status = ATA_INL(ch->r_mem, AHCI_P_SSTS);
 		union ccb *ccb;
 
 		if (bootverbose) {
 			if (((status & ATA_SS_DET_MASK) == ATA_SS_DET_PHY_ONLINE) &&
 			    ((status & ATA_SS_SPD_MASK) != ATA_SS_SPD_NO_SPEED) &&
 			    ((status & ATA_SS_IPM_MASK) == ATA_SS_IPM_ACTIVE)) {
 				device_printf(dev, "CONNECT requested\n");
 			} else
 				device_printf(dev, "DISCONNECT requested\n");
 		}
 		ahci_reset(dev);
 		if ((ccb = xpt_alloc_ccb_nowait()) == NULL)
 			return;
 		if (xpt_create_path(&ccb->ccb_h.path, NULL,
 		    cam_sim_path(ch->sim),
 		    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 			xpt_free_ccb(ccb);
 			return;
 		}
 		xpt_rescan(ccb);
 	}
 }
 
 static void
 ahci_notify_events(device_t dev, u_int32_t status)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	struct cam_path *dpath;
 	int i;
 
 	if (ch->caps & AHCI_CAP_SSNTF)
 		ATA_OUTL(ch->r_mem, AHCI_P_SNTF, status);
 	if (bootverbose)
 		device_printf(dev, "SNTF 0x%04x\n", status);
 	for (i = 0; i < 16; i++) {
 		if ((status & (1 << i)) == 0)
 			continue;
 		if (xpt_create_path(&dpath, NULL,
 		    xpt_path_path_id(ch->path), i, 0) == CAM_REQ_CMP) {
 			xpt_async(AC_SCSI_AEN, dpath, NULL);
 			xpt_free_path(dpath);
 		}
 	}
 }
 
 static void
 ahci_ch_intr_locked(void *data)
 {
 	device_t dev = (device_t)data;
 	struct ahci_channel *ch = device_get_softc(dev);
 
 	mtx_lock(&ch->mtx);
 	ahci_ch_intr(data);
 	mtx_unlock(&ch->mtx);
 }
 
 static void
 ahci_ch_pm(void *arg)
 {
 	device_t dev = (device_t)arg;
 	struct ahci_channel *ch = device_get_softc(dev);
 	uint32_t work;
 
 	if (ch->numrslots != 0)
 		return;
 	work = ATA_INL(ch->r_mem, AHCI_P_CMD);
 	if (ch->pm_level == 4)
 		work |= AHCI_P_CMD_PARTIAL;
 	else
 		work |= AHCI_P_CMD_SLUMBER;
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD, work);
 }
 
 static void
 ahci_ch_intr(void *data)
 {
 	device_t dev = (device_t)data;
 	struct ahci_channel *ch = device_get_softc(dev);
 	uint32_t istatus, sstatus, cstatus, serr = 0, sntf = 0, ok, err;
 	enum ahci_err_type et;
 	int i, ccs, port;
 
 	/* Read and clear interrupt statuses. */
 	istatus = ATA_INL(ch->r_mem, AHCI_P_IS);
 	if (istatus == 0)
 		return;
 	ATA_OUTL(ch->r_mem, AHCI_P_IS, istatus);
 	/* Read command statuses. */
 	sstatus = ATA_INL(ch->r_mem, AHCI_P_SACT);
 	cstatus = ATA_INL(ch->r_mem, AHCI_P_CI);
 	if (istatus & AHCI_P_IX_SDB) {
 		if (ch->caps & AHCI_CAP_SSNTF)
 			sntf = ATA_INL(ch->r_mem, AHCI_P_SNTF);
 		else if (ch->fbs_enabled) {
 			u_int8_t *fis = ch->dma.rfis + 0x58;
 
 			for (i = 0; i < 16; i++) {
 				if (fis[1] & 0x80) {
 					fis[1] &= 0x7f;
 	    				sntf |= 1 << i;
 	    			}
 	    			fis += 256;
 	    		}
 		} else {
 			u_int8_t *fis = ch->dma.rfis + 0x58;
 
 			if (fis[1] & 0x80)
 				sntf = (1 << (fis[1] & 0x0f));
 		}
 	}
 	/* Process PHY events */
 	if (istatus & (AHCI_P_IX_PC | AHCI_P_IX_PRC | AHCI_P_IX_OF |
 	    AHCI_P_IX_IF | AHCI_P_IX_HBD | AHCI_P_IX_HBF | AHCI_P_IX_TFE)) {
 		serr = ATA_INL(ch->r_mem, AHCI_P_SERR);
 		if (serr) {
 			ATA_OUTL(ch->r_mem, AHCI_P_SERR, serr);
 			ahci_phy_check_events(dev, serr);
 		}
 	}
 	/* Process command errors */
 	if (istatus & (AHCI_P_IX_OF | AHCI_P_IX_IF |
 	    AHCI_P_IX_HBD | AHCI_P_IX_HBF | AHCI_P_IX_TFE)) {
 		ccs = (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CCS_MASK)
 		    >> AHCI_P_CMD_CCS_SHIFT;
 //device_printf(dev, "%s ERROR is %08x cs %08x ss %08x rs %08x tfd %02x serr %08x fbs %08x ccs %d\n",
 //    __func__, istatus, cstatus, sstatus, ch->rslots, ATA_INL(ch->r_mem, AHCI_P_TFD),
 //    serr, ATA_INL(ch->r_mem, AHCI_P_FBS), ccs);
 		port = -1;
 		if (ch->fbs_enabled) {
 			uint32_t fbs = ATA_INL(ch->r_mem, AHCI_P_FBS);
 			if (fbs & AHCI_P_FBS_SDE) {
 				port = (fbs & AHCI_P_FBS_DWE)
 				    >> AHCI_P_FBS_DWE_SHIFT;
 			} else {
 				for (i = 0; i < 16; i++) {
 					if (ch->numrslotspd[i] == 0)
 						continue;
 					if (port == -1)
 						port = i;
 					else if (port != i) {
 						port = -2;
 						break;
 					}
 				}
 			}
 		}
 		err = ch->rslots & (cstatus | sstatus);
 	} else {
 		ccs = 0;
 		err = 0;
 		port = -1;
 	}
 	/* Complete all successfull commands. */
 	ok = ch->rslots & ~(cstatus | sstatus);
 	for (i = 0; i < ch->numslots; i++) {
 		if ((ok >> i) & 1)
 			ahci_end_transaction(&ch->slot[i], AHCI_ERR_NONE);
 	}
 	/* On error, complete the rest of commands with error statuses. */
 	if (err) {
 		if (ch->frozen) {
 			union ccb *fccb = ch->frozen;
 			ch->frozen = NULL;
 			fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ;
 			if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) {
 				xpt_freeze_devq(fccb->ccb_h.path, 1);
 				fccb->ccb_h.status |= CAM_DEV_QFRZN;
 			}
 			xpt_done(fccb);
 		}
 		for (i = 0; i < ch->numslots; i++) {
 			/* XXX: reqests in loading state. */
 			if (((err >> i) & 1) == 0)
 				continue;
 			if (port >= 0 &&
 			    ch->slot[i].ccb->ccb_h.target_id != port)
 				continue;
 			if (istatus & AHCI_P_IX_TFE) {
 			    if (port != -2) {
 				/* Task File Error */
 				if (ch->numtslotspd[
 				    ch->slot[i].ccb->ccb_h.target_id] == 0) {
 					/* Untagged operation. */
 					if (i == ccs)
 						et = AHCI_ERR_TFE;
 					else
 						et = AHCI_ERR_INNOCENT;
 				} else {
 					/* Tagged operation. */
 					et = AHCI_ERR_NCQ;
 				}
 			    } else {
 				et = AHCI_ERR_TFE;
 				ch->fatalerr = 1;
 			    }
 			} else if (istatus & AHCI_P_IX_IF) {
 				if (ch->numtslots == 0 && i != ccs && port != -2)
 					et = AHCI_ERR_INNOCENT;
 				else
 					et = AHCI_ERR_SATA;
 			} else
 				et = AHCI_ERR_INVALID;
 			ahci_end_transaction(&ch->slot[i], et);
 		}
 		/*
 		 * We can't reinit port if there are some other
 		 * commands active, use resume to complete them.
 		 */
 		if (ch->rslots != 0) 
 			ATA_OUTL(ch->r_mem, AHCI_P_FBS, AHCI_P_FBS_EN | AHCI_P_FBS_DEC);
 	}
 	/* Process NOTIFY events */
 	if (sntf)
 		ahci_notify_events(dev, sntf);
 }
 
 /* Must be called with channel locked. */
 static int
 ahci_check_collision(device_t dev, union ccb *ccb)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	int t = ccb->ccb_h.target_id;
 
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) {
 		/* Tagged command while we have no supported tag free. */
 		if (((~ch->oslots) & (0xffffffff >> (32 -
 		    ch->curr[t].tags))) == 0)
 			return (1);
 		/* If we have FBS */
 		if (ch->fbs_enabled) {
 			/* Tagged command while untagged are active. */
 			if (ch->numrslotspd[t] != 0 && ch->numtslotspd[t] == 0)
 				return (1);
 		} else {
 			/* Tagged command while untagged are active. */
 			if (ch->numrslots != 0 && ch->numtslots == 0)
 				return (1);
 			/* Tagged command while tagged to other target is active. */
 			if (ch->numtslots != 0 &&
 			    ch->taggedtarget != ccb->ccb_h.target_id)
 				return (1);
 		}
 	} else {
 		/* If we have FBS */
 		if (ch->fbs_enabled) {
 			/* Untagged command while tagged are active. */
 			if (ch->numrslotspd[t] != 0 && ch->numtslotspd[t] != 0)
 				return (1);
 		} else {
 			/* Untagged command while tagged are active. */
 			if (ch->numrslots != 0 && ch->numtslots != 0)
 				return (1);
 		}
 	}
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & (CAM_ATAIO_CONTROL | CAM_ATAIO_NEEDRESULT))) {
 		/* Atomic command while anything active. */
 		if (ch->numrslots != 0)
 			return (1);
 	}
        /* We have some atomic command running. */
        if (ch->aslots != 0)
                return (1);
 	return (0);
 }
 
 /* Must be called with channel locked. */
 static void
 ahci_begin_transaction(device_t dev, union ccb *ccb)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	struct ahci_slot *slot;
 	int tag, tags;
 
 	/* Choose empty slot. */
 	tags = ch->numslots;
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA))
 		tags = ch->curr[ccb->ccb_h.target_id].tags;
 	tag = ch->lastslot;
 	while (1) {
 		if (tag >= tags)
 			tag = 0;
 		if (ch->slot[tag].state == AHCI_SLOT_EMPTY)
 			break;
 		tag++;
 	};
 	ch->lastslot = tag;
 	/* Occupy chosen slot. */
 	slot = &ch->slot[tag];
 	slot->ccb = ccb;
 	/* Stop PM timer. */
 	if (ch->numrslots == 0 && ch->pm_level > 3)
 		callout_stop(&ch->pm_timer);
 	/* Update channel stats. */
 	ch->oslots |= (1 << slot->slot);
 	ch->numrslots++;
 	ch->numrslotspd[ccb->ccb_h.target_id]++;
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) {
 		ch->numtslots++;
 		ch->numtslotspd[ccb->ccb_h.target_id]++;
 		ch->taggedtarget = ccb->ccb_h.target_id;
 	}
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & (CAM_ATAIO_CONTROL | CAM_ATAIO_NEEDRESULT)))
 		ch->aslots |= (1 << slot->slot);
 	slot->dma.nsegs = 0;
 	/* If request moves data, setup and load SG list */
 	if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
 		void *buf;
 		bus_size_t size;
 
 		slot->state = AHCI_SLOT_LOADING;
 		if (ccb->ccb_h.func_code == XPT_ATA_IO) {
 			buf = ccb->ataio.data_ptr;
 			size = ccb->ataio.dxfer_len;
 		} else {
 			buf = ccb->csio.data_ptr;
 			size = ccb->csio.dxfer_len;
 		}
 		bus_dmamap_load(ch->dma.data_tag, slot->dma.data_map,
 		    buf, size, ahci_dmasetprd, slot, 0);
 	} else
 		ahci_execute_transaction(slot);
 }
 
 /* Locked by busdma engine. */
 static void
 ahci_dmasetprd(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 {    
 	struct ahci_slot *slot = arg;
 	struct ahci_channel *ch = device_get_softc(slot->dev);
 	struct ahci_cmd_tab *ctp;
 	struct ahci_dma_prd *prd;
 	int i;
 
 	if (error) {
 		device_printf(slot->dev, "DMA load error\n");
 		ahci_end_transaction(slot, AHCI_ERR_INVALID);
 		return;
 	}
 	KASSERT(nsegs <= AHCI_SG_ENTRIES, ("too many DMA segment entries\n"));
 	/* Get a piece of the workspace for this request */
 	ctp = (struct ahci_cmd_tab *)
 		(ch->dma.work + AHCI_CT_OFFSET + (AHCI_CT_SIZE * slot->slot));
 	/* Fill S/G table */
 	prd = &ctp->prd_tab[0];
 	for (i = 0; i < nsegs; i++) {
 		prd[i].dba = htole64(segs[i].ds_addr);
 		prd[i].dbc = htole32((segs[i].ds_len - 1) & AHCI_PRD_MASK);
 	}
 	slot->dma.nsegs = nsegs;
 	bus_dmamap_sync(ch->dma.data_tag, slot->dma.data_map,
 	    ((slot->ccb->ccb_h.flags & CAM_DIR_IN) ?
 	    BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE));
 	ahci_execute_transaction(slot);
 }
 
 /* Must be called with channel locked. */
 static void
 ahci_execute_transaction(struct ahci_slot *slot)
 {
 	device_t dev = slot->dev;
 	struct ahci_channel *ch = device_get_softc(dev);
 	struct ahci_cmd_tab *ctp;
 	struct ahci_cmd_list *clp;
 	union ccb *ccb = slot->ccb;
 	int port = ccb->ccb_h.target_id & 0x0f;
 	int fis_size, i;
 	uint8_t *fis = ch->dma.rfis + 0x40;
 	uint8_t val;
 
 	/* Get a piece of the workspace for this request */
 	ctp = (struct ahci_cmd_tab *)
 		(ch->dma.work + AHCI_CT_OFFSET + (AHCI_CT_SIZE * slot->slot));
 	/* Setup the FIS for this request */
 	if (!(fis_size = ahci_setup_fis(dev, ctp, ccb, slot->slot))) {
 		device_printf(ch->dev, "Setting up SATA FIS failed\n");
 		ahci_end_transaction(slot, AHCI_ERR_INVALID);
 		return;
 	}
 	/* Setup the command list entry */
 	clp = (struct ahci_cmd_list *)
 	    (ch->dma.work + AHCI_CL_OFFSET + (AHCI_CL_SIZE * slot->slot));
 	clp->cmd_flags = htole16(
 		    (ccb->ccb_h.flags & CAM_DIR_OUT ? AHCI_CMD_WRITE : 0) |
 		    (ccb->ccb_h.func_code == XPT_SCSI_IO ?
 		     (AHCI_CMD_ATAPI | AHCI_CMD_PREFETCH) : 0) |
 		    (fis_size / sizeof(u_int32_t)) |
 		    (port << 12));
 	clp->prd_length = htole16(slot->dma.nsegs);
 	/* Special handling for Soft Reset command. */
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL)) {
 		if (ccb->ataio.cmd.control & ATA_A_RESET) {
 			/* Kick controller into sane state */
 			ahci_stop(dev);
 			ahci_clo(dev);
 			ahci_start(dev, 0);
 			clp->cmd_flags |= AHCI_CMD_RESET | AHCI_CMD_CLR_BUSY;
 		} else {
 			/* Prepare FIS receive area for check. */
 			for (i = 0; i < 20; i++)
 				fis[i] = 0xff;
 		}
 	}
 	clp->bytecount = 0;
 	clp->cmd_table_phys = htole64(ch->dma.work_bus + AHCI_CT_OFFSET +
 				  (AHCI_CT_SIZE * slot->slot));
 	bus_dmamap_sync(ch->dma.work_tag, ch->dma.work_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	bus_dmamap_sync(ch->dma.rfis_tag, ch->dma.rfis_map,
 	    BUS_DMASYNC_PREREAD);
 	/* Set ACTIVE bit for NCQ commands. */
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) {
 		ATA_OUTL(ch->r_mem, AHCI_P_SACT, 1 << slot->slot);
 	}
 	/* If FBS is enabled, set PMP port. */
 	if (ch->fbs_enabled) {
 		ATA_OUTL(ch->r_mem, AHCI_P_FBS, AHCI_P_FBS_EN |
 		    (port << AHCI_P_FBS_DEV_SHIFT));
 	}
 	/* Issue command to the controller. */
 	slot->state = AHCI_SLOT_RUNNING;
 	ch->rslots |= (1 << slot->slot);
 	ATA_OUTL(ch->r_mem, AHCI_P_CI, (1 << slot->slot));
 	/* Device reset commands doesn't interrupt. Poll them. */
 	if (ccb->ccb_h.func_code == XPT_ATA_IO &&
 	    (ccb->ataio.cmd.command == ATA_DEVICE_RESET ||
 	    (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL))) {
 		int count, timeout = ccb->ccb_h.timeout;
 		enum ahci_err_type et = AHCI_ERR_NONE;
 
 		for (count = 0; count < timeout; count++) {
 			DELAY(1000);
 			if (!(ATA_INL(ch->r_mem, AHCI_P_CI) & (1 << slot->slot)))
 				break;
 			if (ATA_INL(ch->r_mem, AHCI_P_TFD) & ATA_S_ERROR) {
 				device_printf(ch->dev,
 				    "Poll error on slot %d, TFD: %04x\n",
 				    slot->slot, ATA_INL(ch->r_mem, AHCI_P_TFD));
 				et = AHCI_ERR_TFE;
 				break;
 			}
 			/* Workaround for ATI SB600/SB700 chipsets. */
 			if (ccb->ccb_h.target_id == 15 &&
 			    pci_get_vendor(device_get_parent(dev)) == 0x1002 &&
 			    (ATA_INL(ch->r_mem, AHCI_P_IS) & AHCI_P_IX_IPM)) {
 				et = AHCI_ERR_TIMEOUT;
 				break;
 			}
 		}
 		if (timeout && (count >= timeout)) {
 			device_printf(ch->dev,
 			    "Poll timeout on slot %d\n", slot->slot);
 			device_printf(dev, "is %08x cs %08x ss %08x "
 			    "rs %08x tfd %02x serr %08x\n",
 			    ATA_INL(ch->r_mem, AHCI_P_IS),
 			    ATA_INL(ch->r_mem, AHCI_P_CI),
 			    ATA_INL(ch->r_mem, AHCI_P_SACT), ch->rslots,
 			    ATA_INL(ch->r_mem, AHCI_P_TFD),
 			    ATA_INL(ch->r_mem, AHCI_P_SERR));
 			et = AHCI_ERR_TIMEOUT;
 		}
 		/* Marvell controllers do not wait for readyness. */
 		if ((ch->quirks & AHCI_Q_NOBSYRES) &&
 		    (ccb->ccb_h.func_code == XPT_ATA_IO) &&
 		    (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) &&
 		    (ccb->ataio.cmd.control & ATA_A_RESET) == 0) {
 			while ((val = fis[2]) & (ATA_S_BUSY | ATA_S_DRQ)) {
 				DELAY(1000);
 				if (count++ >= timeout) {
 					device_printf(dev, "device is not "
 					    "ready after soft-reset: "
 					    "tfd = %08x\n", val);
 	    				et = AHCI_ERR_TIMEOUT;
 	    				break;
 				}
 			} 
 		}
 		ahci_end_transaction(slot, et);
 		/* Kick controller into sane state and enable FBS. */
 		if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 		    (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) &&
 		    (ccb->ataio.cmd.control & ATA_A_RESET) == 0) {
 			ahci_stop(ch->dev);
 			ahci_start(ch->dev, 1);
 		}
 		return;
 	}
 	/* Start command execution timeout */
 	callout_reset(&slot->timeout, (int)ccb->ccb_h.timeout * hz / 2000,
 	    (timeout_t*)ahci_timeout, slot);
 	return;
 }
 
 /* Must be called with channel locked. */
 static void
 ahci_process_timeout(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	int i;
 
 	mtx_assert(&ch->mtx, MA_OWNED);
 	/* Handle the rest of commands. */
 	for (i = 0; i < ch->numslots; i++) {
 		/* Do we have a running request on slot? */
 		if (ch->slot[i].state < AHCI_SLOT_RUNNING)
 			continue;
 		ahci_end_transaction(&ch->slot[i], AHCI_ERR_TIMEOUT);
 	}
 }
 
 /* Must be called with channel locked. */
 static void
 ahci_rearm_timeout(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	int i;
 
 	mtx_assert(&ch->mtx, MA_OWNED);
 	for (i = 0; i < ch->numslots; i++) {
 		struct ahci_slot *slot = &ch->slot[i];
 
 		/* Do we have a running request on slot? */
 		if (slot->state < AHCI_SLOT_RUNNING)
 			continue;
 		if ((ch->toslots & (1 << i)) == 0)
 			continue;
 		callout_reset(&slot->timeout,
 		    (int)slot->ccb->ccb_h.timeout * hz / 2000,
 		    (timeout_t*)ahci_timeout, slot);
 	}
 }
 
 /* Locked by callout mechanism. */
 static void
 ahci_timeout(struct ahci_slot *slot)
 {
 	device_t dev = slot->dev;
 	struct ahci_channel *ch = device_get_softc(dev);
 	uint32_t sstatus;
 	int ccs;
 	int i;
 
 	/* Check for stale timeout. */
 	if (slot->state < AHCI_SLOT_RUNNING)
 		return;
 
 	/* Check if slot was not being executed last time we checked. */
 	if (slot->state < AHCI_SLOT_EXECUTING) {
 		/* Check if slot started executing. */
 		sstatus = ATA_INL(ch->r_mem, AHCI_P_SACT);
 		ccs = (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CCS_MASK)
 		    >> AHCI_P_CMD_CCS_SHIFT;
 		if ((sstatus & (1 << slot->slot)) != 0 || ccs == slot->slot ||
 		    ch->fbs_enabled)
 			slot->state = AHCI_SLOT_EXECUTING;
 
 		callout_reset(&slot->timeout,
 		    (int)slot->ccb->ccb_h.timeout * hz / 2000,
 		    (timeout_t*)ahci_timeout, slot);
 		return;
 	}
 
 	device_printf(dev, "Timeout on slot %d\n", slot->slot);
 	device_printf(dev, "is %08x cs %08x ss %08x rs %08x tfd %02x serr %08x\n",
 	    ATA_INL(ch->r_mem, AHCI_P_IS), ATA_INL(ch->r_mem, AHCI_P_CI),
 	    ATA_INL(ch->r_mem, AHCI_P_SACT), ch->rslots,
 	    ATA_INL(ch->r_mem, AHCI_P_TFD), ATA_INL(ch->r_mem, AHCI_P_SERR));
 
 	/* Handle frozen command. */
 	if (ch->frozen) {
 		union ccb *fccb = ch->frozen;
 		ch->frozen = NULL;
 		fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ;
 		if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) {
 			xpt_freeze_devq(fccb->ccb_h.path, 1);
 			fccb->ccb_h.status |= CAM_DEV_QFRZN;
 		}
 		xpt_done(fccb);
 	}
 	if (!ch->fbs_enabled) {
 		/* Without FBS we know real timeout source. */
 		ch->fatalerr = 1;
 		/* Handle command with timeout. */
 		ahci_end_transaction(&ch->slot[slot->slot], AHCI_ERR_TIMEOUT);
 		/* Handle the rest of commands. */
 		for (i = 0; i < ch->numslots; i++) {
 			/* Do we have a running request on slot? */
 			if (ch->slot[i].state < AHCI_SLOT_RUNNING)
 				continue;
 			ahci_end_transaction(&ch->slot[i], AHCI_ERR_INNOCENT);
 		}
 	} else {
 		/* With FBS we wait for other commands timeout and pray. */
 		if (ch->toslots == 0)
 			xpt_freeze_simq(ch->sim, 1);
 		ch->toslots |= (1 << slot->slot);
 		if ((ch->rslots & ~ch->toslots) == 0)
 			ahci_process_timeout(dev);
 		else
 			device_printf(dev, " ... waiting for slots %08x\n",
 			    ch->rslots & ~ch->toslots);
 	}
 }
 
 /* Must be called with channel locked. */
 static void
 ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et)
 {
 	device_t dev = slot->dev;
 	struct ahci_channel *ch = device_get_softc(dev);
 	union ccb *ccb = slot->ccb;
 	struct ahci_cmd_list *clp;
 	int lastto;
 
 	bus_dmamap_sync(ch->dma.work_tag, ch->dma.work_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	clp = (struct ahci_cmd_list *)
 	    (ch->dma.work + AHCI_CL_OFFSET + (AHCI_CL_SIZE * slot->slot));
 	/* Read result registers to the result struct
 	 * May be incorrect if several commands finished same time,
 	 * so read only when sure or have to.
 	 */
 	if (ccb->ccb_h.func_code == XPT_ATA_IO) {
 		struct ata_res *res = &ccb->ataio.res;
 
 		if ((et == AHCI_ERR_TFE) ||
 		    (ccb->ataio.cmd.flags & CAM_ATAIO_NEEDRESULT)) {
 			u_int8_t *fis = ch->dma.rfis + 0x40;
 
 			bus_dmamap_sync(ch->dma.rfis_tag, ch->dma.rfis_map,
 			    BUS_DMASYNC_POSTREAD);
 			if (ch->fbs_enabled) {
 				fis += ccb->ccb_h.target_id * 256;
 				res->status = fis[2];
 				res->error = fis[3];
 			} else {
 				uint16_t tfd = ATA_INL(ch->r_mem, AHCI_P_TFD);
 
 				res->status = tfd;
 				res->error = tfd >> 8;
 			}
 			res->lba_low = fis[4];
 			res->lba_mid = fis[5];
 			res->lba_high = fis[6];
 			res->device = fis[7];
 			res->lba_low_exp = fis[8];
 			res->lba_mid_exp = fis[9];
 			res->lba_high_exp = fis[10];
 			res->sector_count = fis[12];
 			res->sector_count_exp = fis[13];
 		} else
 			bzero(res, sizeof(*res));
 		if ((ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) == 0 &&
 		    (ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
 			ccb->ataio.resid =
 			    ccb->ataio.dxfer_len - le32toh(clp->bytecount);
 		}
 	} else {
 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
 			ccb->csio.resid =
 			    ccb->csio.dxfer_len - le32toh(clp->bytecount);
 		}
 	}
 	if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
 		bus_dmamap_sync(ch->dma.data_tag, slot->dma.data_map,
 		    (ccb->ccb_h.flags & CAM_DIR_IN) ?
 		    BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(ch->dma.data_tag, slot->dma.data_map);
 	}
 	if (et != AHCI_ERR_NONE)
 		ch->eslots |= (1 << slot->slot);
 	/* In case of error, freeze device for proper recovery. */
 	if ((et != AHCI_ERR_NONE) && (!ch->readlog) &&
 	    !(ccb->ccb_h.status & CAM_DEV_QFRZN)) {
 		xpt_freeze_devq(ccb->ccb_h.path, 1);
 		ccb->ccb_h.status |= CAM_DEV_QFRZN;
 	}
 	/* Set proper result status. */
 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 	switch (et) {
 	case AHCI_ERR_NONE:
 		ccb->ccb_h.status |= CAM_REQ_CMP;
 		if (ccb->ccb_h.func_code == XPT_SCSI_IO)
 			ccb->csio.scsi_status = SCSI_STATUS_OK;
 		break;
 	case AHCI_ERR_INVALID:
 		ch->fatalerr = 1;
 		ccb->ccb_h.status |= CAM_REQ_INVALID;
 		break;
 	case AHCI_ERR_INNOCENT:
 		ccb->ccb_h.status |= CAM_REQUEUE_REQ;
 		break;
 	case AHCI_ERR_TFE:
 	case AHCI_ERR_NCQ:
 		if (ccb->ccb_h.func_code == XPT_SCSI_IO) {
 			ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
 			ccb->csio.scsi_status = SCSI_STATUS_CHECK_COND;
 		} else {
 			ccb->ccb_h.status |= CAM_ATA_STATUS_ERROR;
 		}
 		break;
 	case AHCI_ERR_SATA:
 		ch->fatalerr = 1;
 		if (!ch->readlog) {
 			xpt_freeze_simq(ch->sim, 1);
 			ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 			ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
 		}
 		ccb->ccb_h.status |= CAM_UNCOR_PARITY;
 		break;
 	case AHCI_ERR_TIMEOUT:
 		if (!ch->readlog) {
 			xpt_freeze_simq(ch->sim, 1);
 			ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 			ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
 		}
 		ccb->ccb_h.status |= CAM_CMD_TIMEOUT;
 		break;
 	default:
 		ch->fatalerr = 1;
 		ccb->ccb_h.status |= CAM_REQ_CMP_ERR;
 	}
 	/* Free slot. */
 	ch->oslots &= ~(1 << slot->slot);
 	ch->rslots &= ~(1 << slot->slot);
 	ch->aslots &= ~(1 << slot->slot);
 	slot->state = AHCI_SLOT_EMPTY;
 	slot->ccb = NULL;
 	/* Update channel stats. */
 	ch->numrslots--;
 	ch->numrslotspd[ccb->ccb_h.target_id]--;
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) {
 		ch->numtslots--;
 		ch->numtslotspd[ccb->ccb_h.target_id]--;
 	}
 	/* Cancel timeout state if request completed normally. */
 	if (et != AHCI_ERR_TIMEOUT) {
 		lastto = (ch->toslots == (1 << slot->slot));
 		ch->toslots &= ~(1 << slot->slot);
 		if (lastto)
 			xpt_release_simq(ch->sim, TRUE);
 	}
 	/* If it was first request of reset sequence and there is no error,
 	 * proceed to second request. */
 	if ((ccb->ccb_h.func_code == XPT_ATA_IO) &&
 	    (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) &&
 	    (ccb->ataio.cmd.control & ATA_A_RESET) &&
 	    et == AHCI_ERR_NONE) {
 		ccb->ataio.cmd.control &= ~ATA_A_RESET;
 		ahci_begin_transaction(dev, ccb);
 		return;
 	}
 	/* If it was our READ LOG command - process it. */
 	if (ch->readlog) {
 		ahci_process_read_log(dev, ccb);
 	/* If it was NCQ command error, put result on hold. */
 	} else if (et == AHCI_ERR_NCQ) {
 		ch->hold[slot->slot] = ccb;
 		ch->numhslots++;
 	} else
 		xpt_done(ccb);
 	/* Unfreeze frozen command. */
 	if (ch->frozen && !ahci_check_collision(dev, ch->frozen)) {
 		union ccb *fccb = ch->frozen;
 		ch->frozen = NULL;
 		ahci_begin_transaction(dev, fccb);
 		xpt_release_simq(ch->sim, TRUE);
 	}
 	/* If we have no other active commands, ... */
 	if (ch->rslots == 0) {
 		/* if there was fatal error - reset port. */
 		if (ch->toslots != 0 || ch->fatalerr) {
 			ahci_reset(dev);
 		} else {
 			/* if we have slots in error, we can reinit port. */
 			if (ch->eslots != 0) {
 				ahci_stop(dev);
 				ahci_start(dev, 1);
 			}
 			/* if there commands on hold, we can do READ LOG. */
 			if (!ch->readlog && ch->numhslots)
 				ahci_issue_read_log(dev);
 		}
 	/* If all the rest of commands are in timeout - give them chance. */
 	} else if ((ch->rslots & ~ch->toslots) == 0 &&
 	    et != AHCI_ERR_TIMEOUT)
 		ahci_rearm_timeout(dev);
 	/* Start PM timer. */
 	if (ch->numrslots == 0 && ch->pm_level > 3 &&
 	    (ch->curr[ch->pm_present ? 15 : 0].caps & CTS_SATA_CAPS_D_PMREQ)) {
 		callout_schedule(&ch->pm_timer,
 		    (ch->pm_level == 4) ? hz / 1000 : hz / 8);
 	}
 }
 
 static void
 ahci_issue_read_log(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	union ccb *ccb;
 	struct ccb_ataio *ataio;
 	int i;
 
 	ch->readlog = 1;
 	/* Find some holden command. */
 	for (i = 0; i < ch->numslots; i++) {
 		if (ch->hold[i])
 			break;
 	}
 	ccb = xpt_alloc_ccb_nowait();
 	if (ccb == NULL) {
 		device_printf(dev, "Unable allocate READ LOG command");
 		return; /* XXX */
 	}
 	ccb->ccb_h = ch->hold[i]->ccb_h;	/* Reuse old header. */
 	ccb->ccb_h.func_code = XPT_ATA_IO;
 	ccb->ccb_h.flags = CAM_DIR_IN;
 	ccb->ccb_h.timeout = 1000;	/* 1s should be enough. */
 	ataio = &ccb->ataio;
 	ataio->data_ptr = malloc(512, M_AHCI, M_NOWAIT);
 	if (ataio->data_ptr == NULL) {
 		xpt_free_ccb(ccb);
 		device_printf(dev, "Unable allocate memory for READ LOG command");
 		return; /* XXX */
 	}
 	ataio->dxfer_len = 512;
 	bzero(&ataio->cmd, sizeof(ataio->cmd));
 	ataio->cmd.flags = CAM_ATAIO_48BIT;
 	ataio->cmd.command = 0x2F;	/* READ LOG EXT */
 	ataio->cmd.sector_count = 1;
 	ataio->cmd.sector_count_exp = 0;
 	ataio->cmd.lba_low = 0x10;
 	ataio->cmd.lba_mid = 0;
 	ataio->cmd.lba_mid_exp = 0;
 	/* Freeze SIM while doing READ LOG EXT. */
 	xpt_freeze_simq(ch->sim, 1);
 	ahci_begin_transaction(dev, ccb);
 }
 
 static void
 ahci_process_read_log(device_t dev, union ccb *ccb)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	uint8_t *data;
 	struct ata_res *res;
 	int i;
 
 	ch->readlog = 0;
 
 	data = ccb->ataio.data_ptr;
 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP &&
 	    (data[0] & 0x80) == 0) {
 		for (i = 0; i < ch->numslots; i++) {
 			if (!ch->hold[i])
 				continue;
 			if ((data[0] & 0x1F) == i) {
 				res = &ch->hold[i]->ataio.res;
 				res->status = data[2];
 				res->error = data[3];
 				res->lba_low = data[4];
 				res->lba_mid = data[5];
 				res->lba_high = data[6];
 				res->device = data[7];
 				res->lba_low_exp = data[8];
 				res->lba_mid_exp = data[9];
 				res->lba_high_exp = data[10];
 				res->sector_count = data[12];
 				res->sector_count_exp = data[13];
 			} else {
 				ch->hold[i]->ccb_h.status &= ~CAM_STATUS_MASK;
 				ch->hold[i]->ccb_h.status |= CAM_REQUEUE_REQ;
 			}
 			xpt_done(ch->hold[i]);
 			ch->hold[i] = NULL;
 			ch->numhslots--;
 		}
 	} else {
 		if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP)
 			device_printf(dev, "Error while READ LOG EXT\n");
 		else if ((data[0] & 0x80) == 0) {
 			device_printf(dev, "Non-queued command error in READ LOG EXT\n");
 		}
 		for (i = 0; i < ch->numslots; i++) {
 			if (!ch->hold[i])
 				continue;
 			xpt_done(ch->hold[i]);
 			ch->hold[i] = NULL;
 			ch->numhslots--;
 		}
 	}
 	free(ccb->ataio.data_ptr, M_AHCI);
 	xpt_free_ccb(ccb);
 	xpt_release_simq(ch->sim, TRUE);
 }
 
 static void
 ahci_start(device_t dev, int fbs)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	u_int32_t cmd;
 
 	/* Clear SATA error register */
 	ATA_OUTL(ch->r_mem, AHCI_P_SERR, 0xFFFFFFFF);
 	/* Clear any interrupts pending on this channel */
 	ATA_OUTL(ch->r_mem, AHCI_P_IS, 0xFFFFFFFF);
 	/* Configure FIS-based switching if supported. */
 	if (ch->chcaps & AHCI_P_CMD_FBSCP) {
 		ch->fbs_enabled = (fbs && ch->pm_present) ? 1 : 0;
 		ATA_OUTL(ch->r_mem, AHCI_P_FBS,
 		    ch->fbs_enabled ? AHCI_P_FBS_EN : 0);
 	}
 	/* Start operations on this channel */
 	cmd = ATA_INL(ch->r_mem, AHCI_P_CMD);
 	cmd &= ~AHCI_P_CMD_PMA;
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd | AHCI_P_CMD_ST |
 	    (ch->pm_present ? AHCI_P_CMD_PMA : 0));
 }
 
 static void
 ahci_stop(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	u_int32_t cmd;
 	int timeout;
 
 	/* Kill all activity on this channel */
 	cmd = ATA_INL(ch->r_mem, AHCI_P_CMD);
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd & ~AHCI_P_CMD_ST);
 	/* Wait for activity stop. */
 	timeout = 0;
 	do {
 		DELAY(1000);
 		if (timeout++ > 1000) {
 			device_printf(dev, "stopping AHCI engine failed\n");
 			break;
 		}
 	} while (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CR);
 	ch->eslots = 0;
 }
 
 static void
 ahci_clo(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	u_int32_t cmd;
 	int timeout;
 
 	/* Issue Command List Override if supported */ 
 	if (ch->caps & AHCI_CAP_SCLO) {
 		cmd = ATA_INL(ch->r_mem, AHCI_P_CMD);
 		cmd |= AHCI_P_CMD_CLO;
 		ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd);
 		timeout = 0;
 		do {
 			DELAY(1000);
 			if (timeout++ > 1000) {
 			    device_printf(dev, "executing CLO failed\n");
 			    break;
 			}
 		} while (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CLO);
 	}
 }
 
 static void
 ahci_stop_fr(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	u_int32_t cmd;
 	int timeout;
 
 	/* Kill all FIS reception on this channel */
 	cmd = ATA_INL(ch->r_mem, AHCI_P_CMD);
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd & ~AHCI_P_CMD_FRE);
 	/* Wait for FIS reception stop. */
 	timeout = 0;
 	do {
 		DELAY(1000);
 		if (timeout++ > 1000) {
 			device_printf(dev, "stopping AHCI FR engine failed\n");
 			break;
 		}
 	} while (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_FR);
 }
 
 static void
 ahci_start_fr(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	u_int32_t cmd;
 
 	/* Start FIS reception on this channel */
 	cmd = ATA_INL(ch->r_mem, AHCI_P_CMD);
 	ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd | AHCI_P_CMD_FRE);
 }
 
 static int
 ahci_wait_ready(device_t dev, int t)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	int timeout = 0;
 	uint32_t val;
 
 	while ((val = ATA_INL(ch->r_mem, AHCI_P_TFD)) &
 	    (ATA_S_BUSY | ATA_S_DRQ)) {
 		DELAY(1000);
 		if (timeout++ > t) {
 			device_printf(dev, "device is not ready (timeout %dms) "
 			    "tfd = %08x\n", t, val);
 			return (EBUSY);
 		}
 	} 
 	if (bootverbose)
 		device_printf(dev, "ready wait time=%dms\n", timeout);
 	return (0);
 }
 
 static void
 ahci_reset(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	struct ahci_controller *ctlr = device_get_softc(device_get_parent(dev));
 	int i;
 
 	xpt_freeze_simq(ch->sim, 1);
 	if (bootverbose)
 		device_printf(dev, "AHCI reset...\n");
 	/* Requeue freezed command. */
 	if (ch->frozen) {
 		union ccb *fccb = ch->frozen;
 		ch->frozen = NULL;
 		fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ;
 		if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) {
 			xpt_freeze_devq(fccb->ccb_h.path, 1);
 			fccb->ccb_h.status |= CAM_DEV_QFRZN;
 		}
 		xpt_done(fccb);
 	}
 	/* Kill the engine and requeue all running commands. */
 	ahci_stop(dev);
 	for (i = 0; i < ch->numslots; i++) {
 		/* Do we have a running request on slot? */
 		if (ch->slot[i].state < AHCI_SLOT_RUNNING)
 			continue;
 		/* XXX; Commands in loading state. */
 		ahci_end_transaction(&ch->slot[i], AHCI_ERR_INNOCENT);
 	}
 	for (i = 0; i < ch->numslots; i++) {
 		if (!ch->hold[i])
 			continue;
 		xpt_done(ch->hold[i]);
 		ch->hold[i] = NULL;
 		ch->numhslots--;
 	}
 	if (ch->toslots != 0)
 		xpt_release_simq(ch->sim, TRUE);
 	ch->eslots = 0;
 	ch->toslots = 0;
 	ch->fatalerr = 0;
 	/* Tell the XPT about the event */
 	xpt_async(AC_BUS_RESET, ch->path, NULL);
 	/* Disable port interrupts */
 	ATA_OUTL(ch->r_mem, AHCI_P_IE, 0);
 	/* Reset and reconnect PHY, */
 	if (!ahci_sata_phy_reset(dev)) {
 		if (bootverbose)
 			device_printf(dev,
 			    "AHCI reset done: phy reset found no device\n");
 		ch->devices = 0;
 		/* Enable wanted port interrupts */
 		ATA_OUTL(ch->r_mem, AHCI_P_IE,
 		    (AHCI_P_IX_CPD | AHCI_P_IX_PRC | AHCI_P_IX_PC));
 		xpt_release_simq(ch->sim, TRUE);
 		return;
 	}
 	/* Wait for clearing busy status. */
 	if (ahci_wait_ready(dev, 15000))
 		ahci_clo(dev);
 	ahci_start(dev, 1);
 	ch->devices = 1;
 	/* Enable wanted port interrupts */
 	ATA_OUTL(ch->r_mem, AHCI_P_IE,
 	     (AHCI_P_IX_CPD | AHCI_P_IX_TFE | AHCI_P_IX_HBF |
 	      AHCI_P_IX_HBD | AHCI_P_IX_IF | AHCI_P_IX_OF |
 	      ((ch->pm_level == 0) ? AHCI_P_IX_PRC | AHCI_P_IX_PC : 0) |
 	      AHCI_P_IX_DP | AHCI_P_IX_UF | (ctlr->ccc ? 0 : AHCI_P_IX_SDB) |
 	      AHCI_P_IX_DS | AHCI_P_IX_PS | (ctlr->ccc ? 0 : AHCI_P_IX_DHR)));
 	if (bootverbose)
 		device_printf(dev, "AHCI reset done: device found\n");
 	xpt_release_simq(ch->sim, TRUE);
 }
 
 static int
 ahci_setup_fis(device_t dev, struct ahci_cmd_tab *ctp, union ccb *ccb, int tag)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	u_int8_t *fis = &ctp->cfis[0];
 
 	bzero(ctp->cfis, 64);
 	fis[0] = 0x27;  		/* host to device */
 	fis[1] = (ccb->ccb_h.target_id & 0x0f);
 	if (ccb->ccb_h.func_code == XPT_SCSI_IO) {
 		fis[1] |= 0x80;
 		fis[2] = ATA_PACKET_CMD;
 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE &&
 		    ch->curr[ccb->ccb_h.target_id].mode >= ATA_DMA)
 			fis[3] = ATA_F_DMA;
 		else {
 			fis[5] = ccb->csio.dxfer_len;
 		        fis[6] = ccb->csio.dxfer_len >> 8;
 		}
 		fis[7] = ATA_D_LBA;
 		fis[15] = ATA_A_4BIT;
 		bzero(ctp->acmd, 32);
 		bcopy((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
 		    ccb->csio.cdb_io.cdb_ptr : ccb->csio.cdb_io.cdb_bytes,
 		    ctp->acmd, ccb->csio.cdb_len);
 	} else if ((ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) == 0) {
 		fis[1] |= 0x80;
 		fis[2] = ccb->ataio.cmd.command;
 		fis[3] = ccb->ataio.cmd.features;
 		fis[4] = ccb->ataio.cmd.lba_low;
 		fis[5] = ccb->ataio.cmd.lba_mid;
 		fis[6] = ccb->ataio.cmd.lba_high;
 		fis[7] = ccb->ataio.cmd.device;
 		fis[8] = ccb->ataio.cmd.lba_low_exp;
 		fis[9] = ccb->ataio.cmd.lba_mid_exp;
 		fis[10] = ccb->ataio.cmd.lba_high_exp;
 		fis[11] = ccb->ataio.cmd.features_exp;
 		if (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) {
 			fis[12] = tag << 3;
 			fis[13] = 0;
 		} else {
 			fis[12] = ccb->ataio.cmd.sector_count;
 			fis[13] = ccb->ataio.cmd.sector_count_exp;
 		}
 		fis[15] = ATA_A_4BIT;
 	} else {
 		fis[15] = ccb->ataio.cmd.control;
 	}
 	return (20);
 }
 
 static int
 ahci_sata_connect(struct ahci_channel *ch)
 {
 	u_int32_t status;
 	int timeout;
 
 	/* Wait up to 100ms for "connect well" */
 	for (timeout = 0; timeout < 100 ; timeout++) {
 		status = ATA_INL(ch->r_mem, AHCI_P_SSTS);
 		if (((status & ATA_SS_DET_MASK) == ATA_SS_DET_PHY_ONLINE) &&
 		    ((status & ATA_SS_SPD_MASK) != ATA_SS_SPD_NO_SPEED) &&
 		    ((status & ATA_SS_IPM_MASK) == ATA_SS_IPM_ACTIVE))
 			break;
 		if ((status & ATA_SS_DET_MASK) == ATA_SS_DET_PHY_OFFLINE) {
 			if (bootverbose) {
 				device_printf(ch->dev, "SATA offline status=%08x\n",
 				    status);
 			}
 			return (0);
 		}
 		DELAY(1000);
 	}
 	if (timeout >= 100) {
 		if (bootverbose) {
 			device_printf(ch->dev, "SATA connect timeout status=%08x\n",
 			    status);
 		}
 		return (0);
 	}
 	if (bootverbose) {
 		device_printf(ch->dev, "SATA connect time=%dms status=%08x\n",
 		    timeout, status);
 	}
 	/* Clear SATA error register */
 	ATA_OUTL(ch->r_mem, AHCI_P_SERR, 0xffffffff);
 	return (1);
 }
 
 static int
 ahci_sata_phy_reset(device_t dev)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 	int sata_rev;
 	uint32_t val;
 
 	sata_rev = ch->user[ch->pm_present ? 15 : 0].revision;
 	if (sata_rev == 1)
 		val = ATA_SC_SPD_SPEED_GEN1;
 	else if (sata_rev == 2)
 		val = ATA_SC_SPD_SPEED_GEN2;
 	else if (sata_rev == 3)
 		val = ATA_SC_SPD_SPEED_GEN3;
 	else
 		val = 0;
 	ATA_OUTL(ch->r_mem, AHCI_P_SCTL,
 	    ATA_SC_DET_RESET | val |
 	    ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER);
 	DELAY(5000);
 	ATA_OUTL(ch->r_mem, AHCI_P_SCTL,
 	    ATA_SC_DET_IDLE | val | ((ch->pm_level > 0) ? 0 :
 	    (ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER)));
 	DELAY(5000);
 	if (!ahci_sata_connect(ch)) {
 		if (ch->pm_level > 0)
 			ATA_OUTL(ch->r_mem, AHCI_P_SCTL, ATA_SC_DET_DISABLE);
 		return (0);
 	}
 	return (1);
 }
 
 static int
 ahci_check_ids(device_t dev, union ccb *ccb)
 {
 	struct ahci_channel *ch = device_get_softc(dev);
 
 	if (ccb->ccb_h.target_id > ((ch->caps & AHCI_CAP_SPM) ? 15 : 0)) {
 		ccb->ccb_h.status = CAM_TID_INVALID;
 		xpt_done(ccb);
 		return (-1);
 	}
 	if (ccb->ccb_h.target_lun != 0) {
 		ccb->ccb_h.status = CAM_LUN_INVALID;
 		xpt_done(ccb);
 		return (-1);
 	}
 	return (0);
 }
 
 static void
 ahciaction(struct cam_sim *sim, union ccb *ccb)
 {
 	device_t dev, parent;
 	struct ahci_channel *ch;
 
 	CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE, ("ahciaction func_code=%x\n",
 	    ccb->ccb_h.func_code));
 
 	ch = (struct ahci_channel *)cam_sim_softc(sim);
 	dev = ch->dev;
 	switch (ccb->ccb_h.func_code) {
 	/* Common cases first */
 	case XPT_ATA_IO:	/* Execute the requested I/O operation */
 	case XPT_SCSI_IO:
 		if (ahci_check_ids(dev, ccb))
 			return;
 		if (ch->devices == 0 ||
 		    (ch->pm_present == 0 &&
 		     ccb->ccb_h.target_id > 0 && ccb->ccb_h.target_id < 15)) {
 			ccb->ccb_h.status = CAM_SEL_TIMEOUT;
 			break;
 		}
 		/* Check for command collision. */
 		if (ahci_check_collision(dev, ccb)) {
 			/* Freeze command. */
 			ch->frozen = ccb;
 			/* We have only one frozen slot, so freeze simq also. */
 			xpt_freeze_simq(ch->sim, 1);
 			return;
 		}
 		ahci_begin_transaction(dev, ccb);
 		return;
 	case XPT_EN_LUN:		/* Enable LUN as a target */
 	case XPT_TARGET_IO:		/* Execute target I/O request */
 	case XPT_ACCEPT_TARGET_IO:	/* Accept Host Target Mode CDB */
 	case XPT_CONT_TARGET_IO:	/* Continue Host Target I/O Connection*/
 	case XPT_ABORT:			/* Abort the specified CCB */
 		/* XXX Implement */
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		break;
 	case XPT_SET_TRAN_SETTINGS:
 	{
 		struct	ccb_trans_settings *cts = &ccb->cts;
 		struct	ahci_device *d; 
 
 		if (ahci_check_ids(dev, ccb))
 			return;
 		if (cts->type == CTS_TYPE_CURRENT_SETTINGS)
 			d = &ch->curr[ccb->ccb_h.target_id];
 		else
 			d = &ch->user[ccb->ccb_h.target_id];
 		if (cts->xport_specific.sata.valid & CTS_SATA_VALID_REVISION)
 			d->revision = cts->xport_specific.sata.revision;
 		if (cts->xport_specific.sata.valid & CTS_SATA_VALID_MODE)
 			d->mode = cts->xport_specific.sata.mode;
 		if (cts->xport_specific.sata.valid & CTS_SATA_VALID_BYTECOUNT)
 			d->bytecount = min(8192, cts->xport_specific.sata.bytecount);
 		if (cts->xport_specific.sata.valid & CTS_SATA_VALID_TAGS)
 			d->tags = min(ch->numslots, cts->xport_specific.sata.tags);
 		if (cts->xport_specific.sata.valid & CTS_SATA_VALID_PM)
 			ch->pm_present = cts->xport_specific.sata.pm_present;
 		if (cts->xport_specific.sata.valid & CTS_SATA_VALID_ATAPI)
 			d->atapi = cts->xport_specific.sata.atapi;
 		if (cts->xport_specific.sata.valid & CTS_SATA_VALID_CAPS)
 			d->caps = cts->xport_specific.sata.caps;
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	case XPT_GET_TRAN_SETTINGS:
 	/* Get default/user set transfer settings for the target */
 	{
 		struct	ccb_trans_settings *cts = &ccb->cts;
 		struct  ahci_device *d;
 		uint32_t status;
 
 		if (ahci_check_ids(dev, ccb))
 			return;
 		if (cts->type == CTS_TYPE_CURRENT_SETTINGS)
 			d = &ch->curr[ccb->ccb_h.target_id];
 		else
 			d = &ch->user[ccb->ccb_h.target_id];
 		cts->protocol = PROTO_ATA;
 		cts->protocol_version = PROTO_VERSION_UNSPECIFIED;
 		cts->transport = XPORT_SATA;
 		cts->transport_version = XPORT_VERSION_UNSPECIFIED;
 		cts->proto_specific.valid = 0;
 		cts->xport_specific.sata.valid = 0;
 		if (cts->type == CTS_TYPE_CURRENT_SETTINGS &&
 		    (ccb->ccb_h.target_id == 15 ||
 		    (ccb->ccb_h.target_id == 0 && !ch->pm_present))) {
 			status = ATA_INL(ch->r_mem, AHCI_P_SSTS) & ATA_SS_SPD_MASK;
 			if (status & 0x0f0) {
 				cts->xport_specific.sata.revision =
 				    (status & 0x0f0) >> 4;
 				cts->xport_specific.sata.valid |=
 				    CTS_SATA_VALID_REVISION;
 			}
 			cts->xport_specific.sata.caps = d->caps & CTS_SATA_CAPS_D;
 			if (ch->pm_level) {
 				if (ch->caps & (AHCI_CAP_PSC | AHCI_CAP_SSC))
 					cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_PMREQ;
 				if (ch->caps2 & AHCI_CAP2_APST)
 					cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_APST;
 			}
 			if ((ch->caps & AHCI_CAP_SNCQ) &&
 			    (ch->quirks & AHCI_Q_NOAA) == 0)
 				cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_DMAAA;
 			cts->xport_specific.sata.caps &=
 			    ch->user[ccb->ccb_h.target_id].caps;
 			cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS;
 		} else {
 			cts->xport_specific.sata.revision = d->revision;
 			cts->xport_specific.sata.valid |= CTS_SATA_VALID_REVISION;
 			cts->xport_specific.sata.caps = d->caps;
 			cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS;
 		}
 		cts->xport_specific.sata.mode = d->mode;
 		cts->xport_specific.sata.valid |= CTS_SATA_VALID_MODE;
 		cts->xport_specific.sata.bytecount = d->bytecount;
 		cts->xport_specific.sata.valid |= CTS_SATA_VALID_BYTECOUNT;
 		cts->xport_specific.sata.pm_present = ch->pm_present;
 		cts->xport_specific.sata.valid |= CTS_SATA_VALID_PM;
 		cts->xport_specific.sata.tags = d->tags;
 		cts->xport_specific.sata.valid |= CTS_SATA_VALID_TAGS;
 		cts->xport_specific.sata.atapi = d->atapi;
 		cts->xport_specific.sata.valid |= CTS_SATA_VALID_ATAPI;
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	case XPT_RESET_BUS:		/* Reset the specified SCSI bus */
 	case XPT_RESET_DEV:	/* Bus Device Reset the specified SCSI device */
 		ahci_reset(dev);
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 	case XPT_TERM_IO:		/* Terminate the I/O process */
 		/* XXX Implement */
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		break;
 	case XPT_PATH_INQ:		/* Path routing inquiry */
 	{
 		struct ccb_pathinq *cpi = &ccb->cpi;
 
 		parent = device_get_parent(dev);
 		cpi->version_num = 1; /* XXX??? */
 		cpi->hba_inquiry = PI_SDTR_ABLE;
 		if (ch->caps & AHCI_CAP_SNCQ)
 			cpi->hba_inquiry |= PI_TAG_ABLE;
 		if (ch->caps & AHCI_CAP_SPM)
 			cpi->hba_inquiry |= PI_SATAPM;
 		cpi->target_sprt = 0;
 		cpi->hba_misc = PIM_SEQSCAN;
 		cpi->hba_eng_cnt = 0;
 		if (ch->caps & AHCI_CAP_SPM)
 			cpi->max_target = 15;
 		else
 			cpi->max_target = 0;
 		cpi->max_lun = 0;
 		cpi->initiator_id = 0;
 		cpi->bus_id = cam_sim_bus(sim);
 		cpi->base_transfer_speed = 150000;
 		strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
 		strncpy(cpi->hba_vid, "AHCI", HBA_IDLEN);
 		strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
 		cpi->unit_number = cam_sim_unit(sim);
 		cpi->transport = XPORT_SATA;
 		cpi->transport_version = XPORT_VERSION_UNSPECIFIED;
 		cpi->protocol = PROTO_ATA;
 		cpi->protocol_version = PROTO_VERSION_UNSPECIFIED;
 		cpi->maxio = MAXPHYS;
 		/* ATI SB600 can't handle 256 sectors with FPDMA (NCQ). */
 		if (pci_get_devid(parent) == 0x43801002)
 			cpi->maxio = min(cpi->maxio, 128 * 512);
 		cpi->hba_vendor = pci_get_vendor(parent);
 		cpi->hba_device = pci_get_device(parent);
 		cpi->hba_subvendor = pci_get_subvendor(parent);
 		cpi->hba_subdevice = pci_get_subdevice(parent);
 		cpi->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	default:
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		break;
 	}
 	xpt_done(ccb);
 }
 
 static void
 ahcipoll(struct cam_sim *sim)
 {
 	struct ahci_channel *ch = (struct ahci_channel *)cam_sim_softc(sim);
 
 	ahci_ch_intr(ch->dev);
 }
Index: projects/binutils-2.17/sys/dev/cas/if_cas.c
===================================================================
--- projects/binutils-2.17/sys/dev/cas/if_cas.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/cas/if_cas.c	(revision 215830)
@@ -1,2939 +1,2936 @@
 /*-
  * Copyright (C) 2001 Eduardo Horvath.
  * Copyright (c) 2001-2003 Thomas Moestl
  * Copyright (c) 2007-2009 Marius Strobl <marius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: NetBSD: gem.c,v 1.21 2002/06/01 23:50:58 lukem Exp
  *	from: FreeBSD: if_gem.c 182060 2008-08-23 15:03:26Z marius
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * driver for Sun Cassini/Cassini+ and National Semiconductor DP83065
  * Saturn Gigabit Ethernet controllers
  */
 
 #if 0
 #define	CAS_DEBUG
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/endian.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/refcount.h>
 #include <sys/resource.h>
 #include <sys/rman.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/taskqueue.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
 #include <machine/bus.h>
 #if defined(__powerpc__) || defined(__sparc64__)
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/openfirm.h>
 #include <machine/ofw_machdep.h>
 #endif
 #include <machine/resource.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 
 #include <dev/cas/if_casreg.h>
 #include <dev/cas/if_casvar.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include "miibus_if.h"
 
 #define RINGASSERT(n , min, max)					\
 	CTASSERT(powerof2(n) && (n) >= (min) && (n) <= (max))
 
 RINGASSERT(CAS_NRXCOMP, 128, 32768);
 RINGASSERT(CAS_NRXDESC, 32, 8192);
 RINGASSERT(CAS_NRXDESC2, 32, 8192);
 RINGASSERT(CAS_NTXDESC, 32, 8192);
 
 #undef RINGASSERT
 
 #define	CCDASSERT(m, a)							\
 	CTASSERT((offsetof(struct cas_control_data, m) & ((a) - 1)) == 0)
 
 CCDASSERT(ccd_rxcomps, CAS_RX_COMP_ALIGN);
 CCDASSERT(ccd_rxdescs, CAS_RX_DESC_ALIGN);
 CCDASSERT(ccd_rxdescs2, CAS_RX_DESC_ALIGN);
 
 #undef CCDASSERT
 
 #define	CAS_TRIES	10000
 
 /*
  * According to documentation, the hardware has support for basic TCP
  * checksum offloading only, in practice this can be also used for UDP
  * however (i.e. the problem of previous Sun NICs that a checksum of 0x0
  * is not converted to 0xffff no longer exists).
  */
 #define	CAS_CSUM_FEATURES	(CSUM_TCP | CSUM_UDP)
 
 static inline void cas_add_rxdesc(struct cas_softc *sc, u_int idx);
 static int	cas_attach(struct cas_softc *sc);
 static int	cas_bitwait(struct cas_softc *sc, bus_addr_t r, uint32_t clr,
 		    uint32_t set);
 static void	cas_cddma_callback(void *xsc, bus_dma_segment_t *segs,
 		    int nsegs, int error);
 static void	cas_detach(struct cas_softc *sc);
 static int	cas_disable_rx(struct cas_softc *sc);
 static int	cas_disable_tx(struct cas_softc *sc);
 static void	cas_eint(struct cas_softc *sc, u_int status);
 static void	cas_free(void *arg1, void* arg2);
 static void	cas_init(void *xsc);
 static void	cas_init_locked(struct cas_softc *sc);
 static void	cas_init_regs(struct cas_softc *sc);
 static int	cas_intr(void *v);
 static void	cas_intr_task(void *arg, int pending __unused);
 static int	cas_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
 static int	cas_load_txmbuf(struct cas_softc *sc, struct mbuf **m_head);
 static int	cas_mediachange(struct ifnet *ifp);
 static void	cas_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr);
 static void	cas_meminit(struct cas_softc *sc);
 static void	cas_mifinit(struct cas_softc *sc);
 static int	cas_mii_readreg(device_t dev, int phy, int reg);
 static void	cas_mii_statchg(device_t dev);
 static int	cas_mii_writereg(device_t dev, int phy, int reg, int val);
 static void	cas_reset(struct cas_softc *sc);
 static int	cas_reset_rx(struct cas_softc *sc);
 static int	cas_reset_tx(struct cas_softc *sc);
 static void	cas_resume(struct cas_softc *sc);
 static u_int	cas_descsize(u_int sz);
 static void	cas_rint(struct cas_softc *sc);
 static void	cas_rint_timeout(void *arg);
 static inline void cas_rxcksum(struct mbuf *m, uint16_t cksum);
 static inline void cas_rxcompinit(struct cas_rx_comp *rxcomp);
 static u_int	cas_rxcompsize(u_int sz);
 static void	cas_rxdma_callback(void *xsc, bus_dma_segment_t *segs,
 		    int nsegs, int error);
 static void	cas_setladrf(struct cas_softc *sc);
 static void	cas_start(struct ifnet *ifp);
 static void	cas_stop(struct ifnet *ifp);
 static void	cas_suspend(struct cas_softc *sc);
 static void	cas_tick(void *arg);
 static void	cas_tint(struct cas_softc *sc);
 static void	cas_tx_task(void *arg, int pending __unused);
 static inline void cas_txkick(struct cas_softc *sc);
 static void	cas_watchdog(struct cas_softc *sc);
 
 static devclass_t cas_devclass;
 
 MODULE_DEPEND(cas, ether, 1, 1, 1);
 MODULE_DEPEND(cas, miibus, 1, 1, 1);
 
 #ifdef CAS_DEBUG
 #include <sys/ktr.h>
 #define	KTR_CAS		KTR_SPARE2
 #endif
 
 static int
 cas_attach(struct cas_softc *sc)
 {
 	struct cas_txsoft *txs;
 	struct ifnet *ifp;
 	int error, i;
 	uint32_t v;
 
 	/* Set up ifnet structure. */
 	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL)
 		return (ENOSPC);
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(sc->sc_dev),
 	    device_get_unit(sc->sc_dev));
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_start = cas_start;
 	ifp->if_ioctl = cas_ioctl;
 	ifp->if_init = cas_init;
 	IFQ_SET_MAXLEN(&ifp->if_snd, CAS_TXQUEUELEN);
 	ifp->if_snd.ifq_drv_maxlen = CAS_TXQUEUELEN;
 	IFQ_SET_READY(&ifp->if_snd);
 
 	callout_init_mtx(&sc->sc_tick_ch, &sc->sc_mtx, 0);
 	callout_init(&sc->sc_rx_ch, 1);
 	/* Create local taskq. */
 	TASK_INIT(&sc->sc_intr_task, 0, cas_intr_task, sc);
 	TASK_INIT(&sc->sc_tx_task, 1, cas_tx_task, ifp);
 	sc->sc_tq = taskqueue_create_fast("cas_taskq", M_WAITOK,
 	    taskqueue_thread_enqueue, &sc->sc_tq);
 	if (sc->sc_tq == NULL) {
 		device_printf(sc->sc_dev, "could not create taskqueue\n");
 		error = ENXIO;
 		goto fail_ifnet;
 	}
 	taskqueue_start_threads(&sc->sc_tq, 1, PI_NET, "%s taskq",
 	    device_get_nameunit(sc->sc_dev));
 
 	/* Make sure the chip is stopped. */
 	cas_reset(sc);
 
 	error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 	    BUS_SPACE_MAXSIZE, 0, BUS_SPACE_MAXSIZE, 0, NULL, NULL,
 	    &sc->sc_pdmatag);
 	if (error != 0)
 		goto fail_taskq;
 
 	error = bus_dma_tag_create(sc->sc_pdmatag, 1, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 	    CAS_PAGE_SIZE, 1, CAS_PAGE_SIZE, 0, NULL, NULL, &sc->sc_rdmatag);
 	if (error != 0)
 		goto fail_ptag;
 
 	error = bus_dma_tag_create(sc->sc_pdmatag, 1, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 	    MCLBYTES * CAS_NTXSEGS, CAS_NTXSEGS, MCLBYTES,
 	    BUS_DMA_ALLOCNOW, NULL, NULL, &sc->sc_tdmatag);
 	if (error != 0)
 		goto fail_rtag;
 
 	error = bus_dma_tag_create(sc->sc_pdmatag, CAS_TX_DESC_ALIGN, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 	    sizeof(struct cas_control_data), 1,
 	    sizeof(struct cas_control_data), 0,
 	    NULL, NULL, &sc->sc_cdmatag);
 	if (error != 0)
 		goto fail_ttag;
 
 	/*
 	 * Allocate the control data structures, create and load the
 	 * DMA map for it.
 	 */
 	if ((error = bus_dmamem_alloc(sc->sc_cdmatag,
 	    (void **)&sc->sc_control_data,
 	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
 	    &sc->sc_cddmamap)) != 0) {
 		device_printf(sc->sc_dev,
 		    "unable to allocate control data, error = %d\n", error);
 		goto fail_ctag;
 	}
 
 	sc->sc_cddma = 0;
 	if ((error = bus_dmamap_load(sc->sc_cdmatag, sc->sc_cddmamap,
 	    sc->sc_control_data, sizeof(struct cas_control_data),
 	    cas_cddma_callback, sc, 0)) != 0 || sc->sc_cddma == 0) {
 		device_printf(sc->sc_dev,
 		    "unable to load control data DMA map, error = %d\n",
 		    error);
 		goto fail_cmem;
 	}
 
 	/*
 	 * Initialize the transmit job descriptors.
 	 */
 	STAILQ_INIT(&sc->sc_txfreeq);
 	STAILQ_INIT(&sc->sc_txdirtyq);
 
 	/*
 	 * Create the transmit buffer DMA maps.
 	 */
 	error = ENOMEM;
 	for (i = 0; i < CAS_TXQUEUELEN; i++) {
 		txs = &sc->sc_txsoft[i];
 		txs->txs_mbuf = NULL;
 		txs->txs_ndescs = 0;
 		if ((error = bus_dmamap_create(sc->sc_tdmatag, 0,
 		    &txs->txs_dmamap)) != 0) {
 			device_printf(sc->sc_dev,
 			    "unable to create TX DMA map %d, error = %d\n",
 			    i, error);
 			goto fail_txd;
 		}
 		STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q);
 	}
 
 	/*
 	 * Allocate the receive buffers, create and load the DMA maps
 	 * for them.
 	 */
 	for (i = 0; i < CAS_NRXDESC; i++) {
 		if ((error = bus_dmamem_alloc(sc->sc_rdmatag,
 		    &sc->sc_rxdsoft[i].rxds_buf, BUS_DMA_WAITOK,
 		    &sc->sc_rxdsoft[i].rxds_dmamap)) != 0) {
 			device_printf(sc->sc_dev,
 			    "unable to allocate RX buffer %d, error = %d\n",
 			    i, error);
 			goto fail_rxmem;
 		}
 
 		sc->sc_rxdptr = i;
 		sc->sc_rxdsoft[i].rxds_paddr = 0;
 		if ((error = bus_dmamap_load(sc->sc_rdmatag,
 		    sc->sc_rxdsoft[i].rxds_dmamap, sc->sc_rxdsoft[i].rxds_buf,
 		    CAS_PAGE_SIZE, cas_rxdma_callback, sc, 0)) != 0 ||
 		    sc->sc_rxdsoft[i].rxds_paddr == 0) {
 			device_printf(sc->sc_dev,
 			    "unable to load RX DMA map %d, error = %d\n",
 			    i, error);
 			goto fail_rxmap;
 		}
 	}
 
 	if ((sc->sc_flags & CAS_SERDES) == 0) {
 		CAS_WRITE_4(sc, CAS_PCS_DATAPATH, CAS_PCS_DATAPATH_MII);
 		CAS_BARRIER(sc, CAS_PCS_DATAPATH, 4,
 		    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 		cas_mifinit(sc);
 		/*
 		 * Look for an external PHY.
 		 */
 		error = ENXIO;
 		v = CAS_READ_4(sc, CAS_MIF_CONF);
 		if ((v & CAS_MIF_CONF_MDI1) != 0) {
 			v |= CAS_MIF_CONF_PHY_SELECT;
 			CAS_WRITE_4(sc, CAS_MIF_CONF, v);
 			CAS_BARRIER(sc, CAS_MIF_CONF, 4,
 			    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 			/* Enable/unfreeze the GMII pins of Saturn. */
 			if (sc->sc_variant == CAS_SATURN) {
 				CAS_WRITE_4(sc, CAS_SATURN_PCFG, 0);
 				CAS_BARRIER(sc, CAS_SATURN_PCFG, 4,
 				    BUS_SPACE_BARRIER_READ |
 				    BUS_SPACE_BARRIER_WRITE);
 			}
 			error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp,
 			    cas_mediachange, cas_mediastatus, BMSR_DEFCAPMASK,
-			    MII_PHY_ANY, MII_OFFSET_ANY, 0);
+			    MII_PHY_ANY, MII_OFFSET_ANY, MIIF_DOPAUSE);
 		}
 		/*
 		 * Fall back on an internal PHY if no external PHY was found.
 		 */
 		if (error != 0 && (v & CAS_MIF_CONF_MDI0) != 0) {
 			v &= ~CAS_MIF_CONF_PHY_SELECT;
 			CAS_WRITE_4(sc, CAS_MIF_CONF, v);
 			CAS_BARRIER(sc, CAS_MIF_CONF, 4,
 			    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 			/* Freeze the GMII pins of Saturn for saving power. */
 			if (sc->sc_variant == CAS_SATURN) {
 				CAS_WRITE_4(sc, CAS_SATURN_PCFG,
 				    CAS_SATURN_PCFG_FSI);
 				CAS_BARRIER(sc, CAS_SATURN_PCFG, 4,
 				    BUS_SPACE_BARRIER_READ |
 				    BUS_SPACE_BARRIER_WRITE);
 			}
 			error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp,
 			    cas_mediachange, cas_mediastatus, BMSR_DEFCAPMASK,
-			    MII_PHY_ANY, MII_OFFSET_ANY, 0);
+			    MII_PHY_ANY, MII_OFFSET_ANY, MIIF_DOPAUSE);
 		}
 	} else {
 		/*
 		 * Use the external PCS SERDES.
 		 */
 		CAS_WRITE_4(sc, CAS_PCS_DATAPATH, CAS_PCS_DATAPATH_SERDES);
 		CAS_BARRIER(sc, CAS_PCS_DATAPATH, 4, BUS_SPACE_BARRIER_WRITE);
 		/* Enable/unfreeze the SERDES pins of Saturn. */
 		if (sc->sc_variant == CAS_SATURN) {
 			CAS_WRITE_4(sc, CAS_SATURN_PCFG, 0);
 			CAS_BARRIER(sc, CAS_SATURN_PCFG, 4,
 			    BUS_SPACE_BARRIER_WRITE);
 		}
 		CAS_WRITE_4(sc, CAS_PCS_SERDES_CTRL, CAS_PCS_SERDES_CTRL_ESD);
 		CAS_BARRIER(sc, CAS_PCS_SERDES_CTRL, 4,
 		    BUS_SPACE_BARRIER_WRITE);
 		CAS_WRITE_4(sc, CAS_PCS_CONF, CAS_PCS_CONF_EN);
 		CAS_BARRIER(sc, CAS_PCS_CONF, 4,
 		    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 		error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp,
 		    cas_mediachange, cas_mediastatus, BMSR_DEFCAPMASK,
-		    CAS_PHYAD_EXTERNAL, MII_OFFSET_ANY, 0);
+		    CAS_PHYAD_EXTERNAL, MII_OFFSET_ANY, MIIF_DOPAUSE);
 	}
 	if (error != 0) {
 		device_printf(sc->sc_dev, "attaching PHYs failed\n");
 		goto fail_rxmap;
 	}
 	sc->sc_mii = device_get_softc(sc->sc_miibus);
 
 	/*
 	 * From this point forward, the attachment cannot fail.  A failure
 	 * before this point releases all resources that may have been
 	 * allocated.
 	 */
 
 	/* Announce FIFO sizes. */
 	v = CAS_READ_4(sc, CAS_TX_FIFO_SIZE);
 	device_printf(sc->sc_dev, "%ukB RX FIFO, %ukB TX FIFO\n",
 	    CAS_RX_FIFO_SIZE / 1024, v / 16);
 
 	/* Attach the interface. */
 	ether_ifattach(ifp, sc->sc_enaddr);
 
 	/*
 	 * Tell the upper layer(s) we support long frames/checksum offloads.
 	 */
 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
 	ifp->if_capabilities = IFCAP_VLAN_MTU;
 	if ((sc->sc_flags & CAS_NO_CSUM) == 0) {
 		ifp->if_capabilities |= IFCAP_HWCSUM;
 		ifp->if_hwassist = CAS_CSUM_FEATURES;
 	}
 	ifp->if_capenable = ifp->if_capabilities;
 
 	return (0);
 
 	/*
 	 * Free any resources we've allocated during the failed attach
 	 * attempt.  Do this in reverse order and fall through.
 	 */
  fail_rxmap:
 	for (i = 0; i < CAS_NRXDESC; i++)
 		if (sc->sc_rxdsoft[i].rxds_paddr != 0)
 			bus_dmamap_unload(sc->sc_rdmatag,
 			    sc->sc_rxdsoft[i].rxds_dmamap);
  fail_rxmem:
 	for (i = 0; i < CAS_NRXDESC; i++)
 		if (sc->sc_rxdsoft[i].rxds_buf != NULL)
 			bus_dmamem_free(sc->sc_rdmatag,
 			    sc->sc_rxdsoft[i].rxds_buf,
 			    sc->sc_rxdsoft[i].rxds_dmamap);
  fail_txd:
 	for (i = 0; i < CAS_TXQUEUELEN; i++)
 		if (sc->sc_txsoft[i].txs_dmamap != NULL)
 			bus_dmamap_destroy(sc->sc_tdmatag,
 			    sc->sc_txsoft[i].txs_dmamap);
 	bus_dmamap_unload(sc->sc_cdmatag, sc->sc_cddmamap);
  fail_cmem:
 	bus_dmamem_free(sc->sc_cdmatag, sc->sc_control_data,
 	    sc->sc_cddmamap);
  fail_ctag:
 	bus_dma_tag_destroy(sc->sc_cdmatag);
  fail_ttag:
 	bus_dma_tag_destroy(sc->sc_tdmatag);
  fail_rtag:
 	bus_dma_tag_destroy(sc->sc_rdmatag);
  fail_ptag:
 	bus_dma_tag_destroy(sc->sc_pdmatag);
  fail_taskq:
 	taskqueue_free(sc->sc_tq);
  fail_ifnet:
 	if_free(ifp);
 	return (error);
 }
 
 static void
 cas_detach(struct cas_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	int i;
 
 	ether_ifdetach(ifp);
 	CAS_LOCK(sc);
 	cas_stop(ifp);
 	CAS_UNLOCK(sc);
 	callout_drain(&sc->sc_tick_ch);
 	callout_drain(&sc->sc_rx_ch);
 	taskqueue_drain(sc->sc_tq, &sc->sc_intr_task);
 	taskqueue_drain(sc->sc_tq, &sc->sc_tx_task);
 	if_free(ifp);
 	taskqueue_free(sc->sc_tq);
 	device_delete_child(sc->sc_dev, sc->sc_miibus);
 
 	for (i = 0; i < CAS_NRXDESC; i++)
 		if (sc->sc_rxdsoft[i].rxds_dmamap != NULL)
 			bus_dmamap_sync(sc->sc_rdmatag,
 			    sc->sc_rxdsoft[i].rxds_dmamap,
 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	for (i = 0; i < CAS_NRXDESC; i++)
 		if (sc->sc_rxdsoft[i].rxds_paddr != 0)
 			bus_dmamap_unload(sc->sc_rdmatag,
 			    sc->sc_rxdsoft[i].rxds_dmamap);
 	for (i = 0; i < CAS_NRXDESC; i++)
 		if (sc->sc_rxdsoft[i].rxds_buf != NULL)
 			bus_dmamem_free(sc->sc_rdmatag,
 			    sc->sc_rxdsoft[i].rxds_buf,
 			    sc->sc_rxdsoft[i].rxds_dmamap);
 	for (i = 0; i < CAS_TXQUEUELEN; i++)
 		if (sc->sc_txsoft[i].txs_dmamap != NULL)
 			bus_dmamap_destroy(sc->sc_tdmatag,
 			    sc->sc_txsoft[i].txs_dmamap);
 	CAS_CDSYNC(sc, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	bus_dmamap_unload(sc->sc_cdmatag, sc->sc_cddmamap);
 	bus_dmamem_free(sc->sc_cdmatag, sc->sc_control_data,
 	    sc->sc_cddmamap);
 	bus_dma_tag_destroy(sc->sc_cdmatag);
 	bus_dma_tag_destroy(sc->sc_tdmatag);
 	bus_dma_tag_destroy(sc->sc_rdmatag);
 	bus_dma_tag_destroy(sc->sc_pdmatag);
 }
 
 static void
 cas_suspend(struct cas_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 
 	CAS_LOCK(sc);
 	cas_stop(ifp);
 	CAS_UNLOCK(sc);
 }
 
 static void
 cas_resume(struct cas_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 
 	CAS_LOCK(sc);
 	/*
 	 * On resume all registers have to be initialized again like
 	 * after power-on.
 	 */
 	sc->sc_flags &= ~CAS_INITED;
 	if (ifp->if_flags & IFF_UP)
 		cas_init_locked(sc);
 	CAS_UNLOCK(sc);
 }
 
 static inline void
 cas_rxcksum(struct mbuf *m, uint16_t cksum)
 {
 	struct ether_header *eh;
 	struct ip *ip;
 	struct udphdr *uh;
 	uint16_t *opts;
 	int32_t hlen, len, pktlen;
 	uint32_t temp32;
 
 	pktlen = m->m_pkthdr.len;
 	if (pktlen < sizeof(struct ether_header) + sizeof(struct ip))
 		return;
 	eh = mtod(m, struct ether_header *);
 	if (eh->ether_type != htons(ETHERTYPE_IP))
 		return;
 	ip = (struct ip *)(eh + 1);
 	if (ip->ip_v != IPVERSION)
 		return;
 
 	hlen = ip->ip_hl << 2;
 	pktlen -= sizeof(struct ether_header);
 	if (hlen < sizeof(struct ip))
 		return;
 	if (ntohs(ip->ip_len) < hlen)
 		return;
 	if (ntohs(ip->ip_len) != pktlen)
 		return;
 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
 		return;	/* Cannot handle fragmented packet. */
 
 	switch (ip->ip_p) {
 	case IPPROTO_TCP:
 		if (pktlen < (hlen + sizeof(struct tcphdr)))
 			return;
 		break;
 	case IPPROTO_UDP:
 		if (pktlen < (hlen + sizeof(struct udphdr)))
 			return;
 		uh = (struct udphdr *)((uint8_t *)ip + hlen);
 		if (uh->uh_sum == 0)
 			return; /* no checksum */
 		break;
 	default:
 		return;
 	}
 
 	cksum = ~cksum;
 	/* checksum fixup for IP options */
 	len = hlen - sizeof(struct ip);
 	if (len > 0) {
 		opts = (uint16_t *)(ip + 1);
 		for (; len > 0; len -= sizeof(uint16_t), opts++) {
 			temp32 = cksum - *opts;
 			temp32 = (temp32 >> 16) + (temp32 & 65535);
 			cksum = temp32 & 65535;
 		}
 	}
 	m->m_pkthdr.csum_flags |= CSUM_DATA_VALID;
 	m->m_pkthdr.csum_data = cksum;
 }
 
 static void
 cas_cddma_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error)
 {
 	struct cas_softc *sc = xsc;
 
 	if (error != 0)
 		return;
 	if (nsegs != 1)
 		panic("%s: bad control buffer segment count", __func__);
 	sc->sc_cddma = segs[0].ds_addr;
 }
 
 static void
 cas_rxdma_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error)
 {
 	struct cas_softc *sc = xsc;
 
 	if (error != 0)
 		return;
 	if (nsegs != 1)
 		panic("%s: bad RX buffer segment count", __func__);
 	sc->sc_rxdsoft[sc->sc_rxdptr].rxds_paddr = segs[0].ds_addr;
 }
 
 static void
 cas_tick(void *arg)
 {
 	struct cas_softc *sc = arg;
 	struct ifnet *ifp = sc->sc_ifp;
 	uint32_t v;
 
 	CAS_LOCK_ASSERT(sc, MA_OWNED);
 
 	/*
 	 * Unload collision and error counters.
 	 */
 	ifp->if_collisions +=
 	    CAS_READ_4(sc, CAS_MAC_NORM_COLL_CNT) +
 	    CAS_READ_4(sc, CAS_MAC_FIRST_COLL_CNT);
 	v = CAS_READ_4(sc, CAS_MAC_EXCESS_COLL_CNT) +
 	    CAS_READ_4(sc, CAS_MAC_LATE_COLL_CNT);
 	ifp->if_collisions += v;
 	ifp->if_oerrors += v;
 	ifp->if_ierrors +=
 	    CAS_READ_4(sc, CAS_MAC_RX_LEN_ERR_CNT) +
 	    CAS_READ_4(sc, CAS_MAC_RX_ALIGN_ERR) +
 	    CAS_READ_4(sc, CAS_MAC_RX_CRC_ERR_CNT) +
 	    CAS_READ_4(sc, CAS_MAC_RX_CODE_VIOL);
 
 	/*
 	 * Then clear the hardware counters.
 	 */
 	CAS_WRITE_4(sc, CAS_MAC_NORM_COLL_CNT, 0);
 	CAS_WRITE_4(sc, CAS_MAC_FIRST_COLL_CNT, 0);
 	CAS_WRITE_4(sc, CAS_MAC_EXCESS_COLL_CNT, 0);
 	CAS_WRITE_4(sc, CAS_MAC_LATE_COLL_CNT, 0);
 	CAS_WRITE_4(sc, CAS_MAC_RX_LEN_ERR_CNT, 0);
 	CAS_WRITE_4(sc, CAS_MAC_RX_ALIGN_ERR, 0);
 	CAS_WRITE_4(sc, CAS_MAC_RX_CRC_ERR_CNT, 0);
 	CAS_WRITE_4(sc, CAS_MAC_RX_CODE_VIOL, 0);
 
 	mii_tick(sc->sc_mii);
 
 	if (sc->sc_txfree != CAS_MAXTXFREE)
 		cas_tint(sc);
 
 	cas_watchdog(sc);
 
 	callout_reset(&sc->sc_tick_ch, hz, cas_tick, sc);
 }
 
 static int
 cas_bitwait(struct cas_softc *sc, bus_addr_t r, uint32_t clr, uint32_t set)
 {
 	int i;
 	uint32_t reg;
 
 	for (i = CAS_TRIES; i--; DELAY(100)) {
 		reg = CAS_READ_4(sc, r);
 		if ((reg & clr) == 0 && (reg & set) == set)
 			return (1);
 	}
 	return (0);
 }
 
 static void
 cas_reset(struct cas_softc *sc)
 {
 
 #ifdef CAS_DEBUG
 	CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__);
 #endif
 	/* Disable all interrupts in order to avoid spurious ones. */
 	CAS_WRITE_4(sc, CAS_INTMASK, 0xffffffff);
 
 	cas_reset_rx(sc);
 	cas_reset_tx(sc);
 
 	/*
 	 * Do a full reset modulo the result of the last auto-negotiation
 	 * when using the SERDES.
 	 */
 	CAS_WRITE_4(sc, CAS_RESET, CAS_RESET_RX | CAS_RESET_TX |
 	    ((sc->sc_flags & CAS_SERDES) != 0 ? CAS_RESET_PCS_DIS : 0));
 	CAS_BARRIER(sc, CAS_RESET, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	DELAY(3000);
 	if (!cas_bitwait(sc, CAS_RESET, CAS_RESET_RX | CAS_RESET_TX, 0))
 		device_printf(sc->sc_dev, "cannot reset device\n");
 }
 
 static void
 cas_stop(struct ifnet *ifp)
 {
 	struct cas_softc *sc = ifp->if_softc;
 	struct cas_txsoft *txs;
 
 #ifdef CAS_DEBUG
 	CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__);
 #endif
 
 	callout_stop(&sc->sc_tick_ch);
 	callout_stop(&sc->sc_rx_ch);
 
 	/* Disable all interrupts in order to avoid spurious ones. */
 	CAS_WRITE_4(sc, CAS_INTMASK, 0xffffffff);
 
 	cas_reset_tx(sc);
 	cas_reset_rx(sc);
 
 	/*
 	 * Release any queued transmit buffers.
 	 */
 	while ((txs = STAILQ_FIRST(&sc->sc_txdirtyq)) != NULL) {
 		STAILQ_REMOVE_HEAD(&sc->sc_txdirtyq, txs_q);
 		if (txs->txs_ndescs != 0) {
 			bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap);
 			if (txs->txs_mbuf != NULL) {
 				m_freem(txs->txs_mbuf);
 				txs->txs_mbuf = NULL;
 			}
 		}
 		STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q);
 	}
 
 	/*
 	 * Mark the interface down and cancel the watchdog timer.
 	 */
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 	sc->sc_flags &= ~CAS_LINK;
 	sc->sc_wdog_timer = 0;
 }
 
 static int
 cas_reset_rx(struct cas_softc *sc)
 {
 
 	/*
 	 * Resetting while DMA is in progress can cause a bus hang, so we
 	 * disable DMA first.
 	 */
 	cas_disable_rx(sc);
 	CAS_WRITE_4(sc, CAS_RX_CONF, 0);
 	CAS_BARRIER(sc, CAS_RX_CONF, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!cas_bitwait(sc, CAS_RX_CONF, CAS_RX_CONF_RXDMA_EN, 0))
 		device_printf(sc->sc_dev, "cannot disable RX DMA\n");
 
 	/* Finally, reset the ERX. */
 	CAS_WRITE_4(sc, CAS_RESET, CAS_RESET_RX |
 	    ((sc->sc_flags & CAS_SERDES) != 0 ? CAS_RESET_PCS_DIS : 0));
 	CAS_BARRIER(sc, CAS_RESET, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!cas_bitwait(sc, CAS_RESET, CAS_RESET_RX | CAS_RESET_TX, 0)) {
 		device_printf(sc->sc_dev, "cannot reset receiver\n");
 		return (1);
 	}
 	return (0);
 }
 
 static int
 cas_reset_tx(struct cas_softc *sc)
 {
 
 	/*
 	 * Resetting while DMA is in progress can cause a bus hang, so we
 	 * disable DMA first.
 	 */
 	cas_disable_tx(sc);
 	CAS_WRITE_4(sc, CAS_TX_CONF, 0);
 	CAS_BARRIER(sc, CAS_TX_CONF, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!cas_bitwait(sc, CAS_TX_CONF, CAS_TX_CONF_TXDMA_EN, 0))
 		device_printf(sc->sc_dev, "cannot disable TX DMA\n");
 
 	/* Finally, reset the ETX. */
 	CAS_WRITE_4(sc, CAS_RESET, CAS_RESET_TX |
 	    ((sc->sc_flags & CAS_SERDES) != 0 ? CAS_RESET_PCS_DIS : 0));
 	CAS_BARRIER(sc, CAS_RESET, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!cas_bitwait(sc, CAS_RESET, CAS_RESET_RX | CAS_RESET_TX, 0)) {
 		device_printf(sc->sc_dev, "cannot reset transmitter\n");
 		return (1);
 	}
 	return (0);
 }
 
 static int
 cas_disable_rx(struct cas_softc *sc)
 {
 
 	CAS_WRITE_4(sc, CAS_MAC_RX_CONF,
 	    CAS_READ_4(sc, CAS_MAC_RX_CONF) & ~CAS_MAC_RX_CONF_EN);
 	CAS_BARRIER(sc, CAS_MAC_RX_CONF, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	return (cas_bitwait(sc, CAS_MAC_RX_CONF, CAS_MAC_RX_CONF_EN, 0));
 }
 
 static int
 cas_disable_tx(struct cas_softc *sc)
 {
 
 	CAS_WRITE_4(sc, CAS_MAC_TX_CONF,
 	    CAS_READ_4(sc, CAS_MAC_TX_CONF) & ~CAS_MAC_TX_CONF_EN);
 	CAS_BARRIER(sc, CAS_MAC_TX_CONF, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	return (cas_bitwait(sc, CAS_MAC_TX_CONF, CAS_MAC_TX_CONF_EN, 0));
 }
 
 static inline void
 cas_rxcompinit(struct cas_rx_comp *rxcomp)
 {
 
 	rxcomp->crc_word1 = 0;
 	rxcomp->crc_word2 = 0;
 	rxcomp->crc_word3 =
 	    htole64(CAS_SET(ETHER_HDR_LEN + sizeof(struct ip), CAS_RC3_CSO));
 	rxcomp->crc_word4 = htole64(CAS_RC4_ZERO);
 }
 
 static void
 cas_meminit(struct cas_softc *sc)
 {
 	int i;
 
 	CAS_LOCK_ASSERT(sc, MA_OWNED);
 
 	/*
 	 * Initialize the transmit descriptor ring.
 	 */
 	for (i = 0; i < CAS_NTXDESC; i++) {
 		sc->sc_txdescs[i].cd_flags = 0;
 		sc->sc_txdescs[i].cd_buf_ptr = 0;
 	}
 	sc->sc_txfree = CAS_MAXTXFREE;
 	sc->sc_txnext = 0;
 	sc->sc_txwin = 0;
 
 	/*
 	 * Initialize the receive completion ring.
 	 */
 	for (i = 0; i < CAS_NRXCOMP; i++)
 		cas_rxcompinit(&sc->sc_rxcomps[i]);
 	sc->sc_rxcptr = 0;
 
 	/*
 	 * Initialize the first receive descriptor ring.  We leave
 	 * the second one zeroed as we don't actually use it.
 	 */
 	for (i = 0; i < CAS_NRXDESC; i++)
 		CAS_INIT_RXDESC(sc, i, i);
 	sc->sc_rxdptr = 0;
 
 	CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 }
 
 static u_int
 cas_descsize(u_int sz)
 {
 
 	switch (sz) {
 	case 32:
 		return (CAS_DESC_32);
 	case 64:
 		return (CAS_DESC_64);
 	case 128:
 		return (CAS_DESC_128);
 	case 256:
 		return (CAS_DESC_256);
 	case 512:
 		return (CAS_DESC_512);
 	case 1024:
 		return (CAS_DESC_1K);
 	case 2048:
 		return (CAS_DESC_2K);
 	case 4096:
 		return (CAS_DESC_4K);
 	case 8192:
 		return (CAS_DESC_8K);
 	default:
 		printf("%s: invalid descriptor ring size %d\n", __func__, sz);
 		return (CAS_DESC_32);
 	}
 }
 
 static u_int
 cas_rxcompsize(u_int sz)
 {
 
 	switch (sz) {
 	case 128:
 		return (CAS_RX_CONF_COMP_128);
 	case 256:
 		return (CAS_RX_CONF_COMP_256);
 	case 512:
 		return (CAS_RX_CONF_COMP_512);
 	case 1024:
 		return (CAS_RX_CONF_COMP_1K);
 	case 2048:
 		return (CAS_RX_CONF_COMP_2K);
 	case 4096:
 		return (CAS_RX_CONF_COMP_4K);
 	case 8192:
 		return (CAS_RX_CONF_COMP_8K);
 	case 16384:
 		return (CAS_RX_CONF_COMP_16K);
 	case 32768:
 		return (CAS_RX_CONF_COMP_32K);
 	default:
 		printf("%s: invalid dcompletion ring size %d\n", __func__, sz);
 		return (CAS_RX_CONF_COMP_128);
 	}
 }
 
 static void
 cas_init(void *xsc)
 {
 	struct cas_softc *sc = xsc;
 
 	CAS_LOCK(sc);
 	cas_init_locked(sc);
 	CAS_UNLOCK(sc);
 }
 
 /*
  * Initialization of interface; set up initialization block
  * and transmit/receive descriptor rings.
  */
 static void
 cas_init_locked(struct cas_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	uint32_t v;
 
 	CAS_LOCK_ASSERT(sc, MA_OWNED);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 		return;
 
 #ifdef CAS_DEBUG
 	CTR2(KTR_CAS, "%s: %s: calling stop", device_get_name(sc->sc_dev),
 	    __func__);
 #endif
 	/*
 	 * Initialization sequence.  The numbered steps below correspond
 	 * to the sequence outlined in section 6.3.5.1 in the Ethernet
 	 * Channel Engine manual (part of the PCIO manual).
 	 * See also the STP2002-STQ document from Sun Microsystems.
 	 */
 
 	/* step 1 & 2.  Reset the Ethernet Channel. */
 	cas_stop(ifp);
 	cas_reset(sc);
 #ifdef CAS_DEBUG
 	CTR2(KTR_CAS, "%s: %s: restarting", device_get_name(sc->sc_dev),
 	    __func__);
 #endif
 
 	if ((sc->sc_flags & CAS_SERDES) == 0)
 		/* Re-initialize the MIF. */
 		cas_mifinit(sc);
 
 	/* step 3.  Setup data structures in host memory. */
 	cas_meminit(sc);
 
 	/* step 4.  TX MAC registers & counters */
 	cas_init_regs(sc);
 
 	/* step 5.  RX MAC registers & counters */
 	cas_setladrf(sc);
 
 	/* step 6 & 7.  Program Ring Base Addresses. */
 	CAS_WRITE_4(sc, CAS_TX_DESC3_BASE_HI,
 	    (((uint64_t)CAS_CDTXDADDR(sc, 0)) >> 32));
 	CAS_WRITE_4(sc, CAS_TX_DESC3_BASE_LO,
 	    CAS_CDTXDADDR(sc, 0) & 0xffffffff);
 
 	CAS_WRITE_4(sc, CAS_RX_COMP_BASE_HI,
 	    (((uint64_t)CAS_CDRXCADDR(sc, 0)) >> 32));
 	CAS_WRITE_4(sc, CAS_RX_COMP_BASE_LO,
 	    CAS_CDRXCADDR(sc, 0) & 0xffffffff);
 
 	CAS_WRITE_4(sc, CAS_RX_DESC_BASE_HI,
 	    (((uint64_t)CAS_CDRXDADDR(sc, 0)) >> 32));
 	CAS_WRITE_4(sc, CAS_RX_DESC_BASE_LO,
 	    CAS_CDRXDADDR(sc, 0) & 0xffffffff);
 
 	if ((sc->sc_flags & CAS_REG_PLUS) != 0) {
 		CAS_WRITE_4(sc, CAS_RX_DESC2_BASE_HI,
 		    (((uint64_t)CAS_CDRXD2ADDR(sc, 0)) >> 32));
 		CAS_WRITE_4(sc, CAS_RX_DESC2_BASE_LO,
 		    CAS_CDRXD2ADDR(sc, 0) & 0xffffffff);
 	}
 
 #ifdef CAS_DEBUG
 	CTR5(KTR_CAS,
 	    "loading TXDR %lx, RXCR %lx, RXDR %lx, RXD2R %lx, cddma %lx",
 	    CAS_CDTXDADDR(sc, 0), CAS_CDRXCADDR(sc, 0), CAS_CDRXDADDR(sc, 0),
 	    CAS_CDRXD2ADDR(sc, 0), sc->sc_cddma);
 #endif
 
 	/* step 8.  Global Configuration & Interrupt Masks */
 
 	/* Disable weighted round robin. */
 	CAS_WRITE_4(sc, CAS_CAW, CAS_CAW_RR_DIS);
 
 	/*
 	 * Enable infinite bursts for revisions without PCI issues if
 	 * applicable.  Doing so greatly improves the TX performance on
 	 * !__sparc64__.
 	 */
 	CAS_WRITE_4(sc, CAS_INF_BURST,
 #if !defined(__sparc64__)
 	    (sc->sc_flags & CAS_TABORT) == 0 ? CAS_INF_BURST_EN :
 #endif
 	    0);
 
 	/* Set up interrupts. */
 	CAS_WRITE_4(sc, CAS_INTMASK,
 	    ~(CAS_INTR_TX_INT_ME | CAS_INTR_TX_TAG_ERR |
 	    CAS_INTR_RX_DONE | CAS_INTR_RX_BUF_NA | CAS_INTR_RX_TAG_ERR |
 	    CAS_INTR_RX_COMP_FULL | CAS_INTR_RX_BUF_AEMPTY |
 	    CAS_INTR_RX_COMP_AFULL | CAS_INTR_RX_LEN_MMATCH |
 	    CAS_INTR_PCI_ERROR_INT
 #ifdef CAS_DEBUG
 	    | CAS_INTR_PCS_INT | CAS_INTR_MIF
 #endif
 	    ));
 	/* Don't clear top level interrupts when CAS_STATUS_ALIAS is read. */
 	CAS_WRITE_4(sc, CAS_CLEAR_ALIAS, 0);
 	CAS_WRITE_4(sc, CAS_MAC_RX_MASK, ~CAS_MAC_RX_OVERFLOW);
 	CAS_WRITE_4(sc, CAS_MAC_TX_MASK,
 	    ~(CAS_MAC_TX_UNDERRUN | CAS_MAC_TX_MAX_PKT_ERR));
 #ifdef CAS_DEBUG
 	CAS_WRITE_4(sc, CAS_MAC_CTRL_MASK,
 	    ~(CAS_MAC_CTRL_PAUSE_RCVD | CAS_MAC_CTRL_PAUSE |
 	    CAS_MAC_CTRL_NON_PAUSE));
 #else
 	CAS_WRITE_4(sc, CAS_MAC_CTRL_MASK,
 	    CAS_MAC_CTRL_PAUSE_RCVD | CAS_MAC_CTRL_PAUSE |
 	    CAS_MAC_CTRL_NON_PAUSE);
 #endif
 
 	/* Enable PCI error interrupts. */
 	CAS_WRITE_4(sc, CAS_ERROR_MASK,
 	    ~(CAS_ERROR_DTRTO | CAS_ERROR_OTHER | CAS_ERROR_DMAW_ZERO |
 	    CAS_ERROR_DMAR_ZERO | CAS_ERROR_RTRTO));
 
 	/* Enable PCI error interrupts in BIM configuration. */
 	CAS_WRITE_4(sc, CAS_BIM_CONF,
 	    CAS_BIM_CONF_DPAR_EN | CAS_BIM_CONF_RMA_EN | CAS_BIM_CONF_RTA_EN);
 
 	/*
 	 * step 9.  ETX Configuration: encode receive descriptor ring size,
 	 * enable DMA and disable pre-interrupt writeback completion.
 	 */
 	v = cas_descsize(CAS_NTXDESC) << CAS_TX_CONF_DESC3_SHFT;
 	CAS_WRITE_4(sc, CAS_TX_CONF, v | CAS_TX_CONF_TXDMA_EN |
 	    CAS_TX_CONF_RDPP_DIS | CAS_TX_CONF_PICWB_DIS);
 
 	/* step 10.  ERX Configuration */
 
 	/*
 	 * Encode receive completion and descriptor ring sizes, set the
 	 * swivel offset.
 	 */
 	v = cas_rxcompsize(CAS_NRXCOMP) << CAS_RX_CONF_COMP_SHFT;
 	v |= cas_descsize(CAS_NRXDESC) << CAS_RX_CONF_DESC_SHFT;
 	if ((sc->sc_flags & CAS_REG_PLUS) != 0)
 		v |= cas_descsize(CAS_NRXDESC2) << CAS_RX_CONF_DESC2_SHFT;
 	CAS_WRITE_4(sc, CAS_RX_CONF,
 	    v | (ETHER_ALIGN << CAS_RX_CONF_SOFF_SHFT));
 
 	/* Set the PAUSE thresholds.  We use the maximum OFF threshold. */
 	CAS_WRITE_4(sc, CAS_RX_PTHRS,
-	    ((111 * 64) << CAS_RX_PTHRS_XOFF_SHFT) |
-	    ((15 * 64) << CAS_RX_PTHRS_XON_SHFT));
+	    (111 << CAS_RX_PTHRS_XOFF_SHFT) | (15 << CAS_RX_PTHRS_XON_SHFT));
 
 	/* RX blanking */
 	CAS_WRITE_4(sc, CAS_RX_BLANK,
 	    (15 << CAS_RX_BLANK_TIME_SHFT) | (5 << CAS_RX_BLANK_PKTS_SHFT));
 
 	/* Set RX_COMP_AFULL threshold to half of the RX completions. */
 	CAS_WRITE_4(sc, CAS_RX_AEMPTY_THRS,
 	    (CAS_NRXCOMP / 2) << CAS_RX_AEMPTY_COMP_SHFT);
 
 	/* Initialize the RX page size register as appropriate for 8k. */
 	CAS_WRITE_4(sc, CAS_RX_PSZ,
 	    (CAS_RX_PSZ_8K << CAS_RX_PSZ_SHFT) |
 	    (4 << CAS_RX_PSZ_MB_CNT_SHFT) |
 	    (CAS_RX_PSZ_MB_STRD_2K << CAS_RX_PSZ_MB_STRD_SHFT) |
 	    (CAS_RX_PSZ_MB_OFF_64 << CAS_RX_PSZ_MB_OFF_SHFT));
 
 	/* Disable RX random early detection. */
 	CAS_WRITE_4(sc,	CAS_RX_RED, 0);
 
 	/* Zero the RX reassembly DMA table. */
 	for (v = 0; v <= CAS_RX_REAS_DMA_ADDR_LC; v++) {
 		CAS_WRITE_4(sc,	CAS_RX_REAS_DMA_ADDR, v);
 		CAS_WRITE_4(sc,	CAS_RX_REAS_DMA_DATA_LO, 0);
 		CAS_WRITE_4(sc,	CAS_RX_REAS_DMA_DATA_MD, 0);
 		CAS_WRITE_4(sc,	CAS_RX_REAS_DMA_DATA_HI, 0);
 	}
 
 	/* Ensure the RX control FIFO and RX IPP FIFO addresses are zero. */
 	CAS_WRITE_4(sc, CAS_RX_CTRL_FIFO, 0);
 	CAS_WRITE_4(sc, CAS_RX_IPP_ADDR, 0);
 
 	/* Finally, enable RX DMA. */
 	CAS_WRITE_4(sc, CAS_RX_CONF,
 	    CAS_READ_4(sc, CAS_RX_CONF) | CAS_RX_CONF_RXDMA_EN);
 
 	/* step 11.  Configure Media. */
 
 	/* step 12.  RX_MAC Configuration Register */
 	v = CAS_READ_4(sc, CAS_MAC_RX_CONF) & ~CAS_MAC_RX_CONF_STRPPAD;
 	v |= CAS_MAC_RX_CONF_EN | CAS_MAC_RX_CONF_STRPFCS;
 	CAS_WRITE_4(sc, CAS_MAC_RX_CONF, 0);
 	CAS_BARRIER(sc, CAS_MAC_RX_CONF, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!cas_bitwait(sc, CAS_MAC_RX_CONF, CAS_MAC_RX_CONF_EN, 0))
 		device_printf(sc->sc_dev, "cannot configure RX MAC\n");
 	CAS_WRITE_4(sc, CAS_MAC_RX_CONF, v);
 
 	/* step 13.  TX_MAC Configuration Register */
 	v = CAS_READ_4(sc, CAS_MAC_TX_CONF);
 	v |= CAS_MAC_TX_CONF_EN;
 	CAS_WRITE_4(sc, CAS_MAC_TX_CONF, 0);
 	CAS_BARRIER(sc, CAS_MAC_TX_CONF, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!cas_bitwait(sc, CAS_MAC_TX_CONF, CAS_MAC_TX_CONF_EN, 0))
 		device_printf(sc->sc_dev, "cannot configure TX MAC\n");
 	CAS_WRITE_4(sc, CAS_MAC_TX_CONF, v);
 
 	/* step 14.  Issue Transmit Pending command. */
 
 	/* step 15.  Give the reciever a swift kick. */
 	CAS_WRITE_4(sc, CAS_RX_KICK, CAS_NRXDESC - 4);
 	CAS_WRITE_4(sc, CAS_RX_COMP_TAIL, 0);
 	if ((sc->sc_flags & CAS_REG_PLUS) != 0)
 		CAS_WRITE_4(sc, CAS_RX_KICK2, CAS_NRXDESC2 - 4);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	mii_mediachg(sc->sc_mii);
 
 	/* Start the one second timer. */
 	sc->sc_wdog_timer = 0;
 	callout_reset(&sc->sc_tick_ch, hz, cas_tick, sc);
 }
 
 static int
 cas_load_txmbuf(struct cas_softc *sc, struct mbuf **m_head)
 {
 	bus_dma_segment_t txsegs[CAS_NTXSEGS];
 	struct cas_txsoft *txs;
 	struct ip *ip;
 	struct mbuf *m;
 	uint64_t cflags;
 	int error, nexttx, nsegs, offset, seg;
 
 	CAS_LOCK_ASSERT(sc, MA_OWNED);
 
 	/* Get a work queue entry. */
 	if ((txs = STAILQ_FIRST(&sc->sc_txfreeq)) == NULL) {
 		/* Ran out of descriptors. */
 		return (ENOBUFS);
 	}
 
 	cflags = 0;
 	if (((*m_head)->m_pkthdr.csum_flags & CAS_CSUM_FEATURES) != 0) {
 		if (M_WRITABLE(*m_head) == 0) {
 			m = m_dup(*m_head, M_DONTWAIT);
 			m_freem(*m_head);
 			*m_head = m;
 			if (m == NULL)
 				return (ENOBUFS);
 		}
 		offset = sizeof(struct ether_header);
 		m = m_pullup(*m_head, offset + sizeof(struct ip));
 		if (m == NULL) {
 			*m_head = NULL;
 			return (ENOBUFS);
 		}
 		ip = (struct ip *)(mtod(m, caddr_t) + offset);
 		offset += (ip->ip_hl << 2);
 		cflags = (offset << CAS_TD_CKSUM_START_SHFT) |
 		    ((offset + m->m_pkthdr.csum_data) <<
 		    CAS_TD_CKSUM_STUFF_SHFT) | CAS_TD_CKSUM_EN;
 		*m_head = m;
 	}
 
 	error = bus_dmamap_load_mbuf_sg(sc->sc_tdmatag, txs->txs_dmamap,
 	    *m_head, txsegs, &nsegs, BUS_DMA_NOWAIT);
 	if (error == EFBIG) {
 		m = m_collapse(*m_head, M_DONTWAIT, CAS_NTXSEGS);
 		if (m == NULL) {
 			m_freem(*m_head);
 			*m_head = NULL;
 			return (ENOBUFS);
 		}
 		*m_head = m;
 		error = bus_dmamap_load_mbuf_sg(sc->sc_tdmatag,
 		    txs->txs_dmamap, *m_head, txsegs, &nsegs,
 		    BUS_DMA_NOWAIT);
 		if (error != 0) {
 			m_freem(*m_head);
 			*m_head = NULL;
 			return (error);
 		}
 	} else if (error != 0)
 		return (error);
 	/* If nsegs is wrong then the stack is corrupt. */
 	KASSERT(nsegs <= CAS_NTXSEGS,
 	    ("%s: too many DMA segments (%d)", __func__, nsegs));
 	if (nsegs == 0) {
 		m_freem(*m_head);
 		*m_head = NULL;
 		return (EIO);
 	}
 
 	/*
 	 * Ensure we have enough descriptors free to describe
 	 * the packet.  Note, we always reserve one descriptor
 	 * at the end of the ring as a termination point, in
 	 * order to prevent wrap-around.
 	 */
 	if (nsegs > sc->sc_txfree - 1) {
 		txs->txs_ndescs = 0;
 		bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap);
 		return (ENOBUFS);
 	}
 
 	txs->txs_ndescs = nsegs;
 	txs->txs_firstdesc = sc->sc_txnext;
 	nexttx = txs->txs_firstdesc;
 	for (seg = 0; seg < nsegs; seg++, nexttx = CAS_NEXTTX(nexttx)) {
 #ifdef CAS_DEBUG
 		CTR6(KTR_CAS,
 		    "%s: mapping seg %d (txd %d), len %lx, addr %#lx (%#lx)",
 		    __func__, seg, nexttx, txsegs[seg].ds_len,
 		    txsegs[seg].ds_addr, htole64(txsegs[seg].ds_addr));
 #endif
 		sc->sc_txdescs[nexttx].cd_buf_ptr =
 		    htole64(txsegs[seg].ds_addr);
 		KASSERT(txsegs[seg].ds_len <
 		    CAS_TD_BUF_LEN_MASK >> CAS_TD_BUF_LEN_SHFT,
 		    ("%s: segment size too large!", __func__));
 		sc->sc_txdescs[nexttx].cd_flags =
 		    htole64(txsegs[seg].ds_len << CAS_TD_BUF_LEN_SHFT);
 		txs->txs_lastdesc = nexttx;
 	}
 
 	/* Set EOF on the last descriptor. */
 #ifdef CAS_DEBUG
 	CTR3(KTR_CAS, "%s: end of frame at segment %d, TX %d",
 	    __func__, seg, nexttx);
 #endif
 	sc->sc_txdescs[txs->txs_lastdesc].cd_flags |=
 	    htole64(CAS_TD_END_OF_FRAME);
 
 	/* Lastly set SOF on the first descriptor. */
 #ifdef CAS_DEBUG
 	CTR3(KTR_CAS, "%s: start of frame at segment %d, TX %d",
 	    __func__, seg, nexttx);
 #endif
 	if (sc->sc_txwin += nsegs > CAS_MAXTXFREE * 2 / 3) {
 		sc->sc_txwin = 0;
 		sc->sc_txdescs[txs->txs_firstdesc].cd_flags |=
 		    htole64(cflags | CAS_TD_START_OF_FRAME | CAS_TD_INT_ME);
 	} else
 		sc->sc_txdescs[txs->txs_firstdesc].cd_flags |=
 		    htole64(cflags | CAS_TD_START_OF_FRAME);
 
 	/* Sync the DMA map. */
 	bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap,
 	    BUS_DMASYNC_PREWRITE);
 
 #ifdef CAS_DEBUG
 	CTR4(KTR_CAS, "%s: setting firstdesc=%d, lastdesc=%d, ndescs=%d",
 	    __func__, txs->txs_firstdesc, txs->txs_lastdesc,
 	    txs->txs_ndescs);
 #endif
 	STAILQ_REMOVE_HEAD(&sc->sc_txfreeq, txs_q);
 	STAILQ_INSERT_TAIL(&sc->sc_txdirtyq, txs, txs_q);
 	txs->txs_mbuf = *m_head;
 
 	sc->sc_txnext = CAS_NEXTTX(txs->txs_lastdesc);
 	sc->sc_txfree -= txs->txs_ndescs;
 
 	return (0);
 }
 
 static void
 cas_init_regs(struct cas_softc *sc)
 {
 	int i;
 	const u_char *laddr = IF_LLADDR(sc->sc_ifp);
 
 	CAS_LOCK_ASSERT(sc, MA_OWNED);
 
 	/* These registers are not cleared on reset. */
 	if ((sc->sc_flags & CAS_INITED) == 0) {
 		/* magic values */
 		CAS_WRITE_4(sc, CAS_MAC_IPG0, 0);
 		CAS_WRITE_4(sc, CAS_MAC_IPG1, 8);
 		CAS_WRITE_4(sc, CAS_MAC_IPG2, 4);
 
 		/* min frame length */
 		CAS_WRITE_4(sc, CAS_MAC_MIN_FRAME, ETHER_MIN_LEN);
 		/* max frame length and max burst size */
 		CAS_WRITE_4(sc, CAS_MAC_MAX_BF,
 		    ((ETHER_MAX_LEN_JUMBO + ETHER_VLAN_ENCAP_LEN) <<
 		    CAS_MAC_MAX_BF_FRM_SHFT) |
 		    (0x2000 << CAS_MAC_MAX_BF_BST_SHFT));
 
 		/* more magic values */
 		CAS_WRITE_4(sc, CAS_MAC_PREAMBLE_LEN, 0x7);
 		CAS_WRITE_4(sc, CAS_MAC_JAM_SIZE, 0x4);
 		CAS_WRITE_4(sc, CAS_MAC_ATTEMPT_LIMIT, 0x10);
-		CAS_WRITE_4(sc, CAS_MAC_CTRL_TYPE, 0x8088);
+		CAS_WRITE_4(sc, CAS_MAC_CTRL_TYPE, 0x8808);
 
 		/* random number seed */
 		CAS_WRITE_4(sc, CAS_MAC_RANDOM_SEED,
 		    ((laddr[5] << 8) | laddr[4]) & 0x3ff);
 
 		/* secondary MAC addresses: 0:0:0:0:0:0 */
 		for (i = CAS_MAC_ADDR3; i <= CAS_MAC_ADDR41;
 		    i += CAS_MAC_ADDR4 - CAS_MAC_ADDR3)
 			CAS_WRITE_4(sc, i, 0);
 
 		/* MAC control address: 01:80:c2:00:00:01 */
 		CAS_WRITE_4(sc, CAS_MAC_ADDR42, 0x0001);
 		CAS_WRITE_4(sc, CAS_MAC_ADDR43, 0xc200);
 		CAS_WRITE_4(sc, CAS_MAC_ADDR44, 0x0180);
 
 		/* MAC filter address: 0:0:0:0:0:0 */
 		CAS_WRITE_4(sc, CAS_MAC_AFILTER0, 0);
 		CAS_WRITE_4(sc, CAS_MAC_AFILTER1, 0);
 		CAS_WRITE_4(sc, CAS_MAC_AFILTER2, 0);
 		CAS_WRITE_4(sc, CAS_MAC_AFILTER_MASK1_2, 0);
 		CAS_WRITE_4(sc, CAS_MAC_AFILTER_MASK0, 0);
 
 		/* Zero the hash table. */
 		for (i = CAS_MAC_HASH0; i <= CAS_MAC_HASH15;
 		    i += CAS_MAC_HASH1 - CAS_MAC_HASH0)
 			CAS_WRITE_4(sc, i, 0);
 
 		sc->sc_flags |= CAS_INITED;
 	}
 
 	/* Counters need to be zeroed. */
 	CAS_WRITE_4(sc, CAS_MAC_NORM_COLL_CNT, 0);
 	CAS_WRITE_4(sc, CAS_MAC_FIRST_COLL_CNT, 0);
 	CAS_WRITE_4(sc, CAS_MAC_EXCESS_COLL_CNT, 0);
 	CAS_WRITE_4(sc, CAS_MAC_LATE_COLL_CNT, 0);
 	CAS_WRITE_4(sc, CAS_MAC_DEFER_TMR_CNT, 0);
 	CAS_WRITE_4(sc, CAS_MAC_PEAK_ATTEMPTS, 0);
 	CAS_WRITE_4(sc, CAS_MAC_RX_FRAME_COUNT, 0);
 	CAS_WRITE_4(sc, CAS_MAC_RX_LEN_ERR_CNT, 0);
 	CAS_WRITE_4(sc, CAS_MAC_RX_ALIGN_ERR, 0);
 	CAS_WRITE_4(sc, CAS_MAC_RX_CRC_ERR_CNT, 0);
 	CAS_WRITE_4(sc, CAS_MAC_RX_CODE_VIOL, 0);
 
 	/* Set XOFF PAUSE time. */
 	CAS_WRITE_4(sc, CAS_MAC_SPC, 0x1BF0 << CAS_MAC_SPC_TIME_SHFT);
 
 	/* Set the station address. */
 	CAS_WRITE_4(sc, CAS_MAC_ADDR0, (laddr[4] << 8) | laddr[5]);
 	CAS_WRITE_4(sc, CAS_MAC_ADDR1, (laddr[2] << 8) | laddr[3]);
 	CAS_WRITE_4(sc, CAS_MAC_ADDR2, (laddr[0] << 8) | laddr[1]);
 
 	/* Enable MII outputs. */
 	CAS_WRITE_4(sc, CAS_MAC_XIF_CONF, CAS_MAC_XIF_CONF_TX_OE);
 }
 
 static void
 cas_tx_task(void *arg, int pending __unused)
 {
 	struct ifnet *ifp;
 
 	ifp = (struct ifnet *)arg;
 	cas_start(ifp);
 }
 
 static inline void
 cas_txkick(struct cas_softc *sc)
 {
 
 	/*
 	 * Update the TX kick register.  This register has to point to the
 	 * descriptor after the last valid one and for optimum performance
 	 * should be incremented in multiples of 4 (the DMA engine fetches/
 	 * updates descriptors in batches of 4).
 	 */
 #ifdef CAS_DEBUG
 	CTR3(KTR_CAS, "%s: %s: kicking TX %d",
 	    device_get_name(sc->sc_dev), __func__, sc->sc_txnext);
 #endif
 	CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	CAS_WRITE_4(sc, CAS_TX_KICK3, sc->sc_txnext);
 }
 
 static void
 cas_start(struct ifnet *ifp)
 {
 	struct cas_softc *sc = ifp->if_softc;
 	struct mbuf *m;
 	int kicked, ntx;
 
 	CAS_LOCK(sc);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING || (sc->sc_flags & CAS_LINK) == 0) {
 		CAS_UNLOCK(sc);
 		return;
 	}
 
 	if (sc->sc_txfree < CAS_MAXTXFREE / 4)
 		cas_tint(sc);
 
 #ifdef CAS_DEBUG
 	CTR4(KTR_CAS, "%s: %s: txfree %d, txnext %d",
 	    device_get_name(sc->sc_dev), __func__, sc->sc_txfree,
 	    sc->sc_txnext);
 #endif
 	ntx = 0;
 	kicked = 0;
 	for (; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc->sc_txfree > 1;) {
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
 		if (m == NULL)
 			break;
 		if (cas_load_txmbuf(sc, &m) != 0) {
 			if (m == NULL)
 				break;
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			IFQ_DRV_PREPEND(&ifp->if_snd, m);
 			break;
 		}
 		if ((sc->sc_txnext % 4) == 0) {
 			cas_txkick(sc);
 			kicked = 1;
 		} else
 			kicked = 0;
 		ntx++;
 		BPF_MTAP(ifp, m);
 	}
 
 	if (ntx > 0) {
 		if (kicked == 0)
 			cas_txkick(sc);
 #ifdef CAS_DEBUG
 		CTR2(KTR_CAS, "%s: packets enqueued, OWN on %d",
 		    device_get_name(sc->sc_dev), sc->sc_txnext);
 #endif
 
 		/* Set a watchdog timer in case the chip flakes out. */
 		sc->sc_wdog_timer = 5;
 #ifdef CAS_DEBUG
 		CTR3(KTR_CAS, "%s: %s: watchdog %d",
 		    device_get_name(sc->sc_dev), __func__,
 		    sc->sc_wdog_timer);
 #endif
 	}
 
 	CAS_UNLOCK(sc);
 }
 
 static void
 cas_tint(struct cas_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct cas_txsoft *txs;
 	int progress;
 	uint32_t txlast;
 #ifdef CAS_DEBUG
 	int i;
 
 	CAS_LOCK_ASSERT(sc, MA_OWNED);
 
 	CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__);
 #endif
 
 	/*
 	 * Go through our TX list and free mbufs for those
 	 * frames that have been transmitted.
 	 */
 	progress = 0;
 	CAS_CDSYNC(sc, BUS_DMASYNC_POSTREAD);
 	while ((txs = STAILQ_FIRST(&sc->sc_txdirtyq)) != NULL) {
 #ifdef CAS_DEBUG
 		if ((ifp->if_flags & IFF_DEBUG) != 0) {
 			printf("    txsoft %p transmit chain:\n", txs);
 			for (i = txs->txs_firstdesc;; i = CAS_NEXTTX(i)) {
 				printf("descriptor %d: ", i);
 				printf("cd_flags: 0x%016llx\t",
 				    (long long)le64toh(
 				    sc->sc_txdescs[i].cd_flags));
 				printf("cd_buf_ptr: 0x%016llx\n",
 				    (long long)le64toh(
 				    sc->sc_txdescs[i].cd_buf_ptr));
 				if (i == txs->txs_lastdesc)
 					break;
 			}
 		}
 #endif
 
 		/*
 		 * In theory, we could harvest some descriptors before
 		 * the ring is empty, but that's a bit complicated.
 		 *
 		 * CAS_TX_COMPn points to the last descriptor
 		 * processed + 1.
 		 */
 		txlast = CAS_READ_4(sc, CAS_TX_COMP3);
 #ifdef CAS_DEBUG
 		CTR4(KTR_CAS, "%s: txs->txs_firstdesc = %d, "
 		    "txs->txs_lastdesc = %d, txlast = %d",
 		    __func__, txs->txs_firstdesc, txs->txs_lastdesc, txlast);
 #endif
 		if (txs->txs_firstdesc <= txs->txs_lastdesc) {
 			if ((txlast >= txs->txs_firstdesc) &&
 			    (txlast <= txs->txs_lastdesc))
 				break;
 		} else {
 			/* Ick -- this command wraps. */
 			if ((txlast >= txs->txs_firstdesc) ||
 			    (txlast <= txs->txs_lastdesc))
 				break;
 		}
 
 #ifdef CAS_DEBUG
 		CTR1(KTR_CAS, "%s: releasing a descriptor", __func__);
 #endif
 		STAILQ_REMOVE_HEAD(&sc->sc_txdirtyq, txs_q);
 
 		sc->sc_txfree += txs->txs_ndescs;
 
 		bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap);
 		if (txs->txs_mbuf != NULL) {
 			m_freem(txs->txs_mbuf);
 			txs->txs_mbuf = NULL;
 		}
 
 		STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q);
 
 		ifp->if_opackets++;
 		progress = 1;
 	}
 
 #ifdef CAS_DEBUG
-	CTR4(KTR_CAS, "%s: CAS_TX_STATE_MACHINE %x CAS_TX_DESC_BASE %llx "
+	CTR5(KTR_CAS, "%s: CAS_TX_SM1 %x CAS_TX_SM2 %x CAS_TX_DESC_BASE %llx "
 	    "CAS_TX_COMP3 %x",
-	    __func__, CAS_READ_4(sc, CAS_TX_STATE_MACHINE),
-	    ((long long)CAS_READ_4(sc, CAS_TX_DESC_BASE_HI3) << 32) |
-	    CAS_READ_4(sc, CAS_TX_DESC_BASE_LO3),
+	    __func__, CAS_READ_4(sc, CAS_TX_SM1), CAS_READ_4(sc, CAS_TX_SM2),
+	    ((long long)CAS_READ_4(sc, CAS_TX_DESC3_BASE_HI) << 32) |
+	    CAS_READ_4(sc, CAS_TX_DESC3_BASE_LO),
 	    CAS_READ_4(sc, CAS_TX_COMP3));
 #endif
 
 	if (progress) {
 		/* We freed some descriptors, so reset IFF_DRV_OACTIVE. */
 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 		if (STAILQ_EMPTY(&sc->sc_txdirtyq))
 			sc->sc_wdog_timer = 0;
 	}
 
 #ifdef CAS_DEBUG
 	CTR3(KTR_CAS, "%s: %s: watchdog %d",
 	    device_get_name(sc->sc_dev), __func__, sc->sc_wdog_timer);
 #endif
 }
 
 static void
 cas_rint_timeout(void *arg)
 {
 	struct cas_softc *sc = arg;
 
 	CAS_LOCK_ASSERT(sc, MA_NOTOWNED);
 
 	cas_rint(sc);
 }
 
 static void
 cas_rint(struct cas_softc *sc)
 {
 	struct cas_rxdsoft *rxds, *rxds2;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct mbuf *m, *m2;
 	uint64_t word1, word2, word3, word4;
 	uint32_t rxhead;
 	u_int idx, idx2, len, off, skip;
 
 	CAS_LOCK_ASSERT(sc, MA_NOTOWNED);
 
 	callout_stop(&sc->sc_rx_ch);
 
 #ifdef CAS_DEBUG
 	CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__);
 #endif
 
 #define	PRINTWORD(n, delimiter)						\
 	printf("word ## n: 0x%016llx%c", (long long)word ## n, delimiter)
 
 #define	SKIPASSERT(n)							\
 	KASSERT(sc->sc_rxcomps[sc->sc_rxcptr].crc_word ## n == 0,	\
 	    ("%s: word ## n not 0", __func__))
 
 #define	WORDTOH(n)							\
 	word ## n = le64toh(sc->sc_rxcomps[sc->sc_rxcptr].crc_word ## n)
 
 	/*
 	 * Read the completion head register once.  This limits
 	 * how long the following loop can execute.
 	 */
 	rxhead = CAS_READ_4(sc, CAS_RX_COMP_HEAD);
 #ifdef CAS_DEBUG
 	CTR4(KTR_CAS, "%s: sc->sc_rxcptr %d, sc->sc_rxdptr %d, head %d",
-	    __func__, sc->rxcptr, sc->sc_rxdptr, rxhead);
+	    __func__, sc->sc_rxcptr, sc->sc_rxdptr, rxhead);
 #endif
 	skip = 0;
 	CAS_CDSYNC(sc, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	for (; sc->sc_rxcptr != rxhead;
 	    sc->sc_rxcptr = CAS_NEXTRXCOMP(sc->sc_rxcptr)) {
 		if (skip != 0) {
 			SKIPASSERT(1);
 			SKIPASSERT(2);
 			SKIPASSERT(3);
 
 			--skip;
 			goto skip;
 		}
 
 		WORDTOH(1);
 		WORDTOH(2);
 		WORDTOH(3);
 		WORDTOH(4);
 
 #ifdef CAS_DEBUG
 		if ((ifp->if_flags & IFF_DEBUG) != 0) {
 			printf("    completion %d: ", sc->sc_rxcptr);
 			PRINTWORD(1, '\t');
 			PRINTWORD(2, '\t');
 			PRINTWORD(3, '\t');
 			PRINTWORD(4, '\n');
 		}
 #endif
 
 		if (__predict_false(
 		    (word1 & CAS_RC1_TYPE_MASK) == CAS_RC1_TYPE_HW ||
 		    (word4 & CAS_RC4_ZERO) != 0)) {
 			/*
 			 * The descriptor is still marked as owned, although
 			 * it is supposed to have completed.  This has been
 			 * observed on some machines.  Just exiting here
 			 * might leave the packet sitting around until another
 			 * one arrives to trigger a new interrupt, which is
 			 * generally undesirable, so set up a timeout.
 			 */
 			callout_reset(&sc->sc_rx_ch, CAS_RXOWN_TICKS,
 			    cas_rint_timeout, sc);
 			break;
 		}
 
 		if (__predict_false(
 		    (word4 & (CAS_RC4_BAD | CAS_RC4_LEN_MMATCH)) != 0)) {
 			ifp->if_ierrors++;
 			device_printf(sc->sc_dev,
 			    "receive error: CRC error\n");
 			continue;
 		}
 
 		KASSERT(CAS_GET(word1, CAS_RC1_DATA_SIZE) == 0 ||
 		    CAS_GET(word2, CAS_RC2_HDR_SIZE) == 0,
 		    ("%s: data and header present", __func__));
 		KASSERT((word1 & CAS_RC1_SPLIT_PKT) == 0 ||
 		    CAS_GET(word2, CAS_RC2_HDR_SIZE) == 0,
 		    ("%s: split and header present", __func__));
 		KASSERT(CAS_GET(word1, CAS_RC1_DATA_SIZE) == 0 ||
 		    (word1 & CAS_RC1_RELEASE_HDR) == 0,
 		    ("%s: data present but header release", __func__));
 		KASSERT(CAS_GET(word2, CAS_RC2_HDR_SIZE) == 0 ||
 		    (word1 & CAS_RC1_RELEASE_DATA) == 0,
 		    ("%s: header present but data release", __func__));
 
 		if ((len = CAS_GET(word2, CAS_RC2_HDR_SIZE)) != 0) {
 			idx = CAS_GET(word2, CAS_RC2_HDR_INDEX);
 			off = CAS_GET(word2, CAS_RC2_HDR_OFF);
 #ifdef CAS_DEBUG
 			CTR4(KTR_CAS, "%s: hdr at idx %d, off %d, len %d",
 			    __func__, idx, off, len);
 #endif
 			rxds = &sc->sc_rxdsoft[idx];
 			MGETHDR(m, M_DONTWAIT, MT_DATA);
 			if (m != NULL) {
 				refcount_acquire(&rxds->rxds_refcount);
 				bus_dmamap_sync(sc->sc_rdmatag,
 				    rxds->rxds_dmamap, BUS_DMASYNC_POSTREAD);
 #if __FreeBSD_version < 800016
 				MEXTADD(m, (caddr_t)rxds->rxds_buf +
 				    off * 256 + ETHER_ALIGN, len, cas_free,
 				    rxds, M_RDONLY, EXT_NET_DRV);
 #else
 				MEXTADD(m, (caddr_t)rxds->rxds_buf +
 				    off * 256 + ETHER_ALIGN, len, cas_free,
 				    sc, (void *)(uintptr_t)idx,
 				    M_RDONLY, EXT_NET_DRV);
 #endif
 				if ((m->m_flags & M_EXT) == 0) {
 					m_freem(m);
 					m = NULL;
 				}
 			}
 			if (m != NULL) {
 				m->m_pkthdr.rcvif = ifp;
 				m->m_pkthdr.len = m->m_len = len;
 				ifp->if_ipackets++;
 				if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
 					cas_rxcksum(m, CAS_GET(word4,
 					    CAS_RC4_TCP_CSUM));
 				/* Pass it on. */
 				(*ifp->if_input)(ifp, m);
 			} else
 				ifp->if_ierrors++;
 
 			if ((word1 & CAS_RC1_RELEASE_HDR) != 0 &&
 			    refcount_release(&rxds->rxds_refcount) != 0)
 				cas_add_rxdesc(sc, idx);
 		} else if ((len = CAS_GET(word1, CAS_RC1_DATA_SIZE)) != 0) {
 			idx = CAS_GET(word1, CAS_RC1_DATA_INDEX);
 			off = CAS_GET(word1, CAS_RC1_DATA_OFF);
 #ifdef CAS_DEBUG
 			CTR4(KTR_CAS, "%s: data at idx %d, off %d, len %d",
 			    __func__, idx, off, len);
 #endif
 			rxds = &sc->sc_rxdsoft[idx];
 			MGETHDR(m, M_DONTWAIT, MT_DATA);
 			if (m != NULL) {
 				refcount_acquire(&rxds->rxds_refcount);
 				off += ETHER_ALIGN;
 				m->m_len = min(CAS_PAGE_SIZE - off, len);
 				bus_dmamap_sync(sc->sc_rdmatag,
 				    rxds->rxds_dmamap, BUS_DMASYNC_POSTREAD);
 #if __FreeBSD_version < 800016
 				MEXTADD(m, (caddr_t)rxds->rxds_buf + off,
 				    m->m_len, cas_free, rxds, M_RDONLY,
 				    EXT_NET_DRV);
 #else
 				MEXTADD(m, (caddr_t)rxds->rxds_buf + off,
 				    m->m_len, cas_free, sc,
 				    (void *)(uintptr_t)idx, M_RDONLY,
 				    EXT_NET_DRV);
 #endif
 				if ((m->m_flags & M_EXT) == 0) {
 					m_freem(m);
 					m = NULL;
 				}
 			}
 			idx2 = 0;
 			m2 = NULL;
 			rxds2 = NULL;
 			if ((word1 & CAS_RC1_SPLIT_PKT) != 0) {
 				KASSERT((word1 & CAS_RC1_RELEASE_NEXT) != 0,
 				    ("%s: split but no release next",
 				    __func__));
 
 				idx2 = CAS_GET(word2, CAS_RC2_NEXT_INDEX);
 #ifdef CAS_DEBUG
 				CTR2(KTR_CAS, "%s: split at idx %d",
 				    __func__, idx2);
 #endif
 				rxds2 = &sc->sc_rxdsoft[idx2];
 				if (m != NULL) {
 					MGET(m2, M_DONTWAIT, MT_DATA);
 					if (m2 != NULL) {
 						refcount_acquire(
 						    &rxds2->rxds_refcount);
 						m2->m_len = len - m->m_len;
 						bus_dmamap_sync(
 						    sc->sc_rdmatag,
 						    rxds2->rxds_dmamap,
 						    BUS_DMASYNC_POSTREAD);
 #if __FreeBSD_version < 800016
 						MEXTADD(m2,
 						    (caddr_t)rxds2->rxds_buf,
 						    m2->m_len, cas_free,
 						    rxds2, M_RDONLY,
 						    EXT_NET_DRV);
 #else
 						MEXTADD(m2,
 						    (caddr_t)rxds2->rxds_buf,
 						    m2->m_len, cas_free, sc,
 						    (void *)(uintptr_t)idx2,
 						    M_RDONLY, EXT_NET_DRV);
 #endif
 						if ((m2->m_flags & M_EXT) ==
 						    0) {
 							m_freem(m2);
 							m2 = NULL;
 						}
 					}
 				}
 				if (m2 != NULL)
 					m->m_next = m2;
 				else if (m != NULL) {
 					m_freem(m);
 					m = NULL;
 				}
 			}
 			if (m != NULL) {
 				m->m_pkthdr.rcvif = ifp;
 				m->m_pkthdr.len = len;
 				ifp->if_ipackets++;
 				if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
 					cas_rxcksum(m, CAS_GET(word4,
 					    CAS_RC4_TCP_CSUM));
 				/* Pass it on. */
 				(*ifp->if_input)(ifp, m);
 			} else
 				ifp->if_ierrors++;
 
 			if ((word1 & CAS_RC1_RELEASE_DATA) != 0 &&
 			    refcount_release(&rxds->rxds_refcount) != 0)
 				cas_add_rxdesc(sc, idx);
 			if ((word1 & CAS_RC1_SPLIT_PKT) != 0 &&
 			    refcount_release(&rxds2->rxds_refcount) != 0)
 				cas_add_rxdesc(sc, idx2);
 		}
 
 		skip = CAS_GET(word1, CAS_RC1_SKIP);
 
  skip:
 		cas_rxcompinit(&sc->sc_rxcomps[sc->sc_rxcptr]);
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
 	}
 	CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	CAS_WRITE_4(sc, CAS_RX_COMP_TAIL, sc->sc_rxcptr);
 
 #undef PRINTWORD
 #undef SKIPASSERT
 #undef WORDTOH
 
 #ifdef CAS_DEBUG
 	CTR4(KTR_CAS, "%s: done sc->sc_rxcptr %d, sc->sc_rxdptr %d, head %d",
-	    __func__, sc->rxcptr, sc->sc_rxdptr,
+	    __func__, sc->sc_rxcptr, sc->sc_rxdptr,
 	    CAS_READ_4(sc, CAS_RX_COMP_HEAD));
 #endif
 }
 
 static void
 cas_free(void *arg1, void *arg2)
 {
 	struct cas_rxdsoft *rxds;
 	struct cas_softc *sc;
 	u_int idx;
 
 #if __FreeBSD_version < 800016
 	rxds = arg2;
 	sc = rxds->rxds_sc;
 	idx = rxds->rxds_idx;
 #else
 	sc = arg1;
 	idx = (uintptr_t)arg2;
 	rxds = &sc->sc_rxdsoft[idx];
 #endif
 	if (refcount_release(&rxds->rxds_refcount) == 0)
 		return;
 
 	/*
 	 * NB: this function can be called via m_freem(9) within
 	 * this driver!
 	 */
 
 	cas_add_rxdesc(sc, idx);
 }
 
 static inline void
 cas_add_rxdesc(struct cas_softc *sc, u_int idx)
 {
 	u_int locked;
 
 	if ((locked = CAS_LOCK_OWNED(sc)) == 0)
 		CAS_LOCK(sc);
 
 	bus_dmamap_sync(sc->sc_rdmatag, sc->sc_rxdsoft[idx].rxds_dmamap,
 	    BUS_DMASYNC_PREREAD);
 	CAS_UPDATE_RXDESC(sc, sc->sc_rxdptr, idx);
 	sc->sc_rxdptr = CAS_NEXTRXDESC(sc->sc_rxdptr);
 
 	/*
 	 * Update the RX kick register.  This register has to point to the
 	 * descriptor after the last valid one (before the current batch)
 	 * and for optimum performance should be incremented in multiples
 	 * of 4 (the DMA engine fetches/updates descriptors in batches of 4).
 	 */
 	if ((sc->sc_rxdptr % 4) == 0) {
 		CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 		CAS_WRITE_4(sc, CAS_RX_KICK,
 		    (sc->sc_rxdptr + CAS_NRXDESC - 4) & CAS_NRXDESC_MASK);
 	}
 
 	if (locked == 0)
 		CAS_UNLOCK(sc);
 }
 
 static void
 cas_eint(struct cas_softc *sc, u_int status)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 
 	CAS_LOCK_ASSERT(sc, MA_NOTOWNED);
 
 	ifp->if_ierrors++;
 
 	device_printf(sc->sc_dev, "%s: status 0x%x", __func__, status);
 	if ((status & CAS_INTR_PCI_ERROR_INT) != 0) {
 		status = CAS_READ_4(sc, CAS_ERROR_STATUS);
 		printf(", PCI bus error 0x%x", status);
 		if ((status & CAS_ERROR_OTHER) != 0) {
 			status = pci_read_config(sc->sc_dev, PCIR_STATUS, 2);
 			printf(", PCI status 0x%x", status);
 			pci_write_config(sc->sc_dev, PCIR_STATUS, status, 2);
 		}
 	}
 	printf("\n");
 
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	cas_init(sc);
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_tx_task);
 }
 
 static int
 cas_intr(void *v)
 {
 	struct cas_softc *sc = v;
 
 	if (__predict_false((CAS_READ_4(sc, CAS_STATUS_ALIAS) &
 	    CAS_INTR_SUMMARY) == 0))
 		return (FILTER_STRAY);
 
 	/* Disable interrupts. */
 	CAS_WRITE_4(sc, CAS_INTMASK, 0xffffffff);
 	taskqueue_enqueue(sc->sc_tq, &sc->sc_intr_task);
 
 	return (FILTER_HANDLED);
 }
 
 static void
 cas_intr_task(void *arg, int pending __unused)
 {
 	struct cas_softc *sc = arg;
 	struct ifnet *ifp = sc->sc_ifp;
 	uint32_t status, status2;
 
 	CAS_LOCK_ASSERT(sc, MA_NOTOWNED);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 
 	status = CAS_READ_4(sc, CAS_STATUS);
 	if (__predict_false((status & CAS_INTR_SUMMARY) == 0))
 		goto done;
 
 #ifdef CAS_DEBUG
 	CTR4(KTR_CAS, "%s: %s: cplt %x, status %x",
 	    device_get_name(sc->sc_dev), __func__,
-	    (status >> CAS_STATUS_TX_COMP3_SHIFT), (u_int)status);
+	    (status >> CAS_STATUS_TX_COMP3_SHFT), (u_int)status);
 
 	/*
 	 * PCS interrupts must be cleared, otherwise no traffic is passed!
 	 */
 	if ((status & CAS_INTR_PCS_INT) != 0) {
 		status2 =
 		    CAS_READ_4(sc, CAS_PCS_INTR_STATUS) |
 		    CAS_READ_4(sc, CAS_PCS_INTR_STATUS);
 		if ((status2 & CAS_PCS_INTR_LINK) != 0)
 			device_printf(sc->sc_dev,
 			    "%s: PCS link status changed\n", __func__);
 	}
 	if ((status & CAS_MAC_CTRL_STATUS) != 0) {
 		status2 = CAS_READ_4(sc, CAS_MAC_CTRL_STATUS);
 		if ((status2 & CAS_MAC_CTRL_PAUSE) != 0)
 			device_printf(sc->sc_dev,
 			    "%s: PAUSE received (PAUSE time %d slots)\n",
 			    __func__,
 			    (status2 & CAS_MAC_CTRL_STATUS_PT_MASK) >>
 			    CAS_MAC_CTRL_STATUS_PT_SHFT);
 		if ((status2 & CAS_MAC_CTRL_PAUSE) != 0)
 			device_printf(sc->sc_dev,
 			    "%s: transited to PAUSE state\n", __func__);
 		if ((status2 & CAS_MAC_CTRL_NON_PAUSE) != 0)
 			device_printf(sc->sc_dev,
 			    "%s: transited to non-PAUSE state\n", __func__);
 	}
 	if ((status & CAS_INTR_MIF) != 0)
 		device_printf(sc->sc_dev, "%s: MIF interrupt\n", __func__);
 #endif
 
 	if (__predict_false((status &
 	    (CAS_INTR_TX_TAG_ERR | CAS_INTR_RX_TAG_ERR |
 	    CAS_INTR_RX_LEN_MMATCH | CAS_INTR_PCI_ERROR_INT)) != 0)) {
 		cas_eint(sc, status);
 		return;
 	}
 
 	if (__predict_false(status & CAS_INTR_TX_MAC_INT)) {
 		status2 = CAS_READ_4(sc, CAS_MAC_TX_STATUS);
 		if ((status2 &
 		    (CAS_MAC_TX_UNDERRUN | CAS_MAC_TX_MAX_PKT_ERR)) != 0)
 			sc->sc_ifp->if_oerrors++;
 		else if ((status2 & ~CAS_MAC_TX_FRAME_XMTD) != 0)
 			device_printf(sc->sc_dev,
 			    "MAC TX fault, status %x\n", status2);
 	}
 
 	if (__predict_false(status & CAS_INTR_RX_MAC_INT)) {
 		status2 = CAS_READ_4(sc, CAS_MAC_RX_STATUS);
 		if ((status2 & CAS_MAC_RX_OVERFLOW) != 0)
 			sc->sc_ifp->if_ierrors++;
 		else if ((status2 & ~CAS_MAC_RX_FRAME_RCVD) != 0)
 			device_printf(sc->sc_dev,
 			    "MAC RX fault, status %x\n", status2);
 	}
 
 	if ((status &
 	    (CAS_INTR_RX_DONE | CAS_INTR_RX_BUF_NA | CAS_INTR_RX_COMP_FULL |
 	    CAS_INTR_RX_BUF_AEMPTY | CAS_INTR_RX_COMP_AFULL)) != 0) {
 		cas_rint(sc);
 #ifdef CAS_DEBUG
 		if (__predict_false((status &
 		    (CAS_INTR_RX_BUF_NA | CAS_INTR_RX_COMP_FULL |
 		    CAS_INTR_RX_BUF_AEMPTY | CAS_INTR_RX_COMP_AFULL)) != 0))
 			device_printf(sc->sc_dev,
 			    "RX fault, status %x\n", status);
 #endif
 	}
 
 	if ((status &
 	    (CAS_INTR_TX_INT_ME | CAS_INTR_TX_ALL | CAS_INTR_TX_DONE)) != 0) {
 		CAS_LOCK(sc);
 		cas_tint(sc);
 		CAS_UNLOCK(sc);
 	}
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 	else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_tx_task);
 
 	status = CAS_READ_4(sc, CAS_STATUS_ALIAS);
 	if (__predict_false((status & CAS_INTR_SUMMARY) != 0)) {
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_intr_task);
 		return;
 	}
 
  done:
 	/* Re-enable interrupts. */
 	CAS_WRITE_4(sc, CAS_INTMASK,
 	    ~(CAS_INTR_TX_INT_ME | CAS_INTR_TX_TAG_ERR |
 	    CAS_INTR_RX_DONE | CAS_INTR_RX_BUF_NA | CAS_INTR_RX_TAG_ERR |
 	    CAS_INTR_RX_COMP_FULL | CAS_INTR_RX_BUF_AEMPTY |
 	    CAS_INTR_RX_COMP_AFULL | CAS_INTR_RX_LEN_MMATCH |
 	    CAS_INTR_PCI_ERROR_INT
 #ifdef CAS_DEBUG
 	    | CAS_INTR_PCS_INT | CAS_INTR_MIF
 #endif
 	));
 }
 
 static void
 cas_watchdog(struct cas_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 
 	CAS_LOCK_ASSERT(sc, MA_OWNED);
 
 #ifdef CAS_DEBUG
 	CTR4(KTR_CAS,
-	    "%s: CAS_RX_CONFIG %x CAS_MAC_RX_STATUS %x CAS_MAC_RX_CONFIG %x",
-	    __func__, CAS_READ_4(sc, CAS_RX_CONFIG),
+	    "%s: CAS_RX_CONF %x CAS_MAC_RX_STATUS %x CAS_MAC_RX_CONF %x",
+	    __func__, CAS_READ_4(sc, CAS_RX_CONF),
 	    CAS_READ_4(sc, CAS_MAC_RX_STATUS),
-	    CAS_READ_4(sc, CAS_MAC_RX_CONFIG));
+	    CAS_READ_4(sc, CAS_MAC_RX_CONF));
 	CTR4(KTR_CAS,
-	    "%s: CAS_TX_CONFIG %x CAS_MAC_TX_STATUS %x CAS_MAC_TX_CONFIG %x",
-	    __func__, CAS_READ_4(sc, CAS_TX_CONFIG),
+	    "%s: CAS_TX_CONF %x CAS_MAC_TX_STATUS %x CAS_MAC_TX_CONF %x",
+	    __func__, CAS_READ_4(sc, CAS_TX_CONF),
 	    CAS_READ_4(sc, CAS_MAC_TX_STATUS),
-	    CAS_READ_4(sc, CAS_MAC_TX_CONFIG));
+	    CAS_READ_4(sc, CAS_MAC_TX_CONF));
 #endif
 
 	if (sc->sc_wdog_timer == 0 || --sc->sc_wdog_timer != 0)
 		return;
 
 	if ((sc->sc_flags & CAS_LINK) != 0)
 		device_printf(sc->sc_dev, "device timeout\n");
 	else if (bootverbose)
 		device_printf(sc->sc_dev, "device timeout (no link)\n");
 	++ifp->if_oerrors;
 
 	/* Try to get more packets going. */
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	cas_init_locked(sc);
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_tx_task);
 }
 
 static void
 cas_mifinit(struct cas_softc *sc)
 {
 
 	/* Configure the MIF in frame mode. */
 	CAS_WRITE_4(sc, CAS_MIF_CONF,
 	    CAS_READ_4(sc, CAS_MIF_CONF) & ~CAS_MIF_CONF_BB_MODE);
 	CAS_BARRIER(sc, CAS_MIF_CONF, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 }
 
 /*
  * MII interface
  *
  * The MII interface supports at least three different operating modes:
  *
  * Bitbang mode is implemented using data, clock and output enable registers.
  *
  * Frame mode is implemented by loading a complete frame into the frame
  * register and polling the valid bit for completion.
  *
  * Polling mode uses the frame register but completion is indicated by
  * an interrupt.
  *
  */
 static int
 cas_mii_readreg(device_t dev, int phy, int reg)
 {
 	struct cas_softc *sc;
 	int n;
 	uint32_t v;
 
 #ifdef CAS_DEBUG_PHY
 	printf("%s: phy %d reg %d\n", __func__, phy, reg);
 #endif
 
 	sc = device_get_softc(dev);
 	if ((sc->sc_flags & CAS_SERDES) != 0) {
 		switch (reg) {
 		case MII_BMCR:
 			reg = CAS_PCS_CTRL;
 			break;
 		case MII_BMSR:
 			reg = CAS_PCS_STATUS;
 			break;
 		case MII_PHYIDR1:
 		case MII_PHYIDR2:
 			return (0);
 		case MII_ANAR:
 			reg = CAS_PCS_ANAR;
 			break;
 		case MII_ANLPAR:
 			reg = CAS_PCS_ANLPAR;
 			break;
 		case MII_EXTSR:
 			return (EXTSR_1000XFDX | EXTSR_1000XHDX);
 		default:
 			device_printf(sc->sc_dev,
 			    "%s: unhandled register %d\n", __func__, reg);
 			return (0);
 		}
 		return (CAS_READ_4(sc, reg));
 	}
 
 	/* Construct the frame command. */
 	v = CAS_MIF_FRAME_READ |
 	    (phy << CAS_MIF_FRAME_PHY_SHFT) |
 	    (reg << CAS_MIF_FRAME_REG_SHFT);
 
 	CAS_WRITE_4(sc, CAS_MIF_FRAME, v);
 	CAS_BARRIER(sc, CAS_MIF_FRAME, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	for (n = 0; n < 100; n++) {
 		DELAY(1);
 		v = CAS_READ_4(sc, CAS_MIF_FRAME);
 		if (v & CAS_MIF_FRAME_TA_LSB)
 			return (v & CAS_MIF_FRAME_DATA);
 	}
 
 	device_printf(sc->sc_dev, "%s: timed out\n", __func__);
 	return (0);
 }
 
 static int
 cas_mii_writereg(device_t dev, int phy, int reg, int val)
 {
 	struct cas_softc *sc;
 	int n;
 	uint32_t v;
 
 #ifdef CAS_DEBUG_PHY
 	printf("%s: phy %d reg %d val %x\n", phy, reg, val, __func__);
 #endif
 
 	sc = device_get_softc(dev);
 	if ((sc->sc_flags & CAS_SERDES) != 0) {
 		switch (reg) {
 		case MII_BMSR:
 			reg = CAS_PCS_STATUS;
 			break;
 		case MII_BMCR:
 			reg = CAS_PCS_CTRL;
 			if ((val & CAS_PCS_CTRL_RESET) == 0)
 				break;
 			CAS_WRITE_4(sc, CAS_PCS_CTRL, val);
 			CAS_BARRIER(sc, CAS_PCS_CTRL, 4,
 			    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 			if (!cas_bitwait(sc, CAS_PCS_CTRL,
 			    CAS_PCS_CTRL_RESET, 0))
 				device_printf(sc->sc_dev,
 				    "cannot reset PCS\n");
 			/* FALLTHROUGH */
 		case MII_ANAR:
 			CAS_WRITE_4(sc, CAS_PCS_CONF, 0);
 			CAS_BARRIER(sc, CAS_PCS_CONF, 4,
 			    BUS_SPACE_BARRIER_WRITE);
 			CAS_WRITE_4(sc, CAS_PCS_ANAR, val);
 			CAS_BARRIER(sc, CAS_PCS_ANAR, 4,
 			    BUS_SPACE_BARRIER_WRITE);
 			CAS_WRITE_4(sc, CAS_PCS_SERDES_CTRL,
 			    CAS_PCS_SERDES_CTRL_ESD);
 			CAS_BARRIER(sc, CAS_PCS_CONF, 4,
 			    BUS_SPACE_BARRIER_WRITE);
 			CAS_WRITE_4(sc, CAS_PCS_CONF,
 			    CAS_PCS_CONF_EN);
 			CAS_BARRIER(sc, CAS_PCS_CONF, 4,
 			    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 			return (0);
 		case MII_ANLPAR:
 			reg = CAS_PCS_ANLPAR;
 			break;
 		default:
 			device_printf(sc->sc_dev,
 			    "%s: unhandled register %d\n", __func__, reg);
 			return (0);
 		}
 		CAS_WRITE_4(sc, reg, val);
 		CAS_BARRIER(sc, reg, 4,
 		    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 		return (0);
 	}
 
 	/* Construct the frame command. */
 	v = CAS_MIF_FRAME_WRITE |
 	    (phy << CAS_MIF_FRAME_PHY_SHFT) |
 	    (reg << CAS_MIF_FRAME_REG_SHFT) |
 	    (val & CAS_MIF_FRAME_DATA);
 
 	CAS_WRITE_4(sc, CAS_MIF_FRAME, v);
 	CAS_BARRIER(sc, CAS_MIF_FRAME, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	for (n = 0; n < 100; n++) {
 		DELAY(1);
 		v = CAS_READ_4(sc, CAS_MIF_FRAME);
 		if (v & CAS_MIF_FRAME_TA_LSB)
 			return (1);
 	}
 
 	device_printf(sc->sc_dev, "%s: timed out\n", __func__);
 	return (0);
 }
 
 static void
 cas_mii_statchg(device_t dev)
 {
 	struct cas_softc *sc;
 	struct ifnet *ifp;
 	int gigabit;
 	uint32_t rxcfg, txcfg, v;
 
 	sc = device_get_softc(dev);
 	ifp = sc->sc_ifp;
 
 	CAS_LOCK_ASSERT(sc, MA_OWNED);
 
 #ifdef CAS_DEBUG
 	if ((ifp->if_flags & IFF_DEBUG) != 0)
 		device_printf(sc->sc_dev, "%s: status changen", __func__);
 #endif
 
 	if ((sc->sc_mii->mii_media_status & IFM_ACTIVE) != 0 &&
 	    IFM_SUBTYPE(sc->sc_mii->mii_media_active) != IFM_NONE)
 		sc->sc_flags |= CAS_LINK;
 	else
 		sc->sc_flags &= ~CAS_LINK;
 
 	switch (IFM_SUBTYPE(sc->sc_mii->mii_media_active)) {
 	case IFM_1000_SX:
 	case IFM_1000_LX:
 	case IFM_1000_CX:
 	case IFM_1000_T:
 		gigabit = 1;
 		break;
 	default:
 		gigabit = 0;
 	}
 
 	/*
 	 * The configuration done here corresponds to the steps F) and
 	 * G) and as far as enabling of RX and TX MAC goes also step H)
 	 * of the initialization sequence outlined in section 11.2.1 of
 	 * the Cassini+ ASIC Specification.
 	 */
 
 	rxcfg = CAS_READ_4(sc, CAS_MAC_RX_CONF);
 	rxcfg &= ~(CAS_MAC_RX_CONF_EN | CAS_MAC_RX_CONF_CARR);
 	txcfg = CAS_MAC_TX_CONF_EN_IPG0 | CAS_MAC_TX_CONF_NGU |
 	    CAS_MAC_TX_CONF_NGUL;
 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) != 0)
 		txcfg |= CAS_MAC_TX_CONF_ICARR | CAS_MAC_TX_CONF_ICOLLIS;
 	else if (gigabit != 0) {
 		rxcfg |= CAS_MAC_RX_CONF_CARR;
 		txcfg |= CAS_MAC_TX_CONF_CARR;
 	}
 	CAS_WRITE_4(sc, CAS_MAC_TX_CONF, 0);
 	CAS_BARRIER(sc, CAS_MAC_TX_CONF, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!cas_bitwait(sc, CAS_MAC_TX_CONF, CAS_MAC_TX_CONF_EN, 0))
 		device_printf(sc->sc_dev, "cannot disable TX MAC\n");
 	CAS_WRITE_4(sc, CAS_MAC_TX_CONF, txcfg);
 	CAS_WRITE_4(sc, CAS_MAC_RX_CONF, 0);
 	CAS_BARRIER(sc, CAS_MAC_RX_CONF, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!cas_bitwait(sc, CAS_MAC_RX_CONF, CAS_MAC_RX_CONF_EN, 0))
 		device_printf(sc->sc_dev, "cannot disable RX MAC\n");
 	CAS_WRITE_4(sc, CAS_MAC_RX_CONF, rxcfg);
 
 	v = CAS_READ_4(sc, CAS_MAC_CTRL_CONF) &
 	    ~(CAS_MAC_CTRL_CONF_TXP | CAS_MAC_CTRL_CONF_RXP);
-#ifdef notyet
 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) &
 	    IFM_ETH_RXPAUSE) != 0)
 		v |= CAS_MAC_CTRL_CONF_RXP;
 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) &
 	    IFM_ETH_TXPAUSE) != 0)
 		v |= CAS_MAC_CTRL_CONF_TXP;
-#endif
 	CAS_WRITE_4(sc, CAS_MAC_CTRL_CONF, v);
 
 	/*
 	 * All supported chips have a bug causing incorrect checksum
 	 * to be calculated when letting them strip the FCS in half-
 	 * duplex mode.  In theory we could disable FCS stripping and
 	 * manually adjust the checksum accordingly.  It seems to make
 	 * more sense to optimze for the common case and just disable
 	 * hardware checksumming in half-duplex mode though.
 	 */
 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0) {
 		ifp->if_capenable &= ~IFCAP_HWCSUM;
 		ifp->if_hwassist = 0;
 	} else if ((sc->sc_flags & CAS_NO_CSUM) == 0) {
 		ifp->if_capenable = ifp->if_capabilities;
 		ifp->if_hwassist = CAS_CSUM_FEATURES;
 	}
 
 	if (sc->sc_variant == CAS_SATURN) {
 		if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0)
 			/* silicon bug workaround */
 			CAS_WRITE_4(sc, CAS_MAC_PREAMBLE_LEN, 0x41);
 		else
 			CAS_WRITE_4(sc, CAS_MAC_PREAMBLE_LEN, 0x7);
 	}
 
 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0 &&
 	    gigabit != 0)
 		CAS_WRITE_4(sc, CAS_MAC_SLOT_TIME,
 		    CAS_MAC_SLOT_TIME_CARR);
 	else
 		CAS_WRITE_4(sc, CAS_MAC_SLOT_TIME,
 		    CAS_MAC_SLOT_TIME_NORM);
 
 	/* XIF Configuration */
 	v = CAS_MAC_XIF_CONF_TX_OE | CAS_MAC_XIF_CONF_LNKLED;
 	if ((sc->sc_flags & CAS_SERDES) == 0) {
 		if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0)
 			v |= CAS_MAC_XIF_CONF_NOECHO;
 		v |= CAS_MAC_XIF_CONF_BUF_OE;
 	}
 	if (gigabit != 0)
 		v |= CAS_MAC_XIF_CONF_GMII;
 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) != 0)
 		v |= CAS_MAC_XIF_CONF_FDXLED;
 	CAS_WRITE_4(sc, CAS_MAC_XIF_CONF, v);
 
 	if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 &&
 	    (sc->sc_flags & CAS_LINK) != 0) {
 		CAS_WRITE_4(sc, CAS_MAC_TX_CONF,
 		    txcfg | CAS_MAC_TX_CONF_EN);
 		CAS_WRITE_4(sc, CAS_MAC_RX_CONF,
 		    rxcfg | CAS_MAC_RX_CONF_EN);
 	}
 }
 
 static int
 cas_mediachange(struct ifnet *ifp)
 {
 	struct cas_softc *sc = ifp->if_softc;
 	int error;
 
 	/* XXX add support for serial media. */
 
 	CAS_LOCK(sc);
 	error = mii_mediachg(sc->sc_mii);
 	CAS_UNLOCK(sc);
 	return (error);
 }
 
 static void
 cas_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct cas_softc *sc = ifp->if_softc;
 
 	CAS_LOCK(sc);
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		CAS_UNLOCK(sc);
 		return;
 	}
 
 	mii_pollstat(sc->sc_mii);
 	ifmr->ifm_active = sc->sc_mii->mii_media_active;
 	ifmr->ifm_status = sc->sc_mii->mii_media_status;
 	CAS_UNLOCK(sc);
 }
 
 static int
 cas_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct cas_softc *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 	int error;
 
 	error = 0;
 	switch (cmd) {
 	case SIOCSIFFLAGS:
 		CAS_LOCK(sc);
 		if ((ifp->if_flags & IFF_UP) != 0) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 &&
 			    ((ifp->if_flags ^ sc->sc_ifflags) &
 			    (IFF_ALLMULTI | IFF_PROMISC)) != 0)
 				cas_setladrf(sc);
 			else
 				cas_init_locked(sc);
 		} else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 			cas_stop(ifp);
 		sc->sc_ifflags = ifp->if_flags;
 		CAS_UNLOCK(sc);
 		break;
 	case SIOCSIFCAP:
 		CAS_LOCK(sc);
 		if ((sc->sc_flags & CAS_NO_CSUM) != 0) {
 			error = EINVAL;
 			CAS_UNLOCK(sc);
 			break;
 		}
 		ifp->if_capenable = ifr->ifr_reqcap;
 		if ((ifp->if_capenable & IFCAP_TXCSUM) != 0)
 			ifp->if_hwassist = CAS_CSUM_FEATURES;
 		else
 			ifp->if_hwassist = 0;
 		CAS_UNLOCK(sc);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		CAS_LOCK(sc);
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 			cas_setladrf(sc);
 		CAS_UNLOCK(sc);
 		break;
 	case SIOCSIFMTU:
 		if ((ifr->ifr_mtu < ETHERMIN) ||
 		    (ifr->ifr_mtu > ETHERMTU_JUMBO))
 			error = EINVAL;
 		else
 			ifp->if_mtu = ifr->ifr_mtu;
 		break;
 	case SIOCGIFMEDIA:
 	case SIOCSIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii->mii_media, cmd);
 		break;
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 
 	return (error);
 }
 
 static void
 cas_setladrf(struct cas_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ifmultiaddr *inm;
 	int i;
 	uint32_t hash[16];
 	uint32_t crc, v;
 
 	CAS_LOCK_ASSERT(sc, MA_OWNED);
 
 	/* Get the current RX configuration. */
 	v = CAS_READ_4(sc, CAS_MAC_RX_CONF);
 
 	/*
 	 * Turn off promiscuous mode, promiscuous group mode (all multicast),
 	 * and hash filter.  Depending on the case, the right bit will be
 	 * enabled.
 	 */
 	v &= ~(CAS_MAC_RX_CONF_PROMISC | CAS_MAC_RX_CONF_HFILTER |
 	    CAS_MAC_RX_CONF_PGRP);
 
 	CAS_WRITE_4(sc, CAS_MAC_RX_CONF, v);
 	CAS_BARRIER(sc, CAS_MAC_RX_CONF, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!cas_bitwait(sc, CAS_MAC_RX_CONF, CAS_MAC_RX_CONF_HFILTER, 0))
 		device_printf(sc->sc_dev, "cannot disable RX hash filter\n");
 
 	if ((ifp->if_flags & IFF_PROMISC) != 0) {
 		v |= CAS_MAC_RX_CONF_PROMISC;
 		goto chipit;
 	}
 	if ((ifp->if_flags & IFF_ALLMULTI) != 0) {
 		v |= CAS_MAC_RX_CONF_PGRP;
 		goto chipit;
 	}
 
 	/*
 	 * Set up multicast address filter by passing all multicast
 	 * addresses through a crc generator, and then using the high
 	 * order 8 bits as an index into the 256 bit logical address
 	 * filter.  The high order 4 bits selects the word, while the
 	 * other 4 bits select the bit within the word (where bit 0
 	 * is the MSB).
 	 */
 
 	/* Clear the hash table. */
 	memset(hash, 0, sizeof(hash));
 
 	if_maddr_rlock(ifp);
 	TAILQ_FOREACH(inm, &ifp->if_multiaddrs, ifma_link) {
 		if (inm->ifma_addr->sa_family != AF_LINK)
 			continue;
 		crc = ether_crc32_le(LLADDR((struct sockaddr_dl *)
 		    inm->ifma_addr), ETHER_ADDR_LEN);
 
 		/* We just want the 8 most significant bits. */
 		crc >>= 24;
 
 		/* Set the corresponding bit in the filter. */
 		hash[crc >> 4] |= 1 << (15 - (crc & 15));
 	}
 	if_maddr_runlock(ifp);
 
 	v |= CAS_MAC_RX_CONF_HFILTER;
 
 	/* Now load the hash table into the chip (if we are using it). */
 	for (i = 0; i < 16; i++)
 		CAS_WRITE_4(sc,
 		    CAS_MAC_HASH0 + i * (CAS_MAC_HASH1 - CAS_MAC_HASH0),
 		    hash[i]);
 
  chipit:
 	CAS_WRITE_4(sc, CAS_MAC_RX_CONF, v);
 }
 
 static int	cas_pci_attach(device_t dev);
 static int	cas_pci_detach(device_t dev);
 static int	cas_pci_probe(device_t dev);
 static int	cas_pci_resume(device_t dev);
 static int	cas_pci_suspend(device_t dev);
 
 static device_method_t cas_pci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		cas_pci_probe),
 	DEVMETHOD(device_attach,	cas_pci_attach),
 	DEVMETHOD(device_detach,	cas_pci_detach),
 	DEVMETHOD(device_suspend,	cas_pci_suspend),
 	DEVMETHOD(device_resume,	cas_pci_resume),
 	/* Use the suspend handler here, it is all that is required. */
 	DEVMETHOD(device_shutdown,	cas_pci_suspend),
 
 	/* bus interface */
 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
 
 	/* MII interface */
 	DEVMETHOD(miibus_readreg,	cas_mii_readreg),
 	DEVMETHOD(miibus_writereg,	cas_mii_writereg),
 	DEVMETHOD(miibus_statchg,	cas_mii_statchg),
 
 	KOBJMETHOD_END
 };
 
 static driver_t cas_pci_driver = {
 	"cas",
 	cas_pci_methods,
 	sizeof(struct cas_softc)
 };
 
 DRIVER_MODULE(cas, pci, cas_pci_driver, cas_devclass, 0, 0);
 DRIVER_MODULE(miibus, cas, miibus_driver, miibus_devclass, 0, 0);
 MODULE_DEPEND(cas, pci, 1, 1, 1);
 
 static const struct cas_pci_dev {
 	uint32_t	cpd_devid;
 	uint8_t		cpd_revid;
 	int		cpd_variant;
 	const char	*cpd_desc;
 } const cas_pci_devlist[] = {
 	{ 0x0035100b, 0x0, CAS_SATURN, "NS DP83065 Saturn Gigabit Ethernet" },
 	{ 0xabba108e, 0x10, CAS_CASPLUS, "Sun Cassini+ Gigabit Ethernet" },
 	{ 0xabba108e, 0x0, CAS_CAS, "Sun Cassini Gigabit Ethernet" },
 	{ 0, 0, 0, NULL }
 };
 
 static int
 cas_pci_probe(device_t dev)
 {
 	int i;
 
 	for (i = 0; cas_pci_devlist[i].cpd_desc != NULL; i++) {
 		if (pci_get_devid(dev) == cas_pci_devlist[i].cpd_devid &&
 		    pci_get_revid(dev) >= cas_pci_devlist[i].cpd_revid) {
 			device_set_desc(dev, cas_pci_devlist[i].cpd_desc);
 			return (BUS_PROBE_DEFAULT);
 		}
 	}
 
 	return (ENXIO);
 }
 
 static struct resource_spec cas_pci_res_spec[] = {
 	{ SYS_RES_IRQ, 0, RF_SHAREABLE | RF_ACTIVE },	/* CAS_RES_INTR */
 	{ SYS_RES_MEMORY, PCIR_BAR(0), RF_ACTIVE },	/* CAS_RES_MEM */
 	{ -1, 0 }
 };
 
 #define	CAS_LOCAL_MAC_ADDRESS	"local-mac-address"
 #define	CAS_PHY_INTERFACE	"phy-interface"
 #define	CAS_PHY_TYPE		"phy-type"
 #define	CAS_PHY_TYPE_PCS	"pcs"
 
 static int
 cas_pci_attach(device_t dev)
 {
 	char buf[sizeof(CAS_LOCAL_MAC_ADDRESS)];
 	struct cas_softc *sc;
 	int i;
 #if !(defined(__powerpc__) || defined(__sparc64__))
 	u_char enaddr[4][ETHER_ADDR_LEN];
 	u_int j, k, lma, pcs[4], phy;
 #endif
 
 	sc = device_get_softc(dev);
 	sc->sc_variant = CAS_UNKNOWN;
 	for (i = 0; cas_pci_devlist[i].cpd_desc != NULL; i++) {
 		if (pci_get_devid(dev) == cas_pci_devlist[i].cpd_devid &&
 		    pci_get_revid(dev) >= cas_pci_devlist[i].cpd_revid) {
 			sc->sc_variant = cas_pci_devlist[i].cpd_variant;
 			break;
 		}
 	}
 	if (sc->sc_variant == CAS_UNKNOWN) {
 		device_printf(dev, "unknown adaptor\n");
 		return (ENXIO);
 	}
 
 	pci_enable_busmaster(dev);
 
 	sc->sc_dev = dev;
 	if (sc->sc_variant == CAS_CAS && pci_get_devid(dev) < 0x02)
 		/* Hardware checksumming may hang TX. */
 		sc->sc_flags |= CAS_NO_CSUM;
 	if (sc->sc_variant == CAS_CASPLUS || sc->sc_variant == CAS_SATURN)
 		sc->sc_flags |= CAS_REG_PLUS;
 	if (sc->sc_variant == CAS_CAS ||
 	    (sc->sc_variant == CAS_CASPLUS && pci_get_revid(dev) < 0x11))
 		sc->sc_flags |= CAS_TABORT;
 	if (bootverbose)
 		device_printf(dev, "flags=0x%x\n", sc->sc_flags);
 
 	if (bus_alloc_resources(dev, cas_pci_res_spec, sc->sc_res)) {
 		device_printf(dev, "failed to allocate resources\n");
 		bus_release_resources(dev, cas_pci_res_spec, sc->sc_res);
 		return (ENXIO);
 	}
 
 	CAS_LOCK_INIT(sc, device_get_nameunit(dev));
 
 #if defined(__powerpc__) || defined(__sparc64__)
 	OF_getetheraddr(dev, sc->sc_enaddr);
 	if (OF_getprop(ofw_bus_get_node(dev), CAS_PHY_INTERFACE, buf,
 	    sizeof(buf)) > 0 || OF_getprop(ofw_bus_get_node(dev),
 	    CAS_PHY_TYPE, buf, sizeof(buf)) > 0) {
 		buf[sizeof(buf) - 1] = '\0';
 		if (strcmp(buf, CAS_PHY_TYPE_PCS) == 0)
 			sc->sc_flags |= CAS_SERDES;
 	}
 #else
 	/*
 	 * Dig out VPD (vital product data) and read the MAC address as well
 	 * as the PHY type.  The VPD resides in the PCI Expansion ROM (PCI
 	 * FCode) and can't be accessed via the PCI capability pointer.
 	 * SUNW,pci-ce and SUNW,pci-qge use the Enhanced VPD format described
 	 * in the free US Patent 7149820.
 	 */
 
 #define	PCI_ROMHDR_SIZE			0x1c
 #define	PCI_ROMHDR_SIG			0x00
 #define	PCI_ROMHDR_SIG_MAGIC		0xaa55		/* little endian */
 #define	PCI_ROMHDR_PTR_DATA		0x18
 #define	PCI_ROM_SIZE			0x18
 #define	PCI_ROM_SIG			0x00
 #define	PCI_ROM_SIG_MAGIC		0x52494350	/* "PCIR", endian */
 							/* reversed */
 #define	PCI_ROM_VENDOR			0x04
 #define	PCI_ROM_DEVICE			0x06
 #define	PCI_ROM_PTR_VPD			0x08
 #define	PCI_VPDRES_BYTE0		0x00
 #define	PCI_VPDRES_ISLARGE(x)		((x) & 0x80)
 #define	PCI_VPDRES_LARGE_NAME(x)	((x) & 0x7f)
 #define	PCI_VPDRES_LARGE_LEN_LSB	0x01
 #define	PCI_VPDRES_LARGE_LEN_MSB	0x02
 #define	PCI_VPDRES_LARGE_SIZE		0x03
 #define	PCI_VPDRES_TYPE_ID_STRING	0x02		/* large */
 #define	PCI_VPDRES_TYPE_VPD		0x10		/* large */
 #define	PCI_VPD_KEY0			0x00
 #define	PCI_VPD_KEY1			0x01
 #define	PCI_VPD_LEN			0x02
 #define	PCI_VPD_SIZE			0x03
 
 #define	CAS_ROM_READ_1(sc, offs)					\
 	CAS_READ_1((sc), CAS_PCI_ROM_OFFSET + (offs))
 #define	CAS_ROM_READ_2(sc, offs)					\
 	CAS_READ_2((sc), CAS_PCI_ROM_OFFSET + (offs))
 #define	CAS_ROM_READ_4(sc, offs)					\
 	CAS_READ_4((sc), CAS_PCI_ROM_OFFSET + (offs))
 
 	lma = phy = 0;
 	memset(enaddr, 0, sizeof(enaddr));
 	memset(pcs, 0, sizeof(pcs));
 
 	/* Enable PCI Expansion ROM access. */
 	CAS_WRITE_4(sc, CAS_BIM_LDEV_OEN,
 	    CAS_BIM_LDEV_OEN_PAD | CAS_BIM_LDEV_OEN_PROM);
 
 	/* Read PCI Expansion ROM header. */
 	if (CAS_ROM_READ_2(sc, PCI_ROMHDR_SIG) != PCI_ROMHDR_SIG_MAGIC ||
 	    (i = CAS_ROM_READ_2(sc, PCI_ROMHDR_PTR_DATA)) <
 	    PCI_ROMHDR_SIZE) {
 		device_printf(dev, "unexpected PCI Expansion ROM header\n");
 		goto fail_prom;
 	}
 
 	/* Read PCI Expansion ROM data. */
 	if (CAS_ROM_READ_4(sc, i + PCI_ROM_SIG) != PCI_ROM_SIG_MAGIC ||
 	    CAS_ROM_READ_2(sc, i + PCI_ROM_VENDOR) != pci_get_vendor(dev) ||
 	    CAS_ROM_READ_2(sc, i + PCI_ROM_DEVICE) != pci_get_device(dev) ||
 	    (j = CAS_ROM_READ_2(sc, i + PCI_ROM_PTR_VPD)) <
 	    i + PCI_ROM_SIZE) {
 		device_printf(dev, "unexpected PCI Expansion ROM data\n");
 		goto fail_prom;
 	}
 
 	/* Read PCI VPD. */
  next:
 	if (PCI_VPDRES_ISLARGE(CAS_ROM_READ_1(sc,
 	    j + PCI_VPDRES_BYTE0)) == 0) {
 		device_printf(dev, "no large PCI VPD\n");
 		goto fail_prom;
 	}
 
 	i = (CAS_ROM_READ_1(sc, j + PCI_VPDRES_LARGE_LEN_MSB) << 8) |
 	    CAS_ROM_READ_1(sc, j + PCI_VPDRES_LARGE_LEN_LSB);
 	switch (PCI_VPDRES_LARGE_NAME(CAS_ROM_READ_1(sc,
 	    j + PCI_VPDRES_BYTE0))) {
 	case PCI_VPDRES_TYPE_ID_STRING:
 		/* Skip identifier string. */
 		j += PCI_VPDRES_LARGE_SIZE + i;
 		goto next;
 	case PCI_VPDRES_TYPE_VPD:
 		for (j += PCI_VPDRES_LARGE_SIZE; i > 0;
 		    i -= PCI_VPD_SIZE + CAS_ROM_READ_1(sc, j + PCI_VPD_LEN),
 		    j += PCI_VPD_SIZE + CAS_ROM_READ_1(sc, j + PCI_VPD_LEN)) {
 			if (CAS_ROM_READ_1(sc, j + PCI_VPD_KEY0) != 'Z')
 				/* no Enhanced VPD */
 				continue;
 			if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE) != 'I')
 				/* no instance property */
 				continue;
 			if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 3) == 'B') {
 				/* byte array */
 				if (CAS_ROM_READ_1(sc,
 				    j + PCI_VPD_SIZE + 4) != ETHER_ADDR_LEN)
 					continue;
 				bus_read_region_1(sc->sc_res[CAS_RES_MEM],
 				    CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5,
 				    buf, sizeof(buf));
 				buf[sizeof(buf) - 1] = '\0';
 				if (strcmp(buf, CAS_LOCAL_MAC_ADDRESS) != 0)
 					continue;
 				bus_read_region_1(sc->sc_res[CAS_RES_MEM],
 				    CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE +
 				    5 + sizeof(CAS_LOCAL_MAC_ADDRESS),
 				    enaddr[lma], sizeof(enaddr[lma]));
 				lma++;
 				if (lma == 4 && phy == 4)
 					break;
 			} else if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 3) ==
 			   'S') {
 				/* string */
 				if (CAS_ROM_READ_1(sc,
 				    j + PCI_VPD_SIZE + 4) !=
 				    sizeof(CAS_PHY_TYPE_PCS))
 					continue;
 				bus_read_region_1(sc->sc_res[CAS_RES_MEM],
 				    CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5,
 				    buf, sizeof(buf));
 				buf[sizeof(buf) - 1] = '\0';
 				if (strcmp(buf, CAS_PHY_INTERFACE) == 0)
 					k = sizeof(CAS_PHY_INTERFACE);
 				else if (strcmp(buf, CAS_PHY_TYPE) == 0)
 					k = sizeof(CAS_PHY_TYPE);
 				else
 					continue;
 				bus_read_region_1(sc->sc_res[CAS_RES_MEM],
 				    CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE +
 				    5 + k, buf, sizeof(buf));
 				buf[sizeof(buf) - 1] = '\0';
 				if (strcmp(buf, CAS_PHY_TYPE_PCS) == 0)
 					pcs[phy] = 1;
 				phy++;
 				if (lma == 4 && phy == 4)
 					break;
 			}
 		}
 		break;
 	default:
 		device_printf(dev, "unexpected PCI VPD\n");
 		goto fail_prom;
 	}
 
  fail_prom:
 	CAS_WRITE_4(sc, CAS_BIM_LDEV_OEN, 0);
 
 	if (lma == 0) {
 		device_printf(dev, "could not determine Ethernet address\n");
 		goto fail;
 	}
 	i = 0;
 	if (lma > 1 && pci_get_slot(dev) < sizeof(enaddr) / sizeof(*enaddr))
 		i = pci_get_slot(dev);
 	memcpy(sc->sc_enaddr, enaddr[i], ETHER_ADDR_LEN);
 
 	if (phy == 0) {
 		device_printf(dev, "could not determine PHY type\n");
 		goto fail;
 	}
 	i = 0;
 	if (phy > 1 && pci_get_slot(dev) < sizeof(pcs) / sizeof(*pcs))
 		i = pci_get_slot(dev);
 	if (pcs[i] != 0)
 		sc->sc_flags |= CAS_SERDES;
 #endif
 
 	if (cas_attach(sc) != 0) {
 		device_printf(dev, "could not be attached\n");
 		goto fail;
 	}
 
 	if (bus_setup_intr(dev, sc->sc_res[CAS_RES_INTR], INTR_TYPE_NET |
 	    INTR_MPSAFE, cas_intr, NULL, sc, &sc->sc_ih) != 0) {
 		device_printf(dev, "failed to set up interrupt\n");
 		cas_detach(sc);
 		goto fail;
 	}
 	return (0);
 
  fail:
 	CAS_LOCK_DESTROY(sc);
 	bus_release_resources(dev, cas_pci_res_spec, sc->sc_res);
 	return (ENXIO);
 }
 
 static int
 cas_pci_detach(device_t dev)
 {
 	struct cas_softc *sc;
 
 	sc = device_get_softc(dev);
 	bus_teardown_intr(dev, sc->sc_res[CAS_RES_INTR], sc->sc_ih);
 	cas_detach(sc);
 	CAS_LOCK_DESTROY(sc);
 	bus_release_resources(dev, cas_pci_res_spec, sc->sc_res);
 	return (0);
 }
 
 static int
 cas_pci_suspend(device_t dev)
 {
 
 	cas_suspend(device_get_softc(dev));
 	return (0);
 }
 
 static int
 cas_pci_resume(device_t dev)
 {
 
 	cas_resume(device_get_softc(dev));
 	return (0);
 }
Index: projects/binutils-2.17/sys/dev/e1000/e1000_api.c
===================================================================
--- projects/binutils-2.17/sys/dev/e1000/e1000_api.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/e1000/e1000_api.c	(revision 215830)
@@ -1,1334 +1,1336 @@
 /******************************************************************************
 
   Copyright (c) 2001-2010, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
   modification, are permitted provided that the following conditions are met:
   
    1. Redistributions of source code must retain the above copyright notice, 
       this list of conditions and the following disclaimer.
   
    2. Redistributions in binary form must reproduce the above copyright 
       notice, this list of conditions and the following disclaimer in the 
       documentation and/or other materials provided with the distribution.
   
    3. Neither the name of the Intel Corporation nor the names of its 
       contributors may be used to endorse or promote products derived from 
       this software without specific prior written permission.
   
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   POSSIBILITY OF SUCH DAMAGE.
 
 ******************************************************************************/
 /*$FreeBSD$*/
 
 #include "e1000_api.h"
 
 /**
  *  e1000_init_mac_params - Initialize MAC function pointers
  *  @hw: pointer to the HW structure
  *
  *  This function initializes the function pointers for the MAC
  *  set of functions.  Called by drivers or by e1000_setup_init_funcs.
  **/
 s32 e1000_init_mac_params(struct e1000_hw *hw)
 {
 	s32 ret_val = E1000_SUCCESS;
 
 	if (hw->mac.ops.init_params) {
 		ret_val = hw->mac.ops.init_params(hw);
 		if (ret_val) {
 			DEBUGOUT("MAC Initialization Error\n");
 			goto out;
 		}
 	} else {
 		DEBUGOUT("mac.init_mac_params was NULL\n");
 		ret_val = -E1000_ERR_CONFIG;
 	}
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_init_nvm_params - Initialize NVM function pointers
  *  @hw: pointer to the HW structure
  *
  *  This function initializes the function pointers for the NVM
  *  set of functions.  Called by drivers or by e1000_setup_init_funcs.
  **/
 s32 e1000_init_nvm_params(struct e1000_hw *hw)
 {
 	s32 ret_val = E1000_SUCCESS;
 
 	if (hw->nvm.ops.init_params) {
 		ret_val = hw->nvm.ops.init_params(hw);
 		if (ret_val) {
 			DEBUGOUT("NVM Initialization Error\n");
 			goto out;
 		}
 	} else {
 		DEBUGOUT("nvm.init_nvm_params was NULL\n");
 		ret_val = -E1000_ERR_CONFIG;
 	}
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_init_phy_params - Initialize PHY function pointers
  *  @hw: pointer to the HW structure
  *
  *  This function initializes the function pointers for the PHY
  *  set of functions.  Called by drivers or by e1000_setup_init_funcs.
  **/
 s32 e1000_init_phy_params(struct e1000_hw *hw)
 {
 	s32 ret_val = E1000_SUCCESS;
 
 	if (hw->phy.ops.init_params) {
 		ret_val = hw->phy.ops.init_params(hw);
 		if (ret_val) {
 			DEBUGOUT("PHY Initialization Error\n");
 			goto out;
 		}
 	} else {
 		DEBUGOUT("phy.init_phy_params was NULL\n");
 		ret_val =  -E1000_ERR_CONFIG;
 	}
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_init_mbx_params - Initialize mailbox function pointers
  *  @hw: pointer to the HW structure
  *
  *  This function initializes the function pointers for the PHY
  *  set of functions.  Called by drivers or by e1000_setup_init_funcs.
  **/
 s32 e1000_init_mbx_params(struct e1000_hw *hw)
 {
 	s32 ret_val = E1000_SUCCESS;
 
 	if (hw->mbx.ops.init_params) {
 		ret_val = hw->mbx.ops.init_params(hw);
 		if (ret_val) {
 			DEBUGOUT("Mailbox Initialization Error\n");
 			goto out;
 		}
 	} else {
 		DEBUGOUT("mbx.init_mbx_params was NULL\n");
 		ret_val =  -E1000_ERR_CONFIG;
 	}
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_set_mac_type - Sets MAC type
  *  @hw: pointer to the HW structure
  *
  *  This function sets the mac type of the adapter based on the
  *  device ID stored in the hw structure.
  *  MUST BE FIRST FUNCTION CALLED (explicitly or through
  *  e1000_setup_init_funcs()).
  **/
 s32 e1000_set_mac_type(struct e1000_hw *hw)
 {
 	struct e1000_mac_info *mac = &hw->mac;
 	s32 ret_val = E1000_SUCCESS;
 
 	DEBUGFUNC("e1000_set_mac_type");
 
 	switch (hw->device_id) {
 	case E1000_DEV_ID_82542:
 		mac->type = e1000_82542;
 		break;
 	case E1000_DEV_ID_82543GC_FIBER:
 	case E1000_DEV_ID_82543GC_COPPER:
 		mac->type = e1000_82543;
 		break;
 	case E1000_DEV_ID_82544EI_COPPER:
 	case E1000_DEV_ID_82544EI_FIBER:
 	case E1000_DEV_ID_82544GC_COPPER:
 	case E1000_DEV_ID_82544GC_LOM:
 		mac->type = e1000_82544;
 		break;
 	case E1000_DEV_ID_82540EM:
 	case E1000_DEV_ID_82540EM_LOM:
 	case E1000_DEV_ID_82540EP:
 	case E1000_DEV_ID_82540EP_LOM:
 	case E1000_DEV_ID_82540EP_LP:
 		mac->type = e1000_82540;
 		break;
 	case E1000_DEV_ID_82545EM_COPPER:
 	case E1000_DEV_ID_82545EM_FIBER:
 		mac->type = e1000_82545;
 		break;
 	case E1000_DEV_ID_82545GM_COPPER:
 	case E1000_DEV_ID_82545GM_FIBER:
 	case E1000_DEV_ID_82545GM_SERDES:
 		mac->type = e1000_82545_rev_3;
 		break;
 	case E1000_DEV_ID_82546EB_COPPER:
 	case E1000_DEV_ID_82546EB_FIBER:
 	case E1000_DEV_ID_82546EB_QUAD_COPPER:
 		mac->type = e1000_82546;
 		break;
 	case E1000_DEV_ID_82546GB_COPPER:
 	case E1000_DEV_ID_82546GB_FIBER:
 	case E1000_DEV_ID_82546GB_SERDES:
 	case E1000_DEV_ID_82546GB_PCIE:
 	case E1000_DEV_ID_82546GB_QUAD_COPPER:
 	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
 		mac->type = e1000_82546_rev_3;
 		break;
 	case E1000_DEV_ID_82541EI:
 	case E1000_DEV_ID_82541EI_MOBILE:
 	case E1000_DEV_ID_82541ER_LOM:
 		mac->type = e1000_82541;
 		break;
 	case E1000_DEV_ID_82541ER:
 	case E1000_DEV_ID_82541GI:
 	case E1000_DEV_ID_82541GI_LF:
 	case E1000_DEV_ID_82541GI_MOBILE:
 		mac->type = e1000_82541_rev_2;
 		break;
 	case E1000_DEV_ID_82547EI:
 	case E1000_DEV_ID_82547EI_MOBILE:
 		mac->type = e1000_82547;
 		break;
 	case E1000_DEV_ID_82547GI:
 		mac->type = e1000_82547_rev_2;
 		break;
 	case E1000_DEV_ID_82571EB_COPPER:
 	case E1000_DEV_ID_82571EB_FIBER:
 	case E1000_DEV_ID_82571EB_SERDES:
 	case E1000_DEV_ID_82571EB_SERDES_DUAL:
 	case E1000_DEV_ID_82571EB_SERDES_QUAD:
 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
 	case E1000_DEV_ID_82571PT_QUAD_COPPER:
 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
 		mac->type = e1000_82571;
 		break;
 	case E1000_DEV_ID_82572EI:
 	case E1000_DEV_ID_82572EI_COPPER:
 	case E1000_DEV_ID_82572EI_FIBER:
 	case E1000_DEV_ID_82572EI_SERDES:
 		mac->type = e1000_82572;
 		break;
 	case E1000_DEV_ID_82573E:
 	case E1000_DEV_ID_82573E_IAMT:
 	case E1000_DEV_ID_82573L:
 		mac->type = e1000_82573;
 		break;
 	case E1000_DEV_ID_82574L:
 	case E1000_DEV_ID_82574LA:
 		mac->type = e1000_82574;
 		break;
 	case E1000_DEV_ID_82583V:
 		mac->type = e1000_82583;
 		break;
 	case E1000_DEV_ID_80003ES2LAN_COPPER_DPT:
 	case E1000_DEV_ID_80003ES2LAN_SERDES_DPT:
 	case E1000_DEV_ID_80003ES2LAN_COPPER_SPT:
 	case E1000_DEV_ID_80003ES2LAN_SERDES_SPT:
 		mac->type = e1000_80003es2lan;
 		break;
 	case E1000_DEV_ID_ICH8_IFE:
 	case E1000_DEV_ID_ICH8_IFE_GT:
 	case E1000_DEV_ID_ICH8_IFE_G:
 	case E1000_DEV_ID_ICH8_IGP_M:
 	case E1000_DEV_ID_ICH8_IGP_M_AMT:
 	case E1000_DEV_ID_ICH8_IGP_AMT:
 	case E1000_DEV_ID_ICH8_IGP_C:
 	case E1000_DEV_ID_ICH8_82567V_3:
 		mac->type = e1000_ich8lan;
 		break;
 	case E1000_DEV_ID_ICH9_IFE:
 	case E1000_DEV_ID_ICH9_IFE_GT:
 	case E1000_DEV_ID_ICH9_IFE_G:
 	case E1000_DEV_ID_ICH9_IGP_M:
 	case E1000_DEV_ID_ICH9_IGP_M_AMT:
 	case E1000_DEV_ID_ICH9_IGP_M_V:
 	case E1000_DEV_ID_ICH9_IGP_AMT:
 	case E1000_DEV_ID_ICH9_BM:
 	case E1000_DEV_ID_ICH9_IGP_C:
 	case E1000_DEV_ID_ICH10_R_BM_LM:
 	case E1000_DEV_ID_ICH10_R_BM_LF:
 	case E1000_DEV_ID_ICH10_R_BM_V:
 		mac->type = e1000_ich9lan;
 		break;
 	case E1000_DEV_ID_ICH10_D_BM_LM:
 	case E1000_DEV_ID_ICH10_D_BM_LF:
 	case E1000_DEV_ID_ICH10_D_BM_V:
 	case E1000_DEV_ID_ICH10_HANKSVILLE:
 		mac->type = e1000_ich10lan;
 		break;
 	case E1000_DEV_ID_PCH_D_HV_DM:
 	case E1000_DEV_ID_PCH_D_HV_DC:
 	case E1000_DEV_ID_PCH_M_HV_LM:
 	case E1000_DEV_ID_PCH_M_HV_LC:
 		mac->type = e1000_pchlan;
 		break;
 	case E1000_DEV_ID_PCH2_LV_LM:
 	case E1000_DEV_ID_PCH2_LV_V:
 		mac->type = e1000_pch2lan;
 		break;
 	case E1000_DEV_ID_82575EB_COPPER:
 	case E1000_DEV_ID_82575EB_FIBER_SERDES:
 	case E1000_DEV_ID_82575GB_QUAD_COPPER:
 	case E1000_DEV_ID_82575GB_QUAD_COPPER_PM:
 		mac->type = e1000_82575;
 		break;
 	case E1000_DEV_ID_82576:
 	case E1000_DEV_ID_82576_FIBER:
 	case E1000_DEV_ID_82576_SERDES:
 	case E1000_DEV_ID_82576_QUAD_COPPER:
 	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
 	case E1000_DEV_ID_82576_NS:
 	case E1000_DEV_ID_82576_NS_SERDES:
 	case E1000_DEV_ID_82576_SERDES_QUAD:
 		mac->type = e1000_82576;
 		break;
 	case E1000_DEV_ID_82580_COPPER:
 	case E1000_DEV_ID_82580_FIBER:
 	case E1000_DEV_ID_82580_SERDES:
 	case E1000_DEV_ID_82580_SGMII:
 	case E1000_DEV_ID_82580_COPPER_DUAL:
 	case E1000_DEV_ID_82580_QUAD_FIBER:
+	case E1000_DEV_ID_DH89XXCC_SGMII:
+	case E1000_DEV_ID_DH89XXCC_SERDES:
 		mac->type = e1000_82580;
 		break;
 	case E1000_DEV_ID_82576_VF:
 		mac->type = e1000_vfadapt;
 		break;
 	default:
 		/* Should never have loaded on this device */
 		ret_val = -E1000_ERR_MAC_INIT;
 		break;
 	}
 
 	return ret_val;
 }
 
 /**
  *  e1000_setup_init_funcs - Initializes function pointers
  *  @hw: pointer to the HW structure
  *  @init_device: TRUE will initialize the rest of the function pointers
  *                 getting the device ready for use.  FALSE will only set
  *                 MAC type and the function pointers for the other init
  *                 functions.  Passing FALSE will not generate any hardware
  *                 reads or writes.
  *
  *  This function must be called by a driver in order to use the rest
  *  of the 'shared' code files. Called by drivers only.
  **/
 s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device)
 {
 	s32 ret_val;
 
 	/* Can't do much good without knowing the MAC type. */
 	ret_val = e1000_set_mac_type(hw);
 	if (ret_val) {
 		DEBUGOUT("ERROR: MAC type could not be set properly.\n");
 		goto out;
 	}
 
 	if (!hw->hw_addr) {
 		DEBUGOUT("ERROR: Registers not mapped\n");
 		ret_val = -E1000_ERR_CONFIG;
 		goto out;
 	}
 
 	/*
 	 * Init function pointers to generic implementations. We do this first
 	 * allowing a driver module to override it afterward.
 	 */
 	e1000_init_mac_ops_generic(hw);
 	e1000_init_phy_ops_generic(hw);
 	e1000_init_nvm_ops_generic(hw);
 	e1000_init_mbx_ops_generic(hw);
 
 	/*
 	 * Set up the init function pointers. These are functions within the
 	 * adapter family file that sets up function pointers for the rest of
 	 * the functions in that family.
 	 */
 	switch (hw->mac.type) {
 	case e1000_82542:
 		e1000_init_function_pointers_82542(hw);
 		break;
 	case e1000_82543:
 	case e1000_82544:
 		e1000_init_function_pointers_82543(hw);
 		break;
 	case e1000_82540:
 	case e1000_82545:
 	case e1000_82545_rev_3:
 	case e1000_82546:
 	case e1000_82546_rev_3:
 		e1000_init_function_pointers_82540(hw);
 		break;
 	case e1000_82541:
 	case e1000_82541_rev_2:
 	case e1000_82547:
 	case e1000_82547_rev_2:
 		e1000_init_function_pointers_82541(hw);
 		break;
 	case e1000_82571:
 	case e1000_82572:
 	case e1000_82573:
 	case e1000_82574:
 	case e1000_82583:
 		e1000_init_function_pointers_82571(hw);
 		break;
 	case e1000_80003es2lan:
 		e1000_init_function_pointers_80003es2lan(hw);
 		break;
 	case e1000_ich8lan:
 	case e1000_ich9lan:
 	case e1000_ich10lan:
 	case e1000_pchlan:
 	case e1000_pch2lan:
 		e1000_init_function_pointers_ich8lan(hw);
 		break;
 	case e1000_82575:
 	case e1000_82576:
 	case e1000_82580:
 		e1000_init_function_pointers_82575(hw);
 		break;
 	case e1000_vfadapt:
 		e1000_init_function_pointers_vf(hw);
 		break;
 	default:
 		DEBUGOUT("Hardware not supported\n");
 		ret_val = -E1000_ERR_CONFIG;
 		break;
 	}
 
 	/*
 	 * Initialize the rest of the function pointers. These require some
 	 * register reads/writes in some cases.
 	 */
 	if (!(ret_val) && init_device) {
 		ret_val = e1000_init_mac_params(hw);
 		if (ret_val)
 			goto out;
 
 		ret_val = e1000_init_nvm_params(hw);
 		if (ret_val)
 			goto out;
 
 		ret_val = e1000_init_phy_params(hw);
 		if (ret_val)
 			goto out;
 
 		ret_val = e1000_init_mbx_params(hw);
 		if (ret_val)
 			goto out;
 	}
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_get_bus_info - Obtain bus information for adapter
  *  @hw: pointer to the HW structure
  *
  *  This will obtain information about the HW bus for which the
  *  adapter is attached and stores it in the hw structure. This is a
  *  function pointer entry point called by drivers.
  **/
 s32 e1000_get_bus_info(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.get_bus_info)
 		return hw->mac.ops.get_bus_info(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_clear_vfta - Clear VLAN filter table
  *  @hw: pointer to the HW structure
  *
  *  This clears the VLAN filter table on the adapter. This is a function
  *  pointer entry point called by drivers.
  **/
 void e1000_clear_vfta(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.clear_vfta)
 		hw->mac.ops.clear_vfta(hw);
 }
 
 /**
  *  e1000_write_vfta - Write value to VLAN filter table
  *  @hw: pointer to the HW structure
  *  @offset: the 32-bit offset in which to write the value to.
  *  @value: the 32-bit value to write at location offset.
  *
  *  This writes a 32-bit value to a 32-bit offset in the VLAN filter
  *  table. This is a function pointer entry point called by drivers.
  **/
 void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value)
 {
 	if (hw->mac.ops.write_vfta)
 		hw->mac.ops.write_vfta(hw, offset, value);
 }
 
 /**
  *  e1000_update_mc_addr_list - Update Multicast addresses
  *  @hw: pointer to the HW structure
  *  @mc_addr_list: array of multicast addresses to program
  *  @mc_addr_count: number of multicast addresses to program
  *
  *  Updates the Multicast Table Array.
  *  The caller must have a packed mc_addr_list of multicast addresses.
  **/
 void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list,
                                u32 mc_addr_count)
 {
 	if (hw->mac.ops.update_mc_addr_list)
 		hw->mac.ops.update_mc_addr_list(hw, mc_addr_list,
 		                                mc_addr_count);
 }
 
 /**
  *  e1000_force_mac_fc - Force MAC flow control
  *  @hw: pointer to the HW structure
  *
  *  Force the MAC's flow control settings. Currently no func pointer exists
  *  and all implementations are handled in the generic version of this
  *  function.
  **/
 s32 e1000_force_mac_fc(struct e1000_hw *hw)
 {
 	return e1000_force_mac_fc_generic(hw);
 }
 
 /**
  *  e1000_check_for_link - Check/Store link connection
  *  @hw: pointer to the HW structure
  *
  *  This checks the link condition of the adapter and stores the
  *  results in the hw->mac structure. This is a function pointer entry
  *  point called by drivers.
  **/
 s32 e1000_check_for_link(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.check_for_link)
 		return hw->mac.ops.check_for_link(hw);
 
 	return -E1000_ERR_CONFIG;
 }
 
 /**
  *  e1000_check_mng_mode - Check management mode
  *  @hw: pointer to the HW structure
  *
  *  This checks if the adapter has manageability enabled.
  *  This is a function pointer entry point called by drivers.
  **/
 bool e1000_check_mng_mode(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.check_mng_mode)
 		return hw->mac.ops.check_mng_mode(hw);
 
 	return FALSE;
 }
 
 /**
  *  e1000_mng_write_dhcp_info - Writes DHCP info to host interface
  *  @hw: pointer to the HW structure
  *  @buffer: pointer to the host interface
  *  @length: size of the buffer
  *
  *  Writes the DHCP information to the host interface.
  **/
 s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length)
 {
 	return e1000_mng_write_dhcp_info_generic(hw, buffer, length);
 }
 
 /**
  *  e1000_reset_hw - Reset hardware
  *  @hw: pointer to the HW structure
  *
  *  This resets the hardware into a known state. This is a function pointer
  *  entry point called by drivers.
  **/
 s32 e1000_reset_hw(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.reset_hw)
 		return hw->mac.ops.reset_hw(hw);
 
 	return -E1000_ERR_CONFIG;
 }
 
 /**
  *  e1000_init_hw - Initialize hardware
  *  @hw: pointer to the HW structure
  *
  *  This inits the hardware readying it for operation. This is a function
  *  pointer entry point called by drivers.
  **/
 s32 e1000_init_hw(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.init_hw)
 		return hw->mac.ops.init_hw(hw);
 
 	return -E1000_ERR_CONFIG;
 }
 
 /**
  *  e1000_setup_link - Configures link and flow control
  *  @hw: pointer to the HW structure
  *
  *  This configures link and flow control settings for the adapter. This
  *  is a function pointer entry point called by drivers. While modules can
  *  also call this, they probably call their own version of this function.
  **/
 s32 e1000_setup_link(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.setup_link)
 		return hw->mac.ops.setup_link(hw);
 
 	return -E1000_ERR_CONFIG;
 }
 
 /**
  *  e1000_get_speed_and_duplex - Returns current speed and duplex
  *  @hw: pointer to the HW structure
  *  @speed: pointer to a 16-bit value to store the speed
  *  @duplex: pointer to a 16-bit value to store the duplex.
  *
  *  This returns the speed and duplex of the adapter in the two 'out'
  *  variables passed in. This is a function pointer entry point called
  *  by drivers.
  **/
 s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex)
 {
 	if (hw->mac.ops.get_link_up_info)
 		return hw->mac.ops.get_link_up_info(hw, speed, duplex);
 
 	return -E1000_ERR_CONFIG;
 }
 
 /**
  *  e1000_setup_led - Configures SW controllable LED
  *  @hw: pointer to the HW structure
  *
  *  This prepares the SW controllable LED for use and saves the current state
  *  of the LED so it can be later restored. This is a function pointer entry
  *  point called by drivers.
  **/
 s32 e1000_setup_led(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.setup_led)
 		return hw->mac.ops.setup_led(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_cleanup_led - Restores SW controllable LED
  *  @hw: pointer to the HW structure
  *
  *  This restores the SW controllable LED to the value saved off by
  *  e1000_setup_led. This is a function pointer entry point called by drivers.
  **/
 s32 e1000_cleanup_led(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.cleanup_led)
 		return hw->mac.ops.cleanup_led(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_blink_led - Blink SW controllable LED
  *  @hw: pointer to the HW structure
  *
  *  This starts the adapter LED blinking. Request the LED to be setup first
  *  and cleaned up after. This is a function pointer entry point called by
  *  drivers.
  **/
 s32 e1000_blink_led(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.blink_led)
 		return hw->mac.ops.blink_led(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_id_led_init - store LED configurations in SW
  *  @hw: pointer to the HW structure
  *
  *  Initializes the LED config in SW. This is a function pointer entry point
  *  called by drivers.
  **/
 s32 e1000_id_led_init(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.id_led_init)
 		return hw->mac.ops.id_led_init(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_led_on - Turn on SW controllable LED
  *  @hw: pointer to the HW structure
  *
  *  Turns the SW defined LED on. This is a function pointer entry point
  *  called by drivers.
  **/
 s32 e1000_led_on(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.led_on)
 		return hw->mac.ops.led_on(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_led_off - Turn off SW controllable LED
  *  @hw: pointer to the HW structure
  *
  *  Turns the SW defined LED off. This is a function pointer entry point
  *  called by drivers.
  **/
 s32 e1000_led_off(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.led_off)
 		return hw->mac.ops.led_off(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_reset_adaptive - Reset adaptive IFS
  *  @hw: pointer to the HW structure
  *
  *  Resets the adaptive IFS. Currently no func pointer exists and all
  *  implementations are handled in the generic version of this function.
  **/
 void e1000_reset_adaptive(struct e1000_hw *hw)
 {
 	e1000_reset_adaptive_generic(hw);
 }
 
 /**
  *  e1000_update_adaptive - Update adaptive IFS
  *  @hw: pointer to the HW structure
  *
  *  Updates adapter IFS. Currently no func pointer exists and all
  *  implementations are handled in the generic version of this function.
  **/
 void e1000_update_adaptive(struct e1000_hw *hw)
 {
 	e1000_update_adaptive_generic(hw);
 }
 
 /**
  *  e1000_disable_pcie_master - Disable PCI-Express master access
  *  @hw: pointer to the HW structure
  *
  *  Disables PCI-Express master access and verifies there are no pending
  *  requests. Currently no func pointer exists and all implementations are
  *  handled in the generic version of this function.
  **/
 s32 e1000_disable_pcie_master(struct e1000_hw *hw)
 {
 	return e1000_disable_pcie_master_generic(hw);
 }
 
 /**
  *  e1000_config_collision_dist - Configure collision distance
  *  @hw: pointer to the HW structure
  *
  *  Configures the collision distance to the default value and is used
  *  during link setup.
  **/
 void e1000_config_collision_dist(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.config_collision_dist)
 		hw->mac.ops.config_collision_dist(hw);
 }
 
 /**
  *  e1000_rar_set - Sets a receive address register
  *  @hw: pointer to the HW structure
  *  @addr: address to set the RAR to
  *  @index: the RAR to set
  *
  *  Sets a Receive Address Register (RAR) to the specified address.
  **/
 void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index)
 {
 	if (hw->mac.ops.rar_set)
 		hw->mac.ops.rar_set(hw, addr, index);
 }
 
 /**
  *  e1000_validate_mdi_setting - Ensures valid MDI/MDIX SW state
  *  @hw: pointer to the HW structure
  *
  *  Ensures that the MDI/MDIX SW state is valid.
  **/
 s32 e1000_validate_mdi_setting(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.validate_mdi_setting)
 		return hw->mac.ops.validate_mdi_setting(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_hash_mc_addr - Determines address location in multicast table
  *  @hw: pointer to the HW structure
  *  @mc_addr: Multicast address to hash.
  *
  *  This hashes an address to determine its location in the multicast
  *  table. Currently no func pointer exists and all implementations
  *  are handled in the generic version of this function.
  **/
 u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
 {
 	return e1000_hash_mc_addr_generic(hw, mc_addr);
 }
 
 /**
  *  e1000_enable_tx_pkt_filtering - Enable packet filtering on TX
  *  @hw: pointer to the HW structure
  *
  *  Enables packet filtering on transmit packets if manageability is enabled
  *  and host interface is enabled.
  *  Currently no func pointer exists and all implementations are handled in the
  *  generic version of this function.
  **/
 bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw)
 {
 	return e1000_enable_tx_pkt_filtering_generic(hw);
 }
 
 /**
  *  e1000_mng_host_if_write - Writes to the manageability host interface
  *  @hw: pointer to the HW structure
  *  @buffer: pointer to the host interface buffer
  *  @length: size of the buffer
  *  @offset: location in the buffer to write to
  *  @sum: sum of the data (not checksum)
  *
  *  This function writes the buffer content at the offset given on the host if.
  *  It also does alignment considerations to do the writes in most efficient
  *  way.  Also fills up the sum of the buffer in *buffer parameter.
  **/
 s32 e1000_mng_host_if_write(struct e1000_hw * hw, u8 *buffer, u16 length,
                             u16 offset, u8 *sum)
 {
 	if (hw->mac.ops.mng_host_if_write)
 		return hw->mac.ops.mng_host_if_write(hw, buffer, length,
 		                                     offset, sum);
 
 	return E1000_NOT_IMPLEMENTED;
 }
 
 /**
  *  e1000_mng_write_cmd_header - Writes manageability command header
  *  @hw: pointer to the HW structure
  *  @hdr: pointer to the host interface command header
  *
  *  Writes the command header after does the checksum calculation.
  **/
 s32 e1000_mng_write_cmd_header(struct e1000_hw *hw,
                                struct e1000_host_mng_command_header *hdr)
 {
 	if (hw->mac.ops.mng_write_cmd_header)
 		return hw->mac.ops.mng_write_cmd_header(hw, hdr);
 
 	return E1000_NOT_IMPLEMENTED;
 }
 
 /**
  *  e1000_mng_enable_host_if - Checks host interface is enabled
  *  @hw: pointer to the HW structure
  *
  *  Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND
  *
  *  This function checks whether the HOST IF is enabled for command operation
  *  and also checks whether the previous command is completed.  It busy waits
  *  in case of previous command is not completed.
  **/
 s32 e1000_mng_enable_host_if(struct e1000_hw * hw)
 {
 	if (hw->mac.ops.mng_enable_host_if)
 		return hw->mac.ops.mng_enable_host_if(hw);
 
 	return E1000_NOT_IMPLEMENTED;
 }
 
 /**
  *  e1000_wait_autoneg - Waits for autonegotiation completion
  *  @hw: pointer to the HW structure
  *
  *  Waits for autoneg to complete. Currently no func pointer exists and all
  *  implementations are handled in the generic version of this function.
  **/
 s32 e1000_wait_autoneg(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.wait_autoneg)
 		return hw->mac.ops.wait_autoneg(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_check_reset_block - Verifies PHY can be reset
  *  @hw: pointer to the HW structure
  *
  *  Checks if the PHY is in a state that can be reset or if manageability
  *  has it tied up. This is a function pointer entry point called by drivers.
  **/
 s32 e1000_check_reset_block(struct e1000_hw *hw)
 {
 	if (hw->phy.ops.check_reset_block)
 		return hw->phy.ops.check_reset_block(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_read_phy_reg - Reads PHY register
  *  @hw: pointer to the HW structure
  *  @offset: the register to read
  *  @data: the buffer to store the 16-bit read.
  *
  *  Reads the PHY register and returns the value in data.
  *  This is a function pointer entry point called by drivers.
  **/
 s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data)
 {
 	if (hw->phy.ops.read_reg)
 		return hw->phy.ops.read_reg(hw, offset, data);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_write_phy_reg - Writes PHY register
  *  @hw: pointer to the HW structure
  *  @offset: the register to write
  *  @data: the value to write.
  *
  *  Writes the PHY register at offset with the value in data.
  *  This is a function pointer entry point called by drivers.
  **/
 s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data)
 {
 	if (hw->phy.ops.write_reg)
 		return hw->phy.ops.write_reg(hw, offset, data);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_release_phy - Generic release PHY
  *  @hw: pointer to the HW structure
  *
  *  Return if silicon family does not require a semaphore when accessing the
  *  PHY.
  **/
 void e1000_release_phy(struct e1000_hw *hw)
 {
 	if (hw->phy.ops.release)
 		hw->phy.ops.release(hw);
 }
 
 /**
  *  e1000_acquire_phy - Generic acquire PHY
  *  @hw: pointer to the HW structure
  *
  *  Return success if silicon family does not require a semaphore when
  *  accessing the PHY.
  **/
 s32 e1000_acquire_phy(struct e1000_hw *hw)
 {
 	if (hw->phy.ops.acquire)
 		return hw->phy.ops.acquire(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_cfg_on_link_up - Configure PHY upon link up
  *  @hw: pointer to the HW structure
  **/
 s32 e1000_cfg_on_link_up(struct e1000_hw *hw)
 {
 	if (hw->phy.ops.cfg_on_link_up)
 		return hw->phy.ops.cfg_on_link_up(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_read_kmrn_reg - Reads register using Kumeran interface
  *  @hw: pointer to the HW structure
  *  @offset: the register to read
  *  @data: the location to store the 16-bit value read.
  *
  *  Reads a register out of the Kumeran interface. Currently no func pointer
  *  exists and all implementations are handled in the generic version of
  *  this function.
  **/
 s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data)
 {
 	return e1000_read_kmrn_reg_generic(hw, offset, data);
 }
 
 /**
  *  e1000_write_kmrn_reg - Writes register using Kumeran interface
  *  @hw: pointer to the HW structure
  *  @offset: the register to write
  *  @data: the value to write.
  *
  *  Writes a register to the Kumeran interface. Currently no func pointer
  *  exists and all implementations are handled in the generic version of
  *  this function.
  **/
 s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data)
 {
 	return e1000_write_kmrn_reg_generic(hw, offset, data);
 }
 
 /**
  *  e1000_get_cable_length - Retrieves cable length estimation
  *  @hw: pointer to the HW structure
  *
  *  This function estimates the cable length and stores them in
  *  hw->phy.min_length and hw->phy.max_length. This is a function pointer
  *  entry point called by drivers.
  **/
 s32 e1000_get_cable_length(struct e1000_hw *hw)
 {
 	if (hw->phy.ops.get_cable_length)
 		return hw->phy.ops.get_cable_length(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_get_phy_info - Retrieves PHY information from registers
  *  @hw: pointer to the HW structure
  *
  *  This function gets some information from various PHY registers and
  *  populates hw->phy values with it. This is a function pointer entry
  *  point called by drivers.
  **/
 s32 e1000_get_phy_info(struct e1000_hw *hw)
 {
 	if (hw->phy.ops.get_info)
 		return hw->phy.ops.get_info(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_phy_hw_reset - Hard PHY reset
  *  @hw: pointer to the HW structure
  *
  *  Performs a hard PHY reset. This is a function pointer entry point called
  *  by drivers.
  **/
 s32 e1000_phy_hw_reset(struct e1000_hw *hw)
 {
 	if (hw->phy.ops.reset)
 		return hw->phy.ops.reset(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_phy_commit - Soft PHY reset
  *  @hw: pointer to the HW structure
  *
  *  Performs a soft PHY reset on those that apply. This is a function pointer
  *  entry point called by drivers.
  **/
 s32 e1000_phy_commit(struct e1000_hw *hw)
 {
 	if (hw->phy.ops.commit)
 		return hw->phy.ops.commit(hw);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_set_d0_lplu_state - Sets low power link up state for D0
  *  @hw: pointer to the HW structure
  *  @active: boolean used to enable/disable lplu
  *
  *  Success returns 0, Failure returns 1
  *
  *  The low power link up (lplu) state is set to the power management level D0
  *  and SmartSpeed is disabled when active is TRUE, else clear lplu for D0
  *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
  *  is used during Dx states where the power conservation is most important.
  *  During driver activity, SmartSpeed should be enabled so performance is
  *  maintained.  This is a function pointer entry point called by drivers.
  **/
 s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active)
 {
 	if (hw->phy.ops.set_d0_lplu_state)
 		return hw->phy.ops.set_d0_lplu_state(hw, active);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_set_d3_lplu_state - Sets low power link up state for D3
  *  @hw: pointer to the HW structure
  *  @active: boolean used to enable/disable lplu
  *
  *  Success returns 0, Failure returns 1
  *
  *  The low power link up (lplu) state is set to the power management level D3
  *  and SmartSpeed is disabled when active is TRUE, else clear lplu for D3
  *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
  *  is used during Dx states where the power conservation is most important.
  *  During driver activity, SmartSpeed should be enabled so performance is
  *  maintained.  This is a function pointer entry point called by drivers.
  **/
 s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active)
 {
 	if (hw->phy.ops.set_d3_lplu_state)
 		return hw->phy.ops.set_d3_lplu_state(hw, active);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_read_mac_addr - Reads MAC address
  *  @hw: pointer to the HW structure
  *
  *  Reads the MAC address out of the adapter and stores it in the HW structure.
  *  Currently no func pointer exists and all implementations are handled in the
  *  generic version of this function.
  **/
 s32 e1000_read_mac_addr(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.read_mac_addr)
 		return hw->mac.ops.read_mac_addr(hw);
 
 	return e1000_read_mac_addr_generic(hw);
 }
 
 /**
  *  e1000_read_pba_string - Read device part number string
  *  @hw: pointer to the HW structure
  *  @pba_num: pointer to device part number
  *  @pba_num_size: size of part number buffer
  *
  *  Reads the product board assembly (PBA) number from the EEPROM and stores
  *  the value in pba_num.
  *  Currently no func pointer exists and all implementations are handled in the
  *  generic version of this function.
  **/
 s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size)
 {
 	return e1000_read_pba_string_generic(hw, pba_num, pba_num_size);
 }
 
 /**
  *  e1000_read_pba_length - Read device part number string length
  *  @hw: pointer to the HW structure
  *  @pba_num_size: size of part number buffer
  *
  *  Reads the product board assembly (PBA) number length from the EEPROM and
  *  stores the value in pba_num.
  *  Currently no func pointer exists and all implementations are handled in the
  *  generic version of this function.
  **/
 s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size)
 {
 	return e1000_read_pba_length_generic(hw, pba_num_size);
 }
 
 /**
  *  e1000_read_pba_num - Read device part number
  *  @hw: pointer to the HW structure
  *  @pba_num: pointer to device part number
  *
  *  Reads the product board assembly (PBA) number from the EEPROM and stores
  *  the value in pba_num.
  *  Currently no func pointer exists and all implementations are handled in the
  *  generic version of this function.
  **/
 s32 e1000_read_pba_num(struct e1000_hw *hw, u32 *pba_num)
 {
 	return e1000_read_pba_num_generic(hw, pba_num);
 }
 
 /**
  *  e1000_validate_nvm_checksum - Verifies NVM (EEPROM) checksum
  *  @hw: pointer to the HW structure
  *
  *  Validates the NVM checksum is correct. This is a function pointer entry
  *  point called by drivers.
  **/
 s32 e1000_validate_nvm_checksum(struct e1000_hw *hw)
 {
 	if (hw->nvm.ops.validate)
 		return hw->nvm.ops.validate(hw);
 
 	return -E1000_ERR_CONFIG;
 }
 
 /**
  *  e1000_update_nvm_checksum - Updates NVM (EEPROM) checksum
  *  @hw: pointer to the HW structure
  *
  *  Updates the NVM checksum. Currently no func pointer exists and all
  *  implementations are handled in the generic version of this function.
  **/
 s32 e1000_update_nvm_checksum(struct e1000_hw *hw)
 {
 	if (hw->nvm.ops.update)
 		return hw->nvm.ops.update(hw);
 
 	return -E1000_ERR_CONFIG;
 }
 
 /**
  *  e1000_reload_nvm - Reloads EEPROM
  *  @hw: pointer to the HW structure
  *
  *  Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the
  *  extended control register.
  **/
 void e1000_reload_nvm(struct e1000_hw *hw)
 {
 	if (hw->nvm.ops.reload)
 		hw->nvm.ops.reload(hw);
 }
 
 /**
  *  e1000_read_nvm - Reads NVM (EEPROM)
  *  @hw: pointer to the HW structure
  *  @offset: the word offset to read
  *  @words: number of 16-bit words to read
  *  @data: pointer to the properly sized buffer for the data.
  *
  *  Reads 16-bit chunks of data from the NVM (EEPROM). This is a function
  *  pointer entry point called by drivers.
  **/
 s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
 {
 	if (hw->nvm.ops.read)
 		return hw->nvm.ops.read(hw, offset, words, data);
 
 	return -E1000_ERR_CONFIG;
 }
 
 /**
  *  e1000_write_nvm - Writes to NVM (EEPROM)
  *  @hw: pointer to the HW structure
  *  @offset: the word offset to read
  *  @words: number of 16-bit words to write
  *  @data: pointer to the properly sized buffer for the data.
  *
  *  Writes 16-bit chunks of data to the NVM (EEPROM). This is a function
  *  pointer entry point called by drivers.
  **/
 s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
 {
 	if (hw->nvm.ops.write)
 		return hw->nvm.ops.write(hw, offset, words, data);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_write_8bit_ctrl_reg - Writes 8bit Control register
  *  @hw: pointer to the HW structure
  *  @reg: 32bit register offset
  *  @offset: the register to write
  *  @data: the value to write.
  *
  *  Writes the PHY register at offset with the value in data.
  *  This is a function pointer entry point called by drivers.
  **/
 s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset,
                               u8 data)
 {
 	return e1000_write_8bit_ctrl_reg_generic(hw, reg, offset, data);
 }
 
 /**
  * e1000_power_up_phy - Restores link in case of PHY power down
  * @hw: pointer to the HW structure
  *
  * The phy may be powered down to save power, to turn off link when the
  * driver is unloaded, or wake on lan is not enabled (among others).
  **/
 void e1000_power_up_phy(struct e1000_hw *hw)
 {
 	if (hw->phy.ops.power_up)
 		hw->phy.ops.power_up(hw);
 
 	e1000_setup_link(hw);
 }
 
 /**
  * e1000_power_down_phy - Power down PHY
  * @hw: pointer to the HW structure
  *
  * The phy may be powered down to save power, to turn off link when the
  * driver is unloaded, or wake on lan is not enabled (among others).
  **/
 void e1000_power_down_phy(struct e1000_hw *hw)
 {
 	if (hw->phy.ops.power_down)
 		hw->phy.ops.power_down(hw);
 }
 
 /**
  *  e1000_power_up_fiber_serdes_link - Power up serdes link
  *  @hw: pointer to the HW structure
  *
  *  Power on the optics and PCS.
  **/
 void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.power_up_serdes)
 		hw->mac.ops.power_up_serdes(hw);
 }
 
 /**
  *  e1000_shutdown_fiber_serdes_link - Remove link during power down
  *  @hw: pointer to the HW structure
  *
  *  Shutdown the optics and PCS on driver unload.
  **/
 void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw)
 {
 	if (hw->mac.ops.shutdown_serdes)
 		hw->mac.ops.shutdown_serdes(hw);
 }
 
Index: projects/binutils-2.17/sys/dev/e1000/e1000_hw.h
===================================================================
--- projects/binutils-2.17/sys/dev/e1000/e1000_hw.h	(revision 215829)
+++ projects/binutils-2.17/sys/dev/e1000/e1000_hw.h	(revision 215830)
@@ -1,943 +1,945 @@
 /******************************************************************************
 
   Copyright (c) 2001-2010, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
   modification, are permitted provided that the following conditions are met:
   
    1. Redistributions of source code must retain the above copyright notice, 
       this list of conditions and the following disclaimer.
   
    2. Redistributions in binary form must reproduce the above copyright 
       notice, this list of conditions and the following disclaimer in the 
       documentation and/or other materials provided with the distribution.
   
    3. Neither the name of the Intel Corporation nor the names of its 
       contributors may be used to endorse or promote products derived from 
       this software without specific prior written permission.
   
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   POSSIBILITY OF SUCH DAMAGE.
 
 ******************************************************************************/
 /*$FreeBSD$*/
 
 #ifndef _E1000_HW_H_
 #define _E1000_HW_H_
 
 #include "e1000_osdep.h"
 #include "e1000_regs.h"
 #include "e1000_defines.h"
 
 struct e1000_hw;
 
 #define E1000_DEV_ID_82542                    0x1000
 #define E1000_DEV_ID_82543GC_FIBER            0x1001
 #define E1000_DEV_ID_82543GC_COPPER           0x1004
 #define E1000_DEV_ID_82544EI_COPPER           0x1008
 #define E1000_DEV_ID_82544EI_FIBER            0x1009
 #define E1000_DEV_ID_82544GC_COPPER           0x100C
 #define E1000_DEV_ID_82544GC_LOM              0x100D
 #define E1000_DEV_ID_82540EM                  0x100E
 #define E1000_DEV_ID_82540EM_LOM              0x1015
 #define E1000_DEV_ID_82540EP_LOM              0x1016
 #define E1000_DEV_ID_82540EP                  0x1017
 #define E1000_DEV_ID_82540EP_LP               0x101E
 #define E1000_DEV_ID_82545EM_COPPER           0x100F
 #define E1000_DEV_ID_82545EM_FIBER            0x1011
 #define E1000_DEV_ID_82545GM_COPPER           0x1026
 #define E1000_DEV_ID_82545GM_FIBER            0x1027
 #define E1000_DEV_ID_82545GM_SERDES           0x1028
 #define E1000_DEV_ID_82546EB_COPPER           0x1010
 #define E1000_DEV_ID_82546EB_FIBER            0x1012
 #define E1000_DEV_ID_82546EB_QUAD_COPPER      0x101D
 #define E1000_DEV_ID_82546GB_COPPER           0x1079
 #define E1000_DEV_ID_82546GB_FIBER            0x107A
 #define E1000_DEV_ID_82546GB_SERDES           0x107B
 #define E1000_DEV_ID_82546GB_PCIE             0x108A
 #define E1000_DEV_ID_82546GB_QUAD_COPPER      0x1099
 #define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5
 #define E1000_DEV_ID_82541EI                  0x1013
 #define E1000_DEV_ID_82541EI_MOBILE           0x1018
 #define E1000_DEV_ID_82541ER_LOM              0x1014
 #define E1000_DEV_ID_82541ER                  0x1078
 #define E1000_DEV_ID_82541GI                  0x1076
 #define E1000_DEV_ID_82541GI_LF               0x107C
 #define E1000_DEV_ID_82541GI_MOBILE           0x1077
 #define E1000_DEV_ID_82547EI                  0x1019
 #define E1000_DEV_ID_82547EI_MOBILE           0x101A
 #define E1000_DEV_ID_82547GI                  0x1075
 #define E1000_DEV_ID_82571EB_COPPER           0x105E
 #define E1000_DEV_ID_82571EB_FIBER            0x105F
 #define E1000_DEV_ID_82571EB_SERDES           0x1060
 #define E1000_DEV_ID_82571EB_SERDES_DUAL      0x10D9
 #define E1000_DEV_ID_82571EB_SERDES_QUAD      0x10DA
 #define E1000_DEV_ID_82571EB_QUAD_COPPER      0x10A4
 #define E1000_DEV_ID_82571PT_QUAD_COPPER      0x10D5
 #define E1000_DEV_ID_82571EB_QUAD_FIBER       0x10A5
 #define E1000_DEV_ID_82571EB_QUAD_COPPER_LP   0x10BC
 #define E1000_DEV_ID_82572EI_COPPER           0x107D
 #define E1000_DEV_ID_82572EI_FIBER            0x107E
 #define E1000_DEV_ID_82572EI_SERDES           0x107F
 #define E1000_DEV_ID_82572EI                  0x10B9
 #define E1000_DEV_ID_82573E                   0x108B
 #define E1000_DEV_ID_82573E_IAMT              0x108C
 #define E1000_DEV_ID_82573L                   0x109A
 #define E1000_DEV_ID_82574L                   0x10D3
 #define E1000_DEV_ID_82574LA                  0x10F6
 #define E1000_DEV_ID_82583V                   0x150C
 #define E1000_DEV_ID_80003ES2LAN_COPPER_DPT   0x1096
 #define E1000_DEV_ID_80003ES2LAN_SERDES_DPT   0x1098
 #define E1000_DEV_ID_80003ES2LAN_COPPER_SPT   0x10BA
 #define E1000_DEV_ID_80003ES2LAN_SERDES_SPT   0x10BB
 #define E1000_DEV_ID_ICH8_82567V_3            0x1501
 #define E1000_DEV_ID_ICH8_IGP_M_AMT           0x1049
 #define E1000_DEV_ID_ICH8_IGP_AMT             0x104A
 #define E1000_DEV_ID_ICH8_IGP_C               0x104B
 #define E1000_DEV_ID_ICH8_IFE                 0x104C
 #define E1000_DEV_ID_ICH8_IFE_GT              0x10C4
 #define E1000_DEV_ID_ICH8_IFE_G               0x10C5
 #define E1000_DEV_ID_ICH8_IGP_M               0x104D
 #define E1000_DEV_ID_ICH9_IGP_M               0x10BF
 #define E1000_DEV_ID_ICH9_IGP_M_AMT           0x10F5
 #define E1000_DEV_ID_ICH9_IGP_M_V             0x10CB
 #define E1000_DEV_ID_ICH9_IGP_AMT             0x10BD
 #define E1000_DEV_ID_ICH9_BM                  0x10E5
 #define E1000_DEV_ID_ICH9_IGP_C               0x294C
 #define E1000_DEV_ID_ICH9_IFE                 0x10C0
 #define E1000_DEV_ID_ICH9_IFE_GT              0x10C3
 #define E1000_DEV_ID_ICH9_IFE_G               0x10C2
 #define E1000_DEV_ID_ICH10_R_BM_LM            0x10CC
 #define E1000_DEV_ID_ICH10_R_BM_LF            0x10CD
 #define E1000_DEV_ID_ICH10_R_BM_V             0x10CE
 #define E1000_DEV_ID_ICH10_HANKSVILLE         0xF0FE
 #define E1000_DEV_ID_ICH10_D_BM_LM            0x10DE
 #define E1000_DEV_ID_ICH10_D_BM_LF            0x10DF
 #define E1000_DEV_ID_ICH10_D_BM_V             0x1525
 
 #define E1000_DEV_ID_PCH_M_HV_LM              0x10EA
 #define E1000_DEV_ID_PCH_M_HV_LC              0x10EB
 #define E1000_DEV_ID_PCH_D_HV_DM              0x10EF
 #define E1000_DEV_ID_PCH_D_HV_DC              0x10F0
 #define E1000_DEV_ID_PCH2_LV_LM               0x1502
 #define E1000_DEV_ID_PCH2_LV_V                0x1503
 #define E1000_DEV_ID_82576                    0x10C9
 #define E1000_DEV_ID_82576_FIBER              0x10E6
 #define E1000_DEV_ID_82576_SERDES             0x10E7
 #define E1000_DEV_ID_82576_QUAD_COPPER        0x10E8
 #define E1000_DEV_ID_82576_QUAD_COPPER_ET2    0x1526
 #define E1000_DEV_ID_82576_NS                 0x150A
 #define E1000_DEV_ID_82576_NS_SERDES          0x1518
 #define E1000_DEV_ID_82576_SERDES_QUAD        0x150D
 #define E1000_DEV_ID_82576_VF                 0x10CA
 #define E1000_DEV_ID_82575EB_COPPER           0x10A7
 #define E1000_DEV_ID_82575EB_FIBER_SERDES     0x10A9
 #define E1000_DEV_ID_82575GB_QUAD_COPPER      0x10D6
 #define E1000_DEV_ID_82575GB_QUAD_COPPER_PM   0x10E2
 #define E1000_DEV_ID_82580_COPPER             0x150E
 #define E1000_DEV_ID_82580_FIBER              0x150F
 #define E1000_DEV_ID_82580_SERDES             0x1510
 #define E1000_DEV_ID_82580_SGMII              0x1511
 #define E1000_DEV_ID_82580_COPPER_DUAL        0x1516
 #define E1000_DEV_ID_82580_QUAD_FIBER         0x1527
+#define E1000_DEV_ID_DH89XXCC_SGMII           0x0436
+#define E1000_DEV_ID_DH89XXCC_SERDES          0x0438
 #define E1000_REVISION_0 0
 #define E1000_REVISION_1 1
 #define E1000_REVISION_2 2
 #define E1000_REVISION_3 3
 #define E1000_REVISION_4 4
 
 #define E1000_FUNC_0     0
 #define E1000_FUNC_1     1
 #define E1000_FUNC_2     2
 #define E1000_FUNC_3     3
 
 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0   0
 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1   3
 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN2   6
 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN3   9
 
 enum e1000_mac_type {
 	e1000_undefined = 0,
 	e1000_82542,
 	e1000_82543,
 	e1000_82544,
 	e1000_82540,
 	e1000_82545,
 	e1000_82545_rev_3,
 	e1000_82546,
 	e1000_82546_rev_3,
 	e1000_82541,
 	e1000_82541_rev_2,
 	e1000_82547,
 	e1000_82547_rev_2,
 	e1000_82571,
 	e1000_82572,
 	e1000_82573,
 	e1000_82574,
 	e1000_82583,
 	e1000_80003es2lan,
 	e1000_ich8lan,
 	e1000_ich9lan,
 	e1000_ich10lan,
 	e1000_pchlan,
 	e1000_pch2lan,
 	e1000_82575,
 	e1000_82576,
 	e1000_82580,
 	e1000_vfadapt,
 	e1000_num_macs  /* List is 1-based, so subtract 1 for TRUE count. */
 };
 
 enum e1000_media_type {
 	e1000_media_type_unknown = 0,
 	e1000_media_type_copper = 1,
 	e1000_media_type_fiber = 2,
 	e1000_media_type_internal_serdes = 3,
 	e1000_num_media_types
 };
 
 enum e1000_nvm_type {
 	e1000_nvm_unknown = 0,
 	e1000_nvm_none,
 	e1000_nvm_eeprom_spi,
 	e1000_nvm_eeprom_microwire,
 	e1000_nvm_flash_hw,
 	e1000_nvm_flash_sw
 };
 
 enum e1000_nvm_override {
 	e1000_nvm_override_none = 0,
 	e1000_nvm_override_spi_small,
 	e1000_nvm_override_spi_large,
 	e1000_nvm_override_microwire_small,
 	e1000_nvm_override_microwire_large
 };
 
 enum e1000_phy_type {
 	e1000_phy_unknown = 0,
 	e1000_phy_none,
 	e1000_phy_m88,
 	e1000_phy_igp,
 	e1000_phy_igp_2,
 	e1000_phy_gg82563,
 	e1000_phy_igp_3,
 	e1000_phy_ife,
 	e1000_phy_bm,
 	e1000_phy_82578,
 	e1000_phy_82577,
 	e1000_phy_82579,
 	e1000_phy_82580,
 	e1000_phy_vf,
 };
 
 enum e1000_bus_type {
 	e1000_bus_type_unknown = 0,
 	e1000_bus_type_pci,
 	e1000_bus_type_pcix,
 	e1000_bus_type_pci_express,
 	e1000_bus_type_reserved
 };
 
 enum e1000_bus_speed {
 	e1000_bus_speed_unknown = 0,
 	e1000_bus_speed_33,
 	e1000_bus_speed_66,
 	e1000_bus_speed_100,
 	e1000_bus_speed_120,
 	e1000_bus_speed_133,
 	e1000_bus_speed_2500,
 	e1000_bus_speed_5000,
 	e1000_bus_speed_reserved
 };
 
 enum e1000_bus_width {
 	e1000_bus_width_unknown = 0,
 	e1000_bus_width_pcie_x1,
 	e1000_bus_width_pcie_x2,
 	e1000_bus_width_pcie_x4 = 4,
 	e1000_bus_width_pcie_x8 = 8,
 	e1000_bus_width_32,
 	e1000_bus_width_64,
 	e1000_bus_width_reserved
 };
 
 enum e1000_1000t_rx_status {
 	e1000_1000t_rx_status_not_ok = 0,
 	e1000_1000t_rx_status_ok,
 	e1000_1000t_rx_status_undefined = 0xFF
 };
 
 enum e1000_rev_polarity {
 	e1000_rev_polarity_normal = 0,
 	e1000_rev_polarity_reversed,
 	e1000_rev_polarity_undefined = 0xFF
 };
 
 enum e1000_fc_mode {
 	e1000_fc_none = 0,
 	e1000_fc_rx_pause,
 	e1000_fc_tx_pause,
 	e1000_fc_full,
 	e1000_fc_default = 0xFF
 };
 
 enum e1000_ffe_config {
 	e1000_ffe_config_enabled = 0,
 	e1000_ffe_config_active,
 	e1000_ffe_config_blocked
 };
 
 enum e1000_dsp_config {
 	e1000_dsp_config_disabled = 0,
 	e1000_dsp_config_enabled,
 	e1000_dsp_config_activated,
 	e1000_dsp_config_undefined = 0xFF
 };
 
 enum e1000_ms_type {
 	e1000_ms_hw_default = 0,
 	e1000_ms_force_master,
 	e1000_ms_force_slave,
 	e1000_ms_auto
 };
 
 enum e1000_smart_speed {
 	e1000_smart_speed_default = 0,
 	e1000_smart_speed_on,
 	e1000_smart_speed_off
 };
 
 enum e1000_serdes_link_state {
 	e1000_serdes_link_down = 0,
 	e1000_serdes_link_autoneg_progress,
 	e1000_serdes_link_autoneg_complete,
 	e1000_serdes_link_forced_up
 };
 
 #define __le16 u16
 #define __le32 u32
 #define __le64 u64
 /* Receive Descriptor */
 struct e1000_rx_desc {
 	__le64 buffer_addr; /* Address of the descriptor's data buffer */
 	__le16 length;      /* Length of data DMAed into data buffer */
 	__le16 csum;        /* Packet checksum */
 	u8  status;         /* Descriptor status */
 	u8  errors;         /* Descriptor Errors */
 	__le16 special;
 };
 
 /* Receive Descriptor - Extended */
 union e1000_rx_desc_extended {
 	struct {
 		__le64 buffer_addr;
 		__le64 reserved;
 	} read;
 	struct {
 		struct {
 			__le32 mrq;           /* Multiple Rx Queues */
 			union {
 				__le32 rss;         /* RSS Hash */
 				struct {
 					__le16 ip_id;  /* IP id */
 					__le16 csum;   /* Packet Checksum */
 				} csum_ip;
 			} hi_dword;
 		} lower;
 		struct {
 			__le32 status_error;  /* ext status/error */
 			__le16 length;
 			__le16 vlan;          /* VLAN tag */
 		} upper;
 	} wb;  /* writeback */
 };
 
 #define MAX_PS_BUFFERS 4
 /* Receive Descriptor - Packet Split */
 union e1000_rx_desc_packet_split {
 	struct {
 		/* one buffer for protocol header(s), three data buffers */
 		__le64 buffer_addr[MAX_PS_BUFFERS];
 	} read;
 	struct {
 		struct {
 			__le32 mrq;           /* Multiple Rx Queues */
 			union {
 				__le32 rss;           /* RSS Hash */
 				struct {
 					__le16 ip_id;    /* IP id */
 					__le16 csum;     /* Packet Checksum */
 				} csum_ip;
 			} hi_dword;
 		} lower;
 		struct {
 			__le32 status_error;  /* ext status/error */
 			__le16 length0;       /* length of buffer 0 */
 			__le16 vlan;          /* VLAN tag */
 		} middle;
 		struct {
 			__le16 header_status;
 			__le16 length[3];     /* length of buffers 1-3 */
 		} upper;
 		__le64 reserved;
 	} wb; /* writeback */
 };
 
 /* Transmit Descriptor */
 struct e1000_tx_desc {
 	__le64 buffer_addr;   /* Address of the descriptor's data buffer */
 	union {
 		__le32 data;
 		struct {
 			__le16 length;    /* Data buffer length */
 			u8 cso;           /* Checksum offset */
 			u8 cmd;           /* Descriptor control */
 		} flags;
 	} lower;
 	union {
 		__le32 data;
 		struct {
 			u8 status;        /* Descriptor status */
 			u8 css;           /* Checksum start */
 			__le16 special;
 		} fields;
 	} upper;
 };
 
 /* Offload Context Descriptor */
 struct e1000_context_desc {
 	union {
 		__le32 ip_config;
 		struct {
 			u8 ipcss;         /* IP checksum start */
 			u8 ipcso;         /* IP checksum offset */
 			__le16 ipcse;     /* IP checksum end */
 		} ip_fields;
 	} lower_setup;
 	union {
 		__le32 tcp_config;
 		struct {
 			u8 tucss;         /* TCP checksum start */
 			u8 tucso;         /* TCP checksum offset */
 			__le16 tucse;     /* TCP checksum end */
 		} tcp_fields;
 	} upper_setup;
 	__le32 cmd_and_length;
 	union {
 		__le32 data;
 		struct {
 			u8 status;        /* Descriptor status */
 			u8 hdr_len;       /* Header length */
 			__le16 mss;       /* Maximum segment size */
 		} fields;
 	} tcp_seg_setup;
 };
 
 /* Offload data descriptor */
 struct e1000_data_desc {
 	__le64 buffer_addr;   /* Address of the descriptor's buffer address */
 	union {
 		__le32 data;
 		struct {
 			__le16 length;    /* Data buffer length */
 			u8 typ_len_ext;
 			u8 cmd;
 		} flags;
 	} lower;
 	union {
 		__le32 data;
 		struct {
 			u8 status;        /* Descriptor status */
 			u8 popts;         /* Packet Options */
 			__le16 special;
 		} fields;
 	} upper;
 };
 
 /* Statistics counters collected by the MAC */
 struct e1000_hw_stats {
 	u64 crcerrs;
 	u64 algnerrc;
 	u64 symerrs;
 	u64 rxerrc;
 	u64 mpc;
 	u64 scc;
 	u64 ecol;
 	u64 mcc;
 	u64 latecol;
 	u64 colc;
 	u64 dc;
 	u64 tncrs;
 	u64 sec;
 	u64 cexterr;
 	u64 rlec;
 	u64 xonrxc;
 	u64 xontxc;
 	u64 xoffrxc;
 	u64 xofftxc;
 	u64 fcruc;
 	u64 prc64;
 	u64 prc127;
 	u64 prc255;
 	u64 prc511;
 	u64 prc1023;
 	u64 prc1522;
 	u64 gprc;
 	u64 bprc;
 	u64 mprc;
 	u64 gptc;
 	u64 gorc;
 	u64 gotc;
 	u64 rnbc;
 	u64 ruc;
 	u64 rfc;
 	u64 roc;
 	u64 rjc;
 	u64 mgprc;
 	u64 mgpdc;
 	u64 mgptc;
 	u64 tor;
 	u64 tot;
 	u64 tpr;
 	u64 tpt;
 	u64 ptc64;
 	u64 ptc127;
 	u64 ptc255;
 	u64 ptc511;
 	u64 ptc1023;
 	u64 ptc1522;
 	u64 mptc;
 	u64 bptc;
 	u64 tsctc;
 	u64 tsctfc;
 	u64 iac;
 	u64 icrxptc;
 	u64 icrxatc;
 	u64 ictxptc;
 	u64 ictxatc;
 	u64 ictxqec;
 	u64 ictxqmtc;
 	u64 icrxdmtc;
 	u64 icrxoc;
 	u64 cbtmpc;
 	u64 htdpmc;
 	u64 cbrdpc;
 	u64 cbrmpc;
 	u64 rpthc;
 	u64 hgptc;
 	u64 htcbdpc;
 	u64 hgorc;
 	u64 hgotc;
 	u64 lenerrs;
 	u64 scvpc;
 	u64 hrmpc;
 	u64 doosync;
 };
 
 struct e1000_vf_stats {
 	u64 base_gprc;
 	u64 base_gptc;
 	u64 base_gorc;
 	u64 base_gotc;
 	u64 base_mprc;
 	u64 base_gotlbc;
 	u64 base_gptlbc;
 	u64 base_gorlbc;
 	u64 base_gprlbc;
 
 	u32 last_gprc;
 	u32 last_gptc;
 	u32 last_gorc;
 	u32 last_gotc;
 	u32 last_mprc;
 	u32 last_gotlbc;
 	u32 last_gptlbc;
 	u32 last_gorlbc;
 	u32 last_gprlbc;
 
 	u64 gprc;
 	u64 gptc;
 	u64 gorc;
 	u64 gotc;
 	u64 mprc;
 	u64 gotlbc;
 	u64 gptlbc;
 	u64 gorlbc;
 	u64 gprlbc;
 };
 
 struct e1000_phy_stats {
 	u32 idle_errors;
 	u32 receive_errors;
 };
 
 struct e1000_host_mng_dhcp_cookie {
 	u32 signature;
 	u8  status;
 	u8  reserved0;
 	u16 vlan_id;
 	u32 reserved1;
 	u16 reserved2;
 	u8  reserved3;
 	u8  checksum;
 };
 
 /* Host Interface "Rev 1" */
 struct e1000_host_command_header {
 	u8 command_id;
 	u8 command_length;
 	u8 command_options;
 	u8 checksum;
 };
 
 #define E1000_HI_MAX_DATA_LENGTH     252
 struct e1000_host_command_info {
 	struct e1000_host_command_header command_header;
 	u8 command_data[E1000_HI_MAX_DATA_LENGTH];
 };
 
 /* Host Interface "Rev 2" */
 struct e1000_host_mng_command_header {
 	u8  command_id;
 	u8  checksum;
 	u16 reserved1;
 	u16 reserved2;
 	u16 command_length;
 };
 
 #define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8
 struct e1000_host_mng_command_info {
 	struct e1000_host_mng_command_header command_header;
 	u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH];
 };
 
 #include "e1000_mac.h"
 #include "e1000_phy.h"
 #include "e1000_nvm.h"
 #include "e1000_manage.h"
 #include "e1000_mbx.h"
 
 struct e1000_mac_operations {
 	/* Function pointers for the MAC. */
 	s32  (*init_params)(struct e1000_hw *);
 	s32  (*id_led_init)(struct e1000_hw *);
 	s32  (*blink_led)(struct e1000_hw *);
 	s32  (*check_for_link)(struct e1000_hw *);
 	bool (*check_mng_mode)(struct e1000_hw *hw);
 	s32  (*cleanup_led)(struct e1000_hw *);
 	void (*clear_hw_cntrs)(struct e1000_hw *);
 	void (*clear_vfta)(struct e1000_hw *);
 	s32  (*get_bus_info)(struct e1000_hw *);
 	void (*set_lan_id)(struct e1000_hw *);
 	s32  (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *);
 	s32  (*led_on)(struct e1000_hw *);
 	s32  (*led_off)(struct e1000_hw *);
 	void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32);
 	s32  (*reset_hw)(struct e1000_hw *);
 	s32  (*init_hw)(struct e1000_hw *);
 	void (*shutdown_serdes)(struct e1000_hw *);
 	void (*power_up_serdes)(struct e1000_hw *);
 	s32  (*setup_link)(struct e1000_hw *);
 	s32  (*setup_physical_interface)(struct e1000_hw *);
 	s32  (*setup_led)(struct e1000_hw *);
 	void (*write_vfta)(struct e1000_hw *, u32, u32);
 	void (*config_collision_dist)(struct e1000_hw *);
 	void (*rar_set)(struct e1000_hw *, u8*, u32);
 	s32  (*read_mac_addr)(struct e1000_hw *);
 	s32  (*validate_mdi_setting)(struct e1000_hw *);
 	s32  (*mng_host_if_write)(struct e1000_hw *, u8*, u16, u16, u8*);
 	s32  (*mng_write_cmd_header)(struct e1000_hw *hw,
                       struct e1000_host_mng_command_header*);
 	s32  (*mng_enable_host_if)(struct e1000_hw *);
 	s32  (*wait_autoneg)(struct e1000_hw *);
 };
 
 struct e1000_phy_operations {
 	s32  (*init_params)(struct e1000_hw *);
 	s32  (*acquire)(struct e1000_hw *);
 	s32  (*cfg_on_link_up)(struct e1000_hw *);
 	s32  (*check_polarity)(struct e1000_hw *);
 	s32  (*check_reset_block)(struct e1000_hw *);
 	s32  (*commit)(struct e1000_hw *);
 	s32  (*force_speed_duplex)(struct e1000_hw *);
 	s32  (*get_cfg_done)(struct e1000_hw *hw);
 	s32  (*get_cable_length)(struct e1000_hw *);
 	s32  (*get_info)(struct e1000_hw *);
 	s32  (*read_reg)(struct e1000_hw *, u32, u16 *);
 	s32  (*read_reg_locked)(struct e1000_hw *, u32, u16 *);
 	void (*release)(struct e1000_hw *);
 	s32  (*reset)(struct e1000_hw *);
 	s32  (*set_d0_lplu_state)(struct e1000_hw *, bool);
 	s32  (*set_d3_lplu_state)(struct e1000_hw *, bool);
 	s32  (*write_reg)(struct e1000_hw *, u32, u16);
 	s32  (*write_reg_locked)(struct e1000_hw *, u32, u16);
 	void (*power_up)(struct e1000_hw *);
 	void (*power_down)(struct e1000_hw *);
 };
 
 struct e1000_nvm_operations {
 	s32  (*init_params)(struct e1000_hw *);
 	s32  (*acquire)(struct e1000_hw *);
 	s32  (*read)(struct e1000_hw *, u16, u16, u16 *);
 	void (*release)(struct e1000_hw *);
 	void (*reload)(struct e1000_hw *);
 	s32  (*update)(struct e1000_hw *);
 	s32  (*valid_led_default)(struct e1000_hw *, u16 *);
 	s32  (*validate)(struct e1000_hw *);
 	s32  (*write)(struct e1000_hw *, u16, u16, u16 *);
 };
 
 struct e1000_mac_info {
 	struct e1000_mac_operations ops;
 	u8 addr[6];
 	u8 perm_addr[6];
 
 	enum e1000_mac_type type;
 
 	u32 collision_delta;
 	u32 ledctl_default;
 	u32 ledctl_mode1;
 	u32 ledctl_mode2;
 	u32 mc_filter_type;
 	u32 tx_packet_delta;
 	u32 txcw;
 
 	u16 current_ifs_val;
 	u16 ifs_max_val;
 	u16 ifs_min_val;
 	u16 ifs_ratio;
 	u16 ifs_step_size;
 	u16 mta_reg_count;
 	u16 uta_reg_count;
 
 	/* Maximum size of the MTA register table in all supported adapters */
 	#define MAX_MTA_REG 128
 	u32 mta_shadow[MAX_MTA_REG];
 	u16 rar_entry_count;
 
 	u8  forced_speed_duplex;
 
 	bool adaptive_ifs;
 	bool has_fwsm;
 	bool arc_subsystem_valid;
 	bool asf_firmware_present;
 	bool autoneg;
 	bool autoneg_failed;
 	bool get_link_status;
 	bool in_ifs_mode;
 	bool report_tx_early;
 	enum e1000_serdes_link_state serdes_link_state;
 	bool serdes_has_link;
 	bool tx_pkt_filtering;
 };
 
 struct e1000_phy_info {
 	struct e1000_phy_operations ops;
 	enum e1000_phy_type type;
 
 	enum e1000_1000t_rx_status local_rx;
 	enum e1000_1000t_rx_status remote_rx;
 	enum e1000_ms_type ms_type;
 	enum e1000_ms_type original_ms_type;
 	enum e1000_rev_polarity cable_polarity;
 	enum e1000_smart_speed smart_speed;
 
 	u32 addr;
 	u32 id;
 	u32 reset_delay_us; /* in usec */
 	u32 revision;
 
 	enum e1000_media_type media_type;
 
 	u16 autoneg_advertised;
 	u16 autoneg_mask;
 	u16 cable_length;
 	u16 max_cable_length;
 	u16 min_cable_length;
 
 	u8 mdix;
 
 	bool disable_polarity_correction;
 	bool is_mdix;
 	bool polarity_correction;
 	bool reset_disable;
 	bool speed_downgraded;
 	bool autoneg_wait_to_complete;
 };
 
 struct e1000_nvm_info {
 	struct e1000_nvm_operations ops;
 	enum e1000_nvm_type type;
 	enum e1000_nvm_override override;
 
 	u32 flash_bank_size;
 	u32 flash_base_addr;
 
 	u16 word_size;
 	u16 delay_usec;
 	u16 address_bits;
 	u16 opcode_bits;
 	u16 page_size;
 };
 
 struct e1000_bus_info {
 	enum e1000_bus_type type;
 	enum e1000_bus_speed speed;
 	enum e1000_bus_width width;
 
 	u16 func;
 	u16 pci_cmd_word;
 };
 
 struct e1000_fc_info {
 	u32 high_water;          /* Flow control high-water mark */
 	u32 low_water;           /* Flow control low-water mark */
 	u16 pause_time;          /* Flow control pause timer */
 	u16 refresh_time;        /* Flow control refresh timer */
 	bool send_xon;           /* Flow control send XON */
 	bool strict_ieee;        /* Strict IEEE mode */
 	enum e1000_fc_mode current_mode; /* FC mode in effect */
 	enum e1000_fc_mode requested_mode; /* FC mode requested by caller */
 };
 
 struct e1000_mbx_operations {
 	s32 (*init_params)(struct e1000_hw *hw);
 	s32 (*read)(struct e1000_hw *, u32 *, u16,  u16);
 	s32 (*write)(struct e1000_hw *, u32 *, u16, u16);
 	s32 (*read_posted)(struct e1000_hw *, u32 *, u16,  u16);
 	s32 (*write_posted)(struct e1000_hw *, u32 *, u16, u16);
 	s32 (*check_for_msg)(struct e1000_hw *, u16);
 	s32 (*check_for_ack)(struct e1000_hw *, u16);
 	s32 (*check_for_rst)(struct e1000_hw *, u16);
 };
 
 struct e1000_mbx_stats {
 	u32 msgs_tx;
 	u32 msgs_rx;
 
 	u32 acks;
 	u32 reqs;
 	u32 rsts;
 };
 
 struct e1000_mbx_info {
 	struct e1000_mbx_operations ops;
 	struct e1000_mbx_stats stats;
 	u32 timeout;
 	u32 usec_delay;
 	u16 size;
 };
 
 struct e1000_dev_spec_82541 {
 	enum e1000_dsp_config dsp_config;
 	enum e1000_ffe_config ffe_config;
 	u16 spd_default;
 	bool phy_init_script;
 };
 
 struct e1000_dev_spec_82542 {
 	bool dma_fairness;
 };
 
 struct e1000_dev_spec_82543 {
 	u32  tbi_compatibility;
 	bool dma_fairness;
 	bool init_phy_disabled;
 };
 
 struct e1000_dev_spec_82571 {
 	bool laa_is_present;
 	u32 smb_counter;
 	E1000_MUTEX swflag_mutex;
 };
 
 struct e1000_dev_spec_80003es2lan {
 	bool  mdic_wa_enable;
 };
 
 struct e1000_shadow_ram {
 	u16  value;
 	bool modified;
 };
 
 #define E1000_SHADOW_RAM_WORDS		2048
 
 struct e1000_dev_spec_ich8lan {
 	bool kmrn_lock_loss_workaround_enabled;
 	struct e1000_shadow_ram shadow_ram[E1000_SHADOW_RAM_WORDS];
 	E1000_MUTEX nvm_mutex;
 	E1000_MUTEX swflag_mutex;
 	bool nvm_k1_enabled;
 	bool eee_disable;
 };
 
 struct e1000_dev_spec_82575 {
 	bool sgmii_active;
 	bool global_device_reset;
 };
 
 struct e1000_dev_spec_vf {
 	u32	vf_number;
 	u32	v2p_mailbox;
 };
 
 struct e1000_hw {
 	void *back;
 
 	u8 *hw_addr;
 	u8 *flash_address;
 	unsigned long io_base;
 
 	struct e1000_mac_info  mac;
 	struct e1000_fc_info   fc;
 	struct e1000_phy_info  phy;
 	struct e1000_nvm_info  nvm;
 	struct e1000_bus_info  bus;
 	struct e1000_mbx_info mbx;
 	struct e1000_host_mng_dhcp_cookie mng_cookie;
 
 	union {
 		struct e1000_dev_spec_82541	_82541;
 		struct e1000_dev_spec_82542	_82542;
 		struct e1000_dev_spec_82543	_82543;
 		struct e1000_dev_spec_82571	_82571;
 		struct e1000_dev_spec_80003es2lan _80003es2lan;
 		struct e1000_dev_spec_ich8lan	ich8lan;
 		struct e1000_dev_spec_82575	_82575;
 		struct e1000_dev_spec_vf	vf;
 	} dev_spec;
 
 	u16 device_id;
 	u16 subsystem_vendor_id;
 	u16 subsystem_device_id;
 	u16 vendor_id;
 
 	u8  revision_id;
 };
 
 #include "e1000_82541.h"
 #include "e1000_82543.h"
 #include "e1000_82571.h"
 #include "e1000_80003es2lan.h"
 #include "e1000_ich8lan.h"
 #include "e1000_82575.h"
 
 /* These functions must be implemented by drivers */
 void e1000_pci_clear_mwi(struct e1000_hw *hw);
 void e1000_pci_set_mwi(struct e1000_hw *hw);
 s32  e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
 s32  e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
 void e1000_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value);
 void e1000_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value);
 
 #endif
Index: projects/binutils-2.17/sys/dev/e1000/if_em.c
===================================================================
--- projects/binutils-2.17/sys/dev/e1000/if_em.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/e1000/if_em.c	(revision 215830)
@@ -1,5480 +1,5508 @@
 /******************************************************************************
 
   Copyright (c) 2001-2010, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
   modification, are permitted provided that the following conditions are met:
   
    1. Redistributions of source code must retain the above copyright notice, 
       this list of conditions and the following disclaimer.
   
    2. Redistributions in binary form must reproduce the above copyright 
       notice, this list of conditions and the following disclaimer in the 
       documentation and/or other materials provided with the distribution.
   
    3. Neither the name of the Intel Corporation nor the names of its 
       contributors may be used to endorse or promote products derived from 
       this software without specific prior written permission.
   
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   POSSIBILITY OF SUCH DAMAGE.
 
 ******************************************************************************/
 /*$FreeBSD$*/
 
 #ifdef HAVE_KERNEL_OPTION_HEADERS
 #include "opt_device_polling.h"
 #include "opt_inet.h"
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #if __FreeBSD_version >= 800000
 #include <sys/buf_ring.h>
 #endif
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/eventhandler.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
 #include <machine/in_cksum.h>
 #include <dev/led/led.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 #include "e1000_api.h"
 #include "e1000_82571.h"
 #include "if_em.h"
 
 /*********************************************************************
  *  Set this to one to display debug statistics
  *********************************************************************/
 int	em_display_debug_stats = 0;
 
 /*********************************************************************
  *  Driver version:
  *********************************************************************/
-char em_driver_version[] = "7.1.7";
+char em_driver_version[] = "7.1.8";
 
 /*********************************************************************
  *  PCI Device ID Table
  *
  *  Used by probe to select devices to load on
  *  Last field stores an index into e1000_strings
  *  Last entry must be all 0s
  *
  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
  *********************************************************************/
 
 static em_vendor_info_t em_vendor_info_array[] =
 {
 	/* Intel(R) PRO/1000 Network Connection */
 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
 
 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	/* required last entry */
 	{ 0, 0, 0, 0, 0}
 };
 
 /*********************************************************************
  *  Table of branding strings for all supported NICs.
  *********************************************************************/
 
 static char *em_strings[] = {
 	"Intel(R) PRO/1000 Network Connection"
 };
 
 /*********************************************************************
  *  Function prototypes
  *********************************************************************/
 static int	em_probe(device_t);
 static int	em_attach(device_t);
 static int	em_detach(device_t);
 static int	em_shutdown(device_t);
 static int	em_suspend(device_t);
 static int	em_resume(device_t);
 static void	em_start(struct ifnet *);
 static void	em_start_locked(struct ifnet *, struct tx_ring *);
 #ifdef EM_MULTIQUEUE
 static int	em_mq_start(struct ifnet *, struct mbuf *);
 static int	em_mq_start_locked(struct ifnet *,
 		    struct tx_ring *, struct mbuf *);
 static void	em_qflush(struct ifnet *);
 #endif
 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
 static void	em_init(void *);
 static void	em_init_locked(struct adapter *);
 static void	em_stop(void *);
 static void	em_media_status(struct ifnet *, struct ifmediareq *);
 static int	em_media_change(struct ifnet *);
 static void	em_identify_hardware(struct adapter *);
 static int	em_allocate_pci_resources(struct adapter *);
 static int	em_allocate_legacy(struct adapter *);
 static int	em_allocate_msix(struct adapter *);
 static int	em_allocate_queues(struct adapter *);
 static int	em_setup_msix(struct adapter *);
 static void	em_free_pci_resources(struct adapter *);
 static void	em_local_timer(void *);
 static void	em_reset(struct adapter *);
 static int	em_setup_interface(device_t, struct adapter *);
 
 static void	em_setup_transmit_structures(struct adapter *);
 static void	em_initialize_transmit_unit(struct adapter *);
 static int	em_allocate_transmit_buffers(struct tx_ring *);
 static void	em_free_transmit_structures(struct adapter *);
 static void	em_free_transmit_buffers(struct tx_ring *);
 
 static int	em_setup_receive_structures(struct adapter *);
 static int	em_allocate_receive_buffers(struct rx_ring *);
 static void	em_initialize_receive_unit(struct adapter *);
 static void	em_free_receive_structures(struct adapter *);
 static void	em_free_receive_buffers(struct rx_ring *);
 
 static void	em_enable_intr(struct adapter *);
 static void	em_disable_intr(struct adapter *);
 static void	em_update_stats_counters(struct adapter *);
 static void	em_add_hw_stats(struct adapter *adapter);
 static bool	em_txeof(struct tx_ring *);
 static bool	em_rxeof(struct rx_ring *, int, int *);
 #ifndef __NO_STRICT_ALIGNMENT
 static int	em_fixup_rx(struct rx_ring *);
 #endif
 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
 		    struct ip *, u32 *, u32 *);
 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
 		    struct tcphdr *, u32 *, u32 *);
 static void	em_set_promisc(struct adapter *);
 static void	em_disable_promisc(struct adapter *);
 static void	em_set_multi(struct adapter *);
 static void	em_update_link_status(struct adapter *);
 static void	em_refresh_mbufs(struct rx_ring *, int);
 static void	em_register_vlan(void *, struct ifnet *, u16);
 static void	em_unregister_vlan(void *, struct ifnet *, u16);
 static void	em_setup_vlan_hw_support(struct adapter *);
 static int	em_xmit(struct tx_ring *, struct mbuf **);
 static int	em_dma_malloc(struct adapter *, bus_size_t,
 		    struct em_dma_alloc *, int);
 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
 static void	em_print_nvm_info(struct adapter *);
 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
 static void	em_print_debug_info(struct adapter *);
 static int 	em_is_valid_ether_addr(u8 *);
 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
 		    const char *, struct em_int_delay_info *, int, int);
 /* Management and WOL Support */
 static void	em_init_manageability(struct adapter *);
 static void	em_release_manageability(struct adapter *);
 static void     em_get_hw_control(struct adapter *);
 static void     em_release_hw_control(struct adapter *);
 static void	em_get_wakeup(device_t);
 static void     em_enable_wakeup(device_t);
 static int	em_enable_phy_wakeup(struct adapter *);
 static void	em_led_func(void *, int);
+static void	em_disable_aspm(struct adapter *);
 
 static int	em_irq_fast(void *);
 
 /* MSIX handlers */
 static void	em_msix_tx(void *);
 static void	em_msix_rx(void *);
 static void	em_msix_link(void *);
 static void	em_handle_tx(void *context, int pending);
 static void	em_handle_rx(void *context, int pending);
 static void	em_handle_link(void *context, int pending);
 
 static void	em_add_rx_process_limit(struct adapter *, const char *,
 		    const char *, int *, int);
 static void	em_set_flow_cntrl(struct adapter *, const char *,
 		    const char *, int *, int);
 
 static __inline void em_rx_discard(struct rx_ring *, int);
 
 #ifdef DEVICE_POLLING
 static poll_handler_t em_poll;
 #endif /* POLLING */
 
 /*********************************************************************
  *  FreeBSD Device Interface Entry Points
  *********************************************************************/
 
 static device_method_t em_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, em_probe),
 	DEVMETHOD(device_attach, em_attach),
 	DEVMETHOD(device_detach, em_detach),
 	DEVMETHOD(device_shutdown, em_shutdown),
 	DEVMETHOD(device_suspend, em_suspend),
 	DEVMETHOD(device_resume, em_resume),
 	{0, 0}
 };
 
 static driver_t em_driver = {
 	"em", em_methods, sizeof(struct adapter),
 };
 
 devclass_t em_devclass;
 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
 MODULE_DEPEND(em, pci, 1, 1, 1);
 MODULE_DEPEND(em, ether, 1, 1, 1);
 
 /*********************************************************************
  *  Tunable default values.
  *********************************************************************/
 
 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
 #define M_TSO_LEN			66
 
 /* Allow common code without TSO */
 #ifndef CSUM_TSO
 #define CSUM_TSO	0
 #endif
 
 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
 
 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
 
 static int em_rxd = EM_DEFAULT_RXD;
 static int em_txd = EM_DEFAULT_TXD;
 TUNABLE_INT("hw.em.rxd", &em_rxd);
 TUNABLE_INT("hw.em.txd", &em_txd);
 
 static int em_smart_pwr_down = FALSE;
 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
 
 /* Controls whether promiscuous also shows bad packets */
 static int em_debug_sbp = FALSE;
 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
 
 static int em_enable_msix = TRUE;
 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
 
 /* How many packets rxeof tries to clean at a time */
 static int em_rx_process_limit = 100;
 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
 
 /* Flow control setting - default to FULL */
 static int em_fc_setting = e1000_fc_full;
 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
 
 /* Global used in WOL setup with multiport cards */
 static int global_quad_port_a = 0;
 
 /*********************************************************************
  *  Device identification routine
  *
  *  em_probe determines if the driver should be loaded on
  *  adapter based on PCI vendor/device id of the adapter.
  *
  *  return BUS_PROBE_DEFAULT on success, positive on failure
  *********************************************************************/
 
 static int
 em_probe(device_t dev)
 {
 	char		adapter_name[60];
 	u16		pci_vendor_id = 0;
 	u16		pci_device_id = 0;
 	u16		pci_subvendor_id = 0;
 	u16		pci_subdevice_id = 0;
 	em_vendor_info_t *ent;
 
 	INIT_DEBUGOUT("em_probe: begin");
 
 	pci_vendor_id = pci_get_vendor(dev);
 	if (pci_vendor_id != EM_VENDOR_ID)
 		return (ENXIO);
 
 	pci_device_id = pci_get_device(dev);
 	pci_subvendor_id = pci_get_subvendor(dev);
 	pci_subdevice_id = pci_get_subdevice(dev);
 
 	ent = em_vendor_info_array;
 	while (ent->vendor_id != 0) {
 		if ((pci_vendor_id == ent->vendor_id) &&
 		    (pci_device_id == ent->device_id) &&
 
 		    ((pci_subvendor_id == ent->subvendor_id) ||
 		    (ent->subvendor_id == PCI_ANY_ID)) &&
 
 		    ((pci_subdevice_id == ent->subdevice_id) ||
 		    (ent->subdevice_id == PCI_ANY_ID))) {
 			sprintf(adapter_name, "%s %s",
 				em_strings[ent->index],
 				em_driver_version);
 			device_set_desc_copy(dev, adapter_name);
 			return (BUS_PROBE_DEFAULT);
 		}
 		ent++;
 	}
 
 	return (ENXIO);
 }
 
 /*********************************************************************
  *  Device initialization routine
  *
  *  The attach entry point is called when the driver is being loaded.
  *  This routine identifies the type of hardware, allocates all resources
  *  and initializes the hardware.
  *
  *  return 0 on success, positive on failure
  *********************************************************************/
 
 static int
 em_attach(device_t dev)
 {
 	struct adapter	*adapter;
 	int		error = 0;
 
 	INIT_DEBUGOUT("em_attach: begin");
 
 	adapter = device_get_softc(dev);
 	adapter->dev = adapter->osdep.dev = dev;
 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
 
 	/* SYSCTL stuff */
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
 	    em_sysctl_nvm_info, "I", "NVM Information");
 
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
 	    em_sysctl_debug_info, "I", "Debug Information");
 
 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
 
 	/* Determine hardware and mac info */
 	em_identify_hardware(adapter);
 
 	/* Setup PCI resources */
 	if (em_allocate_pci_resources(adapter)) {
 		device_printf(dev, "Allocation of PCI resources failed\n");
 		error = ENXIO;
 		goto err_pci;
 	}
 
 	/*
 	** For ICH8 and family we need to
 	** map the flash memory, and this
 	** must happen after the MAC is 
 	** identified
 	*/
 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
 	    (adapter->hw.mac.type == e1000_ich9lan) ||
 	    (adapter->hw.mac.type == e1000_ich10lan) ||
 	    (adapter->hw.mac.type == e1000_pchlan) ||
 	    (adapter->hw.mac.type == e1000_pch2lan)) {
 		int rid = EM_BAR_TYPE_FLASH;
 		adapter->flash = bus_alloc_resource_any(dev,
 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
 		if (adapter->flash == NULL) {
 			device_printf(dev, "Mapping of Flash failed\n");
 			error = ENXIO;
 			goto err_pci;
 		}
 		/* This is used in the shared code */
 		adapter->hw.flash_address = (u8 *)adapter->flash;
 		adapter->osdep.flash_bus_space_tag =
 		    rman_get_bustag(adapter->flash);
 		adapter->osdep.flash_bus_space_handle =
 		    rman_get_bushandle(adapter->flash);
 	}
 
 	/* Do Shared Code initialization */
 	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
 		device_printf(dev, "Setup of Shared code failed\n");
 		error = ENXIO;
 		goto err_pci;
 	}
 
 	e1000_get_bus_info(&adapter->hw);
 
 	/* Set up some sysctls for the tunable interrupt delays */
 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
 	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
 	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
 	    "receive interrupt delay limit in usecs",
 	    &adapter->rx_abs_int_delay,
 	    E1000_REGISTER(&adapter->hw, E1000_RADV),
 	    em_rx_abs_int_delay_dflt);
 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
 	    "transmit interrupt delay limit in usecs",
 	    &adapter->tx_abs_int_delay,
 	    E1000_REGISTER(&adapter->hw, E1000_TADV),
 	    em_tx_abs_int_delay_dflt);
 
 	/* Sysctl for limiting the amount of work done in the taskqueue */
 	em_add_rx_process_limit(adapter, "rx_processing_limit",
 	    "max number of rx packets to process", &adapter->rx_process_limit,
 	    em_rx_process_limit);
 
 	/* Sysctl for setting the interface flow control */
 	em_set_flow_cntrl(adapter, "flow_control",
 	    "max number of rx packets to process",
 	    &adapter->fc_setting, em_fc_setting);
 
 	/*
 	 * Validate number of transmit and receive descriptors. It
 	 * must not exceed hardware maximum, and must be multiple
 	 * of E1000_DBA_ALIGN.
 	 */
 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
 		    EM_DEFAULT_TXD, em_txd);
 		adapter->num_tx_desc = EM_DEFAULT_TXD;
 	} else
 		adapter->num_tx_desc = em_txd;
 
 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
 		    EM_DEFAULT_RXD, em_rxd);
 		adapter->num_rx_desc = EM_DEFAULT_RXD;
 	} else
 		adapter->num_rx_desc = em_rxd;
 
 	adapter->hw.mac.autoneg = DO_AUTO_NEG;
 	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
 	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
 
 	/* Copper options */
 	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
 		adapter->hw.phy.mdix = AUTO_ALL_MODES;
 		adapter->hw.phy.disable_polarity_correction = FALSE;
 		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
 	}
 
 	/*
 	 * Set the frame limits assuming
 	 * standard ethernet sized frames.
 	 */
 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
 	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
 
 	/*
 	 * This controls when hardware reports transmit completion
 	 * status.
 	 */
 	adapter->hw.mac.report_tx_early = 1;
 
 	/* 
 	** Get queue/ring memory
 	*/
 	if (em_allocate_queues(adapter)) {
 		error = ENOMEM;
 		goto err_pci;
 	}
 
 	/* Allocate multicast array memory. */
 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
 	if (adapter->mta == NULL) {
 		device_printf(dev, "Can not allocate multicast setup array\n");
 		error = ENOMEM;
 		goto err_late;
 	}
 
 	/* Check SOL/IDER usage */
 	if (e1000_check_reset_block(&adapter->hw))
 		device_printf(dev, "PHY reset is blocked"
 		    " due to SOL/IDER session.\n");
 
 	/*
 	** Start from a known state, this is
 	** important in reading the nvm and
 	** mac from that.
 	*/
 	e1000_reset_hw(&adapter->hw);
 
 	/* Make sure we have a good EEPROM before we read from it */
 	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
 		/*
 		** Some PCI-E parts fail the first check due to
 		** the link being in sleep state, call it again,
 		** if it fails a second time its a real issue.
 		*/
 		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
 			device_printf(dev,
 			    "The EEPROM Checksum Is Not Valid\n");
 			error = EIO;
 			goto err_late;
 		}
 	}
 
 	/* Copy the permanent MAC address out of the EEPROM */
 	if (e1000_read_mac_addr(&adapter->hw) < 0) {
 		device_printf(dev, "EEPROM read error while reading MAC"
 		    " address\n");
 		error = EIO;
 		goto err_late;
 	}
 
 	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
 		device_printf(dev, "Invalid MAC address\n");
 		error = EIO;
 		goto err_late;
 	}
 
 	/*
 	**  Do interrupt configuration
 	*/
 	if (adapter->msix > 1) /* Do MSIX */
 		error = em_allocate_msix(adapter);
 	else  /* MSI or Legacy */
 		error = em_allocate_legacy(adapter);
 	if (error)
 		goto err_late;
 
 	/*
 	 * Get Wake-on-Lan and Management info for later use
 	 */
 	em_get_wakeup(dev);
 
 	/* Setup OS specific network interface */
 	if (em_setup_interface(dev, adapter) != 0)
 		goto err_late;
 
 	em_reset(adapter);
 
 	/* Initialize statistics */
 	em_update_stats_counters(adapter);
 
 	adapter->hw.mac.get_link_status = 1;
 	em_update_link_status(adapter);
 
 	/* Register for VLAN events */
 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
 
 	em_add_hw_stats(adapter);
 
 	/* Non-AMT based hardware can now take control from firmware */
 	if (adapter->has_manage && !adapter->has_amt)
 		em_get_hw_control(adapter);
 
 	/* Tell the stack that the interface is not active */
 	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
 	adapter->led_dev = led_create(em_led_func, adapter,
 	    device_get_nameunit(dev));
 
 	INIT_DEBUGOUT("em_attach: end");
 
 	return (0);
 
 err_late:
 	em_free_transmit_structures(adapter);
 	em_free_receive_structures(adapter);
 	em_release_hw_control(adapter);
 	if (adapter->ifp != NULL)
 		if_free(adapter->ifp);
 err_pci:
 	em_free_pci_resources(adapter);
 	free(adapter->mta, M_DEVBUF);
 	EM_CORE_LOCK_DESTROY(adapter);
 
 	return (error);
 }
 
 /*********************************************************************
  *  Device removal routine
  *
  *  The detach entry point is called when the driver is being removed.
  *  This routine stops the adapter and deallocates all the resources
  *  that were allocated for driver operation.
  *
  *  return 0 on success, positive on failure
  *********************************************************************/
 
 static int
 em_detach(device_t dev)
 {
 	struct adapter	*adapter = device_get_softc(dev);
 	struct ifnet	*ifp = adapter->ifp;
 
 	INIT_DEBUGOUT("em_detach: begin");
 
 	/* Make sure VLANS are not using driver */
 	if (adapter->ifp->if_vlantrunk != NULL) {
 		device_printf(dev,"Vlan in use, detach first\n");
 		return (EBUSY);
 	}
 
 #ifdef DEVICE_POLLING
 	if (ifp->if_capenable & IFCAP_POLLING)
 		ether_poll_deregister(ifp);
 #endif
 
 	if (adapter->led_dev != NULL)
 		led_destroy(adapter->led_dev);
 
 	EM_CORE_LOCK(adapter);
 	adapter->in_detach = 1;
 	em_stop(adapter);
 	EM_CORE_UNLOCK(adapter);
 	EM_CORE_LOCK_DESTROY(adapter);
 
 	e1000_phy_hw_reset(&adapter->hw);
 
 	em_release_manageability(adapter);
 	em_release_hw_control(adapter);
 
 	/* Unregister VLAN events */
 	if (adapter->vlan_attach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
 	if (adapter->vlan_detach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
 
 	ether_ifdetach(adapter->ifp);
 	callout_drain(&adapter->timer);
 
 	em_free_pci_resources(adapter);
 	bus_generic_detach(dev);
 	if_free(ifp);
 
 	em_free_transmit_structures(adapter);
 	em_free_receive_structures(adapter);
 
 	em_release_hw_control(adapter);
 	free(adapter->mta, M_DEVBUF);
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Shutdown entry point
  *
  **********************************************************************/
 
 static int
 em_shutdown(device_t dev)
 {
 	return em_suspend(dev);
 }
 
 /*
  * Suspend/resume device methods.
  */
 static int
 em_suspend(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 
 	EM_CORE_LOCK(adapter);
 
         em_release_manageability(adapter);
 	em_release_hw_control(adapter);
 	em_enable_wakeup(dev);
 
 	EM_CORE_UNLOCK(adapter);
 
 	return bus_generic_suspend(dev);
 }
 
 static int
 em_resume(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 	struct ifnet *ifp = adapter->ifp;
 
 	EM_CORE_LOCK(adapter);
 	em_init_locked(adapter);
 	em_init_manageability(adapter);
 	EM_CORE_UNLOCK(adapter);
 	em_start(ifp);
 
 	return bus_generic_resume(dev);
 }
 
 
 /*********************************************************************
  *  Transmit entry point
  *
  *  em_start is called by the stack to initiate a transmit.
  *  The driver will remain in this routine as long as there are
  *  packets to transmit and transmit resources are available.
  *  In case resources are not available stack is notified and
  *  the packet is requeued.
  **********************************************************************/
 
 #ifdef EM_MULTIQUEUE
 static int
 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
 {
 	struct adapter  *adapter = txr->adapter;
         struct mbuf     *next;
         int             err = 0, enq = 0;
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
 		if (m != NULL)
 			err = drbr_enqueue(ifp, txr->br, m);
 		return (err);
 	}
 
         /* Call cleanup if number of TX descriptors low */
 	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
 		em_txeof(txr);
 
 	enq = 0;
 	if (m == NULL) {
 		next = drbr_dequeue(ifp, txr->br);
 	} else if (drbr_needs_enqueue(ifp, txr->br)) {
 		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
 			return (err);
 		next = drbr_dequeue(ifp, txr->br);
 	} else
 		next = m;
 
 	/* Process the queue */
 	while (next != NULL) {
 		if ((err = em_xmit(txr, &next)) != 0) {
                         if (next != NULL)
                                 err = drbr_enqueue(ifp, txr->br, next);
                         break;
 		}
 		enq++;
 		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
 		ETHER_BPF_MTAP(ifp, next);
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
                         break;
 		if (txr->tx_avail < EM_MAX_SCATTER) {
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			break;
 		}
 		next = drbr_dequeue(ifp, txr->br);
 	}
 
 	if (enq > 0) {
                 /* Set the watchdog */
                 txr->queue_status = EM_QUEUE_WORKING;
 		txr->watchdog_time = ticks;
 	}
 	return (err);
 }
 
 /*
 ** Multiqueue capable stack interface
 */
 static int
 em_mq_start(struct ifnet *ifp, struct mbuf *m)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct tx_ring	*txr = adapter->tx_rings;
 	int 		error;
 
 	if (EM_TX_TRYLOCK(txr)) {
 		error = em_mq_start_locked(ifp, txr, m);
 		EM_TX_UNLOCK(txr);
 	} else 
 		error = drbr_enqueue(ifp, txr->br, m);
 
 	return (error);
 }
 
 /*
 ** Flush all ring buffers
 */
 static void
 em_qflush(struct ifnet *ifp)
 {
 	struct adapter  *adapter = ifp->if_softc;
 	struct tx_ring  *txr = adapter->tx_rings;
 	struct mbuf     *m;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		EM_TX_LOCK(txr);
 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
 			m_freem(m);
 		EM_TX_UNLOCK(txr);
 	}
 	if_qflush(ifp);
 }
 
 #endif /* EM_MULTIQUEUE */
 
 static void
 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct mbuf	*m_head;
 
 	EM_TX_LOCK_ASSERT(txr);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return;
 
 	if (!adapter->link_active)
 		return;
 
         /* Call cleanup if number of TX descriptors low */
 	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
 		em_txeof(txr);
 
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		if (txr->tx_avail < EM_MAX_SCATTER) {
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			break;
 		}
                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 		/*
 		 *  Encapsulation can modify our pointer, and or make it
 		 *  NULL on failure.  In that event, we can't requeue.
 		 */
 		if (em_xmit(txr, &m_head)) {
 			if (m_head == NULL)
 				break;
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			break;
 		}
 
 		/* Send a copy of the frame to the BPF listener */
 		ETHER_BPF_MTAP(ifp, m_head);
 
 		/* Set timeout in case hardware has problems transmitting. */
 		txr->watchdog_time = ticks;
                 txr->queue_status = EM_QUEUE_WORKING;
 	}
 
 	return;
 }
 
 static void
 em_start(struct ifnet *ifp)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct tx_ring	*txr = adapter->tx_rings;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		EM_TX_LOCK(txr);
 		em_start_locked(ifp, txr);
 		EM_TX_UNLOCK(txr);
 	}
 	return;
 }
 
 /*********************************************************************
  *  Ioctl entry point
  *
  *  em_ioctl is called when the user wants to configure the
  *  interface.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 
 static int
 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 #ifdef INET
 	struct ifaddr *ifa = (struct ifaddr *)data;
 #endif
 	int error = 0;
 
 	if (adapter->in_detach)
 		return (error);
 
 	switch (command) {
 	case SIOCSIFADDR:
 #ifdef INET
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			/*
 			 * XXX
 			 * Since resetting hardware takes a very long time
 			 * and results in link renegotiation we only
 			 * initialize the hardware only when it is absolutely
 			 * required.
 			 */
 			ifp->if_flags |= IFF_UP;
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 				EM_CORE_LOCK(adapter);
 				em_init_locked(adapter);
 				EM_CORE_UNLOCK(adapter);
 			}
 			arp_ifinit(ifp, ifa);
 		} else
 #endif
 			error = ether_ioctl(ifp, command, data);
 		break;
 	case SIOCSIFMTU:
 	    {
 		int max_frame_size;
 
 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
 
 		EM_CORE_LOCK(adapter);
 		switch (adapter->hw.mac.type) {
 		case e1000_82571:
 		case e1000_82572:
 		case e1000_ich9lan:
 		case e1000_ich10lan:
 		case e1000_pch2lan:
 		case e1000_82574:
 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
 			max_frame_size = 9234;
 			break;
 		case e1000_pchlan:
 			max_frame_size = 4096;
 			break;
 			/* Adapters that do not support jumbo frames */
 		case e1000_82583:
 		case e1000_ich8lan:
 			max_frame_size = ETHER_MAX_LEN;
 			break;
 		default:
 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
 		}
 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
 		    ETHER_CRC_LEN) {
 			EM_CORE_UNLOCK(adapter);
 			error = EINVAL;
 			break;
 		}
 
 		ifp->if_mtu = ifr->ifr_mtu;
 		adapter->max_frame_size =
 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
 		em_init_locked(adapter);
 		EM_CORE_UNLOCK(adapter);
 		break;
 	    }
 	case SIOCSIFFLAGS:
 		IOCTL_DEBUGOUT("ioctl rcv'd:\
 		    SIOCSIFFLAGS (Set Interface Flags)");
 		EM_CORE_LOCK(adapter);
 		if (ifp->if_flags & IFF_UP) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 				if ((ifp->if_flags ^ adapter->if_flags) &
 				    (IFF_PROMISC | IFF_ALLMULTI)) {
 					em_disable_promisc(adapter);
 					em_set_promisc(adapter);
 				}
 			} else
 				em_init_locked(adapter);
 		} else
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				em_stop(adapter);
 		adapter->if_flags = ifp->if_flags;
 		EM_CORE_UNLOCK(adapter);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			EM_CORE_LOCK(adapter);
 			em_disable_intr(adapter);
 			em_set_multi(adapter);
 #ifdef DEVICE_POLLING
 			if (!(ifp->if_capenable & IFCAP_POLLING))
 #endif
 				em_enable_intr(adapter);
 			EM_CORE_UNLOCK(adapter);
 		}
 		break;
 	case SIOCSIFMEDIA:
 		/*
 		** As the speed/duplex settings are being
 		** changed, we need to reset the PHY.
 		*/
 		adapter->hw.phy.reset_disable = FALSE;
 		/* Check SOL/IDER usage */
 		EM_CORE_LOCK(adapter);
 		if (e1000_check_reset_block(&adapter->hw)) {
 			EM_CORE_UNLOCK(adapter);
 			device_printf(adapter->dev, "Media change is"
 			    " blocked due to SOL/IDER session.\n");
 			break;
 		}
 		EM_CORE_UNLOCK(adapter);
 		/* falls thru */
 	case SIOCGIFMEDIA:
 		IOCTL_DEBUGOUT("ioctl rcv'd: \
 		    SIOCxIFMEDIA (Get/Set Interface Media)");
 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
 		break;
 	case SIOCSIFCAP:
 	    {
 		int mask, reinit;
 
 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
 		reinit = 0;
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 #ifdef DEVICE_POLLING
 		if (mask & IFCAP_POLLING) {
 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
 				error = ether_poll_register(em_poll, ifp);
 				if (error)
 					return (error);
 				EM_CORE_LOCK(adapter);
 				em_disable_intr(adapter);
 				ifp->if_capenable |= IFCAP_POLLING;
 				EM_CORE_UNLOCK(adapter);
 			} else {
 				error = ether_poll_deregister(ifp);
 				/* Enable interrupt even in error case */
 				EM_CORE_LOCK(adapter);
 				em_enable_intr(adapter);
 				ifp->if_capenable &= ~IFCAP_POLLING;
 				EM_CORE_UNLOCK(adapter);
 			}
 		}
 #endif
 		if (mask & IFCAP_HWCSUM) {
 			ifp->if_capenable ^= IFCAP_HWCSUM;
 			reinit = 1;
 		}
 		if (mask & IFCAP_TSO4) {
 			ifp->if_capenable ^= IFCAP_TSO4;
 			reinit = 1;
 		}
 		if (mask & IFCAP_VLAN_HWTAGGING) {
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 			reinit = 1;
 		}
 		if (mask & IFCAP_VLAN_HWFILTER) {
 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
 			reinit = 1;
 		}
 		if ((mask & IFCAP_WOL) &&
 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
 			if (mask & IFCAP_WOL_MCAST)
 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
 			if (mask & IFCAP_WOL_MAGIC)
 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
 		}
 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
 			em_init(adapter);
 		VLAN_CAPABILITIES(ifp);
 		break;
 	    }
 
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 
 /*********************************************************************
  *  Init entry point
  *
  *  This routine is used in two ways. It is used by the stack as
  *  init entry point in network interface structure. It is also used
  *  by the driver as a hw/sw initialization routine to get to a
  *  consistent state.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 
 static void
 em_init_locked(struct adapter *adapter)
 {
 	struct ifnet	*ifp = adapter->ifp;
 	device_t	dev = adapter->dev;
 	u32		pba;
 
 	INIT_DEBUGOUT("em_init: begin");
 
 	EM_CORE_LOCK_ASSERT(adapter);
 
 	em_disable_intr(adapter);
 	callout_stop(&adapter->timer);
 
 	/*
 	 * Packet Buffer Allocation (PBA)
 	 * Writing PBA sets the receive portion of the buffer
 	 * the remainder is used for the transmit buffer.
 	 */
 	switch (adapter->hw.mac.type) {
 	/* Total Packet Buffer on these is 48K */
 	case e1000_82571:
 	case e1000_82572:
 	case e1000_80003es2lan:
 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
 		break;
 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
 		break;
 	case e1000_82574:
 	case e1000_82583:
 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
 		break;
 	case e1000_ich8lan:
 		pba = E1000_PBA_8K;
 		break;
 	case e1000_ich9lan:
 	case e1000_ich10lan:
-	case e1000_pchlan:
 		pba = E1000_PBA_10K;
 		break;
+	case e1000_pchlan:
 	case e1000_pch2lan:
 		pba = E1000_PBA_26K;
 		break;
 	default:
 		if (adapter->max_frame_size > 8192)
 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
 		else
 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
 	}
 
 	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
 	
 	/* Get the latest mac address, User can use a LAA */
         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
               ETHER_ADDR_LEN);
 
 	/* Put the address into the Receive Address Array */
 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
 
 	/*
 	 * With the 82571 adapter, RAR[0] may be overwritten
 	 * when the other port is reset, we make a duplicate
 	 * in RAR[14] for that eventuality, this assures
 	 * the interface continues to function.
 	 */
 	if (adapter->hw.mac.type == e1000_82571) {
 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
 		    E1000_RAR_ENTRIES - 1);
 	}
 
 	/* Initialize the hardware */
 	em_reset(adapter);
 	em_update_link_status(adapter);
 
 	/* Setup VLAN support, basic and offload if available */
 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
 
 	/* Set hardware offload abilities */
 	ifp->if_hwassist = 0;
 	if (ifp->if_capenable & IFCAP_TXCSUM)
 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
 	if (ifp->if_capenable & IFCAP_TSO4)
 		ifp->if_hwassist |= CSUM_TSO;
 
 	/* Configure for OS presence */
 	em_init_manageability(adapter);
 
 	/* Prepare transmit descriptors and buffers */
 	em_setup_transmit_structures(adapter);
 	em_initialize_transmit_unit(adapter);
 
 	/* Setup Multicast table */
 	em_set_multi(adapter);
 
 	/*
 	** Figure out the desired mbuf
 	** pool for doing jumbos
 	*/
 	if (adapter->max_frame_size <= 2048)
 		adapter->rx_mbuf_sz = MCLBYTES;
 	else if (adapter->max_frame_size <= 4096)
 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
 	else
 		adapter->rx_mbuf_sz = MJUM9BYTES;
 
 	/* Prepare receive descriptors and buffers */
 	if (em_setup_receive_structures(adapter)) {
 		device_printf(dev, "Could not setup receive structures\n");
 		em_stop(adapter);
 		return;
 	}
 	em_initialize_receive_unit(adapter);
 
 	/* Use real VLAN Filter support? */
 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
 			/* Use real VLAN Filter support */
 			em_setup_vlan_hw_support(adapter);
 		else {
 			u32 ctrl;
 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
 			ctrl |= E1000_CTRL_VME;
 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
 		}
 	}
 
 	/* Don't lose promiscuous settings */
 	em_set_promisc(adapter);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
 
 	/* MSI/X configuration for 82574 */
 	if (adapter->hw.mac.type == e1000_82574) {
 		int tmp;
 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 		tmp |= E1000_CTRL_EXT_PBA_CLR;
 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
 		/* Set the IVAR - interrupt vector routing. */
 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
 	}
 
 #ifdef DEVICE_POLLING
 	/*
 	 * Only enable interrupts if we are not polling, make sure
 	 * they are off otherwise.
 	 */
 	if (ifp->if_capenable & IFCAP_POLLING)
 		em_disable_intr(adapter);
 	else
 #endif /* DEVICE_POLLING */
 		em_enable_intr(adapter);
 
 	/* AMT based hardware can now take control from firmware */
 	if (adapter->has_manage && adapter->has_amt)
 		em_get_hw_control(adapter);
 
 	/* Don't reset the phy next time init gets called */
 	adapter->hw.phy.reset_disable = TRUE;
 }
 
 static void
 em_init(void *arg)
 {
 	struct adapter *adapter = arg;
 
 	EM_CORE_LOCK(adapter);
 	em_init_locked(adapter);
 	EM_CORE_UNLOCK(adapter);
 }
 
 
 #ifdef DEVICE_POLLING
 /*********************************************************************
  *
  *  Legacy polling routine: note this only works with single queue
  *
  *********************************************************************/
 static int
 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
 {
 	struct adapter *adapter = ifp->if_softc;
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct rx_ring	*rxr = adapter->rx_rings;
 	u32		reg_icr;
 	int		rx_done;
 
 	EM_CORE_LOCK(adapter);
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		EM_CORE_UNLOCK(adapter);
 		return (0);
 	}
 
 	if (cmd == POLL_AND_CHECK_STATUS) {
 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
 			callout_stop(&adapter->timer);
 			adapter->hw.mac.get_link_status = 1;
 			em_update_link_status(adapter);
 			callout_reset(&adapter->timer, hz,
 			    em_local_timer, adapter);
 		}
 	}
 	EM_CORE_UNLOCK(adapter);
 
 	em_rxeof(rxr, count, &rx_done);
 
 	EM_TX_LOCK(txr);
 	em_txeof(txr);
 #ifdef EM_MULTIQUEUE
 	if (!drbr_empty(ifp, txr->br))
 		em_mq_start_locked(ifp, txr, NULL);
 #else
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		em_start_locked(ifp, txr);
 #endif
 	EM_TX_UNLOCK(txr);
 
 	return (rx_done);
 }
 #endif /* DEVICE_POLLING */
 
 
 /*********************************************************************
  *
  *  Fast Legacy/MSI Combined Interrupt Service routine  
  *
  *********************************************************************/
 static int
 em_irq_fast(void *arg)
 {
 	struct adapter	*adapter = arg;
 	struct ifnet	*ifp;
 	u32		reg_icr;
 
 	ifp = adapter->ifp;
 
 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
 
 	/* Hot eject?  */
 	if (reg_icr == 0xffffffff)
 		return FILTER_STRAY;
 
 	/* Definitely not our interrupt.  */
 	if (reg_icr == 0x0)
 		return FILTER_STRAY;
 
 	/*
 	 * Starting with the 82571 chip, bit 31 should be used to
 	 * determine whether the interrupt belongs to us.
 	 */
 	if (adapter->hw.mac.type >= e1000_82571 &&
 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
 		return FILTER_STRAY;
 
 	em_disable_intr(adapter);
 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
 
 	/* Link status change */
 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
 		adapter->hw.mac.get_link_status = 1;
 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
 	}
 
 	if (reg_icr & E1000_ICR_RXO)
 		adapter->rx_overruns++;
 	return FILTER_HANDLED;
 }
 
 /* Combined RX/TX handler, used by Legacy and MSI */
 static void
 em_handle_que(void *context, int pending)
 {
 	struct adapter	*adapter = context;
 	struct ifnet	*ifp = adapter->ifp;
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct rx_ring	*rxr = adapter->rx_rings;
 	bool		more;
 
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
 
 		EM_TX_LOCK(txr);
 		em_txeof(txr);
 #ifdef EM_MULTIQUEUE
 		if (!drbr_empty(ifp, txr->br))
 			em_mq_start_locked(ifp, txr, NULL);
 #else
 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 			em_start_locked(ifp, txr);
 #endif
 		em_txeof(txr);
 		EM_TX_UNLOCK(txr);
 		if (more) {
 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
 			return;
 		}
 	}
 
 	em_enable_intr(adapter);
 	return;
 }
 
 
 /*********************************************************************
  *
  *  MSIX Interrupt Service Routines
  *
  **********************************************************************/
 static void
 em_msix_tx(void *arg)
 {
 	struct tx_ring *txr = arg;
 	struct adapter *adapter = txr->adapter;
 	bool		more;
 
 	++txr->tx_irq;
 	EM_TX_LOCK(txr);
 	more = em_txeof(txr);
 	EM_TX_UNLOCK(txr);
 	if (more)
 		taskqueue_enqueue(txr->tq, &txr->tx_task);
 	else
 		/* Reenable this interrupt */
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
 	return;
 }
 
 /*********************************************************************
  *
  *  MSIX RX Interrupt Service routine
  *
  **********************************************************************/
 
 static void
 em_msix_rx(void *arg)
 {
 	struct rx_ring	*rxr = arg;
 	struct adapter	*adapter = rxr->adapter;
 	bool		more;
 
 	++rxr->rx_irq;
 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
 	if (more)
 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
 	else
 		/* Reenable this interrupt */
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
 	return;
 }
 
 /*********************************************************************
  *
  *  MSIX Link Fast Interrupt Service routine
  *
  **********************************************************************/
 static void
 em_msix_link(void *arg)
 {
 	struct adapter	*adapter = arg;
 	u32		reg_icr;
 
 	++adapter->link_irq;
 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
 
 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
 		adapter->hw.mac.get_link_status = 1;
 		em_handle_link(adapter, 0);
 	} else
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
 		    EM_MSIX_LINK | E1000_IMS_LSC);
 	return;
 }
 
 static void
 em_handle_rx(void *context, int pending)
 {
 	struct rx_ring	*rxr = context;
 	struct adapter	*adapter = rxr->adapter;
         bool            more;
 
 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
 	if (more)
 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
 	else
 		/* Reenable this interrupt */
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
 }
 
 static void
 em_handle_tx(void *context, int pending)
 {
 	struct tx_ring	*txr = context;
 	struct adapter	*adapter = txr->adapter;
 	struct ifnet	*ifp = adapter->ifp;
 
 	EM_TX_LOCK(txr);
 	em_txeof(txr);
 #ifdef EM_MULTIQUEUE
 	if (!drbr_empty(ifp, txr->br))
 		em_mq_start_locked(ifp, txr, NULL);
 #else
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		em_start_locked(ifp, txr);
 #endif
 	em_txeof(txr);
 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
 	EM_TX_UNLOCK(txr);
 }
 
 static void
 em_handle_link(void *context, int pending)
 {
 	struct adapter	*adapter = context;
 	struct ifnet *ifp = adapter->ifp;
 
 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
 		return;
 
 	EM_CORE_LOCK(adapter);
 	callout_stop(&adapter->timer);
 	em_update_link_status(adapter);
 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
 	    EM_MSIX_LINK | E1000_IMS_LSC);
 	EM_CORE_UNLOCK(adapter);
 }
 
 
 /*********************************************************************
  *
  *  Media Ioctl callback
  *
  *  This routine is called whenever the user queries the status of
  *  the interface using ifconfig.
  *
  **********************************************************************/
 static void
 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct adapter *adapter = ifp->if_softc;
 	u_char fiber_type = IFM_1000_SX;
 
 	INIT_DEBUGOUT("em_media_status: begin");
 
 	EM_CORE_LOCK(adapter);
 	em_update_link_status(adapter);
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if (!adapter->link_active) {
 		EM_CORE_UNLOCK(adapter);
 		return;
 	}
 
 	ifmr->ifm_status |= IFM_ACTIVE;
 
 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
 		ifmr->ifm_active |= fiber_type | IFM_FDX;
 	} else {
 		switch (adapter->link_speed) {
 		case 10:
 			ifmr->ifm_active |= IFM_10_T;
 			break;
 		case 100:
 			ifmr->ifm_active |= IFM_100_TX;
 			break;
 		case 1000:
 			ifmr->ifm_active |= IFM_1000_T;
 			break;
 		}
 		if (adapter->link_duplex == FULL_DUPLEX)
 			ifmr->ifm_active |= IFM_FDX;
 		else
 			ifmr->ifm_active |= IFM_HDX;
 	}
 	EM_CORE_UNLOCK(adapter);
 }
 
 /*********************************************************************
  *
  *  Media Ioctl callback
  *
  *  This routine is called when the user changes speed/duplex using
  *  media/mediopt option with ifconfig.
  *
  **********************************************************************/
 static int
 em_media_change(struct ifnet *ifp)
 {
 	struct adapter *adapter = ifp->if_softc;
 	struct ifmedia  *ifm = &adapter->media;
 
 	INIT_DEBUGOUT("em_media_change: begin");
 
 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
 		return (EINVAL);
 
 	EM_CORE_LOCK(adapter);
 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
 	case IFM_AUTO:
 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
 		break;
 	case IFM_1000_LX:
 	case IFM_1000_SX:
 	case IFM_1000_T:
 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
 		break;
 	case IFM_100_TX:
 		adapter->hw.mac.autoneg = FALSE;
 		adapter->hw.phy.autoneg_advertised = 0;
 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
 		else
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
 		break;
 	case IFM_10_T:
 		adapter->hw.mac.autoneg = FALSE;
 		adapter->hw.phy.autoneg_advertised = 0;
 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
 		else
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
 		break;
 	default:
 		device_printf(adapter->dev, "Unsupported media type\n");
 	}
 
 	em_init_locked(adapter);
 	EM_CORE_UNLOCK(adapter);
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  This routine maps the mbufs to tx descriptors.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 
 static int
 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
 {
 	struct adapter		*adapter = txr->adapter;
 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
 	bus_dmamap_t		map;
 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
 	struct e1000_tx_desc	*ctxd = NULL;
 	struct mbuf		*m_head;
 	struct ether_header	*eh;
 	struct ip		*ip = NULL;
 	struct tcphdr		*tp = NULL;
 	u32			txd_upper, txd_lower, txd_used, txd_saved;
 	int			ip_off, poff;
 	int			nsegs, i, j, first, last = 0;
 	int			error, do_tso, tso_desc = 0;
 
 	m_head = *m_headp;
 	txd_upper = txd_lower = txd_used = txd_saved = 0;
 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
 	ip_off = poff = 0;
 
 	/*
 	 * Intel recommends entire IP/TCP header length reside in a single
 	 * buffer. If multiple descriptors are used to describe the IP and
 	 * TCP header, each descriptor should describe one or more
 	 * complete headers; descriptors referencing only parts of headers
 	 * are not supported. If all layer headers are not coalesced into
 	 * a single buffer, each buffer should not cross a 4KB boundary,
 	 * or be larger than the maximum read request size.
 	 * Controller also requires modifing IP/TCP header to make TSO work
 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
 	 * IP/TCP header into a single buffer to meet the requirement of
 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
 	 * which also has similiar restrictions.
 	 */
 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
 		if (do_tso || (m_head->m_next != NULL && 
 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
 			if (M_WRITABLE(*m_headp) == 0) {
 				m_head = m_dup(*m_headp, M_DONTWAIT);
 				m_freem(*m_headp);
 				if (m_head == NULL) {
 					*m_headp = NULL;
 					return (ENOBUFS);
 				}
 				*m_headp = m_head;
 			}
 		}
 		/*
 		 * XXX
 		 * Assume IPv4, we don't have TSO/checksum offload support
 		 * for IPv6 yet.
 		 */
 		ip_off = sizeof(struct ether_header);
 		m_head = m_pullup(m_head, ip_off);
 		if (m_head == NULL) {
 			*m_headp = NULL;
 			return (ENOBUFS);
 		}
 		eh = mtod(m_head, struct ether_header *);
 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 			ip_off = sizeof(struct ether_vlan_header);
 			m_head = m_pullup(m_head, ip_off);
 			if (m_head == NULL) {
 				*m_headp = NULL;
 				return (ENOBUFS);
 			}
 		}
 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
 		if (m_head == NULL) {
 			*m_headp = NULL;
 			return (ENOBUFS);
 		}
 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
 		poff = ip_off + (ip->ip_hl << 2);
 		m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
 		if (m_head == NULL) {
 			*m_headp = NULL;
 			return (ENOBUFS);
 		}
 		if (do_tso) {
 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
 			/*
 			 * TSO workaround:
 			 *   pull 4 more bytes of data into it.
 			 */
 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
 			if (m_head == NULL) {
 				*m_headp = NULL;
 				return (ENOBUFS);
 			}
 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
 			ip->ip_len = 0;
 			ip->ip_sum = 0;
 			/*
 			 * The pseudo TCP checksum does not include TCP payload
 			 * length so driver should recompute the checksum here
 			 * what hardware expect to see. This is adherence of
 			 * Microsoft's Large Send specification.
 			 */
 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
 			if (m_head == NULL) {
 				*m_headp = NULL;
 				return (ENOBUFS);
 			}
 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
 			if (m_head == NULL) {
 				*m_headp = NULL;
 				return (ENOBUFS);
 			}
 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
 		}
 		*m_headp = m_head;
 	}
 
 	/*
 	 * Map the packet for DMA
 	 *
 	 * Capture the first descriptor index,
 	 * this descriptor will have the index
 	 * of the EOP which is the only one that
 	 * now gets a DONE bit writeback.
 	 */
 	first = txr->next_avail_desc;
 	tx_buffer = &txr->tx_buffers[first];
 	tx_buffer_mapped = tx_buffer;
 	map = tx_buffer->map;
 
 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
 
 	/*
 	 * There are two types of errors we can (try) to handle:
 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
 	 *   out of segments.  Defragment the mbuf chain and try again.
 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
 	 *   at this point in time.  Defer sending and try again later.
 	 * All other errors, in particular EINVAL, are fatal and prevent the
 	 * mbuf chain from ever going through.  Drop it and report error.
 	 */
 	if (error == EFBIG) {
 		struct mbuf *m;
 
 		m = m_defrag(*m_headp, M_DONTWAIT);
 		if (m == NULL) {
 			adapter->mbuf_alloc_failed++;
 			m_freem(*m_headp);
 			*m_headp = NULL;
 			return (ENOBUFS);
 		}
 		*m_headp = m;
 
 		/* Try it again */
 		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
 		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
 
 		if (error) {
 			adapter->no_tx_dma_setup++;
 			m_freem(*m_headp);
 			*m_headp = NULL;
 			return (error);
 		}
 	} else if (error != 0) {
 		adapter->no_tx_dma_setup++;
 		return (error);
 	}
 
 	/*
 	 * TSO Hardware workaround, if this packet is not
 	 * TSO, and is only a single descriptor long, and
 	 * it follows a TSO burst, then we need to add a
 	 * sentinel descriptor to prevent premature writeback.
 	 */
 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
 		if (nsegs == 1)
 			tso_desc = TRUE;
 		txr->tx_tso = FALSE;
 	}
 
         if (nsegs > (txr->tx_avail - 2)) {
                 txr->no_desc_avail++;
 		bus_dmamap_unload(txr->txtag, map);
 		return (ENOBUFS);
         }
 	m_head = *m_headp;
 
 	/* Do hardware assists */
 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 		em_tso_setup(txr, m_head, ip_off, ip, tp,
 		    &txd_upper, &txd_lower);
 		/* we need to make a final sentinel transmit desc */
 		tso_desc = TRUE;
 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
 		em_transmit_checksum_setup(txr, m_head,
 		    ip_off, ip, &txd_upper, &txd_lower);
 
 	i = txr->next_avail_desc;
 
 	/* Set up our transmit descriptors */
 	for (j = 0; j < nsegs; j++) {
 		bus_size_t seg_len;
 		bus_addr_t seg_addr;
 
 		tx_buffer = &txr->tx_buffers[i];
 		ctxd = &txr->tx_base[i];
 		seg_addr = segs[j].ds_addr;
 		seg_len  = segs[j].ds_len;
 		/*
 		** TSO Workaround:
 		** If this is the last descriptor, we want to
 		** split it so we have a small final sentinel
 		*/
 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
 			seg_len -= 4;
 			ctxd->buffer_addr = htole64(seg_addr);
 			ctxd->lower.data = htole32(
 			adapter->txd_cmd | txd_lower | seg_len);
 			ctxd->upper.data =
 			    htole32(txd_upper);
 			if (++i == adapter->num_tx_desc)
 				i = 0;
 			/* Now make the sentinel */	
 			++txd_used; /* using an extra txd */
 			ctxd = &txr->tx_base[i];
 			tx_buffer = &txr->tx_buffers[i];
 			ctxd->buffer_addr =
 			    htole64(seg_addr + seg_len);
 			ctxd->lower.data = htole32(
 			adapter->txd_cmd | txd_lower | 4);
 			ctxd->upper.data =
 			    htole32(txd_upper);
 			last = i;
 			if (++i == adapter->num_tx_desc)
 				i = 0;
 		} else {
 			ctxd->buffer_addr = htole64(seg_addr);
 			ctxd->lower.data = htole32(
 			adapter->txd_cmd | txd_lower | seg_len);
 			ctxd->upper.data =
 			    htole32(txd_upper);
 			last = i;
 			if (++i == adapter->num_tx_desc)
 				i = 0;
 		}
 		tx_buffer->m_head = NULL;
 		tx_buffer->next_eop = -1;
 	}
 
 	txr->next_avail_desc = i;
 	txr->tx_avail -= nsegs;
 	if (tso_desc) /* TSO used an extra for sentinel */
 		txr->tx_avail -= txd_used;
 
 	if (m_head->m_flags & M_VLANTAG) {
 		/* Set the vlan id. */
 		ctxd->upper.fields.special =
 		    htole16(m_head->m_pkthdr.ether_vtag);
                 /* Tell hardware to add tag */
                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
         }
 
         tx_buffer->m_head = m_head;
 	tx_buffer_mapped->map = tx_buffer->map;
 	tx_buffer->map = map;
         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
 
         /*
          * Last Descriptor of Packet
 	 * needs End Of Packet (EOP)
 	 * and Report Status (RS)
          */
         ctxd->lower.data |=
 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
 	/*
 	 * Keep track in the first buffer which
 	 * descriptor will be written back
 	 */
 	tx_buffer = &txr->tx_buffers[first];
 	tx_buffer->next_eop = last;
 	/* Update the watchdog time early and often */
 	txr->watchdog_time = ticks;
 
 	/*
 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
 	 * that this frame is available to transmit.
 	 */
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
 
 	return (0);
 }
 
 static void
 em_set_promisc(struct adapter *adapter)
 {
 	struct ifnet	*ifp = adapter->ifp;
 	u32		reg_rctl;
 
 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 
 	if (ifp->if_flags & IFF_PROMISC) {
 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
 		/* Turn this on if you want to see bad packets */
 		if (em_debug_sbp)
 			reg_rctl |= E1000_RCTL_SBP;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 	} else if (ifp->if_flags & IFF_ALLMULTI) {
 		reg_rctl |= E1000_RCTL_MPE;
 		reg_rctl &= ~E1000_RCTL_UPE;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 	}
 }
 
 static void
 em_disable_promisc(struct adapter *adapter)
 {
 	u32	reg_rctl;
 
 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 
 	reg_rctl &=  (~E1000_RCTL_UPE);
 	reg_rctl &=  (~E1000_RCTL_MPE);
 	reg_rctl &=  (~E1000_RCTL_SBP);
 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 }
 
 
 /*********************************************************************
  *  Multicast Update
  *
  *  This routine is called whenever multicast address list is updated.
  *
  **********************************************************************/
 
 static void
 em_set_multi(struct adapter *adapter)
 {
 	struct ifnet	*ifp = adapter->ifp;
 	struct ifmultiaddr *ifma;
 	u32 reg_rctl = 0;
 	u8  *mta; /* Multicast array memory */
 	int mcnt = 0;
 
 	IOCTL_DEBUGOUT("em_set_multi: begin");
 
 	mta = adapter->mta;
 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
 
 	if (adapter->hw.mac.type == e1000_82542 && 
 	    adapter->hw.revision_id == E1000_REVISION_2) {
 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
 			e1000_pci_clear_mwi(&adapter->hw);
 		reg_rctl |= E1000_RCTL_RST;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 		msec_delay(5);
 	}
 
 #if __FreeBSD_version < 800000
 	IF_ADDR_LOCK(ifp);
 #else
 	if_maddr_rlock(ifp);
 #endif
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 
 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
 			break;
 
 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
 		mcnt++;
 	}
 #if __FreeBSD_version < 800000
 	IF_ADDR_UNLOCK(ifp);
 #else
 	if_maddr_runlock(ifp);
 #endif
 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 		reg_rctl |= E1000_RCTL_MPE;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 	} else
 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
 
 	if (adapter->hw.mac.type == e1000_82542 && 
 	    adapter->hw.revision_id == E1000_REVISION_2) {
 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 		reg_rctl &= ~E1000_RCTL_RST;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 		msec_delay(5);
 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
 			e1000_pci_set_mwi(&adapter->hw);
 	}
 }
 
 
 /*********************************************************************
  *  Timer routine
  *
  *  This routine checks for link status and updates statistics.
  *
  **********************************************************************/
 
 static void
 em_local_timer(void *arg)
 {
 	struct adapter	*adapter = arg;
 	struct ifnet	*ifp = adapter->ifp;
 	struct tx_ring	*txr = adapter->tx_rings;
 
 	EM_CORE_LOCK_ASSERT(adapter);
 
 	em_update_link_status(adapter);
 	em_update_stats_counters(adapter);
 
 	/* Reset LAA into RAR[0] on 82571 */
 	if ((adapter->hw.mac.type == e1000_82571) &&
 	    e1000_get_laa_state_82571(&adapter->hw))
 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
 
 	/* 
 	** Don't do TX watchdog check if we've been paused
 	*/
 	if (adapter->pause_frames) {
 		adapter->pause_frames = 0;
 		goto out;
 	}
 	/*
 	** Check on the state of the TX queue(s), this 
 	** can be done without the lock because its RO
 	** and the HUNG state will be static if set.
 	*/
 	for (int i = 0; i < adapter->num_queues; i++, txr++)
 		if (txr->queue_status == EM_QUEUE_HUNG)
 			goto hung;
 out:
 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
 	return;
 hung:
 	/* Looks like we're hung */
 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
 	device_printf(adapter->dev,
 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
 	    "Next TX to Clean = %d\n",
 	    txr->me, txr->tx_avail, txr->next_to_clean);
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	adapter->watchdog_events++;
 	EM_TX_UNLOCK(txr);
 	em_init_locked(adapter);
 }
 
 
 static void
 em_update_link_status(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	struct ifnet *ifp = adapter->ifp;
 	device_t dev = adapter->dev;
 	struct tx_ring *txr = adapter->tx_rings;
 	u32 link_check = 0;
 
 	/* Get the cached link value or read phy for real */
 	switch (hw->phy.media_type) {
 	case e1000_media_type_copper:
 		if (hw->mac.get_link_status) {
 			/* Do the work to read phy */
 			e1000_check_for_link(hw);
 			link_check = !hw->mac.get_link_status;
 			if (link_check) /* ESB2 fix */
 				e1000_cfg_on_link_up(hw);
 		} else
 			link_check = TRUE;
 		break;
 	case e1000_media_type_fiber:
 		e1000_check_for_link(hw);
 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
                                  E1000_STATUS_LU);
 		break;
 	case e1000_media_type_internal_serdes:
 		e1000_check_for_link(hw);
 		link_check = adapter->hw.mac.serdes_has_link;
 		break;
 	default:
 	case e1000_media_type_unknown:
 		break;
 	}
 
 	/* Now check for a transition */
 	if (link_check && (adapter->link_active == 0)) {
 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
 		    &adapter->link_duplex);
 		/* Check if we must disable SPEED_MODE bit on PCI-E */
 		if ((adapter->link_speed != SPEED_1000) &&
 		    ((hw->mac.type == e1000_82571) ||
 		    (hw->mac.type == e1000_82572))) {
 			int tarc0;
 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
 			tarc0 &= ~SPEED_MODE_BIT;
 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
 		}
 		if (bootverbose)
 			device_printf(dev, "Link is up %d Mbps %s\n",
 			    adapter->link_speed,
 			    ((adapter->link_duplex == FULL_DUPLEX) ?
 			    "Full Duplex" : "Half Duplex"));
 		adapter->link_active = 1;
 		adapter->smartspeed = 0;
 		ifp->if_baudrate = adapter->link_speed * 1000000;
 		if_link_state_change(ifp, LINK_STATE_UP);
 	} else if (!link_check && (adapter->link_active == 1)) {
 		ifp->if_baudrate = adapter->link_speed = 0;
 		adapter->link_duplex = 0;
 		if (bootverbose)
 			device_printf(dev, "Link is Down\n");
 		adapter->link_active = 0;
 		/* Link down, disable watchdog */
 		for (int i = 0; i < adapter->num_queues; i++, txr++)
 			txr->queue_status = EM_QUEUE_IDLE;
 		if_link_state_change(ifp, LINK_STATE_DOWN);
 	}
 }
 
 /*********************************************************************
  *
  *  This routine disables all traffic on the adapter by issuing a
  *  global reset on the MAC and deallocates TX/RX buffers.
  *
  *  This routine should always be called with BOTH the CORE
  *  and TX locks.
  **********************************************************************/
 
 static void
 em_stop(void *arg)
 {
 	struct adapter	*adapter = arg;
 	struct ifnet	*ifp = adapter->ifp;
 	struct tx_ring	*txr = adapter->tx_rings;
 
 	EM_CORE_LOCK_ASSERT(adapter);
 
 	INIT_DEBUGOUT("em_stop: begin");
 
 	em_disable_intr(adapter);
 	callout_stop(&adapter->timer);
 
 	/* Tell the stack that the interface is no longer active */
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
         /* Unarm watchdog timer. */
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		EM_TX_LOCK(txr);
 		txr->queue_status = EM_QUEUE_IDLE;
 		EM_TX_UNLOCK(txr);
 	}
 
 	e1000_reset_hw(&adapter->hw);
 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
 
 	e1000_led_off(&adapter->hw);
 	e1000_cleanup_led(&adapter->hw);
 }
 
 
 /*********************************************************************
  *
  *  Determine hardware revision.
  *
  **********************************************************************/
 static void
 em_identify_hardware(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 
 	/* Make sure our PCI config space has the necessary stuff set */
 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
 		device_printf(dev, "Memory Access and/or Bus Master bits "
 		    "were not set!\n");
 		adapter->hw.bus.pci_cmd_word |=
 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
 		pci_write_config(dev, PCIR_COMMAND,
 		    adapter->hw.bus.pci_cmd_word, 2);
 	}
 
 	/* Save off the information about this board */
 	adapter->hw.vendor_id = pci_get_vendor(dev);
 	adapter->hw.device_id = pci_get_device(dev);
 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
 	adapter->hw.subsystem_vendor_id =
 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
 	adapter->hw.subsystem_device_id =
 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
 
 	/* Do Shared Code Init and Setup */
 	if (e1000_set_mac_type(&adapter->hw)) {
 		device_printf(dev, "Setup init failure\n");
 		return;
 	}
 }
 
 static int
 em_allocate_pci_resources(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	int		rid;
 
 	rid = PCIR_BAR(0);
 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &rid, RF_ACTIVE);
 	if (adapter->memory == NULL) {
 		device_printf(dev, "Unable to allocate bus resource: memory\n");
 		return (ENXIO);
 	}
 	adapter->osdep.mem_bus_space_tag =
 	    rman_get_bustag(adapter->memory);
 	adapter->osdep.mem_bus_space_handle =
 	    rman_get_bushandle(adapter->memory);
 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
 
 	/* Default to a single queue */
 	adapter->num_queues = 1;
 
 	/*
 	 * Setup MSI/X or MSI if PCI Express
 	 */
 	adapter->msix = em_setup_msix(adapter);
 
 	adapter->hw.back = &adapter->osdep;
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Setup the Legacy or MSI Interrupt handler
  *
  **********************************************************************/
 int
 em_allocate_legacy(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 	int error, rid = 0;
 
 	/* Manually turn off all interrupts */
 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
 
 	if (adapter->msix == 1) /* using MSI */
 		rid = 1;
 	/* We allocate a single interrupt resource */
 	adapter->res = bus_alloc_resource_any(dev,
 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 	if (adapter->res == NULL) {
 		device_printf(dev, "Unable to allocate bus resource: "
 		    "interrupt\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * Allocate a fast interrupt and the associated
 	 * deferred processing contexts.
 	 */
 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
 	    taskqueue_thread_enqueue, &adapter->tq);
 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
 	    device_get_nameunit(adapter->dev));
 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
 		device_printf(dev, "Failed to register fast interrupt "
 			    "handler: %d\n", error);
 		taskqueue_free(adapter->tq);
 		adapter->tq = NULL;
 		return (error);
 	}
 	
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Setup the MSIX Interrupt handlers
  *   This is not really Multiqueue, rather
  *   its just multiple interrupt vectors.
  *
  **********************************************************************/
 int
 em_allocate_msix(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	struct		tx_ring *txr = adapter->tx_rings;
 	struct		rx_ring *rxr = adapter->rx_rings;
 	int		error, rid, vector = 0;
 
 
 	/* Make sure all interrupts are disabled */
 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
 
 	/* First set up ring resources */
 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
 
 		/* RX ring */
 		rid = vector + 1;
 
 		rxr->res = bus_alloc_resource_any(dev,
 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
 		if (rxr->res == NULL) {
 			device_printf(dev,
 			    "Unable to allocate bus resource: "
 			    "RX MSIX Interrupt %d\n", i);
 			return (ENXIO);
 		}
 		if ((error = bus_setup_intr(dev, rxr->res,
 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
 		    rxr, &rxr->tag)) != 0) {
 			device_printf(dev, "Failed to register RX handler");
 			return (error);
 		}
 #if __FreeBSD_version >= 800504
 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
 #endif
 		rxr->msix = vector++; /* NOTE increment vector for TX */
 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
 		    taskqueue_thread_enqueue, &rxr->tq);
 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
 		    device_get_nameunit(adapter->dev));
 		/*
 		** Set the bit to enable interrupt
 		** in E1000_IMS -- bits 20 and 21
 		** are for RX0 and RX1, note this has
 		** NOTHING to do with the MSIX vector
 		*/
 		rxr->ims = 1 << (20 + i);
 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
 
 		/* TX ring */
 		rid = vector + 1;
 		txr->res = bus_alloc_resource_any(dev,
 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
 		if (txr->res == NULL) {
 			device_printf(dev,
 			    "Unable to allocate bus resource: "
 			    "TX MSIX Interrupt %d\n", i);
 			return (ENXIO);
 		}
 		if ((error = bus_setup_intr(dev, txr->res,
 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
 		    txr, &txr->tag)) != 0) {
 			device_printf(dev, "Failed to register TX handler");
 			return (error);
 		}
 #if __FreeBSD_version >= 800504
 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
 #endif
 		txr->msix = vector++; /* Increment vector for next pass */
 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
 		    taskqueue_thread_enqueue, &txr->tq);
 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
 		    device_get_nameunit(adapter->dev));
 		/*
 		** Set the bit to enable interrupt
 		** in E1000_IMS -- bits 22 and 23
 		** are for TX0 and TX1, note this has
 		** NOTHING to do with the MSIX vector
 		*/
 		txr->ims = 1 << (22 + i);
 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
 	}
 
 	/* Link interrupt */
 	++rid;
 	adapter->res = bus_alloc_resource_any(dev,
 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
 	if (!adapter->res) {
 		device_printf(dev,"Unable to allocate "
 		    "bus resource: Link interrupt [%d]\n", rid);
 		return (ENXIO);
         }
 	/* Set the link handler function */
 	error = bus_setup_intr(dev, adapter->res,
 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
 	    em_msix_link, adapter, &adapter->tag);
 	if (error) {
 		adapter->res = NULL;
 		device_printf(dev, "Failed to register LINK handler");
 		return (error);
 	}
 #if __FreeBSD_version >= 800504
 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
 #endif
 	adapter->linkvec = vector;
 	adapter->ivars |=  (8 | vector) << 16;
 	adapter->ivars |= 0x80000000;
 
 	return (0);
 }
 
 
 static void
 em_free_pci_resources(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	struct tx_ring	*txr;
 	struct rx_ring	*rxr;
 	int		rid;
 
 
 	/*
 	** Release all the queue interrupt resources:
 	*/
 	for (int i = 0; i < adapter->num_queues; i++) {
 		txr = &adapter->tx_rings[i];
 		rxr = &adapter->rx_rings[i];
 		/* an early abort? */
 		if ((txr == NULL) || (rxr == NULL))
 			break;
 		rid = txr->msix +1;
 		if (txr->tag != NULL) {
 			bus_teardown_intr(dev, txr->res, txr->tag);
 			txr->tag = NULL;
 		}
 		if (txr->res != NULL)
 			bus_release_resource(dev, SYS_RES_IRQ,
 			    rid, txr->res);
 		rid = rxr->msix +1;
 		if (rxr->tag != NULL) {
 			bus_teardown_intr(dev, rxr->res, rxr->tag);
 			rxr->tag = NULL;
 		}
 		if (rxr->res != NULL)
 			bus_release_resource(dev, SYS_RES_IRQ,
 			    rid, rxr->res);
 	}
 
         if (adapter->linkvec) /* we are doing MSIX */
                 rid = adapter->linkvec + 1;
         else
                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
 
 	if (adapter->tag != NULL) {
 		bus_teardown_intr(dev, adapter->res, adapter->tag);
 		adapter->tag = NULL;
 	}
 
 	if (adapter->res != NULL)
 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
 
 
 	if (adapter->msix)
 		pci_release_msi(dev);
 
 	if (adapter->msix_mem != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
 
 	if (adapter->memory != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    PCIR_BAR(0), adapter->memory);
 
 	if (adapter->flash != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    EM_FLASH, adapter->flash);
 }
 
 /*
  * Setup MSI or MSI/X
  */
 static int
 em_setup_msix(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 	int val = 0;
 
 
 	/*
 	** Setup MSI/X for Hartwell: tests have shown
 	** use of two queues to be unstable, and to
 	** provide no great gain anyway, so we simply
 	** seperate the interrupts and use a single queue.
 	*/
 	if ((adapter->hw.mac.type == e1000_82574) &&
 	    (em_enable_msix == TRUE)) {
 		/* Map the MSIX BAR */
 		int rid = PCIR_BAR(EM_MSIX_BAR);
 		adapter->msix_mem = bus_alloc_resource_any(dev,
 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
        		if (!adapter->msix_mem) {
 			/* May not be enabled */
                		device_printf(adapter->dev,
 			    "Unable to map MSIX table \n");
 			goto msi;
        		}
 		val = pci_msix_count(dev); 
 		if (val < 3) {
 			bus_release_resource(dev, SYS_RES_MEMORY,
 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
 			adapter->msix_mem = NULL;
                		device_printf(adapter->dev,
 			    "MSIX: insufficient vectors, using MSI\n");
 			goto msi;
 		}
 		val = 3;
 		adapter->num_queues = 1;
 		if (pci_alloc_msix(dev, &val) == 0) {
 			device_printf(adapter->dev,
 			    "Using MSIX interrupts "
 			    "with %d vectors\n", val);
 		}
 
 		return (val);
 	}
 msi:
        	val = pci_msi_count(dev);
        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
                	adapter->msix = 1;
                	device_printf(adapter->dev,"Using an MSI interrupt\n");
 		return (val);
 	} 
 	/* Should only happen due to manual configuration */
 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
 	return (0);
 }
 
 
 /*********************************************************************
  *
  *  Initialize the hardware to a configuration
  *  as specified by the adapter structure.
  *
  **********************************************************************/
 static void
 em_reset(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	struct ifnet	*ifp = adapter->ifp;
 	struct e1000_hw	*hw = &adapter->hw;
 	u16		rx_buffer_size;
 
 	INIT_DEBUGOUT("em_reset: begin");
 
 	/* Set up smart power down as default off on newer adapters. */
 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
 	    hw->mac.type == e1000_82572)) {
 		u16 phy_tmp = 0;
 
 		/* Speed up time to link by disabling smart power down. */
 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
 		phy_tmp &= ~IGP02E1000_PM_SPD;
 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
 	}
 
 	/*
 	 * These parameters control the automatic generation (Tx) and
 	 * response (Rx) to Ethernet PAUSE frames.
 	 * - High water mark should allow for at least two frames to be
 	 *   received after sending an XOFF.
 	 * - Low water mark works best when it is very near the high water mark.
 	 *   This allows the receiver to restart by sending XON when it has
 	 *   drained a bit. Here we use an arbitary value of 1500 which will
 	 *   restart after one full frame is pulled from the buffer. There
 	 *   could be several smaller frames in the buffer and if so they will
 	 *   not trigger the XON until their total number reduces the buffer
 	 *   by 1500.
 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
 	 */
 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
 
 	hw->fc.high_water = rx_buffer_size -
 	    roundup2(adapter->max_frame_size, 1024);
 	hw->fc.low_water = hw->fc.high_water - 1500;
 
 	if (hw->mac.type == e1000_80003es2lan)
 		hw->fc.pause_time = 0xFFFF;
 	else
 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
 
 	hw->fc.send_xon = TRUE;
 
         /* Set Flow control, use the tunable location if sane */
 	hw->fc.requested_mode = adapter->fc_setting;
 
 	/* Workaround: no TX flow ctrl for PCH */
 	if (hw->mac.type == e1000_pchlan)
                 hw->fc.requested_mode = e1000_fc_rx_pause;
 
 	/* Override - settings for PCH2LAN, ya its magic :) */
 	if (hw->mac.type == e1000_pch2lan) {
 		hw->fc.high_water = 0x5C20;
 		hw->fc.low_water = 0x5048;
 		hw->fc.pause_time = 0x0650;
 		hw->fc.refresh_time = 0x0400;
 		/* Jumbos need adjusted PBA */
 		if (ifp->if_mtu > ETHERMTU)
 			E1000_WRITE_REG(hw, E1000_PBA, 12);
 		else
 			E1000_WRITE_REG(hw, E1000_PBA, 26);
 	}
 
 	/* Issue a global reset */
 	e1000_reset_hw(hw);
 	E1000_WRITE_REG(hw, E1000_WUC, 0);
+	em_disable_aspm(adapter);
 
 	if (e1000_init_hw(hw) < 0) {
 		device_printf(dev, "Hardware Initialization Failed\n");
 		return;
 	}
 
 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
 	e1000_get_phy_info(hw);
 	e1000_check_for_link(hw);
 	return;
 }
 
 /*********************************************************************
  *
  *  Setup networking device structure and register an interface.
  *
  **********************************************************************/
 static int
 em_setup_interface(device_t dev, struct adapter *adapter)
 {
 	struct ifnet   *ifp;
 
 	INIT_DEBUGOUT("em_setup_interface: begin");
 
 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "can not allocate ifnet structure\n");
 		return (-1);
 	}
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_mtu = ETHERMTU;
 	ifp->if_init =  em_init;
 	ifp->if_softc = adapter;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = em_ioctl;
 	ifp->if_start = em_start;
 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
 	IFQ_SET_READY(&ifp->if_snd);
 
 	ether_ifattach(ifp, adapter->hw.mac.addr);
 
 	ifp->if_capabilities = ifp->if_capenable = 0;
 
 #ifdef EM_MULTIQUEUE
 	/* Multiqueue tx functions */
 	ifp->if_transmit = em_mq_start;
 	ifp->if_qflush = em_qflush;
 #endif	
 
 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
 	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
 
 	/* Enable TSO by default, can disable with ifconfig */
 	ifp->if_capabilities |= IFCAP_TSO4;
 	ifp->if_capenable |= IFCAP_TSO4;
 
 	/*
 	 * Tell the upper layer(s) we
 	 * support full VLAN capability
 	 */
 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
 	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
 
 	/*
 	** Dont turn this on by default, if vlans are
 	** created on another pseudo device (eg. lagg)
 	** then vlan events are not passed thru, breaking
 	** operation, but with HW FILTER off it works. If
 	** using vlans directly on the em driver you can
 	** enable this and get full hardware tag filtering.
 	*/
 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
 
 #ifdef DEVICE_POLLING
 	ifp->if_capabilities |= IFCAP_POLLING;
 #endif
 
 	/* Enable only WOL MAGIC by default */
 	if (adapter->wol) {
 		ifp->if_capabilities |= IFCAP_WOL;
 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
 	}
 		
 	/*
 	 * Specify the media types supported by this adapter and register
 	 * callbacks to update media and link information
 	 */
 	ifmedia_init(&adapter->media, IFM_IMASK,
 	    em_media_change, em_media_status);
 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
 		u_char fiber_type = IFM_1000_SX;	/* default type */
 
 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
 			    0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
 	} else {
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
 			    0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
 			    0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
 			    0, NULL);
 		if (adapter->hw.phy.type != e1000_phy_ife) {
 			ifmedia_add(&adapter->media,
 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
 			ifmedia_add(&adapter->media,
 				IFM_ETHER | IFM_1000_T, 0, NULL);
 		}
 	}
 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
 	return (0);
 }
 
 
 /*
  * Manage DMA'able memory.
  */
 static void
 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	if (error)
 		return;
 	*(bus_addr_t *) arg = segs[0].ds_addr;
 }
 
 static int
 em_dma_malloc(struct adapter *adapter, bus_size_t size,
         struct em_dma_alloc *dma, int mapflags)
 {
 	int error;
 
 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				size,			/* maxsize */
 				1,			/* nsegments */
 				size,			/* maxsegsize */
 				0,			/* flags */
 				NULL,			/* lockfunc */
 				NULL,			/* lockarg */
 				&dma->dma_tag);
 	if (error) {
 		device_printf(adapter->dev,
 		    "%s: bus_dma_tag_create failed: %d\n",
 		    __func__, error);
 		goto fail_0;
 	}
 
 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
 	if (error) {
 		device_printf(adapter->dev,
 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
 		    __func__, (uintmax_t)size, error);
 		goto fail_2;
 	}
 
 	dma->dma_paddr = 0;
 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
 	if (error || dma->dma_paddr == 0) {
 		device_printf(adapter->dev,
 		    "%s: bus_dmamap_load failed: %d\n",
 		    __func__, error);
 		goto fail_3;
 	}
 
 	return (0);
 
 fail_3:
 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
 fail_2:
 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
 	bus_dma_tag_destroy(dma->dma_tag);
 fail_0:
 	dma->dma_map = NULL;
 	dma->dma_tag = NULL;
 
 	return (error);
 }
 
 static void
 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
 {
 	if (dma->dma_tag == NULL)
 		return;
 	if (dma->dma_map != NULL) {
 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
 		dma->dma_map = NULL;
 	}
 	bus_dma_tag_destroy(dma->dma_tag);
 	dma->dma_tag = NULL;
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for the transmit and receive rings, and then
  *  the descriptors associated with each, called only once at attach.
  *
  **********************************************************************/
 static int
 em_allocate_queues(struct adapter *adapter)
 {
 	device_t		dev = adapter->dev;
 	struct tx_ring		*txr = NULL;
 	struct rx_ring		*rxr = NULL;
 	int rsize, tsize, error = E1000_SUCCESS;
 	int txconf = 0, rxconf = 0;
 
 
 	/* Allocate the TX ring struct memory */
 	if (!(adapter->tx_rings =
 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate TX ring memory\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
 	/* Now allocate the RX */
 	if (!(adapter->rx_rings =
 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate RX ring memory\n");
 		error = ENOMEM;
 		goto rx_fail;
 	}
 
 	tsize = roundup2(adapter->num_tx_desc *
 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
 	/*
 	 * Now set up the TX queues, txconf is needed to handle the
 	 * possibility that things fail midcourse and we need to
 	 * undo memory gracefully
 	 */ 
 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
 		/* Set up some basics */
 		txr = &adapter->tx_rings[i];
 		txr->adapter = adapter;
 		txr->me = i;
 
 		/* Initialize the TX lock */
 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
 		    device_get_nameunit(dev), txr->me);
 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
 
 		if (em_dma_malloc(adapter, tsize,
 			&txr->txdma, BUS_DMA_NOWAIT)) {
 			device_printf(dev,
 			    "Unable to allocate TX Descriptor memory\n");
 			error = ENOMEM;
 			goto err_tx_desc;
 		}
 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
 		bzero((void *)txr->tx_base, tsize);
 
         	if (em_allocate_transmit_buffers(txr)) {
 			device_printf(dev,
 			    "Critical Failure setting up transmit buffers\n");
 			error = ENOMEM;
 			goto err_tx_desc;
         	}
 #if __FreeBSD_version >= 800000
 		/* Allocate a buf ring */
 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
 		    M_WAITOK, &txr->tx_mtx);
 #endif
 	}
 
 	/*
 	 * Next the RX queues...
 	 */ 
 	rsize = roundup2(adapter->num_rx_desc *
 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
 		rxr = &adapter->rx_rings[i];
 		rxr->adapter = adapter;
 		rxr->me = i;
 
 		/* Initialize the RX lock */
 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
 		    device_get_nameunit(dev), txr->me);
 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
 
 		if (em_dma_malloc(adapter, rsize,
 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
 			device_printf(dev,
 			    "Unable to allocate RxDescriptor memory\n");
 			error = ENOMEM;
 			goto err_rx_desc;
 		}
 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
 		bzero((void *)rxr->rx_base, rsize);
 
         	/* Allocate receive buffers for the ring*/
 		if (em_allocate_receive_buffers(rxr)) {
 			device_printf(dev,
 			    "Critical Failure setting up receive buffers\n");
 			error = ENOMEM;
 			goto err_rx_desc;
 		}
 	}
 
 	return (0);
 
 err_rx_desc:
 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
 		em_dma_free(adapter, &rxr->rxdma);
 err_tx_desc:
 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
 		em_dma_free(adapter, &txr->txdma);
 	free(adapter->rx_rings, M_DEVBUF);
 rx_fail:
 #if __FreeBSD_version >= 800000
 	buf_ring_free(txr->br, M_DEVBUF);
 #endif
 	free(adapter->tx_rings, M_DEVBUF);
 fail:
 	return (error);
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
  *  the information needed to transmit a packet on the wire. This is
  *  called only once at attach, setup is done every reset.
  *
  **********************************************************************/
 static int
 em_allocate_transmit_buffers(struct tx_ring *txr)
 {
 	struct adapter *adapter = txr->adapter;
 	device_t dev = adapter->dev;
 	struct em_buffer *txbuf;
 	int error, i;
 
 	/*
 	 * Setup DMA descriptor areas.
 	 */
 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
 			       1, 0,			/* alignment, bounds */
 			       BUS_SPACE_MAXADDR,	/* lowaddr */
 			       BUS_SPACE_MAXADDR,	/* highaddr */
 			       NULL, NULL,		/* filter, filterarg */
 			       EM_TSO_SIZE,		/* maxsize */
 			       EM_MAX_SCATTER,		/* nsegments */
 			       PAGE_SIZE,		/* maxsegsize */
 			       0,			/* flags */
 			       NULL,			/* lockfunc */
 			       NULL,			/* lockfuncarg */
 			       &txr->txtag))) {
 		device_printf(dev,"Unable to allocate TX DMA tag\n");
 		goto fail;
 	}
 
 	if (!(txr->tx_buffers =
 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
         /* Create the descriptor buffer dma maps */
 	txbuf = txr->tx_buffers;
 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
 		if (error != 0) {
 			device_printf(dev, "Unable to create TX DMA map\n");
 			goto fail;
 		}
 	}
 
 	return 0;
 fail:
 	/* We free all, it handles case where we are in the middle */
 	em_free_transmit_structures(adapter);
 	return (error);
 }
 
 /*********************************************************************
  *
  *  Initialize a transmit ring.
  *
  **********************************************************************/
 static void
 em_setup_transmit_ring(struct tx_ring *txr)
 {
 	struct adapter *adapter = txr->adapter;
 	struct em_buffer *txbuf;
 	int i;
 
 	/* Clear the old descriptor contents */
 	EM_TX_LOCK(txr);
 	bzero((void *)txr->tx_base,
 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
 	/* Reset indices */
 	txr->next_avail_desc = 0;
 	txr->next_to_clean = 0;
 
 	/* Free any existing tx buffers. */
         txbuf = txr->tx_buffers;
 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
 		if (txbuf->m_head != NULL) {
 			bus_dmamap_sync(txr->txtag, txbuf->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(txr->txtag, txbuf->map);
 			m_freem(txbuf->m_head);
 			txbuf->m_head = NULL;
 		}
 		/* clear the watch index */
 		txbuf->next_eop = -1;
         }
 
 	/* Set number of descriptors available */
 	txr->tx_avail = adapter->num_tx_desc;
 	txr->queue_status = EM_QUEUE_IDLE;
 
 	/* Clear checksum offload context. */
 	txr->last_hw_offload = 0;
 	txr->last_hw_ipcss = 0;
 	txr->last_hw_ipcso = 0;
 	txr->last_hw_tucss = 0;
 	txr->last_hw_tucso = 0;
 
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	EM_TX_UNLOCK(txr);
 }
 
 /*********************************************************************
  *
  *  Initialize all transmit rings.
  *
  **********************************************************************/
 static void
 em_setup_transmit_structures(struct adapter *adapter)
 {
 	struct tx_ring *txr = adapter->tx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++)
 		em_setup_transmit_ring(txr);
 
 	return;
 }
 
 /*********************************************************************
  *
  *  Enable transmit unit.
  *
  **********************************************************************/
 static void
 em_initialize_transmit_unit(struct adapter *adapter)
 {
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct e1000_hw	*hw = &adapter->hw;
 	u32	tctl, tarc, tipg = 0;
 
 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		u64 bus_addr = txr->txdma.dma_paddr;
 		/* Base and Len of TX Ring */
 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
 	    	    (u32)(bus_addr >> 32));
 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
 	    	    (u32)bus_addr);
 		/* Init the HEAD/TAIL indices */
 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
 
 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
 
 		txr->queue_status = EM_QUEUE_IDLE;
 	}
 
 	/* Set the default values for the Tx Inter Packet Gap timer */
 	switch (adapter->hw.mac.type) {
 	case e1000_82542:
 		tipg = DEFAULT_82542_TIPG_IPGT;
 		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
 		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
 		break;
 	case e1000_80003es2lan:
 		tipg = DEFAULT_82543_TIPG_IPGR1;
 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
 		    E1000_TIPG_IPGR2_SHIFT;
 		break;
 	default:
 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
 		    (adapter->hw.phy.media_type ==
 		    e1000_media_type_internal_serdes))
 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
 		else
 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
 	}
 
 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
 
 	if(adapter->hw.mac.type >= e1000_82540)
 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
 		    adapter->tx_abs_int_delay.value);
 
 	if ((adapter->hw.mac.type == e1000_82571) ||
 	    (adapter->hw.mac.type == e1000_82572)) {
 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
 		tarc |= SPEED_MODE_BIT;
 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
 		tarc |= 1;
 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
 		tarc |= 1;
 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
 	}
 
 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
 	if (adapter->tx_int_delay.value > 0)
 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
 
 	/* Program the Transmit Control Register */
 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
 	tctl &= ~E1000_TCTL_CT;
 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
 
 	if (adapter->hw.mac.type >= e1000_82571)
 		tctl |= E1000_TCTL_MULR;
 
 	/* This write will effectively turn on the transmit unit. */
 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
 
 }
 
 
 /*********************************************************************
  *
  *  Free all transmit rings.
  *
  **********************************************************************/
 static void
 em_free_transmit_structures(struct adapter *adapter)
 {
 	struct tx_ring *txr = adapter->tx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		EM_TX_LOCK(txr);
 		em_free_transmit_buffers(txr);
 		em_dma_free(adapter, &txr->txdma);
 		EM_TX_UNLOCK(txr);
 		EM_TX_LOCK_DESTROY(txr);
 	}
 
 	free(adapter->tx_rings, M_DEVBUF);
 }
 
 /*********************************************************************
  *
  *  Free transmit ring related data structures.
  *
  **********************************************************************/
 static void
 em_free_transmit_buffers(struct tx_ring *txr)
 {
 	struct adapter		*adapter = txr->adapter;
 	struct em_buffer	*txbuf;
 
 	INIT_DEBUGOUT("free_transmit_ring: begin");
 
 	if (txr->tx_buffers == NULL)
 		return;
 
 	for (int i = 0; i < adapter->num_tx_desc; i++) {
 		txbuf = &txr->tx_buffers[i];
 		if (txbuf->m_head != NULL) {
 			bus_dmamap_sync(txr->txtag, txbuf->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(txr->txtag,
 			    txbuf->map);
 			m_freem(txbuf->m_head);
 			txbuf->m_head = NULL;
 			if (txbuf->map != NULL) {
 				bus_dmamap_destroy(txr->txtag,
 				    txbuf->map);
 				txbuf->map = NULL;
 			}
 		} else if (txbuf->map != NULL) {
 			bus_dmamap_unload(txr->txtag,
 			    txbuf->map);
 			bus_dmamap_destroy(txr->txtag,
 			    txbuf->map);
 			txbuf->map = NULL;
 		}
 	}
 #if __FreeBSD_version >= 800000
 	if (txr->br != NULL)
 		buf_ring_free(txr->br, M_DEVBUF);
 #endif
 	if (txr->tx_buffers != NULL) {
 		free(txr->tx_buffers, M_DEVBUF);
 		txr->tx_buffers = NULL;
 	}
 	if (txr->txtag != NULL) {
 		bus_dma_tag_destroy(txr->txtag);
 		txr->txtag = NULL;
 	}
 	return;
 }
 
 
 /*********************************************************************
  *  The offload context is protocol specific (TCP/UDP) and thus
  *  only needs to be set when the protocol changes. The occasion
  *  of a context change can be a performance detriment, and
  *  might be better just disabled. The reason arises in the way
  *  in which the controller supports pipelined requests from the
  *  Tx data DMA. Up to four requests can be pipelined, and they may
  *  belong to the same packet or to multiple packets. However all
  *  requests for one packet are issued before a request is issued
  *  for a subsequent packet and if a request for the next packet
  *  requires a context change, that request will be stalled
  *  until the previous request completes. This means setting up
  *  a new context effectively disables pipelined Tx data DMA which
  *  in turn greatly slow down performance to send small sized
  *  frames. 
  **********************************************************************/
 static void
 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
 {
 	struct adapter			*adapter = txr->adapter;
 	struct e1000_context_desc	*TXD = NULL;
 	struct em_buffer		*tx_buffer;
 	int				cur, hdr_len;
 	u32				cmd = 0;
 	u16				offload = 0;
 	u8				ipcso, ipcss, tucso, tucss;
 
 	ipcss = ipcso = tucss = tucso = 0;
 	hdr_len = ip_off + (ip->ip_hl << 2);
 	cur = txr->next_avail_desc;
 
 	/* Setup of IP header checksum. */
 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
 		offload |= CSUM_IP;
 		ipcss = ip_off;
 		ipcso = ip_off + offsetof(struct ip, ip_sum);
 		/*
 		 * Start offset for header checksum calculation.
 		 * End offset for header checksum calculation.
 		 * Offset of place to put the checksum.
 		 */
 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
 		TXD->lower_setup.ip_fields.ipcss = ipcss;
 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
 		TXD->lower_setup.ip_fields.ipcso = ipcso;
 		cmd |= E1000_TXD_CMD_IP;
 	}
 
 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
  		offload |= CSUM_TCP;
  		tucss = hdr_len;
  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
  		/*
  		 * Setting up new checksum offload context for every frames
  		 * takes a lot of processing time for hardware. This also
  		 * reduces performance a lot for small sized frames so avoid
  		 * it if driver can use previously configured checksum
  		 * offload context.
  		 */
  		if (txr->last_hw_offload == offload) {
  			if (offload & CSUM_IP) {
  				if (txr->last_hw_ipcss == ipcss &&
  				    txr->last_hw_ipcso == ipcso &&
  				    txr->last_hw_tucss == tucss &&
  				    txr->last_hw_tucso == tucso)
  					return;
  			} else {
  				if (txr->last_hw_tucss == tucss &&
  				    txr->last_hw_tucso == tucso)
  					return;
  			}
   		}
  		txr->last_hw_offload = offload;
  		txr->last_hw_tucss = tucss;
  		txr->last_hw_tucso = tucso;
  		/*
  		 * Start offset for payload checksum calculation.
  		 * End offset for payload checksum calculation.
  		 * Offset of place to put the checksum.
  		 */
 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
  		TXD->upper_setup.tcp_fields.tucso = tucso;
  		cmd |= E1000_TXD_CMD_TCP;
  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
  		tucss = hdr_len;
  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
  		/*
  		 * Setting up new checksum offload context for every frames
  		 * takes a lot of processing time for hardware. This also
  		 * reduces performance a lot for small sized frames so avoid
  		 * it if driver can use previously configured checksum
  		 * offload context.
  		 */
  		if (txr->last_hw_offload == offload) {
  			if (offload & CSUM_IP) {
  				if (txr->last_hw_ipcss == ipcss &&
  				    txr->last_hw_ipcso == ipcso &&
  				    txr->last_hw_tucss == tucss &&
  				    txr->last_hw_tucso == tucso)
  					return;
  			} else {
  				if (txr->last_hw_tucss == tucss &&
  				    txr->last_hw_tucso == tucso)
  					return;
  			}
  		}
  		txr->last_hw_offload = offload;
  		txr->last_hw_tucss = tucss;
  		txr->last_hw_tucso = tucso;
  		/*
  		 * Start offset for header checksum calculation.
  		 * End offset for header checksum calculation.
  		 * Offset of place to put the checksum.
  		 */
 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
  		TXD->upper_setup.tcp_fields.tucss = tucss;
  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
  		TXD->upper_setup.tcp_fields.tucso = tucso;
   	}
   
  	if (offload & CSUM_IP) {
  		txr->last_hw_ipcss = ipcss;
  		txr->last_hw_ipcso = ipcso;
   	}
 
 	TXD->tcp_seg_setup.data = htole32(0);
 	TXD->cmd_and_length =
 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
 	tx_buffer = &txr->tx_buffers[cur];
 	tx_buffer->m_head = NULL;
 	tx_buffer->next_eop = -1;
 
 	if (++cur == adapter->num_tx_desc)
 		cur = 0;
 
 	txr->tx_avail--;
 	txr->next_avail_desc = cur;
 }
 
 
 /**********************************************************************
  *
  *  Setup work for hardware segmentation offload (TSO)
  *
  **********************************************************************/
 static void
 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
 {
 	struct adapter			*adapter = txr->adapter;
 	struct e1000_context_desc	*TXD;
 	struct em_buffer		*tx_buffer;
 	int cur, hdr_len;
 
 	/*
 	 * In theory we can use the same TSO context if and only if
 	 * frame is the same type(IP/TCP) and the same MSS. However
 	 * checking whether a frame has the same IP/TCP structure is
 	 * hard thing so just ignore that and always restablish a
 	 * new TSO context.
 	 */
 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
 		      E1000_TXD_DTYP_D |	/* Data descr type */
 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
 
 	/* IP and/or TCP header checksum calculation and insertion. */
 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
 
 	cur = txr->next_avail_desc;
 	tx_buffer = &txr->tx_buffers[cur];
 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
 
 	/*
 	 * Start offset for header checksum calculation.
 	 * End offset for header checksum calculation.
 	 * Offset of place put the checksum.
 	 */
 	TXD->lower_setup.ip_fields.ipcss = ip_off;
 	TXD->lower_setup.ip_fields.ipcse =
 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
 	/*
 	 * Start offset for payload checksum calculation.
 	 * End offset for payload checksum calculation.
 	 * Offset of place to put the checksum.
 	 */
 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
 	TXD->upper_setup.tcp_fields.tucse = 0;
 	TXD->upper_setup.tcp_fields.tucso =
 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
 	/*
 	 * Payload size per packet w/o any headers.
 	 * Length of all headers up to payload.
 	 */
 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
 
 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
 				E1000_TXD_CMD_DEXT |	/* Extended descr */
 				E1000_TXD_CMD_TSE |	/* TSE context */
 				E1000_TXD_CMD_IP |	/* Do IP csum */
 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
 
 	tx_buffer->m_head = NULL;
 	tx_buffer->next_eop = -1;
 
 	if (++cur == adapter->num_tx_desc)
 		cur = 0;
 
 	txr->tx_avail--;
 	txr->next_avail_desc = cur;
 	txr->tx_tso = TRUE;
 }
 
 
 /**********************************************************************
  *
  *  Examine each tx_buffer in the used queue. If the hardware is done
  *  processing the packet then free associated resources. The
  *  tx_buffer is put back on the free queue.
  *
  **********************************************************************/
 static bool
 em_txeof(struct tx_ring *txr)
 {
 	struct adapter	*adapter = txr->adapter;
         int first, last, done, processed;
         struct em_buffer *tx_buffer;
         struct e1000_tx_desc   *tx_desc, *eop_desc;
 	struct ifnet   *ifp = adapter->ifp;
 
 	EM_TX_LOCK_ASSERT(txr);
 
 	/* No work, make sure watchdog is off */
         if (txr->tx_avail == adapter->num_tx_desc) {
 		txr->queue_status = EM_QUEUE_IDLE;
                 return (FALSE);
 	}
 
 	processed = 0;
         first = txr->next_to_clean;
         tx_desc = &txr->tx_base[first];
         tx_buffer = &txr->tx_buffers[first];
 	last = tx_buffer->next_eop;
         eop_desc = &txr->tx_base[last];
 
 	/*
 	 * What this does is get the index of the
 	 * first descriptor AFTER the EOP of the 
 	 * first packet, that way we can do the
 	 * simple comparison on the inner while loop.
 	 */
 	if (++last == adapter->num_tx_desc)
  		last = 0;
 	done = last;
 
         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
             BUS_DMASYNC_POSTREAD);
 
         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
 		/* We clean the range of the packet */
 		while (first != done) {
                 	tx_desc->upper.data = 0;
                 	tx_desc->lower.data = 0;
                 	tx_desc->buffer_addr = 0;
                 	++txr->tx_avail;
 			++processed;
 
 			if (tx_buffer->m_head) {
 				bus_dmamap_sync(txr->txtag,
 				    tx_buffer->map,
 				    BUS_DMASYNC_POSTWRITE);
 				bus_dmamap_unload(txr->txtag,
 				    tx_buffer->map);
                         	m_freem(tx_buffer->m_head);
                         	tx_buffer->m_head = NULL;
                 	}
 			tx_buffer->next_eop = -1;
 			txr->watchdog_time = ticks;
 
 	                if (++first == adapter->num_tx_desc)
 				first = 0;
 
 	                tx_buffer = &txr->tx_buffers[first];
 			tx_desc = &txr->tx_base[first];
 		}
 		++ifp->if_opackets;
 		/* See if we can continue to the next packet */
 		last = tx_buffer->next_eop;
 		if (last != -1) {
         		eop_desc = &txr->tx_base[last];
 			/* Get new done point */
 			if (++last == adapter->num_tx_desc) last = 0;
 			done = last;
 		} else
 			break;
         }
         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
         txr->next_to_clean = first;
 
 	/*
 	** Watchdog calculation, we know there's
 	** work outstanding or the first return
 	** would have been taken, so none processed
 	** for too long indicates a hang. local timer
 	** will examine this and do a reset if needed.
 	*/
 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
 		txr->queue_status = EM_QUEUE_HUNG;
 
         /*
          * If we have enough room, clear IFF_DRV_OACTIVE
          * to tell the stack that it is OK to send packets.
          */
         if (txr->tx_avail > EM_TX_CLEANUP_THRESHOLD) {                
                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 		/* Disable watchdog if all clean */
                 if (txr->tx_avail == adapter->num_tx_desc) {
 			txr->queue_status = EM_QUEUE_IDLE;
 			return (FALSE);
 		} 
         }
 
 	return (TRUE);
 }
 
 
 /*********************************************************************
  *
  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
  *
  **********************************************************************/
 static void
 em_refresh_mbufs(struct rx_ring *rxr, int limit)
 {
 	struct adapter		*adapter = rxr->adapter;
 	struct mbuf		*m;
 	bus_dma_segment_t	segs[1];
 	struct em_buffer	*rxbuf;
 	int			i, error, nsegs, cleaned;
 
 	i = rxr->next_to_refresh;
 	cleaned = -1;
 	while (i != limit) {
 		rxbuf = &rxr->rx_buffers[i];
 		/*
 		** Just skip entries with a buffer,
 		** they can only be due to an error
 		** and are to be reused.
 		*/
 		if (rxbuf->m_head != NULL)
 			goto reuse;
 		m = m_getjcl(M_DONTWAIT, MT_DATA,
 		    M_PKTHDR, adapter->rx_mbuf_sz);
 		/*
 		** If we have a temporary resource shortage
 		** that causes a failure, just abort refresh
 		** for now, we will return to this point when
 		** reinvoked from em_rxeof.
 		*/
 		if (m == NULL)
 			goto update;
 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
 
 		/* Use bus_dma machinery to setup the memory mapping  */
 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			m_free(m);
 			goto update;
 		}
 
 		/* If nsegs is wrong then the stack is corrupt. */
 		KASSERT(nsegs == 1, ("Too many segments returned!"));
 	
 		bus_dmamap_sync(rxr->rxtag,
 		    rxbuf->map, BUS_DMASYNC_PREREAD);
 		rxbuf->m_head = m;
 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
 reuse:
 		cleaned = i;
 		/* Calculate next index */
 		if (++i == adapter->num_rx_desc)
 			i = 0;
 		/* This is the work marker for refresh */
 		rxr->next_to_refresh = i;
 	}
 update:
 	/*
 	** Update the tail pointer only if,
 	** and as far as we have refreshed.
 	*/
 	if (cleaned != -1) /* Update tail index */
 		E1000_WRITE_REG(&adapter->hw,
 		    E1000_RDT(rxr->me), cleaned);
 
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for rx_buffer structures. Since we use one
  *  rx_buffer per received packet, the maximum number of rx_buffer's
  *  that we'll need is equal to the number of receive descriptors
  *  that we've allocated.
  *
  **********************************************************************/
 static int
 em_allocate_receive_buffers(struct rx_ring *rxr)
 {
 	struct adapter		*adapter = rxr->adapter;
 	device_t		dev = adapter->dev;
 	struct em_buffer	*rxbuf;
 	int			error;
 
 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (rxr->rx_buffers == NULL) {
 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
 		return (ENOMEM);
 	}
 
 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
 				1, 0,			/* alignment, bounds */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				MJUM9BYTES,		/* maxsize */
 				1,			/* nsegments */
 				MJUM9BYTES,		/* maxsegsize */
 				0,			/* flags */
 				NULL,			/* lockfunc */
 				NULL,			/* lockarg */
 				&rxr->rxtag);
 	if (error) {
 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
 		    __func__, error);
 		goto fail;
 	}
 
 	rxbuf = rxr->rx_buffers;
 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
 		rxbuf = &rxr->rx_buffers[i];
 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
 		    &rxbuf->map);
 		if (error) {
 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
 			    __func__, error);
 			goto fail;
 		}
 	}
 
 	return (0);
 
 fail:
 	em_free_receive_structures(adapter);
 	return (error);
 }
 
 
 /*********************************************************************
  *
  *  Initialize a receive ring and its buffers.
  *
  **********************************************************************/
 static int
 em_setup_receive_ring(struct rx_ring *rxr)
 {
 	struct	adapter 	*adapter = rxr->adapter;
 	struct em_buffer	*rxbuf;
 	bus_dma_segment_t	seg[1];
 	int			rsize, nsegs, error;
 
 
 	/* Clear the ring contents */
 	EM_RX_LOCK(rxr);
 	rsize = roundup2(adapter->num_rx_desc *
 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
 	bzero((void *)rxr->rx_base, rsize);
 
 	/*
 	** Free current RX buffer structs and their mbufs
 	*/
 	for (int i = 0; i < adapter->num_rx_desc; i++) {
 		rxbuf = &rxr->rx_buffers[i];
 		if (rxbuf->m_head != NULL) {
 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
 			m_freem(rxbuf->m_head);
 		}
 	}
 
 	/* Now replenish the mbufs */
 	for (int j = 0; j != adapter->num_rx_desc; ++j) {
 
 		rxbuf = &rxr->rx_buffers[j];
 		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
 		    M_PKTHDR, adapter->rx_mbuf_sz);
 		if (rxbuf->m_head == NULL)
 			return (ENOBUFS);
 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
 
 		/* Get the memory mapping */
 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
 		    rxbuf->map, rxbuf->m_head, seg,
 		    &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			m_freem(rxbuf->m_head);
 			rxbuf->m_head = NULL;
 			return (error);
 		}
 		bus_dmamap_sync(rxr->rxtag,
 		    rxbuf->map, BUS_DMASYNC_PREREAD);
 
 		/* Update descriptor */
 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
 	}
 
 
 	/* Setup our descriptor indices */
 	rxr->next_to_check = 0;
 	rxr->next_to_refresh = 0;
 
 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	EM_RX_UNLOCK(rxr);
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Initialize all receive rings.
  *
  **********************************************************************/
 static int
 em_setup_receive_structures(struct adapter *adapter)
 {
 	struct rx_ring *rxr = adapter->rx_rings;
 	int j;
 
 	for (j = 0; j < adapter->num_queues; j++, rxr++)
 		if (em_setup_receive_ring(rxr))
 			goto fail;
 
 	return (0);
 fail:
 	/*
 	 * Free RX buffers allocated so far, we will only handle
 	 * the rings that completed, the failing case will have
 	 * cleaned up for itself. 'j' failed, so its the terminus.
 	 */
 	for (int i = 0; i < j; ++i) {
 		rxr = &adapter->rx_rings[i];
 		for (int n = 0; n < adapter->num_rx_desc; n++) {
 			struct em_buffer *rxbuf;
 			rxbuf = &rxr->rx_buffers[n];
 			if (rxbuf->m_head != NULL) {
 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
 			  	  BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
 				m_freem(rxbuf->m_head);
 				rxbuf->m_head = NULL;
 			}
 		}
 	}
 
 	return (ENOBUFS);
 }
 
 /*********************************************************************
  *
  *  Free all receive rings.
  *
  **********************************************************************/
 static void
 em_free_receive_structures(struct adapter *adapter)
 {
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
 		em_free_receive_buffers(rxr);
 		/* Free the ring memory as well */
 		em_dma_free(adapter, &rxr->rxdma);
 		EM_RX_LOCK_DESTROY(rxr);
 	}
 
 	free(adapter->rx_rings, M_DEVBUF);
 }
 
 
 /*********************************************************************
  *
  *  Free receive ring data structures
  *
  **********************************************************************/
 static void
 em_free_receive_buffers(struct rx_ring *rxr)
 {
 	struct adapter		*adapter = rxr->adapter;
 	struct em_buffer	*rxbuf = NULL;
 
 	INIT_DEBUGOUT("free_receive_buffers: begin");
 
 	if (rxr->rx_buffers != NULL) {
 		for (int i = 0; i < adapter->num_rx_desc; i++) {
 			rxbuf = &rxr->rx_buffers[i];
 			if (rxbuf->map != NULL) {
 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
 				    BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
 			}
 			if (rxbuf->m_head != NULL) {
 				m_freem(rxbuf->m_head);
 				rxbuf->m_head = NULL;
 			}
 		}
 		free(rxr->rx_buffers, M_DEVBUF);
 		rxr->rx_buffers = NULL;
 	}
 
 	if (rxr->rxtag != NULL) {
 		bus_dma_tag_destroy(rxr->rxtag);
 		rxr->rxtag = NULL;
 	}
 
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Enable receive unit.
  *
  **********************************************************************/
 #define MAX_INTS_PER_SEC	8000
 #define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
 
 static void
 em_initialize_receive_unit(struct adapter *adapter)
 {
 	struct rx_ring	*rxr = adapter->rx_rings;
 	struct ifnet	*ifp = adapter->ifp;
 	struct e1000_hw	*hw = &adapter->hw;
 	u64	bus_addr;
 	u32	rctl, rxcsum;
 
 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
 
 	/*
 	 * Make sure receives are disabled while setting
 	 * up the descriptor ring
 	 */
 	rctl = E1000_READ_REG(hw, E1000_RCTL);
 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
 
 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
 	    adapter->rx_abs_int_delay.value);
 	/*
 	 * Set the interrupt throttling rate. Value is calculated
 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
 	 */
 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
 
 	/*
 	** When using MSIX interrupts we need to throttle
 	** using the EITR register (82574 only)
 	*/
 	if (hw->mac.type == e1000_82574)
 		for (int i = 0; i < 4; i++)
 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
 			    DEFAULT_ITR);
 
 	/* Disable accelerated ackknowledge */
 	if (adapter->hw.mac.type == e1000_82574)
 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
 
 	if (ifp->if_capenable & IFCAP_RXCSUM) {
 		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
 		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
 	}
 
 	/*
 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
 	** long latencies are observed, like Lenovo X60. This
 	** change eliminates the problem, but since having positive
 	** values in RDTR is a known source of problems on other
 	** platforms another solution is being sought.
 	*/
 	if (hw->mac.type == e1000_82573)
 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
 		/* Setup the Base and Length of the Rx Descriptor Ring */
 		bus_addr = rxr->rxdma.dma_paddr;
 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
 		/* Setup the Head and Tail Descriptor Pointers */
 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
 		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
 	}
 
 	/* Set early receive threshold on appropriate hw */
 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
 	    (adapter->hw.mac.type == e1000_pch2lan) ||
 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
 	    (ifp->if_mtu > ETHERMTU)) {
 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
 		E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
 	}
 		
 	if (adapter->hw.mac.type == e1000_pch2lan) {
 		if (ifp->if_mtu > ETHERMTU)
 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
 		else
 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
 	}
 
 	/* Setup the Receive Control Register */
 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
 
         /* Strip the CRC */
         rctl |= E1000_RCTL_SECRC;
 
         /* Make sure VLAN Filters are off */
         rctl &= ~E1000_RCTL_VFE;
 	rctl &= ~E1000_RCTL_SBP;
 
 	if (adapter->rx_mbuf_sz == MCLBYTES)
 		rctl |= E1000_RCTL_SZ_2048;
 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
 
 	if (ifp->if_mtu > ETHERMTU)
 		rctl |= E1000_RCTL_LPE;
 	else
 		rctl &= ~E1000_RCTL_LPE;
 
 	/* Write out the settings */
 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
 
 	return;
 }
 
 
 /*********************************************************************
  *
  *  This routine executes in interrupt context. It replenishes
  *  the mbufs in the descriptor and sends data which has been
  *  dma'ed into host memory to upper layer.
  *
  *  We loop at most count times if count is > 0, or until done if
  *  count < 0.
  *  
  *  For polling we also now return the number of cleaned packets
  *********************************************************************/
 static bool
 em_rxeof(struct rx_ring *rxr, int count, int *done)
 {
 	struct adapter		*adapter = rxr->adapter;
 	struct ifnet		*ifp = adapter->ifp;
 	struct mbuf		*mp, *sendmp;
 	u8			status = 0;
 	u16 			len;
 	int			i, processed, rxdone = 0;
 	bool			eop;
 	struct e1000_rx_desc	*cur;
 
 	EM_RX_LOCK(rxr);
 
 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
 
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
 
 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 		cur = &rxr->rx_base[i];
 		status = cur->status;
 		mp = sendmp = NULL;
 
 		if ((status & E1000_RXD_STAT_DD) == 0)
 			break;
 
 		len = le16toh(cur->length);
 		eop = (status & E1000_RXD_STAT_EOP) != 0;
-		count--;
 
-		if (((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) &&
-		    (rxr->discard == FALSE)) {
+		if ((rxr->discard == TRUE) || (cur->errors &
+		    E1000_RXD_ERR_FRAME_ERR_MASK)) {
+			ifp->if_ierrors++;
+			++rxr->rx_discarded;
+			if (!eop) /* Catch subsequent segs */
+				rxr->discard = TRUE;
+			else
+				rxr->discard = FALSE;
+			em_rx_discard(rxr, i);
+			goto next_desc;
+		}
 
-			/* Assign correct length to the current fragment */
-			mp = rxr->rx_buffers[i].m_head;
-			mp->m_len = len;
+		/* Assign correct length to the current fragment */
+		mp = rxr->rx_buffers[i].m_head;
+		mp->m_len = len;
 
-			/* Trigger for refresh */
-			rxr->rx_buffers[i].m_head = NULL;
+		/* Trigger for refresh */
+		rxr->rx_buffers[i].m_head = NULL;
 
-			if (rxr->fmp == NULL) {
-				mp->m_pkthdr.len = len;
-				rxr->fmp = mp; /* Store the first mbuf */
-				rxr->lmp = mp;
-			} else {
-				/* Chain mbuf's together */
-				mp->m_flags &= ~M_PKTHDR;
-				rxr->lmp->m_next = mp;
-				rxr->lmp = rxr->lmp->m_next;
-				rxr->fmp->m_pkthdr.len += len;
-			}
+		/* First segment? */
+		if (rxr->fmp == NULL) {
+			mp->m_pkthdr.len = len;
+			rxr->fmp = rxr->lmp = mp;
+		} else {
+			/* Chain mbuf's together */
+			mp->m_flags &= ~M_PKTHDR;
+			rxr->lmp->m_next = mp;
+			rxr->lmp = mp;
+			rxr->fmp->m_pkthdr.len += len;
+		}
 
-			if (eop) {
-				rxr->fmp->m_pkthdr.rcvif = ifp;
-				ifp->if_ipackets++;
-				em_receive_checksum(cur, rxr->fmp);
+		if (eop) {
+			--count;
+			sendmp = rxr->fmp;
+			sendmp->m_pkthdr.rcvif = ifp;
+			ifp->if_ipackets++;
+			em_receive_checksum(cur, sendmp);
 #ifndef __NO_STRICT_ALIGNMENT
-				if (adapter->max_frame_size >
-				    (MCLBYTES - ETHER_ALIGN) &&
-				    em_fixup_rx(rxr) != 0)
-					goto skip;
+			if (adapter->max_frame_size >
+			    (MCLBYTES - ETHER_ALIGN) &&
+			    em_fixup_rx(rxr) != 0)
+				goto skip;
 #endif
-				if (status & E1000_RXD_STAT_VP) {
-					rxr->fmp->m_pkthdr.ether_vtag =
-					    (le16toh(cur->special) &
-					    E1000_RXD_SPC_VLAN_MASK);
-					rxr->fmp->m_flags |= M_VLANTAG;
-				}
+			if (status & E1000_RXD_STAT_VP) {
+				sendmp->m_pkthdr.ether_vtag =
+				    (le16toh(cur->special) &
+				    E1000_RXD_SPC_VLAN_MASK);
+				sendmp->m_flags |= M_VLANTAG;
+			}
 #ifdef EM_MULTIQUEUE
-				rxr->fmp->m_pkthdr.flowid = rxr->msix;
-				rxr->fmp->m_flags |= M_FLOWID;
+			sendmp->m_pkthdr.flowid = rxr->msix;
+			sendmp->m_flags |= M_FLOWID;
 #endif
 #ifndef __NO_STRICT_ALIGNMENT
 skip:
 #endif
-				sendmp = rxr->fmp;
-				rxr->fmp = NULL;
-				rxr->lmp = NULL;
-			}
-		} else {
-			ifp->if_ierrors++;
-			++rxr->rx_discarded;
-			if (!eop) /* Catch subsequent segs */
-				rxr->discard = TRUE;
-			else
-				rxr->discard = FALSE;
-			em_rx_discard(rxr, i);
-			sendmp = NULL;
+			rxr->fmp = rxr->lmp = NULL;
 		}
-
+next_desc:
 		/* Zero out the receive descriptors status. */
 		cur->status = 0;
 		++rxdone;	/* cumulative for POLL */
 		++processed;
 
 		/* Advance our pointers to the next descriptor. */
 		if (++i == adapter->num_rx_desc)
 			i = 0;
 
 		/* Send to the stack */
 		if (sendmp != NULL) {
 			rxr->next_to_check = i;
 			EM_RX_UNLOCK(rxr);
 			(*ifp->if_input)(ifp, sendmp);
 			EM_RX_LOCK(rxr);
 			i = rxr->next_to_check;
 		}
 
 		/* Only refresh mbufs every 8 descriptors */
 		if (processed == 8) {
 			em_refresh_mbufs(rxr, i);
 			processed = 0;
 		}
 	}
 
 	/* Catch any remaining refresh work */
-	if (processed != 0) {
-		em_refresh_mbufs(rxr, i);
-		processed = 0;
-	}
+	em_refresh_mbufs(rxr, i);
 
 	rxr->next_to_check = i;
 	if (done != NULL)
 		*done = rxdone;
 	EM_RX_UNLOCK(rxr);
 
 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
 }
 
 static __inline void
 em_rx_discard(struct rx_ring *rxr, int i)
 {
 	struct adapter		*adapter = rxr->adapter;
 	struct em_buffer	*rbuf;
 	struct mbuf		*m;
 
 	rbuf = &rxr->rx_buffers[i];
 	/* Free any previous pieces */
 	if (rxr->fmp != NULL) {
 		rxr->fmp->m_flags |= M_PKTHDR;
 		m_freem(rxr->fmp);
 		rxr->fmp = NULL;
 		rxr->lmp = NULL;
 	}
                          
 	/* Reset state, keep loaded DMA map and reuse */
 	m = rbuf->m_head;
 	m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
 	m->m_flags |= M_PKTHDR;
 	m->m_data = m->m_ext.ext_buf;
 	m->m_next = NULL;
 
 	return;
 }
 
 #ifndef __NO_STRICT_ALIGNMENT
 /*
  * When jumbo frames are enabled we should realign entire payload on
  * architecures with strict alignment. This is serious design mistake of 8254x
  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
  * payload. On architecures without strict alignment restrictions 8254x still
  * performs unaligned memory access which would reduce the performance too.
  * To avoid copying over an entire frame to align, we allocate a new mbuf and
  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
  * existing mbuf chain.
  *
  * Be aware, best performance of the 8254x is achived only when jumbo frame is
  * not used at all on architectures with strict alignment.
  */
 static int
 em_fixup_rx(struct rx_ring *rxr)
 {
 	struct adapter *adapter = rxr->adapter;
 	struct mbuf *m, *n;
 	int error;
 
 	error = 0;
 	m = rxr->fmp;
 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
 		m->m_data += ETHER_HDR_LEN;
 	} else {
 		MGETHDR(n, M_DONTWAIT, MT_DATA);
 		if (n != NULL) {
 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
 			m->m_data += ETHER_HDR_LEN;
 			m->m_len -= ETHER_HDR_LEN;
 			n->m_len = ETHER_HDR_LEN;
 			M_MOVE_PKTHDR(n, m);
 			n->m_next = m;
 			rxr->fmp = n;
 		} else {
 			adapter->dropped_pkts++;
 			m_freem(rxr->fmp);
 			rxr->fmp = NULL;
 			error = ENOMEM;
 		}
 	}
 
 	return (error);
 }
 #endif
 
 /*********************************************************************
  *
  *  Verify that the hardware indicated that the checksum is valid.
  *  Inform the stack about the status of checksum so that stack
  *  doesn't spend time verifying the checksum.
  *
  *********************************************************************/
 static void
 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
 {
 	/* Ignore Checksum bit is set */
 	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
 		mp->m_pkthdr.csum_flags = 0;
 		return;
 	}
 
 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
 		/* Did it pass? */
 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
 			/* IP Checksum Good */
 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
 
 		} else {
 			mp->m_pkthdr.csum_flags = 0;
 		}
 	}
 
 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
 		/* Did it pass? */
 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
 			mp->m_pkthdr.csum_flags |=
 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 			mp->m_pkthdr.csum_data = htons(0xffff);
 		}
 	}
 }
 
 /*
  * This routine is run via an vlan
  * config EVENT
  */
 static void
 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	u32		index, bit;
 
 	if (ifp->if_softc !=  arg)   /* Not our event */
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
                 return;
 
 	EM_CORE_LOCK(adapter);
 	index = (vtag >> 5) & 0x7F;
 	bit = vtag & 0x1F;
 	adapter->shadow_vfta[index] |= (1 << bit);
 	++adapter->num_vlans;
 	/* Re-init to load the changes */
 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
 		em_init_locked(adapter);
 	EM_CORE_UNLOCK(adapter);
 }
 
 /*
  * This routine is run via an vlan
  * unconfig EVENT
  */
 static void
 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	u32		index, bit;
 
 	if (ifp->if_softc !=  arg)
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
                 return;
 
 	EM_CORE_LOCK(adapter);
 	index = (vtag >> 5) & 0x7F;
 	bit = vtag & 0x1F;
 	adapter->shadow_vfta[index] &= ~(1 << bit);
 	--adapter->num_vlans;
 	/* Re-init to load the changes */
 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
 		em_init_locked(adapter);
 	EM_CORE_UNLOCK(adapter);
 }
 
 static void
 em_setup_vlan_hw_support(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32             reg;
 
 	/*
 	** We get here thru init_locked, meaning
 	** a soft reset, this has already cleared
 	** the VFTA and other state, so if there
 	** have been no vlan's registered do nothing.
 	*/
 	if (adapter->num_vlans == 0)
                 return;
 
 	/*
 	** A soft reset zero's out the VFTA, so
 	** we need to repopulate it now.
 	*/
 	for (int i = 0; i < EM_VFTA_SIZE; i++)
                 if (adapter->shadow_vfta[i] != 0)
 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
                             i, adapter->shadow_vfta[i]);
 
 	reg = E1000_READ_REG(hw, E1000_CTRL);
 	reg |= E1000_CTRL_VME;
 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
 
 	/* Enable the Filter Table */
 	reg = E1000_READ_REG(hw, E1000_RCTL);
 	reg &= ~E1000_RCTL_CFIEN;
 	reg |= E1000_RCTL_VFE;
 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
 }
 
 static void
 em_enable_intr(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 ims_mask = IMS_ENABLE_MASK;
 
 	if (hw->mac.type == e1000_82574) {
 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
 		ims_mask |= EM_MSIX_MASK;
 	} 
 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
 }
 
 static void
 em_disable_intr(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 
 	if (hw->mac.type == e1000_82574)
 		E1000_WRITE_REG(hw, EM_EIAC, 0);
 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
 }
 
 /*
  * Bit of a misnomer, what this really means is
  * to enable OS management of the system... aka
  * to disable special hardware management features 
  */
 static void
 em_init_manageability(struct adapter *adapter)
 {
 	/* A shared code workaround */
 #define E1000_82542_MANC2H E1000_MANC2H
 	if (adapter->has_manage) {
 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
 
 		/* disable hardware interception of ARP */
 		manc &= ~(E1000_MANC_ARP_EN);
 
                 /* enable receiving management packets to the host */
 		manc |= E1000_MANC_EN_MNG2HOST;
 #define E1000_MNG2HOST_PORT_623 (1 << 5)
 #define E1000_MNG2HOST_PORT_664 (1 << 6)
 		manc2h |= E1000_MNG2HOST_PORT_623;
 		manc2h |= E1000_MNG2HOST_PORT_664;
 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
 	}
 }
 
 /*
  * Give control back to hardware management
  * controller if there is one.
  */
 static void
 em_release_manageability(struct adapter *adapter)
 {
 	if (adapter->has_manage) {
 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
 
 		/* re-enable hardware interception of ARP */
 		manc |= E1000_MANC_ARP_EN;
 		manc &= ~E1000_MANC_EN_MNG2HOST;
 
 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
 	}
 }
 
 /*
  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
  * For ASF and Pass Through versions of f/w this means
  * that the driver is loaded. For AMT version type f/w
  * this means that the network i/f is open.
  */
 static void
 em_get_hw_control(struct adapter *adapter)
 {
 	u32 ctrl_ext, swsm;
 
 	if (adapter->hw.mac.type == e1000_82573) {
 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
 		    swsm | E1000_SWSM_DRV_LOAD);
 		return;
 	}
 	/* else */
 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
 	return;
 }
 
 /*
  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
  * For ASF and Pass Through versions of f/w this means that
  * the driver is no longer loaded. For AMT versions of the
  * f/w this means that the network i/f is closed.
  */
 static void
 em_release_hw_control(struct adapter *adapter)
 {
 	u32 ctrl_ext, swsm;
 
 	if (!adapter->has_manage)
 		return;
 
 	if (adapter->hw.mac.type == e1000_82573) {
 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
 		    swsm & ~E1000_SWSM_DRV_LOAD);
 		return;
 	}
 	/* else */
 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
 	return;
 }
 
 static int
 em_is_valid_ether_addr(u8 *addr)
 {
 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
 
 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
 		return (FALSE);
 	}
 
 	return (TRUE);
 }
 
 /*
 ** Parse the interface capabilities with regard
 ** to both system management and wake-on-lan for
 ** later use.
 */
 static void
 em_get_wakeup(device_t dev)
 {
 	struct adapter	*adapter = device_get_softc(dev);
 	u16		eeprom_data = 0, device_id, apme_mask;
 
 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
 	apme_mask = EM_EEPROM_APME;
 
 	switch (adapter->hw.mac.type) {
 	case e1000_82573:
 	case e1000_82583:
 		adapter->has_amt = TRUE;
 		/* Falls thru */
 	case e1000_82571:
 	case e1000_82572:
 	case e1000_80003es2lan:
 		if (adapter->hw.bus.func == 1) {
 			e1000_read_nvm(&adapter->hw,
 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
 			break;
 		} else
 			e1000_read_nvm(&adapter->hw,
 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
 		break;
 	case e1000_ich8lan:
 	case e1000_ich9lan:
 	case e1000_ich10lan:
 	case e1000_pchlan:
 	case e1000_pch2lan:
 		apme_mask = E1000_WUC_APME;
 		adapter->has_amt = TRUE;
 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
 		break;
 	default:
 		e1000_read_nvm(&adapter->hw,
 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
 		break;
 	}
 	if (eeprom_data & apme_mask)
 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
 	/*
          * We have the eeprom settings, now apply the special cases
          * where the eeprom may be wrong or the board won't support
          * wake on lan on a particular port
 	 */
 	device_id = pci_get_device(dev);
         switch (device_id) {
 	case E1000_DEV_ID_82571EB_FIBER:
 		/* Wake events only supported on port A for dual fiber
 		 * regardless of eeprom setting */
 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
 		    E1000_STATUS_FUNC_1)
 			adapter->wol = 0;
 		break;
 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
                 /* if quad port adapter, disable WoL on all but port A */
 		if (global_quad_port_a != 0)
 			adapter->wol = 0;
 		/* Reset for multiple quad port adapters */
 		if (++global_quad_port_a == 4)
 			global_quad_port_a = 0;
                 break;
 	}
 	return;
 }
 
 
 /*
  * Enable PCI Wake On Lan capability
  */
 static void
 em_enable_wakeup(device_t dev)
 {
 	struct adapter	*adapter = device_get_softc(dev);
 	struct ifnet	*ifp = adapter->ifp;
 	u32		pmc, ctrl, ctrl_ext, rctl;
 	u16     	status;
 
 	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
 		return;
 
 	/* Advertise the wakeup capability */
 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
 
 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
 	    (adapter->hw.mac.type == e1000_pchlan) ||
 	    (adapter->hw.mac.type == e1000_ich9lan) ||
 	    (adapter->hw.mac.type == e1000_ich10lan)) {
 		e1000_disable_gig_wol_ich8lan(&adapter->hw);
 		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
 	}
 
 	/* Keep the laser running on Fiber adapters */
 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
 	}
 
 	/*
 	** Determine type of Wakeup: note that wol
 	** is set with all bits on by default.
 	*/
 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
 		adapter->wol &= ~E1000_WUFC_MAG;
 
 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
 		adapter->wol &= ~E1000_WUFC_MC;
 	else {
 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 		rctl |= E1000_RCTL_MPE;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
 	}
 
 	if ((adapter->hw.mac.type == e1000_pchlan) ||
 	    (adapter->hw.mac.type == e1000_pch2lan)) {
 		if (em_enable_phy_wakeup(adapter))
 			return;
 	} else {
 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
 	}
 
 	if (adapter->hw.phy.type == e1000_phy_igp_3)
 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
 
         /* Request PME */
         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
 	if (ifp->if_capenable & IFCAP_WOL)
 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
 
 	return;
 }
 
 /*
 ** WOL in the newer chipset interfaces (pchlan)
 ** require thing to be copied into the phy
 */
 static int
 em_enable_phy_wakeup(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 mreg, ret = 0;
 	u16 preg;
 
 	/* copy MAC RARs to PHY RARs */
 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
 
 	/* copy MAC MTA to PHY MTA */
 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
 		    (u16)((mreg >> 16) & 0xFFFF));
 	}
 
 	/* configure PHY Rx Control register */
 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
 	mreg = E1000_READ_REG(hw, E1000_RCTL);
 	if (mreg & E1000_RCTL_UPE)
 		preg |= BM_RCTL_UPE;
 	if (mreg & E1000_RCTL_MPE)
 		preg |= BM_RCTL_MPE;
 	preg &= ~(BM_RCTL_MO_MASK);
 	if (mreg & E1000_RCTL_MO_3)
 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
 				<< BM_RCTL_MO_SHIFT);
 	if (mreg & E1000_RCTL_BAM)
 		preg |= BM_RCTL_BAM;
 	if (mreg & E1000_RCTL_PMCF)
 		preg |= BM_RCTL_PMCF;
 	mreg = E1000_READ_REG(hw, E1000_CTRL);
 	if (mreg & E1000_CTRL_RFCE)
 		preg |= BM_RCTL_RFCE;
 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
 
 	/* enable PHY wakeup in MAC register */
 	E1000_WRITE_REG(hw, E1000_WUC,
 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
 
 	/* configure and enable PHY wakeup in PHY registers */
 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
 
 	/* activate PHY wakeup */
 	ret = hw->phy.ops.acquire(hw);
 	if (ret) {
 		printf("Could not acquire PHY\n");
 		return ret;
 	}
 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
 	if (ret) {
 		printf("Could not read PHY page 769\n");
 		goto out;
 	}
 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
 	if (ret)
 		printf("Could not set PHY Host Wakeup bit\n");
 out:
 	hw->phy.ops.release(hw);
 
 	return ret;
 }
 
 static void
 em_led_func(void *arg, int onoff)
 {
 	struct adapter	*adapter = arg;
  
 	EM_CORE_LOCK(adapter);
 	if (onoff) {
 		e1000_setup_led(&adapter->hw);
 		e1000_led_on(&adapter->hw);
 	} else {
 		e1000_led_off(&adapter->hw);
 		e1000_cleanup_led(&adapter->hw);
 	}
 	EM_CORE_UNLOCK(adapter);
+}
+
+/*
+** Disable the L0S and L1 LINK states
+*/
+static void
+em_disable_aspm(struct adapter *adapter)
+{
+	int		base, reg;
+	u16		link_cap,link_ctrl;
+	device_t	dev = adapter->dev;
+
+	switch (adapter->hw.mac.type) {
+		case e1000_82573:
+		case e1000_82574:
+		case e1000_82583:
+			break;
+		default:
+			return;
+	}
+	if (pci_find_extcap(dev, PCIY_EXPRESS, &base) != 0)
+		return;
+	reg = base + PCIR_EXPRESS_LINK_CAP;
+	link_cap = pci_read_config(dev, reg, 2);
+	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
+		return;
+	reg = base + PCIR_EXPRESS_LINK_CTL;
+	link_ctrl = pci_read_config(dev, reg, 2);
+	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
+	pci_write_config(dev, reg, link_ctrl, 2);
+	return;
 }
 
 /**********************************************************************
  *
  *  Update the board statistics counters.
  *
  **********************************************************************/
 static void
 em_update_stats_counters(struct adapter *adapter)
 {
 	struct ifnet   *ifp;
 
 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
 	}
 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
 
 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
 	/*
 	** For watchdog management we need to know if we have been
 	** paused during the last interval, so capture that here.
 	*/
 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
 	adapter->stats.xoffrxc += adapter->pause_frames;
 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
 
 	/* For the 64-bit byte counters the low dword must be read first. */
 	/* Both registers clear on the read of the high dword */
 
 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
 
 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
 
 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
 
 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
 
 	/* Interrupt Counts */
 
 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
 
 	if (adapter->hw.mac.type >= e1000_82543) {
 		adapter->stats.algnerrc += 
 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
 		adapter->stats.rxerrc += 
 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
 		adapter->stats.tncrs += 
 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
 		adapter->stats.cexterr += 
 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
 		adapter->stats.tsctc += 
 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
 		adapter->stats.tsctfc += 
 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
 	}
 	ifp = adapter->ifp;
 
 	ifp->if_collisions = adapter->stats.colc;
 
 	/* Rx Errors */
 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
 	    adapter->stats.ruc + adapter->stats.roc +
 	    adapter->stats.mpc + adapter->stats.cexterr;
 
 	/* Tx Errors */
 	ifp->if_oerrors = adapter->stats.ecol +
 	    adapter->stats.latecol + adapter->watchdog_events;
 }
 
 /* Export a single 32-bit register via a read-only sysctl. */
 static int
 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter;
 	u_int val;
 
 	adapter = oidp->oid_arg1;
 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
 	return (sysctl_handle_int(oidp, &val, 0, req));
 }
 
 /*
  * Add sysctl variables, one per statistic, to the system.
  */
 static void
 em_add_hw_stats(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 
 	struct tx_ring *txr = adapter->tx_rings;
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
 	struct e1000_hw_stats *stats = &adapter->stats;
 
 	struct sysctl_oid *stat_node, *queue_node, *int_node;
 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
 
 #define QUEUE_NAME_LEN 32
 	char namebuf[QUEUE_NAME_LEN];
 	
 	/* Driver Statistics */
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
 			CTLFLAG_RD, &adapter->link_irq, 0,
 			"Link MSIX IRQ Handled");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
 			 "Std mbuf failed");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
 			 "Std mbuf cluster failed");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
 			CTLFLAG_RD, &adapter->dropped_pkts,
 			"Driver dropped packets");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
 			"Driver tx dma failure in xmit");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
 			CTLFLAG_RD, &adapter->rx_overruns,
 			"RX overruns");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
 			CTLFLAG_RD, &adapter->watchdog_events,
 			"Watchdog timeouts");
 	
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
 			CTLFLAG_RD, adapter, E1000_CTRL,
 			em_sysctl_reg_handler, "IU",
 			"Device Control Register");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
 			CTLFLAG_RD, adapter, E1000_RCTL,
 			em_sysctl_reg_handler, "IU",
 			"Receiver Control Register");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
 			"Flow Control High Watermark");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
 			"Flow Control Low Watermark");
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 					    CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
 				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
 				em_sysctl_reg_handler, "IU",
  				"Transmit Descriptor Head");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
 				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
 				em_sysctl_reg_handler, "IU",
  				"Transmit Descriptor Tail");
 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
 				CTLFLAG_RD, &txr->tx_irq,
 				"Queue MSI-X Transmit Interrupts");
 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
 				CTLFLAG_RD, &txr->no_desc_avail,
 				"Queue No Descriptor Available");
 		
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
 				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
 				em_sysctl_reg_handler, "IU",
 				"Receive Descriptor Head");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
 				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
 				em_sysctl_reg_handler, "IU",
 				"Receive Descriptor Tail");
 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
 				CTLFLAG_RD, &rxr->rx_irq,
 				"Queue MSI-X Receive Interrupts");
 	}
 
 	/* MAC stats get their own sub node */
 
 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
 				    CTLFLAG_RD, NULL, "Statistics");
 	stat_list = SYSCTL_CHILDREN(stat_node);
 
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
 			CTLFLAG_RD, &stats->ecol,
 			"Excessive collisions");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
 			CTLFLAG_RD, &stats->scc,
 			"Single collisions");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
 			CTLFLAG_RD, &stats->mcc,
 			"Multiple collisions");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
 			CTLFLAG_RD, &stats->latecol,
 			"Late collisions");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
 			CTLFLAG_RD, &stats->colc,
 			"Collision Count");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
 			CTLFLAG_RD, &adapter->stats.symerrs,
 			"Symbol Errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
 			CTLFLAG_RD, &adapter->stats.sec,
 			"Sequence Errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
 			CTLFLAG_RD, &adapter->stats.dc,
 			"Defer Count");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
 			CTLFLAG_RD, &adapter->stats.mpc,
 			"Missed Packets");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
 			CTLFLAG_RD, &adapter->stats.rnbc,
 			"Receive No Buffers");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
 			CTLFLAG_RD, &adapter->stats.ruc,
 			"Receive Undersize");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
 			CTLFLAG_RD, &adapter->stats.rfc,
 			"Fragmented Packets Received ");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
 			CTLFLAG_RD, &adapter->stats.roc,
 			"Oversized Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
 			CTLFLAG_RD, &adapter->stats.rjc,
 			"Recevied Jabber");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
 			CTLFLAG_RD, &adapter->stats.rxerrc,
 			"Receive Errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
 			CTLFLAG_RD, &adapter->stats.crcerrs,
 			"CRC errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
 			CTLFLAG_RD, &adapter->stats.algnerrc,
 			"Alignment Errors");
 	/* On 82575 these are collision counts */
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
 			CTLFLAG_RD, &adapter->stats.cexterr,
 			"Collision/Carrier extension errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
 			CTLFLAG_RD, &adapter->stats.xonrxc,
 			"XON Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
 			CTLFLAG_RD, &adapter->stats.xontxc,
 			"XON Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
 			CTLFLAG_RD, &adapter->stats.xoffrxc,
 			"XOFF Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
 			CTLFLAG_RD, &adapter->stats.xofftxc,
 			"XOFF Transmitted");
 
 	/* Packet Reception Stats */
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
 			CTLFLAG_RD, &adapter->stats.tpr,
 			"Total Packets Received ");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
 			CTLFLAG_RD, &adapter->stats.gprc,
 			"Good Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
 			CTLFLAG_RD, &adapter->stats.bprc,
 			"Broadcast Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
 			CTLFLAG_RD, &adapter->stats.mprc,
 			"Multicast Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
 			CTLFLAG_RD, &adapter->stats.prc64,
 			"64 byte frames received ");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
 			CTLFLAG_RD, &adapter->stats.prc127,
 			"65-127 byte frames received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
 			CTLFLAG_RD, &adapter->stats.prc255,
 			"128-255 byte frames received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
 			CTLFLAG_RD, &adapter->stats.prc511,
 			"256-511 byte frames received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
 			CTLFLAG_RD, &adapter->stats.prc1023,
 			"512-1023 byte frames received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
 			CTLFLAG_RD, &adapter->stats.prc1522,
 			"1023-1522 byte frames received");
  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
  			CTLFLAG_RD, &adapter->stats.gorc, 
  			"Good Octets Received"); 
 
 	/* Packet Transmission Stats */
  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
  			CTLFLAG_RD, &adapter->stats.gotc, 
  			"Good Octets Transmitted"); 
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
 			CTLFLAG_RD, &adapter->stats.tpt,
 			"Total Packets Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
 			CTLFLAG_RD, &adapter->stats.gptc,
 			"Good Packets Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
 			CTLFLAG_RD, &adapter->stats.bptc,
 			"Broadcast Packets Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
 			CTLFLAG_RD, &adapter->stats.mptc,
 			"Multicast Packets Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
 			CTLFLAG_RD, &adapter->stats.ptc64,
 			"64 byte frames transmitted ");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
 			CTLFLAG_RD, &adapter->stats.ptc127,
 			"65-127 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
 			CTLFLAG_RD, &adapter->stats.ptc255,
 			"128-255 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
 			CTLFLAG_RD, &adapter->stats.ptc511,
 			"256-511 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
 			CTLFLAG_RD, &adapter->stats.ptc1023,
 			"512-1023 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
 			CTLFLAG_RD, &adapter->stats.ptc1522,
 			"1024-1522 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
 			CTLFLAG_RD, &adapter->stats.tsctc,
 			"TSO Contexts Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
 			CTLFLAG_RD, &adapter->stats.tsctfc,
 			"TSO Contexts Failed");
 
 
 	/* Interrupt Stats */
 
 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
 	int_list = SYSCTL_CHILDREN(int_node);
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
 			CTLFLAG_RD, &adapter->stats.iac,
 			"Interrupt Assertion Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
 			CTLFLAG_RD, &adapter->stats.icrxptc,
 			"Interrupt Cause Rx Pkt Timer Expire Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
 			CTLFLAG_RD, &adapter->stats.icrxatc,
 			"Interrupt Cause Rx Abs Timer Expire Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
 			CTLFLAG_RD, &adapter->stats.ictxptc,
 			"Interrupt Cause Tx Pkt Timer Expire Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
 			CTLFLAG_RD, &adapter->stats.ictxatc,
 			"Interrupt Cause Tx Abs Timer Expire Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
 			CTLFLAG_RD, &adapter->stats.ictxqec,
 			"Interrupt Cause Tx Queue Empty Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
 			"Interrupt Cause Tx Queue Min Thresh Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
 			"Interrupt Cause Rx Desc Min Thresh Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
 			CTLFLAG_RD, &adapter->stats.icrxoc,
 			"Interrupt Cause Receiver Overrun Count");
 }
 
 /**********************************************************************
  *
  *  This routine provides a way to dump out the adapter eeprom,
  *  often a useful debug/service tool. This only dumps the first
  *  32 words, stuff that matters is in that extent.
  *
  **********************************************************************/
 static int
 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter;
 	int error;
 	int result;
 
 	result = -1;
 	error = sysctl_handle_int(oidp, &result, 0, req);
 
 	if (error || !req->newptr)
 		return (error);
 
 	/*
 	 * This value will cause a hex dump of the
 	 * first 32 16-bit words of the EEPROM to
 	 * the screen.
 	 */
 	if (result == 1) {
 		adapter = (struct adapter *)arg1;
 		em_print_nvm_info(adapter);
         }
 
 	return (error);
 }
 
 static void
 em_print_nvm_info(struct adapter *adapter)
 {
 	u16	eeprom_data;
 	int	i, j, row = 0;
 
 	/* Its a bit crude, but it gets the job done */
 	printf("\nInterface EEPROM Dump:\n");
 	printf("Offset\n0x0000  ");
 	for (i = 0, j = 0; i < 32; i++, j++) {
 		if (j == 8) { /* Make the offset block */
 			j = 0; ++row;
 			printf("\n0x00%x0  ",row);
 		}
 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
 		printf("%04x ", eeprom_data);
 	}
 	printf("\n");
 }
 
 static int
 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
 {
 	struct em_int_delay_info *info;
 	struct adapter *adapter;
 	u32 regval;
 	int error, usecs, ticks;
 
 	info = (struct em_int_delay_info *)arg1;
 	usecs = info->value;
 	error = sysctl_handle_int(oidp, &usecs, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
 		return (EINVAL);
 	info->value = usecs;
 	ticks = EM_USECS_TO_TICKS(usecs);
 
 	adapter = info->adapter;
 	
 	EM_CORE_LOCK(adapter);
 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
 	regval = (regval & ~0xffff) | (ticks & 0xffff);
 	/* Handle a few special cases. */
 	switch (info->offset) {
 	case E1000_RDTR:
 		break;
 	case E1000_TIDV:
 		if (ticks == 0) {
 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
 			/* Don't write 0 into the TIDV register. */
 			regval++;
 		} else
 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
 		break;
 	}
 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
 	EM_CORE_UNLOCK(adapter);
 	return (0);
 }
 
 static void
 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
 	const char *description, struct em_int_delay_info *info,
 	int offset, int value)
 {
 	info->adapter = adapter;
 	info->offset = offset;
 	info->value = value;
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
 	    info, 0, em_sysctl_int_delay, "I", description);
 }
 
 static void
 em_add_rx_process_limit(struct adapter *adapter, const char *name,
 	const char *description, int *limit, int value)
 {
 	*limit = value;
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
 }
 
 static void
 em_set_flow_cntrl(struct adapter *adapter, const char *name,
 	const char *description, int *limit, int value)
 {
 	*limit = value;
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
 }
 
 static int
 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter;
 	int error;
 	int result;
 
 	result = -1;
 	error = sysctl_handle_int(oidp, &result, 0, req);
 
 	if (error || !req->newptr)
 		return (error);
 
 	if (result == 1) {
 		adapter = (struct adapter *)arg1;
 		em_print_debug_info(adapter);
         }
 
 	return (error);
 }
 
 /*
 ** This routine is meant to be fluid, add whatever is
 ** needed for debugging a problem.  -jfv
 */
 static void
 em_print_debug_info(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 	struct tx_ring *txr = adapter->tx_rings;
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
 		printf("Interface is RUNNING ");
 	else
 		printf("Interface is NOT RUNNING\n");
 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
 		printf("and ACTIVE\n");
 	else
 		printf("and INACTIVE\n");
 
 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
 	device_printf(dev, "TX descriptors avail = %d\n",
 	    txr->tx_avail);
 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
 	    txr->no_desc_avail);
 	device_printf(dev, "RX discarded packets = %ld\n",
 	    rxr->rx_discarded);
 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
 }
Index: projects/binutils-2.17/sys/dev/e1000/if_igb.c
===================================================================
--- projects/binutils-2.17/sys/dev/e1000/if_igb.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/e1000/if_igb.c	(revision 215830)
@@ -1,5439 +1,5501 @@
 /******************************************************************************
 
   Copyright (c) 2001-2010, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
   modification, are permitted provided that the following conditions are met:
   
    1. Redistributions of source code must retain the above copyright notice, 
       this list of conditions and the following disclaimer.
   
    2. Redistributions in binary form must reproduce the above copyright 
       notice, this list of conditions and the following disclaimer in the 
       documentation and/or other materials provided with the distribution.
   
    3. Neither the name of the Intel Corporation nor the names of its 
       contributors may be used to endorse or promote products derived from 
       this software without specific prior written permission.
   
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   POSSIBILITY OF SUCH DAMAGE.
 
 ******************************************************************************/
 /*$FreeBSD$*/
 
 
 #ifdef HAVE_KERNEL_OPTION_HEADERS
 #include "opt_device_polling.h"
 #include "opt_inet.h"
 #include "opt_altq.h"
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #if __FreeBSD_version >= 800000
 #include <sys/buf_ring.h>
 #endif
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/eventhandler.h>
 #include <sys/pcpu.h>
 #include <sys/smp.h>
 #include <machine/smp.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_lro.h>
 #include <netinet/udp.h>
 
 #include <machine/in_cksum.h>
 #include <dev/led/led.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 #include "e1000_api.h"
 #include "e1000_82575.h"
 #include "if_igb.h"
 
 /*********************************************************************
  *  Set this to one to display debug statistics
  *********************************************************************/
 int	igb_display_debug_stats = 0;
 
 /*********************************************************************
  *  Driver version:
  *********************************************************************/
-char igb_driver_version[] = "version - 2.0.4";
+char igb_driver_version[] = "version - 2.0.7";
 
 
 /*********************************************************************
  *  PCI Device ID Table
  *
  *  Used by probe to select devices to load on
  *  Last field stores an index into e1000_strings
  *  Last entry must be all 0s
  *
  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
  *********************************************************************/
 
 static igb_vendor_info_t igb_vendor_info_array[] =
 {
 	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
+	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
+						PCI_ANY_ID, PCI_ANY_ID, 0},
+	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
+	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	/* required last entry */
 	{ 0, 0, 0, 0, 0}
 };
 
 /*********************************************************************
  *  Table of branding strings for all supported NICs.
  *********************************************************************/
 
 static char *igb_strings[] = {
 	"Intel(R) PRO/1000 Network Connection"
 };
 
 /*********************************************************************
  *  Function prototypes
  *********************************************************************/
 static int	igb_probe(device_t);
 static int	igb_attach(device_t);
 static int	igb_detach(device_t);
 static int	igb_shutdown(device_t);
 static int	igb_suspend(device_t);
 static int	igb_resume(device_t);
 static void	igb_start(struct ifnet *);
 static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
 #if __FreeBSD_version >= 800000
 static int	igb_mq_start(struct ifnet *, struct mbuf *);
 static int	igb_mq_start_locked(struct ifnet *,
 		    struct tx_ring *, struct mbuf *);
 static void	igb_qflush(struct ifnet *);
 #endif
 static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
 static void	igb_init(void *);
 static void	igb_init_locked(struct adapter *);
 static void	igb_stop(void *);
 static void	igb_media_status(struct ifnet *, struct ifmediareq *);
 static int	igb_media_change(struct ifnet *);
 static void	igb_identify_hardware(struct adapter *);
 static int	igb_allocate_pci_resources(struct adapter *);
 static int	igb_allocate_msix(struct adapter *);
 static int	igb_allocate_legacy(struct adapter *);
 static int	igb_setup_msix(struct adapter *);
 static void	igb_free_pci_resources(struct adapter *);
 static void	igb_local_timer(void *);
 static void	igb_reset(struct adapter *);
 static int	igb_setup_interface(device_t, struct adapter *);
 static int	igb_allocate_queues(struct adapter *);
 static void	igb_configure_queues(struct adapter *);
 
 static int	igb_allocate_transmit_buffers(struct tx_ring *);
 static void	igb_setup_transmit_structures(struct adapter *);
 static void	igb_setup_transmit_ring(struct tx_ring *);
 static void	igb_initialize_transmit_units(struct adapter *);
 static void	igb_free_transmit_structures(struct adapter *);
 static void	igb_free_transmit_buffers(struct tx_ring *);
 
 static int	igb_allocate_receive_buffers(struct rx_ring *);
 static int	igb_setup_receive_structures(struct adapter *);
 static int	igb_setup_receive_ring(struct rx_ring *);
 static void	igb_initialize_receive_units(struct adapter *);
 static void	igb_free_receive_structures(struct adapter *);
 static void	igb_free_receive_buffers(struct rx_ring *);
 static void	igb_free_receive_ring(struct rx_ring *);
 
 static void	igb_enable_intr(struct adapter *);
 static void	igb_disable_intr(struct adapter *);
 static void	igb_update_stats_counters(struct adapter *);
 static bool	igb_txeof(struct tx_ring *);
 
 static __inline	void igb_rx_discard(struct rx_ring *, int);
 static __inline void igb_rx_input(struct rx_ring *,
 		    struct ifnet *, struct mbuf *, u32);
 
 static bool	igb_rxeof(struct igb_queue *, int, int *);
 static void	igb_rx_checksum(u32, struct mbuf *, u32);
 static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
 static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
 static void	igb_set_promisc(struct adapter *);
 static void	igb_disable_promisc(struct adapter *);
 static void	igb_set_multi(struct adapter *);
 static void	igb_update_link_status(struct adapter *);
 static void	igb_refresh_mbufs(struct rx_ring *, int);
 
 static void	igb_register_vlan(void *, struct ifnet *, u16);
 static void	igb_unregister_vlan(void *, struct ifnet *, u16);
 static void	igb_setup_vlan_hw_support(struct adapter *);
 
 static int	igb_xmit(struct tx_ring *, struct mbuf **);
 static int	igb_dma_malloc(struct adapter *, bus_size_t,
 		    struct igb_dma_alloc *, int);
 static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
 static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
 static void	igb_print_nvm_info(struct adapter *);
 static int 	igb_is_valid_ether_addr(u8 *);
 static void     igb_add_hw_stats(struct adapter *);
 
 static void	igb_vf_init_stats(struct adapter *);
 static void	igb_update_vf_stats_counters(struct adapter *);
 
 /* Management and WOL Support */
 static void	igb_init_manageability(struct adapter *);
 static void	igb_release_manageability(struct adapter *);
 static void     igb_get_hw_control(struct adapter *);
 static void     igb_release_hw_control(struct adapter *);
 static void     igb_enable_wakeup(device_t);
 static void     igb_led_func(void *, int);
 
 static int	igb_irq_fast(void *);
 static void	igb_add_rx_process_limit(struct adapter *, const char *,
 		    const char *, int *, int);
 static void	igb_handle_que(void *context, int pending);
 static void	igb_handle_link(void *context, int pending);
 
 /* These are MSIX only irq handlers */
 static void	igb_msix_que(void *);
 static void	igb_msix_link(void *);
 
 #ifdef DEVICE_POLLING
 static poll_handler_t igb_poll;
 #endif /* POLLING */
 
 /*********************************************************************
  *  FreeBSD Device Interface Entry Points
  *********************************************************************/
 
 static device_method_t igb_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, igb_probe),
 	DEVMETHOD(device_attach, igb_attach),
 	DEVMETHOD(device_detach, igb_detach),
 	DEVMETHOD(device_shutdown, igb_shutdown),
 	DEVMETHOD(device_suspend, igb_suspend),
 	DEVMETHOD(device_resume, igb_resume),
 	{0, 0}
 };
 
 static driver_t igb_driver = {
 	"igb", igb_methods, sizeof(struct adapter),
 };
 
 static devclass_t igb_devclass;
 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
 MODULE_DEPEND(igb, pci, 1, 1, 1);
 MODULE_DEPEND(igb, ether, 1, 1, 1);
 
 /*********************************************************************
  *  Tunable default values.
  *********************************************************************/
 
 /* Descriptor defaults */
 static int igb_rxd = IGB_DEFAULT_RXD;
 static int igb_txd = IGB_DEFAULT_TXD;
 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
 TUNABLE_INT("hw.igb.txd", &igb_txd);
 
 /*
 ** AIM: Adaptive Interrupt Moderation
 ** which means that the interrupt rate
 ** is varied over time based on the
 ** traffic for that interrupt vector
 */
 static int igb_enable_aim = TRUE;
 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
 
 /*
  * MSIX should be the default for best performance,
  * but this allows it to be forced off for testing.
  */         
 static int igb_enable_msix = 1;
 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
 
 /*
- * Header split has seemed to be beneficial in
- * many circumstances tested, however there have
- * been some stability issues, so the default is
- * off. 
- */
+** Tuneable Interrupt rate
+*/
+static int igb_max_interrupt_rate = 8000;
+TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
+
+/*
+** Header split causes the packet header to
+** be dma'd to a seperate mbuf from the payload.
+** this can have memory alignment benefits. But
+** another plus is that small packets often fit
+** into the header and thus use no cluster. Its
+** a very workload dependent type feature.
+*/
 static bool igb_header_split = FALSE;
 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
 
 /*
 ** This will autoconfigure based on
 ** the number of CPUs if left at 0.
 */
 static int igb_num_queues = 0;
 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
 
 /* How many packets rxeof tries to clean at a time */
 static int igb_rx_process_limit = 100;
 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
 
 /* Flow control setting - default to FULL */
 static int igb_fc_setting = e1000_fc_full;
 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
 
-/*
-** Shadow VFTA table, this is needed because
-** the real filter table gets cleared during
-** a soft reset and the driver needs to be able
-** to repopulate it.
-*/
-static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
-
-
 /*********************************************************************
  *  Device identification routine
  *
  *  igb_probe determines if the driver should be loaded on
  *  adapter based on PCI vendor/device id of the adapter.
  *
  *  return BUS_PROBE_DEFAULT on success, positive on failure
  *********************************************************************/
 
 static int
 igb_probe(device_t dev)
 {
 	char		adapter_name[60];
 	uint16_t	pci_vendor_id = 0;
 	uint16_t	pci_device_id = 0;
 	uint16_t	pci_subvendor_id = 0;
 	uint16_t	pci_subdevice_id = 0;
 	igb_vendor_info_t *ent;
 
 	INIT_DEBUGOUT("igb_probe: begin");
 
 	pci_vendor_id = pci_get_vendor(dev);
 	if (pci_vendor_id != IGB_VENDOR_ID)
 		return (ENXIO);
 
 	pci_device_id = pci_get_device(dev);
 	pci_subvendor_id = pci_get_subvendor(dev);
 	pci_subdevice_id = pci_get_subdevice(dev);
 
 	ent = igb_vendor_info_array;
 	while (ent->vendor_id != 0) {
 		if ((pci_vendor_id == ent->vendor_id) &&
 		    (pci_device_id == ent->device_id) &&
 
 		    ((pci_subvendor_id == ent->subvendor_id) ||
 		    (ent->subvendor_id == PCI_ANY_ID)) &&
 
 		    ((pci_subdevice_id == ent->subdevice_id) ||
 		    (ent->subdevice_id == PCI_ANY_ID))) {
 			sprintf(adapter_name, "%s %s",
 				igb_strings[ent->index],
 				igb_driver_version);
 			device_set_desc_copy(dev, adapter_name);
 			return (BUS_PROBE_DEFAULT);
 		}
 		ent++;
 	}
 
 	return (ENXIO);
 }
 
 /*********************************************************************
  *  Device initialization routine
  *
  *  The attach entry point is called when the driver is being loaded.
  *  This routine identifies the type of hardware, allocates all resources
  *  and initializes the hardware.
  *
  *  return 0 on success, positive on failure
  *********************************************************************/
 
 static int
 igb_attach(device_t dev)
 {
 	struct adapter	*adapter;
 	int		error = 0;
 	u16		eeprom_data;
 
 	INIT_DEBUGOUT("igb_attach: begin");
 
 	adapter = device_get_softc(dev);
 	adapter->dev = adapter->osdep.dev = dev;
 	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
 
 	/* SYSCTL stuff */
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
 	    igb_sysctl_nvm_info, "I", "NVM Information");
 
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
 	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
 	    &igb_fc_setting, 0, "Flow Control");
 
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
 	    &igb_enable_aim, 1, "Interrupt Moderation");
 
 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
 
 	/* Determine hardware and mac info */
 	igb_identify_hardware(adapter);
 
 	/* Setup PCI resources */
 	if (igb_allocate_pci_resources(adapter)) {
 		device_printf(dev, "Allocation of PCI resources failed\n");
 		error = ENXIO;
 		goto err_pci;
 	}
 
 	/* Do Shared Code initialization */
 	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
 		device_printf(dev, "Setup of Shared code failed\n");
 		error = ENXIO;
 		goto err_pci;
 	}
 
 	e1000_get_bus_info(&adapter->hw);
 
 	/* Sysctls for limiting the amount of work done in the taskqueue */
 	igb_add_rx_process_limit(adapter, "rx_processing_limit",
 	    "max number of rx packets to process", &adapter->rx_process_limit,
 	    igb_rx_process_limit);
 
 	/*
 	 * Validate number of transmit and receive descriptors. It
 	 * must not exceed hardware maximum, and must be multiple
 	 * of E1000_DBA_ALIGN.
 	 */
 	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
 	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
 		    IGB_DEFAULT_TXD, igb_txd);
 		adapter->num_tx_desc = IGB_DEFAULT_TXD;
 	} else
 		adapter->num_tx_desc = igb_txd;
 	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
 	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
 		    IGB_DEFAULT_RXD, igb_rxd);
 		adapter->num_rx_desc = IGB_DEFAULT_RXD;
 	} else
 		adapter->num_rx_desc = igb_rxd;
 
 	adapter->hw.mac.autoneg = DO_AUTO_NEG;
 	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
 	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
 
 	/* Copper options */
 	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
 		adapter->hw.phy.mdix = AUTO_ALL_MODES;
 		adapter->hw.phy.disable_polarity_correction = FALSE;
 		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
 	}
 
 	/*
 	 * Set the frame limits assuming
 	 * standard ethernet sized frames.
 	 */
 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
 	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
 
 	/*
 	** Allocate and Setup Queues
 	*/
 	if (igb_allocate_queues(adapter)) {
 		error = ENOMEM;
 		goto err_pci;
 	}
 
 	/* Allocate the appropriate stats memory */
 	if (adapter->hw.mac.type == e1000_vfadapt) {
 		adapter->stats =
 		    (struct e1000_vf_stats *)malloc(sizeof \
 		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
 		igb_vf_init_stats(adapter);
 	} else
 		adapter->stats =
 		    (struct e1000_hw_stats *)malloc(sizeof \
 		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (adapter->stats == NULL) {
 		device_printf(dev, "Can not allocate stats memory\n");
 		error = ENOMEM;
 		goto err_late;
 	}
 
 	/* Allocate multicast array memory. */
 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
 	if (adapter->mta == NULL) {
 		device_printf(dev, "Can not allocate multicast setup array\n");
 		error = ENOMEM;
 		goto err_late;
 	}
 
 	/*
 	** Start from a known state, this is
 	** important in reading the nvm and
 	** mac from that.
 	*/
 	e1000_reset_hw(&adapter->hw);
 
 	/* Make sure we have a good EEPROM before we read from it */
 	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
 		/*
 		** Some PCI-E parts fail the first check due to
 		** the link being in sleep state, call it again,
 		** if it fails a second time its a real issue.
 		*/
 		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
 			device_printf(dev,
 			    "The EEPROM Checksum Is Not Valid\n");
 			error = EIO;
 			goto err_late;
 		}
 	}
 
 	/*
 	** Copy the permanent MAC address out of the EEPROM
 	*/
 	if (e1000_read_mac_addr(&adapter->hw) < 0) {
 		device_printf(dev, "EEPROM read error while reading MAC"
 		    " address\n");
 		error = EIO;
 		goto err_late;
 	}
 	/* Check its sanity */
 	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
 		device_printf(dev, "Invalid MAC address\n");
 		error = EIO;
 		goto err_late;
 	}
 
 	/* 
 	** Configure Interrupts
 	*/
 	if ((adapter->msix > 1) && (igb_enable_msix))
 		error = igb_allocate_msix(adapter);
 	else /* MSI or Legacy */
 		error = igb_allocate_legacy(adapter);
 	if (error)
 		goto err_late;
 
 	/* Setup OS specific network interface */
 	if (igb_setup_interface(dev, adapter) != 0)
 		goto err_late;
 
 	/* Now get a good starting state */
 	igb_reset(adapter);
 
 	/* Initialize statistics */
 	igb_update_stats_counters(adapter);
 
 	adapter->hw.mac.get_link_status = 1;
 	igb_update_link_status(adapter);
 
 	/* Indicate SOL/IDER usage */
 	if (e1000_check_reset_block(&adapter->hw))
 		device_printf(dev,
 		    "PHY reset is blocked due to SOL/IDER session.\n");
 
 	/* Determine if we have to control management hardware */
 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
 
 	/*
 	 * Setup Wake-on-Lan
 	 */
 	/* APME bit in EEPROM is mapped to WUC.APME */
 	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
 	if (eeprom_data)
 		adapter->wol = E1000_WUFC_MAG;
 
 	/* Register for VLAN events */
 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
 
 	igb_add_hw_stats(adapter);
 
 	/* Tell the stack that the interface is not active */
 	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
 	adapter->led_dev = led_create(igb_led_func, adapter,
 	    device_get_nameunit(dev));
 
 	INIT_DEBUGOUT("igb_attach: end");
 
 	return (0);
 
 err_late:
 	igb_free_transmit_structures(adapter);
 	igb_free_receive_structures(adapter);
 	igb_release_hw_control(adapter);
 	if (adapter->ifp != NULL)
 		if_free(adapter->ifp);
 err_pci:
 	igb_free_pci_resources(adapter);
 	free(adapter->mta, M_DEVBUF);
 	IGB_CORE_LOCK_DESTROY(adapter);
 
 	return (error);
 }
 
 /*********************************************************************
  *  Device removal routine
  *
  *  The detach entry point is called when the driver is being removed.
  *  This routine stops the adapter and deallocates all the resources
  *  that were allocated for driver operation.
  *
  *  return 0 on success, positive on failure
  *********************************************************************/
 
 static int
 igb_detach(device_t dev)
 {
 	struct adapter	*adapter = device_get_softc(dev);
 	struct ifnet	*ifp = adapter->ifp;
 
 	INIT_DEBUGOUT("igb_detach: begin");
 
 	/* Make sure VLANS are not using driver */
 	if (adapter->ifp->if_vlantrunk != NULL) {
 		device_printf(dev,"Vlan in use, detach first\n");
 		return (EBUSY);
 	}
 
 	if (adapter->led_dev != NULL)
 		led_destroy(adapter->led_dev);
 
 #ifdef DEVICE_POLLING
 	if (ifp->if_capenable & IFCAP_POLLING)
 		ether_poll_deregister(ifp);
 #endif
 
 	IGB_CORE_LOCK(adapter);
 	adapter->in_detach = 1;
 	igb_stop(adapter);
 	IGB_CORE_UNLOCK(adapter);
 
 	e1000_phy_hw_reset(&adapter->hw);
 
 	/* Give control back to firmware */
 	igb_release_manageability(adapter);
 	igb_release_hw_control(adapter);
 
 	if (adapter->wol) {
 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
 		igb_enable_wakeup(dev);
 	}
 
 	/* Unregister VLAN events */
 	if (adapter->vlan_attach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
 	if (adapter->vlan_detach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
 
 	ether_ifdetach(adapter->ifp);
 
 	callout_drain(&adapter->timer);
 
 	igb_free_pci_resources(adapter);
 	bus_generic_detach(dev);
 	if_free(ifp);
 
 	igb_free_transmit_structures(adapter);
 	igb_free_receive_structures(adapter);
 	free(adapter->mta, M_DEVBUF);
 
 	IGB_CORE_LOCK_DESTROY(adapter);
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Shutdown entry point
  *
  **********************************************************************/
 
 static int
 igb_shutdown(device_t dev)
 {
 	return igb_suspend(dev);
 }
 
 /*
  * Suspend/resume device methods.
  */
 static int
 igb_suspend(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 
 	IGB_CORE_LOCK(adapter);
 
 	igb_stop(adapter);
 
         igb_release_manageability(adapter);
 	igb_release_hw_control(adapter);
 
         if (adapter->wol) {
                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
                 igb_enable_wakeup(dev);
         }
 
 	IGB_CORE_UNLOCK(adapter);
 
 	return bus_generic_suspend(dev);
 }
 
 static int
 igb_resume(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 	struct ifnet *ifp = adapter->ifp;
 
 	IGB_CORE_LOCK(adapter);
 	igb_init_locked(adapter);
 	igb_init_manageability(adapter);
 
 	if ((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
 		igb_start(ifp);
 
 	IGB_CORE_UNLOCK(adapter);
 
 	return bus_generic_resume(dev);
 }
 
 
 /*********************************************************************
  *  Transmit entry point
  *
  *  igb_start is called by the stack to initiate a transmit.
  *  The driver will remain in this routine as long as there are
  *  packets to transmit and transmit resources are available.
  *  In case resources are not available stack is notified and
  *  the packet is requeued.
  **********************************************************************/
 
 static void
 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct mbuf	*m_head;
 
 	IGB_TX_LOCK_ASSERT(txr);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return;
 	if (!adapter->link_active)
 		return;
 
 	/* Call cleanup if number of TX descriptors low */
 	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
 		igb_txeof(txr);
 
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			break;
 		}
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 		/*
 		 *  Encapsulation can modify our pointer, and or make it
 		 *  NULL on failure.  In that event, we can't requeue.
 		 */
 		if (igb_xmit(txr, &m_head)) {
 			if (m_head == NULL)
 				break;
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			break;
 		}
 
 		/* Send a copy of the frame to the BPF listener */
 		ETHER_BPF_MTAP(ifp, m_head);
 
 		/* Set watchdog on */
 		txr->watchdog_time = ticks;
-		txr->watchdog_check = TRUE;
+		txr->queue_status = IGB_QUEUE_WORKING;
 	}
 }
  
 /*
  * Legacy TX driver routine, called from the
  * stack, always uses tx[0], and spins for it.
  * Should not be used with multiqueue tx
  */
 static void
 igb_start(struct ifnet *ifp)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct tx_ring	*txr = adapter->tx_rings;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		IGB_TX_LOCK(txr);
 		igb_start_locked(txr, ifp);
 		IGB_TX_UNLOCK(txr);
 	}
 	return;
 }
 
 #if __FreeBSD_version >= 800000
 /*
 ** Multiqueue Transmit driver
 **
 */
 static int
 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
 {
 	struct adapter		*adapter = ifp->if_softc;
 	struct igb_queue	*que;
 	struct tx_ring		*txr;
 	int 			i = 0, err = 0;
 
 	/* Which queue to use */
 	if ((m->m_flags & M_FLOWID) != 0)
 		i = m->m_pkthdr.flowid % adapter->num_queues;
 
 	txr = &adapter->tx_rings[i];
 	que = &adapter->queues[i];
 
 	if (IGB_TX_TRYLOCK(txr)) {
 		err = igb_mq_start_locked(ifp, txr, m);
 		IGB_TX_UNLOCK(txr);
 	} else {
 		err = drbr_enqueue(ifp, txr->br, m);
 		taskqueue_enqueue(que->tq, &que->que_task);
 	}
 
 	return (err);
 }
 
 static int
 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
 {
 	struct adapter  *adapter = txr->adapter;
         struct mbuf     *next;
         int             err = 0, enq;
 
 	IGB_TX_LOCK_ASSERT(txr);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
 		if (m != NULL)
 			err = drbr_enqueue(ifp, txr->br, m);
 		return (err);
 	}
 
 	/* Call cleanup if number of TX descriptors low */
 	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
 		igb_txeof(txr);
 
 	enq = 0;
 	if (m == NULL) {
 		next = drbr_dequeue(ifp, txr->br);
 	} else if (drbr_needs_enqueue(ifp, txr->br)) {
 		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
 			return (err);
 		next = drbr_dequeue(ifp, txr->br);
 	} else
 		next = m;
 
 	/* Process the queue */
 	while (next != NULL) {
 		if ((err = igb_xmit(txr, &next)) != 0) {
 			if (next != NULL)
 				err = drbr_enqueue(ifp, txr->br, next);
 			break;
 		}
 		enq++;
 		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
 		ETHER_BPF_MTAP(ifp, next);
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
 		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			break;
 		}
 		next = drbr_dequeue(ifp, txr->br);
 	}
 	if (enq > 0) {
 		/* Set the watchdog */
-		txr->watchdog_check = TRUE;
+		txr->queue_status = IGB_QUEUE_WORKING;
 		txr->watchdog_time = ticks;
 	}
 	return (err);
 }
 
 /*
 ** Flush all ring buffers
 */
 static void
 igb_qflush(struct ifnet *ifp)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct mbuf	*m;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		IGB_TX_LOCK(txr);
 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
 			m_freem(m);
 		IGB_TX_UNLOCK(txr);
 	}
 	if_qflush(ifp);
 }
 #endif /* __FreeBSD_version >= 800000 */
 
 /*********************************************************************
  *  Ioctl entry point
  *
  *  igb_ioctl is called when the user wants to configure the
  *  interface.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 
 static int
 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 #ifdef INET
 	struct ifaddr *ifa = (struct ifaddr *)data;
 #endif
 	int error = 0;
 
 	if (adapter->in_detach)
 		return (error);
 
 	switch (command) {
 	case SIOCSIFADDR:
 #ifdef INET
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			/*
 			 * XXX
 			 * Since resetting hardware takes a very long time
 			 * and results in link renegotiation we only
 			 * initialize the hardware only when it is absolutely
 			 * required.
 			 */
 			ifp->if_flags |= IFF_UP;
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 				IGB_CORE_LOCK(adapter);
 				igb_init_locked(adapter);
 				IGB_CORE_UNLOCK(adapter);
 			}
 			if (!(ifp->if_flags & IFF_NOARP))
 				arp_ifinit(ifp, ifa);
 		} else
 #endif
 			error = ether_ioctl(ifp, command, data);
 		break;
 	case SIOCSIFMTU:
 	    {
 		int max_frame_size;
 
 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
 
 		IGB_CORE_LOCK(adapter);
 		max_frame_size = 9234;
 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
 		    ETHER_CRC_LEN) {
 			IGB_CORE_UNLOCK(adapter);
 			error = EINVAL;
 			break;
 		}
 
 		ifp->if_mtu = ifr->ifr_mtu;
 		adapter->max_frame_size =
 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
 		igb_init_locked(adapter);
 		IGB_CORE_UNLOCK(adapter);
 		break;
 	    }
 	case SIOCSIFFLAGS:
 		IOCTL_DEBUGOUT("ioctl rcv'd:\
 		    SIOCSIFFLAGS (Set Interface Flags)");
 		IGB_CORE_LOCK(adapter);
 		if (ifp->if_flags & IFF_UP) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 				if ((ifp->if_flags ^ adapter->if_flags) &
 				    (IFF_PROMISC | IFF_ALLMULTI)) {
 					igb_disable_promisc(adapter);
 					igb_set_promisc(adapter);
 				}
 			} else
 				igb_init_locked(adapter);
 		} else
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				igb_stop(adapter);
 		adapter->if_flags = ifp->if_flags;
 		IGB_CORE_UNLOCK(adapter);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			IGB_CORE_LOCK(adapter);
 			igb_disable_intr(adapter);
 			igb_set_multi(adapter);
 #ifdef DEVICE_POLLING
 			if (!(ifp->if_capenable & IFCAP_POLLING))
 #endif
 				igb_enable_intr(adapter);
 			IGB_CORE_UNLOCK(adapter);
 		}
 		break;
 	case SIOCSIFMEDIA:
+		/*
+		** As the speed/duplex settings are being
+		** changed, we need toreset the PHY.
+		*/
+		adapter->hw.phy.reset_disable = FALSE;
 		/* Check SOL/IDER usage */
 		IGB_CORE_LOCK(adapter);
 		if (e1000_check_reset_block(&adapter->hw)) {
 			IGB_CORE_UNLOCK(adapter);
 			device_printf(adapter->dev, "Media change is"
 			    " blocked due to SOL/IDER session.\n");
 			break;
 		}
 		IGB_CORE_UNLOCK(adapter);
 	case SIOCGIFMEDIA:
 		IOCTL_DEBUGOUT("ioctl rcv'd: \
 		    SIOCxIFMEDIA (Get/Set Interface Media)");
 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
 		break;
 	case SIOCSIFCAP:
 	    {
 		int mask, reinit;
 
 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
 		reinit = 0;
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 #ifdef DEVICE_POLLING
 		if (mask & IFCAP_POLLING) {
 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
 				error = ether_poll_register(igb_poll, ifp);
 				if (error)
 					return (error);
 				IGB_CORE_LOCK(adapter);
 				igb_disable_intr(adapter);
 				ifp->if_capenable |= IFCAP_POLLING;
 				IGB_CORE_UNLOCK(adapter);
 			} else {
 				error = ether_poll_deregister(ifp);
 				/* Enable interrupt even in error case */
 				IGB_CORE_LOCK(adapter);
 				igb_enable_intr(adapter);
 				ifp->if_capenable &= ~IFCAP_POLLING;
 				IGB_CORE_UNLOCK(adapter);
 			}
 		}
 #endif
 		if (mask & IFCAP_HWCSUM) {
 			ifp->if_capenable ^= IFCAP_HWCSUM;
 			reinit = 1;
 		}
 		if (mask & IFCAP_TSO4) {
 			ifp->if_capenable ^= IFCAP_TSO4;
 			reinit = 1;
 		}
 		if (mask & IFCAP_VLAN_HWTAGGING) {
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 			reinit = 1;
 		}
 		if (mask & IFCAP_VLAN_HWFILTER) {
 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
 			reinit = 1;
 		}
 		if (mask & IFCAP_LRO) {
 			ifp->if_capenable ^= IFCAP_LRO;
 			reinit = 1;
 		}
 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
 			igb_init(adapter);
 		VLAN_CAPABILITIES(ifp);
 		break;
 	    }
 
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 
 /*********************************************************************
  *  Init entry point
  *
  *  This routine is used in two ways. It is used by the stack as
  *  init entry point in network interface structure. It is also used
  *  by the driver as a hw/sw initialization routine to get to a
  *  consistent state.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 
 static void
 igb_init_locked(struct adapter *adapter)
 {
 	struct ifnet	*ifp = adapter->ifp;
 	device_t	dev = adapter->dev;
 
 	INIT_DEBUGOUT("igb_init: begin");
 
 	IGB_CORE_LOCK_ASSERT(adapter);
 
 	igb_disable_intr(adapter);
 	callout_stop(&adapter->timer);
 
 	/* Get the latest mac address, User can use a LAA */
         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
               ETHER_ADDR_LEN);
 
 	/* Put the address into the Receive Address Array */
 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
 
 	igb_reset(adapter);
 	igb_update_link_status(adapter);
 
 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
 
-        /* Use real VLAN Filter support? */
-	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
-		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
-			/* Use real VLAN Filter support */
-			igb_setup_vlan_hw_support(adapter);
-		else {
-			u32 ctrl;
-			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
-			ctrl |= E1000_CTRL_VME;
-			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
-		}
-	}
-                                
 	/* Set hardware offload abilities */
 	ifp->if_hwassist = 0;
 	if (ifp->if_capenable & IFCAP_TXCSUM) {
 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
 #if __FreeBSD_version >= 800000
 		if (adapter->hw.mac.type == e1000_82576)
 			ifp->if_hwassist |= CSUM_SCTP;
 #endif
 	}
 
 	if (ifp->if_capenable & IFCAP_TSO4)
 		ifp->if_hwassist |= CSUM_TSO;
 
 	/* Configure for OS presence */
 	igb_init_manageability(adapter);
 
 	/* Prepare transmit descriptors and buffers */
 	igb_setup_transmit_structures(adapter);
 	igb_initialize_transmit_units(adapter);
 
 	/* Setup Multicast table */
 	igb_set_multi(adapter);
 
 	/*
 	** Figure out the desired mbuf pool
 	** for doing jumbo/packetsplit
 	*/
-	if (ifp->if_mtu > ETHERMTU)
+	if (adapter->max_frame_size <= 2048)
+		adapter->rx_mbuf_sz = MCLBYTES;
+	else if (adapter->max_frame_size <= 4096)
 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
 	else
-		adapter->rx_mbuf_sz = MCLBYTES;
+		adapter->rx_mbuf_sz = MJUM9BYTES;
 
 	/* Prepare receive descriptors and buffers */
 	if (igb_setup_receive_structures(adapter)) {
 		device_printf(dev, "Could not setup receive structures\n");
 		return;
 	}
 	igb_initialize_receive_units(adapter);
 
+        /* Use real VLAN Filter support? */
+	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
+		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+			/* Use real VLAN Filter support */
+			igb_setup_vlan_hw_support(adapter);
+		else {
+			u32 ctrl;
+			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
+			ctrl |= E1000_CTRL_VME;
+			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
+		}
+	}
+                                
 	/* Don't lose promiscuous settings */
 	igb_set_promisc(adapter);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
 
 	if (adapter->msix > 1) /* Set up queue routing */
 		igb_configure_queues(adapter);
 
-	/* Set up VLAN tag offload and filter */
-	igb_setup_vlan_hw_support(adapter);
-
 	/* this clears any pending interrupts */
 	E1000_READ_REG(&adapter->hw, E1000_ICR);
 #ifdef DEVICE_POLLING
 	/*
 	 * Only enable interrupts if we are not polling, make sure
 	 * they are off otherwise.
 	 */
 	if (ifp->if_capenable & IFCAP_POLLING)
 		igb_disable_intr(adapter);
 	else
 #endif /* DEVICE_POLLING */
 	{
 	igb_enable_intr(adapter);
 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
 	}
 
 	/* Don't reset the phy next time init gets called */
 	adapter->hw.phy.reset_disable = TRUE;
 }
 
 static void
 igb_init(void *arg)
 {
 	struct adapter *adapter = arg;
 
 	IGB_CORE_LOCK(adapter);
 	igb_init_locked(adapter);
 	IGB_CORE_UNLOCK(adapter);
 }
 
 
 static void
 igb_handle_que(void *context, int pending)
 {
 	struct igb_queue *que = context;
 	struct adapter *adapter = que->adapter;
 	struct tx_ring *txr = que->txr;
 	struct ifnet	*ifp = adapter->ifp;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		bool	more;
 
 		more = igb_rxeof(que, -1, NULL);
 
 		IGB_TX_LOCK(txr);
 		if (igb_txeof(txr))
 			more = TRUE;
 #if __FreeBSD_version >= 800000
 		if (!drbr_empty(ifp, txr->br))
 			igb_mq_start_locked(ifp, txr, NULL);
 #else
 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 			igb_start_locked(txr, ifp);
 #endif
 		IGB_TX_UNLOCK(txr);
 		if (more) {
 			taskqueue_enqueue(que->tq, &que->que_task);
 			return;
 		}
 	}
 
 #ifdef DEVICE_POLLING
 	if (ifp->if_capenable & IFCAP_POLLING)
 		return;
 #endif
 	/* Reenable this interrupt */
 	if (que->eims)
 		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
 	else
 		igb_enable_intr(adapter);
 }
 
 /* Deal with link in a sleepable context */
 static void
 igb_handle_link(void *context, int pending)
 {
 	struct adapter *adapter = context;
 
 	adapter->hw.mac.get_link_status = 1;
 	igb_update_link_status(adapter);
 }
 
 /*********************************************************************
  *
  *  MSI/Legacy Deferred
  *  Interrupt Service routine  
  *
  *********************************************************************/
 static int
 igb_irq_fast(void *arg)
 {
 	struct adapter		*adapter = arg;
 	struct igb_queue	*que = adapter->queues;
 	u32			reg_icr;
 
 
 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
 
 	/* Hot eject?  */
 	if (reg_icr == 0xffffffff)
 		return FILTER_STRAY;
 
 	/* Definitely not our interrupt.  */
 	if (reg_icr == 0x0)
 		return FILTER_STRAY;
 
 	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
 		return FILTER_STRAY;
 
 	/*
 	 * Mask interrupts until the taskqueue is finished running.  This is
 	 * cheap, just assume that it is needed.  This also works around the
 	 * MSI message reordering errata on certain systems.
 	 */
 	igb_disable_intr(adapter);
 	taskqueue_enqueue(que->tq, &que->que_task);
 
 	/* Link status change */
 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
 		taskqueue_enqueue(que->tq, &adapter->link_task);
 
 	if (reg_icr & E1000_ICR_RXO)
 		adapter->rx_overruns++;
 	return FILTER_HANDLED;
 }
 
 #ifdef DEVICE_POLLING
 /*********************************************************************
  *
  *  Legacy polling routine : if using this code you MUST be sure that
  *  multiqueue is not defined, ie, set igb_num_queues to 1.
  *
  *********************************************************************/
 #if __FreeBSD_version >= 800000
 #define POLL_RETURN_COUNT(a) (a)
 static int
 #else
 #define POLL_RETURN_COUNT(a)
 static void
 #endif
 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
 {
 	struct adapter		*adapter = ifp->if_softc;
 	struct igb_queue	*que = adapter->queues;
 	struct tx_ring		*txr = adapter->tx_rings;
 	u32			reg_icr, rx_done = 0;
 	u32			loop = IGB_MAX_LOOP;
 	bool			more;
 
 	IGB_CORE_LOCK(adapter);
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		IGB_CORE_UNLOCK(adapter);
 		return POLL_RETURN_COUNT(rx_done);
 	}
 
 	if (cmd == POLL_AND_CHECK_STATUS) {
 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
 		/* Link status change */
 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
 			igb_handle_link(adapter, 0);
 
 		if (reg_icr & E1000_ICR_RXO)
 			adapter->rx_overruns++;
 	}
 	IGB_CORE_UNLOCK(adapter);
 
 	igb_rxeof(que, count, &rx_done);
 
 	IGB_TX_LOCK(txr);
 	do {
 		more = igb_txeof(txr);
 	} while (loop-- && more);
 #if __FreeBSD_version >= 800000
 	if (!drbr_empty(ifp, txr->br))
 		igb_mq_start_locked(ifp, txr, NULL);
 #else
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		igb_start_locked(txr, ifp);
 #endif
 	IGB_TX_UNLOCK(txr);
 	return POLL_RETURN_COUNT(rx_done);
 }
 #endif /* DEVICE_POLLING */
 
 /*********************************************************************
  *
  *  MSIX TX Interrupt Service routine
  *
  **********************************************************************/
 static void
 igb_msix_que(void *arg)
 {
 	struct igb_queue *que = arg;
 	struct adapter *adapter = que->adapter;
 	struct tx_ring *txr = que->txr;
 	struct rx_ring *rxr = que->rxr;
 	u32		newitr = 0;
 	bool		more_tx, more_rx;
 
 	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
 	++que->irqs;
 
 	IGB_TX_LOCK(txr);
 	more_tx = igb_txeof(txr);
 	IGB_TX_UNLOCK(txr);
 
 	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
 
 	if (igb_enable_aim == FALSE)
 		goto no_calc;
 	/*
 	** Do Adaptive Interrupt Moderation:
         **  - Write out last calculated setting
 	**  - Calculate based on average size over
 	**    the last interval.
 	*/
         if (que->eitr_setting)
                 E1000_WRITE_REG(&adapter->hw,
                     E1000_EITR(que->msix), que->eitr_setting);
  
         que->eitr_setting = 0;
 
         /* Idle, do nothing */
         if ((txr->bytes == 0) && (rxr->bytes == 0))
                 goto no_calc;
                                 
         /* Used half Default if sub-gig */
         if (adapter->link_speed != 1000)
                 newitr = IGB_DEFAULT_ITR / 2;
         else {
 		if ((txr->bytes) && (txr->packets))
                 	newitr = txr->bytes/txr->packets;
 		if ((rxr->bytes) && (rxr->packets))
 			newitr = max(newitr,
 			    (rxr->bytes / rxr->packets));
                 newitr += 24; /* account for hardware frame, crc */
 		/* set an upper boundary */
 		newitr = min(newitr, 3000);
 		/* Be nice to the mid range */
                 if ((newitr > 300) && (newitr < 1200))
                         newitr = (newitr / 3);
                 else
                         newitr = (newitr / 2);
         }
         newitr &= 0x7FFC;  /* Mask invalid bits */
         if (adapter->hw.mac.type == e1000_82575)
                 newitr |= newitr << 16;
         else
                 newitr |= E1000_EITR_CNT_IGNR;
                  
         /* save for next interrupt */
         que->eitr_setting = newitr;
 
         /* Reset state */
         txr->bytes = 0;
         txr->packets = 0;
         rxr->bytes = 0;
         rxr->packets = 0;
 
 no_calc:
 	/* Schedule a clean task if needed*/
 	if (more_tx || more_rx) 
 		taskqueue_enqueue(que->tq, &que->que_task);
 	else
 		/* Reenable this interrupt */
 		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
 	return;
 }
 
 
 /*********************************************************************
  *
  *  MSIX Link Interrupt Service routine
  *
  **********************************************************************/
 
 static void
 igb_msix_link(void *arg)
 {
 	struct adapter	*adapter = arg;
 	u32       	icr;
 
 	++adapter->link_irq;
 	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
 	if (!(icr & E1000_ICR_LSC))
 		goto spurious;
 	igb_handle_link(adapter, 0);
 
 spurious:
 	/* Rearm */
 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
 	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Media Ioctl callback
  *
  *  This routine is called whenever the user queries the status of
  *  the interface using ifconfig.
  *
  **********************************************************************/
 static void
 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct adapter *adapter = ifp->if_softc;
 	u_char fiber_type = IFM_1000_SX;
 
 	INIT_DEBUGOUT("igb_media_status: begin");
 
 	IGB_CORE_LOCK(adapter);
 	igb_update_link_status(adapter);
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if (!adapter->link_active) {
 		IGB_CORE_UNLOCK(adapter);
 		return;
 	}
 
 	ifmr->ifm_status |= IFM_ACTIVE;
 
 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
 		ifmr->ifm_active |= fiber_type | IFM_FDX;
 	else {
 		switch (adapter->link_speed) {
 		case 10:
 			ifmr->ifm_active |= IFM_10_T;
 			break;
 		case 100:
 			ifmr->ifm_active |= IFM_100_TX;
 			break;
 		case 1000:
 			ifmr->ifm_active |= IFM_1000_T;
 			break;
 		}
 		if (adapter->link_duplex == FULL_DUPLEX)
 			ifmr->ifm_active |= IFM_FDX;
 		else
 			ifmr->ifm_active |= IFM_HDX;
 	}
 	IGB_CORE_UNLOCK(adapter);
 }
 
 /*********************************************************************
  *
  *  Media Ioctl callback
  *
  *  This routine is called when the user changes speed/duplex using
  *  media/mediopt option with ifconfig.
  *
  **********************************************************************/
 static int
 igb_media_change(struct ifnet *ifp)
 {
 	struct adapter *adapter = ifp->if_softc;
 	struct ifmedia  *ifm = &adapter->media;
 
 	INIT_DEBUGOUT("igb_media_change: begin");
 
 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
 		return (EINVAL);
 
 	IGB_CORE_LOCK(adapter);
 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
 	case IFM_AUTO:
 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
 		break;
 	case IFM_1000_LX:
 	case IFM_1000_SX:
 	case IFM_1000_T:
 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
 		break;
 	case IFM_100_TX:
 		adapter->hw.mac.autoneg = FALSE;
 		adapter->hw.phy.autoneg_advertised = 0;
 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
 		else
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
 		break;
 	case IFM_10_T:
 		adapter->hw.mac.autoneg = FALSE;
 		adapter->hw.phy.autoneg_advertised = 0;
 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
 		else
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
 		break;
 	default:
 		device_printf(adapter->dev, "Unsupported media type\n");
 	}
 
-	/* As the speed/duplex settings my have changed we need to
-	 * reset the PHY.
-	 */
-	adapter->hw.phy.reset_disable = FALSE;
-
 	igb_init_locked(adapter);
 	IGB_CORE_UNLOCK(adapter);
 
 	return (0);
 }
 
 
 /*********************************************************************
  *
  *  This routine maps the mbufs to Advanced TX descriptors.
  *  used by the 82575 adapter.
  *  
  **********************************************************************/
 
 static int
 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
 {
 	struct adapter		*adapter = txr->adapter;
 	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
 	bus_dmamap_t		map;
 	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
 	union e1000_adv_tx_desc	*txd = NULL;
 	struct mbuf		*m_head;
 	u32			olinfo_status = 0, cmd_type_len = 0;
 	int			nsegs, i, j, error, first, last = 0;
 	u32			hdrlen = 0;
 
 	m_head = *m_headp;
 
 
 	/* Set basic descriptor constants */
 	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
 	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
 	if (m_head->m_flags & M_VLANTAG)
 		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
 
         /*
          * Force a cleanup if number of TX descriptors
          * available hits the threshold
          */
 	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
 		igb_txeof(txr);
 		/* Now do we at least have a minimal? */
 		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
 			txr->no_desc_avail++;
 			return (ENOBUFS);
 		}
 	}
 
 	/*
          * Map the packet for DMA.
 	 *
 	 * Capture the first descriptor index,
 	 * this descriptor will have the index
 	 * of the EOP which is the only one that
 	 * now gets a DONE bit writeback.
 	 */
 	first = txr->next_avail_desc;
 	tx_buffer = &txr->tx_buffers[first];
 	tx_buffer_mapped = tx_buffer;
 	map = tx_buffer->map;
 
 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
 
 	if (error == EFBIG) {
 		struct mbuf *m;
 
 		m = m_defrag(*m_headp, M_DONTWAIT);
 		if (m == NULL) {
 			adapter->mbuf_defrag_failed++;
 			m_freem(*m_headp);
 			*m_headp = NULL;
 			return (ENOBUFS);
 		}
 		*m_headp = m;
 
 		/* Try it again */
 		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
 		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
 
 		if (error == ENOMEM) {
 			adapter->no_tx_dma_setup++;
 			return (error);
 		} else if (error != 0) {
 			adapter->no_tx_dma_setup++;
 			m_freem(*m_headp);
 			*m_headp = NULL;
 			return (error);
 		}
 	} else if (error == ENOMEM) {
 		adapter->no_tx_dma_setup++;
 		return (error);
 	} else if (error != 0) {
 		adapter->no_tx_dma_setup++;
 		m_freem(*m_headp);
 		*m_headp = NULL;
 		return (error);
 	}
 
 	/* Check again to be sure we have enough descriptors */
         if (nsegs > (txr->tx_avail - 2)) {
                 txr->no_desc_avail++;
 		bus_dmamap_unload(txr->txtag, map);
 		return (ENOBUFS);
         }
 	m_head = *m_headp;
 
         /*
          * Set up the context descriptor:
          * used when any hardware offload is done.
 	 * This includes CSUM, VLAN, and TSO. It
 	 * will use the first descriptor.
          */
         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 		if (igb_tso_setup(txr, m_head, &hdrlen)) {
 			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
 			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
 			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
 		} else
 			return (ENXIO); 
 	} else if (igb_tx_ctx_setup(txr, m_head))
 		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
 
 	/* Calculate payload length */
 	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
 	    << E1000_ADVTXD_PAYLEN_SHIFT);
 
 	/* 82575 needs the queue index added */
 	if (adapter->hw.mac.type == e1000_82575)
 		olinfo_status |= txr->me << 4;
 
 	/* Set up our transmit descriptors */
 	i = txr->next_avail_desc;
 	for (j = 0; j < nsegs; j++) {
 		bus_size_t seg_len;
 		bus_addr_t seg_addr;
 
 		tx_buffer = &txr->tx_buffers[i];
 		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
 		seg_addr = segs[j].ds_addr;
 		seg_len  = segs[j].ds_len;
 
 		txd->read.buffer_addr = htole64(seg_addr);
 		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
 		txd->read.olinfo_status = htole32(olinfo_status);
 		last = i;
 		if (++i == adapter->num_tx_desc)
 			i = 0;
 		tx_buffer->m_head = NULL;
 		tx_buffer->next_eop = -1;
 	}
 
 	txr->next_avail_desc = i;
 	txr->tx_avail -= nsegs;
 
         tx_buffer->m_head = m_head;
 	tx_buffer_mapped->map = tx_buffer->map;
 	tx_buffer->map = map;
         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
 
         /*
          * Last Descriptor of Packet
 	 * needs End Of Packet (EOP)
 	 * and Report Status (RS)
          */
         txd->read.cmd_type_len |=
 	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
 	/*
 	 * Keep track in the first buffer which
 	 * descriptor will be written back
 	 */
 	tx_buffer = &txr->tx_buffers[first];
 	tx_buffer->next_eop = last;
 	txr->watchdog_time = ticks;
 
 	/*
 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
 	 * that this frame is available to transmit.
 	 */
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
 	++txr->tx_packets;
 
 	return (0);
 
 }
 
 static void
 igb_set_promisc(struct adapter *adapter)
 {
 	struct ifnet	*ifp = adapter->ifp;
 	struct e1000_hw *hw = &adapter->hw;
 	u32		reg;
 
 	if (hw->mac.type == e1000_vfadapt) {
 		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
 		return;
 	}
 
 	reg = E1000_READ_REG(hw, E1000_RCTL);
 	if (ifp->if_flags & IFF_PROMISC) {
 		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
 		E1000_WRITE_REG(hw, E1000_RCTL, reg);
 	} else if (ifp->if_flags & IFF_ALLMULTI) {
 		reg |= E1000_RCTL_MPE;
 		reg &= ~E1000_RCTL_UPE;
 		E1000_WRITE_REG(hw, E1000_RCTL, reg);
 	}
 }
 
 static void
 igb_disable_promisc(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32		reg;
 
 	if (hw->mac.type == e1000_vfadapt) {
 		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
 		return;
 	}
 	reg = E1000_READ_REG(hw, E1000_RCTL);
 	reg &=  (~E1000_RCTL_UPE);
 	reg &=  (~E1000_RCTL_MPE);
 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
 }
 
 
 /*********************************************************************
  *  Multicast Update
  *
  *  This routine is called whenever multicast address list is updated.
  *
  **********************************************************************/
 
 static void
 igb_set_multi(struct adapter *adapter)
 {
 	struct ifnet	*ifp = adapter->ifp;
 	struct ifmultiaddr *ifma;
 	u32 reg_rctl = 0;
 	u8  *mta;
 
 	int mcnt = 0;
 
 	IOCTL_DEBUGOUT("igb_set_multi: begin");
 
 	mta = adapter->mta;
 	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
 	    MAX_NUM_MULTICAST_ADDRESSES);
 
 #if __FreeBSD_version < 800000
 	IF_ADDR_LOCK(ifp);
 #else
 	if_maddr_rlock(ifp);
 #endif
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 
 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
 			break;
 
 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
 		mcnt++;
 	}
 #if __FreeBSD_version < 800000
 	IF_ADDR_UNLOCK(ifp);
 #else
 	if_maddr_runlock(ifp);
 #endif
 
 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 		reg_rctl |= E1000_RCTL_MPE;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 	} else
 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
 }
 
 
 /*********************************************************************
  *  Timer routine:
  *  	This routine checks for link status,
  *	updates statistics, and does the watchdog.
  *
  **********************************************************************/
 
 static void
 igb_local_timer(void *arg)
 {
 	struct adapter		*adapter = arg;
 	device_t		dev = adapter->dev;
 	struct tx_ring		*txr = adapter->tx_rings;
 
 
 	IGB_CORE_LOCK_ASSERT(adapter);
 
 	igb_update_link_status(adapter);
 	igb_update_stats_counters(adapter);
 
 	/* 
 	** If flow control has paused us since last checking
 	** it invalidates the watchdog timing, so dont run it.
 	*/
 	if (adapter->pause_frames) {
 		adapter->pause_frames = 0;
 		goto out;
 	}
 
         /*
         ** Watchdog: check for time since any descriptor was cleaned
         */
-	for (int i = 0; i < adapter->num_queues; i++, txr++) {
-		IGB_TX_LOCK(txr);
-		if ((txr->watchdog_check == FALSE) ||
-		    (txr->tx_avail == adapter->num_tx_desc)) {
-			IGB_TX_UNLOCK(txr);
-			continue;
-		}
-		if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
+	for (int i = 0; i < adapter->num_queues; i++, txr++)
+		if (txr->queue_status == IGB_QUEUE_HUNG) 
 			goto timeout;
-		IGB_TX_UNLOCK(txr);
-	}
-
 out:
 	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
 	return;
 
 timeout:
 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
 	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
 	device_printf(dev,"TX(%d) desc avail = %d,"
             "Next TX to Clean = %d\n",
             txr->me, txr->tx_avail, txr->next_to_clean);
 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	adapter->watchdog_events++;
-	IGB_TX_UNLOCK(txr);
 	igb_init_locked(adapter);
 }
 
 static void
 igb_update_link_status(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	struct ifnet *ifp = adapter->ifp;
 	device_t dev = adapter->dev;
 	struct tx_ring *txr = adapter->tx_rings;
 	u32 link_check = 0;
 
 	/* Get the cached link value or read for real */
         switch (hw->phy.media_type) {
         case e1000_media_type_copper:
                 if (hw->mac.get_link_status) {
 			/* Do the work to read phy */
                         e1000_check_for_link(hw);
                         link_check = !hw->mac.get_link_status;
                 } else
                         link_check = TRUE;
                 break;
         case e1000_media_type_fiber:
                 e1000_check_for_link(hw);
                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
                                  E1000_STATUS_LU);
                 break;
         case e1000_media_type_internal_serdes:
                 e1000_check_for_link(hw);
                 link_check = adapter->hw.mac.serdes_has_link;
                 break;
 	/* VF device is type_unknown */
         case e1000_media_type_unknown:
                 e1000_check_for_link(hw);
 		link_check = !hw->mac.get_link_status;
 		/* Fall thru */
         default:
                 break;
         }
 
 	/* Now we check if a transition has happened */
 	if (link_check && (adapter->link_active == 0)) {
 		e1000_get_speed_and_duplex(&adapter->hw, 
 		    &adapter->link_speed, &adapter->link_duplex);
 		if (bootverbose)
 			device_printf(dev, "Link is up %d Mbps %s\n",
 			    adapter->link_speed,
 			    ((adapter->link_duplex == FULL_DUPLEX) ?
 			    "Full Duplex" : "Half Duplex"));
 		adapter->link_active = 1;
 		ifp->if_baudrate = adapter->link_speed * 1000000;
 		/* This can sleep */
 		if_link_state_change(ifp, LINK_STATE_UP);
 	} else if (!link_check && (adapter->link_active == 1)) {
 		ifp->if_baudrate = adapter->link_speed = 0;
 		adapter->link_duplex = 0;
 		if (bootverbose)
 			device_printf(dev, "Link is Down\n");
 		adapter->link_active = 0;
 		/* This can sleep */
 		if_link_state_change(ifp, LINK_STATE_DOWN);
 		/* Turn off watchdogs */
 		for (int i = 0; i < adapter->num_queues; i++, txr++)
-			txr->watchdog_check = FALSE;
+			txr->queue_status = IGB_QUEUE_IDLE;
 	}
 }
 
 /*********************************************************************
  *
  *  This routine disables all traffic on the adapter by issuing a
  *  global reset on the MAC and deallocates TX/RX buffers.
  *
  **********************************************************************/
 
 static void
 igb_stop(void *arg)
 {
 	struct adapter	*adapter = arg;
 	struct ifnet	*ifp = adapter->ifp;
 	struct tx_ring *txr = adapter->tx_rings;
 
 	IGB_CORE_LOCK_ASSERT(adapter);
 
 	INIT_DEBUGOUT("igb_stop: begin");
 
 	igb_disable_intr(adapter);
 
 	callout_stop(&adapter->timer);
 
 	/* Tell the stack that the interface is no longer active */
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
 	/* Unarm watchdog timer. */
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		IGB_TX_LOCK(txr);
-		txr->watchdog_check = FALSE;
+		txr->queue_status = IGB_QUEUE_IDLE;
 		IGB_TX_UNLOCK(txr);
 	}
 
 	e1000_reset_hw(&adapter->hw);
 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
 
 	e1000_led_off(&adapter->hw);
 	e1000_cleanup_led(&adapter->hw);
 }
 
 
 /*********************************************************************
  *
  *  Determine hardware revision.
  *
  **********************************************************************/
 static void
 igb_identify_hardware(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 
 	/* Make sure our PCI config space has the necessary stuff set */
 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
 		INIT_DEBUGOUT("Memory Access and/or Bus Master "
 		    "bits were not set!\n");
 		adapter->hw.bus.pci_cmd_word |=
 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
 		pci_write_config(dev, PCIR_COMMAND,
 		    adapter->hw.bus.pci_cmd_word, 2);
 	}
 
 	/* Save off the information about this board */
 	adapter->hw.vendor_id = pci_get_vendor(dev);
 	adapter->hw.device_id = pci_get_device(dev);
 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
 	adapter->hw.subsystem_vendor_id =
 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
 	adapter->hw.subsystem_device_id =
 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
 
 	/* Set MAC type early for PCI setup */
 	e1000_set_mac_type(&adapter->hw);
 }
 
 static int
 igb_allocate_pci_resources(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	int		rid;
 
 	rid = PCIR_BAR(0);
 	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &rid, RF_ACTIVE);
 	if (adapter->pci_mem == NULL) {
 		device_printf(dev, "Unable to allocate bus resource: memory\n");
 		return (ENXIO);
 	}
 	adapter->osdep.mem_bus_space_tag =
 	    rman_get_bustag(adapter->pci_mem);
 	adapter->osdep.mem_bus_space_handle =
 	    rman_get_bushandle(adapter->pci_mem);
 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
 
 	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
 
 	/* This will setup either MSI/X or MSI */
 	adapter->msix = igb_setup_msix(adapter);
 	adapter->hw.back = &adapter->osdep;
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Setup the Legacy or MSI Interrupt handler
  *
  **********************************************************************/
 static int
 igb_allocate_legacy(struct adapter *adapter)
 {
 	device_t		dev = adapter->dev;
 	struct igb_queue	*que = adapter->queues;
 	int			error, rid = 0;
 
 	/* Turn off all interrupts */
 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
 
 	/* MSI RID is 1 */
 	if (adapter->msix == 1)
 		rid = 1;
 
 	/* We allocate a single interrupt resource */
 	adapter->res = bus_alloc_resource_any(dev,
 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 	if (adapter->res == NULL) {
 		device_printf(dev, "Unable to allocate bus resource: "
 		    "interrupt\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * Try allocating a fast interrupt and the associated deferred
 	 * processing contexts.
 	 */
 	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
 	/* Make tasklet for deferred link handling */
 	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
 	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
 	    taskqueue_thread_enqueue, &que->tq);
 	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
 	    device_get_nameunit(adapter->dev));
 	if ((error = bus_setup_intr(dev, adapter->res,
 	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
 	    adapter, &adapter->tag)) != 0) {
 		device_printf(dev, "Failed to register fast interrupt "
 			    "handler: %d\n", error);
 		taskqueue_free(que->tq);
 		que->tq = NULL;
 		return (error);
 	}
 
 	return (0);
 }
 
 
 /*********************************************************************
  *
  *  Setup the MSIX Queue Interrupt handlers: 
  *
  **********************************************************************/
 static int
 igb_allocate_msix(struct adapter *adapter)
 {
 	device_t		dev = adapter->dev;
 	struct igb_queue	*que = adapter->queues;
 	int			error, rid, vector = 0;
 
 
 	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
 		rid = vector +1;
 		que->res = bus_alloc_resource_any(dev,
 		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 		if (que->res == NULL) {
 			device_printf(dev,
 			    "Unable to allocate bus resource: "
 			    "MSIX Queue Interrupt\n");
 			return (ENXIO);
 		}
 		error = bus_setup_intr(dev, que->res,
 	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
 		    igb_msix_que, que, &que->tag);
 		if (error) {
 			que->res = NULL;
 			device_printf(dev, "Failed to register Queue handler");
 			return (error);
 		}
 #if __FreeBSD_version >= 800504
 		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
 #endif
 		que->msix = vector;
 		if (adapter->hw.mac.type == e1000_82575)
 			que->eims = E1000_EICR_TX_QUEUE0 << i;
 		else
 			que->eims = 1 << vector;
 		/*
 		** Bind the msix vector, and thus the
 		** rings to the corresponding cpu.
 		*/
 		if (adapter->num_queues > 1)
 			bus_bind_intr(dev, que->res, i);
 		/* Make tasklet for deferred handling */
 		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
 		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
 		    taskqueue_thread_enqueue, &que->tq);
 		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
 		    device_get_nameunit(adapter->dev));
 	}
 
 	/* And Link */
 	rid = vector + 1;
 	adapter->res = bus_alloc_resource_any(dev,
 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 	if (adapter->res == NULL) {
 		device_printf(dev,
 		    "Unable to allocate bus resource: "
 		    "MSIX Link Interrupt\n");
 		return (ENXIO);
 	}
 	if ((error = bus_setup_intr(dev, adapter->res,
 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
 	    igb_msix_link, adapter, &adapter->tag)) != 0) {
 		device_printf(dev, "Failed to register Link handler");
 		return (error);
 	}
 #if __FreeBSD_version >= 800504
 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
 #endif
 	adapter->linkvec = vector;
 
 	return (0);
 }
 
 
 static void
 igb_configure_queues(struct adapter *adapter)
 {
 	struct	e1000_hw	*hw = &adapter->hw;
 	struct	igb_queue	*que;
-	u32			tmp, ivar = 0;
-	u32			newitr = IGB_DEFAULT_ITR;
+	u32			tmp, ivar = 0, newitr = 0;
 
 	/* First turn on RSS capability */
 	if (adapter->hw.mac.type > e1000_82575)
 		E1000_WRITE_REG(hw, E1000_GPIE,
 		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
 		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
 
 	/* Turn on MSIX */
 	switch (adapter->hw.mac.type) {
 	case e1000_82580:
 	case e1000_vfadapt:
 		/* RX entries */
 		for (int i = 0; i < adapter->num_queues; i++) {
 			u32 index = i >> 1;
 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
 			que = &adapter->queues[i];
 			if (i & 1) {
 				ivar &= 0xFF00FFFF;
 				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
 			} else {
 				ivar &= 0xFFFFFF00;
 				ivar |= que->msix | E1000_IVAR_VALID;
 			}
 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
 		}
 		/* TX entries */
 		for (int i = 0; i < adapter->num_queues; i++) {
 			u32 index = i >> 1;
 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
 			que = &adapter->queues[i];
 			if (i & 1) {
 				ivar &= 0x00FFFFFF;
 				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
 			} else {
 				ivar &= 0xFFFF00FF;
 				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
 			}
 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
 			adapter->eims_mask |= que->eims;
 		}
 
 		/* And for the link interrupt */
 		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
 		adapter->link_mask = 1 << adapter->linkvec;
 		adapter->eims_mask |= adapter->link_mask;
 		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
 		break;
 	case e1000_82576:
 		/* RX entries */
 		for (int i = 0; i < adapter->num_queues; i++) {
 			u32 index = i & 0x7; /* Each IVAR has two entries */
 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
 			que = &adapter->queues[i];
 			if (i < 8) {
 				ivar &= 0xFFFFFF00;
 				ivar |= que->msix | E1000_IVAR_VALID;
 			} else {
 				ivar &= 0xFF00FFFF;
 				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
 			}
 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
 			adapter->eims_mask |= que->eims;
 		}
 		/* TX entries */
 		for (int i = 0; i < adapter->num_queues; i++) {
 			u32 index = i & 0x7; /* Each IVAR has two entries */
 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
 			que = &adapter->queues[i];
 			if (i < 8) {
 				ivar &= 0xFFFF00FF;
 				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
 			} else {
 				ivar &= 0x00FFFFFF;
 				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
 			}
 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
 			adapter->eims_mask |= que->eims;
 		}
 
 		/* And for the link interrupt */
 		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
 		adapter->link_mask = 1 << adapter->linkvec;
 		adapter->eims_mask |= adapter->link_mask;
 		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
 		break;
 
 	case e1000_82575:
                 /* enable MSI-X support*/
 		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
                 tmp |= E1000_CTRL_EXT_PBA_CLR;
                 /* Auto-Mask interrupts upon ICR read. */
                 tmp |= E1000_CTRL_EXT_EIAME;
                 tmp |= E1000_CTRL_EXT_IRCA;
                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
 
 		/* Queues */
 		for (int i = 0; i < adapter->num_queues; i++) {
 			que = &adapter->queues[i];
 			tmp = E1000_EICR_RX_QUEUE0 << i;
 			tmp |= E1000_EICR_TX_QUEUE0 << i;
 			que->eims = tmp;
 			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
 			    i, que->eims);
 			adapter->eims_mask |= que->eims;
 		}
 
 		/* Link */
 		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
 		    E1000_EIMS_OTHER);
 		adapter->link_mask |= E1000_EIMS_OTHER;
 		adapter->eims_mask |= adapter->link_mask;
 	default:
 		break;
 	}
 
 	/* Set the starting interrupt rate */
+	if (igb_max_interrupt_rate > 0)
+		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
+
         if (hw->mac.type == e1000_82575)
                 newitr |= newitr << 16;
         else
                 newitr |= E1000_EITR_CNT_IGNR;
 
 	for (int i = 0; i < adapter->num_queues; i++) {
 		que = &adapter->queues[i];
 		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
 	}
 
 	return;
 }
 
 
 static void
 igb_free_pci_resources(struct adapter *adapter)
 {
 	struct		igb_queue *que = adapter->queues;
 	device_t	dev = adapter->dev;
 	int		rid;
 
 	/*
 	** There is a slight possibility of a failure mode
 	** in attach that will result in entering this function
 	** before interrupt resources have been initialized, and
 	** in that case we do not want to execute the loops below
 	** We can detect this reliably by the state of the adapter
 	** res pointer.
 	*/
 	if (adapter->res == NULL)
 		goto mem;
 
 	/*
 	 * First release all the interrupt resources:
 	 */
 	for (int i = 0; i < adapter->num_queues; i++, que++) {
 		rid = que->msix + 1;
 		if (que->tag != NULL) {
 			bus_teardown_intr(dev, que->res, que->tag);
 			que->tag = NULL;
 		}
 		if (que->res != NULL)
 			bus_release_resource(dev,
 			    SYS_RES_IRQ, rid, que->res);
 	}
 
 	/* Clean the Legacy or Link interrupt last */
 	if (adapter->linkvec) /* we are doing MSIX */
 		rid = adapter->linkvec + 1;
 	else
 		(adapter->msix != 0) ? (rid = 1):(rid = 0);
 
 	if (adapter->tag != NULL) {
 		bus_teardown_intr(dev, adapter->res, adapter->tag);
 		adapter->tag = NULL;
 	}
 	if (adapter->res != NULL)
 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
 
 mem:
 	if (adapter->msix)
 		pci_release_msi(dev);
 
 	if (adapter->msix_mem != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
 
 	if (adapter->pci_mem != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    PCIR_BAR(0), adapter->pci_mem);
 
 }
 
 /*
  * Setup Either MSI/X or MSI
  */
 static int
 igb_setup_msix(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 	int rid, want, queues, msgs;
 
 	/* tuneable override */
 	if (igb_enable_msix == 0)
 		goto msi;
 
 	/* First try MSI/X */
 	rid = PCIR_BAR(IGB_MSIX_BAR);
 	adapter->msix_mem = bus_alloc_resource_any(dev,
 	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
        	if (!adapter->msix_mem) {
 		/* May not be enabled */
 		device_printf(adapter->dev,
 		    "Unable to map MSIX table \n");
 		goto msi;
 	}
 
 	msgs = pci_msix_count(dev); 
 	if (msgs == 0) { /* system has msix disabled */
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
 		adapter->msix_mem = NULL;
 		goto msi;
 	}
 
 	/* Figure out a reasonable auto config value */
 	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
 
 	/* Manual override */
 	if (igb_num_queues != 0)
 		queues = igb_num_queues;
+	if (queues > 8)  /* max queues */
+		queues = 8;
 
 	/* Can have max of 4 queues on 82575 */
 	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
 		queues = 4;
 
 	/* Limit the VF adapter to one queue */
 	if (adapter->hw.mac.type == e1000_vfadapt)
 		queues = 1;
 
 	/*
 	** One vector (RX/TX pair) per queue
 	** plus an additional for Link interrupt
 	*/
 	want = queues + 1;
 	if (msgs >= want)
 		msgs = want;
 	else {
                	device_printf(adapter->dev,
 		    "MSIX Configuration Problem, "
 		    "%d vectors configured, but %d queues wanted!\n",
 		    msgs, want);
 		return (ENXIO);
 	}
 	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
                	device_printf(adapter->dev,
 		    "Using MSIX interrupts with %d vectors\n", msgs);
 		adapter->num_queues = queues;
 		return (msgs);
 	}
 msi:
        	msgs = pci_msi_count(dev);
        	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
                	device_printf(adapter->dev,"Using MSI interrupt\n");
 	return (msgs);
 }
 
 /*********************************************************************
  *
  *  Set up an fresh starting state
  *
  **********************************************************************/
 static void
 igb_reset(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	struct e1000_hw *hw = &adapter->hw;
 	struct e1000_fc_info *fc = &hw->fc;
 	struct ifnet	*ifp = adapter->ifp;
 	u32		pba = 0;
 	u16		hwm;
 
 	INIT_DEBUGOUT("igb_reset: begin");
 
 	/* Let the firmware know the OS is in control */
 	igb_get_hw_control(adapter);
 
 	/*
 	 * Packet Buffer Allocation (PBA)
 	 * Writing PBA sets the receive portion of the buffer
 	 * the remainder is used for the transmit buffer.
 	 */
 	switch (hw->mac.type) {
 	case e1000_82575:
 		pba = E1000_PBA_32K;
 		break;
 	case e1000_82576:
 	case e1000_vfadapt:
 		pba = E1000_PBA_64K;
 		break;
 	case e1000_82580:
 		pba = E1000_PBA_35K;
 	default:
 		break;
 	}
 
 	/* Special needs in case of Jumbo frames */
 	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
 		u32 tx_space, min_tx, min_rx;
 		pba = E1000_READ_REG(hw, E1000_PBA);
 		tx_space = pba >> 16;
 		pba &= 0xffff;
 		min_tx = (adapter->max_frame_size +
 		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
 		min_tx = roundup2(min_tx, 1024);
 		min_tx >>= 10;
                 min_rx = adapter->max_frame_size;
                 min_rx = roundup2(min_rx, 1024);
                 min_rx >>= 10;
 		if (tx_space < min_tx &&
 		    ((min_tx - tx_space) < pba)) {
 			pba = pba - (min_tx - tx_space);
 			/*
                          * if short on rx space, rx wins
                          * and must trump tx adjustment
 			 */
                         if (pba < min_rx)
                                 pba = min_rx;
 		}
 		E1000_WRITE_REG(hw, E1000_PBA, pba);
 	}
 
 	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
 
 	/*
 	 * These parameters control the automatic generation (Tx) and
 	 * response (Rx) to Ethernet PAUSE frames.
 	 * - High water mark should allow for at least two frames to be
 	 *   received after sending an XOFF.
 	 * - Low water mark works best when it is very near the high water mark.
 	 *   This allows the receiver to restart by sending XON when it has
 	 *   drained a bit.
 	 */
 	hwm = min(((pba << 10) * 9 / 10),
 	    ((pba << 10) - 2 * adapter->max_frame_size));
 
 	if (hw->mac.type < e1000_82576) {
 		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
 		fc->low_water = fc->high_water - 8;
 	} else {
 		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
 		fc->low_water = fc->high_water - 16;
 	}
 
 	fc->pause_time = IGB_FC_PAUSE_TIME;
 	fc->send_xon = TRUE;
 
 	/* Set Flow control, use the tunable location if sane */
-	if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
+	if ((igb_fc_setting >= 0) && (igb_fc_setting < 4))
 		fc->requested_mode = igb_fc_setting;
 	else
 		fc->requested_mode = e1000_fc_none;
 
 	fc->current_mode = fc->requested_mode;
 
 	/* Issue a global reset */
 	e1000_reset_hw(hw);
 	E1000_WRITE_REG(hw, E1000_WUC, 0);
 
 	if (e1000_init_hw(hw) < 0)
 		device_printf(dev, "Hardware Initialization Failed\n");
 
 	if (hw->mac.type == e1000_82580) {
 		u32 reg;
 
 		hwm = (pba << 10) - (2 * adapter->max_frame_size);
 		/*
 		 * 0x80000000 - enable DMA COAL
 		 * 0x10000000 - use L0s as low power
 		 * 0x20000000 - use L1 as low power
 		 * X << 16 - exit dma coal when rx data exceeds X kB
 		 * Y - upper limit to stay in dma coal in units of 32usecs
 		 */
 		E1000_WRITE_REG(hw, E1000_DMACR,
 		    0xA0000006 | ((hwm << 6) & 0x00FF0000));
 
 		/* set hwm to PBA -  2 * max frame size */
 		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
 		/*
 		 * This sets the time to wait before requesting transition to
 		 * low power state to number of usecs needed to receive 1 512
 		 * byte frame at gigabit line rate
 		 */
 		E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
 
 		/* free space in tx packet buffer to wake from DMA coal */
 		E1000_WRITE_REG(hw, E1000_DMCTXTH,
 		    (20480 - (2 * adapter->max_frame_size)) >> 6);
 
 		/* make low power state decision controlled by DMA coal */
 		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
 		E1000_WRITE_REG(hw, E1000_PCIEMISC,
 		    reg | E1000_PCIEMISC_LX_DECISION);
 	}
 
 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
 	e1000_get_phy_info(hw);
 	e1000_check_for_link(hw);
 	return;
 }
 
 /*********************************************************************
  *
  *  Setup networking device structure and register an interface.
  *
  **********************************************************************/
 static int
 igb_setup_interface(device_t dev, struct adapter *adapter)
 {
 	struct ifnet   *ifp;
 
 	INIT_DEBUGOUT("igb_setup_interface: begin");
 
 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "can not allocate ifnet structure\n");
 		return (-1);
 	}
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_mtu = ETHERMTU;
 	ifp->if_init =  igb_init;
 	ifp->if_softc = adapter;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = igb_ioctl;
 	ifp->if_start = igb_start;
 #if __FreeBSD_version >= 800000
 	ifp->if_transmit = igb_mq_start;
 	ifp->if_qflush = igb_qflush;
 #endif
 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
 	IFQ_SET_READY(&ifp->if_snd);
 
 	ether_ifattach(ifp, adapter->hw.mac.addr);
 
 	ifp->if_capabilities = ifp->if_capenable = 0;
 
 	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
 	ifp->if_capabilities |= IFCAP_TSO4;
 	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
-	if (igb_header_split)
-		ifp->if_capabilities |= IFCAP_LRO;
-
 	ifp->if_capenable = ifp->if_capabilities;
+
+	/* Don't enable LRO by default */
+	ifp->if_capabilities |= IFCAP_LRO;
+
 #ifdef DEVICE_POLLING
 	ifp->if_capabilities |= IFCAP_POLLING;
 #endif
 
 	/*
 	 * Tell the upper layer(s) we
 	 * support full VLAN capability.
 	 */
 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
 	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
 
 	/*
 	** Dont turn this on by default, if vlans are
 	** created on another pseudo device (eg. lagg)
 	** then vlan events are not passed thru, breaking
 	** operation, but with HW FILTER off it works. If
 	** using vlans directly on the em driver you can
 	** enable this and get full hardware tag filtering.
 	*/
 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
 
 	/*
 	 * Specify the media types supported by this adapter and register
 	 * callbacks to update media and link information
 	 */
 	ifmedia_init(&adapter->media, IFM_IMASK,
 	    igb_media_change, igb_media_status);
 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
 			    0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
 	} else {
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
 			    0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
 			    0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
 			    0, NULL);
 		if (adapter->hw.phy.type != e1000_phy_ife) {
 			ifmedia_add(&adapter->media,
 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
 			ifmedia_add(&adapter->media,
 				IFM_ETHER | IFM_1000_T, 0, NULL);
 		}
 	}
 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
 	return (0);
 }
 
 
 /*
  * Manage DMA'able memory.
  */
 static void
 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	if (error)
 		return;
 	*(bus_addr_t *) arg = segs[0].ds_addr;
 }
 
 static int
 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
         struct igb_dma_alloc *dma, int mapflags)
 {
 	int error;
 
 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
 				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				size,			/* maxsize */
 				1,			/* nsegments */
 				size,			/* maxsegsize */
 				0,			/* flags */
 				NULL,			/* lockfunc */
 				NULL,			/* lockarg */
 				&dma->dma_tag);
 	if (error) {
 		device_printf(adapter->dev,
 		    "%s: bus_dma_tag_create failed: %d\n",
 		    __func__, error);
 		goto fail_0;
 	}
 
 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
 	    BUS_DMA_NOWAIT, &dma->dma_map);
 	if (error) {
 		device_printf(adapter->dev,
 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
 		    __func__, (uintmax_t)size, error);
 		goto fail_2;
 	}
 
 	dma->dma_paddr = 0;
 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
 	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
 	if (error || dma->dma_paddr == 0) {
 		device_printf(adapter->dev,
 		    "%s: bus_dmamap_load failed: %d\n",
 		    __func__, error);
 		goto fail_3;
 	}
 
 	return (0);
 
 fail_3:
 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
 fail_2:
 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
 	bus_dma_tag_destroy(dma->dma_tag);
 fail_0:
 	dma->dma_map = NULL;
 	dma->dma_tag = NULL;
 
 	return (error);
 }
 
 static void
 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
 {
 	if (dma->dma_tag == NULL)
 		return;
 	if (dma->dma_map != NULL) {
 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
 		dma->dma_map = NULL;
 	}
 	bus_dma_tag_destroy(dma->dma_tag);
 	dma->dma_tag = NULL;
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for the transmit and receive rings, and then
  *  the descriptors associated with each, called only once at attach.
  *
  **********************************************************************/
 static int
 igb_allocate_queues(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 	struct igb_queue	*que = NULL;
 	struct tx_ring		*txr = NULL;
 	struct rx_ring		*rxr = NULL;
 	int rsize, tsize, error = E1000_SUCCESS;
 	int txconf = 0, rxconf = 0;
 
 	/* First allocate the top level queue structs */
 	if (!(adapter->queues =
 	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate queue memory\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
 	/* Next allocate the TX ring struct memory */
 	if (!(adapter->tx_rings =
 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate TX ring memory\n");
 		error = ENOMEM;
 		goto tx_fail;
 	}
 
 	/* Now allocate the RX */
 	if (!(adapter->rx_rings =
 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate RX ring memory\n");
 		error = ENOMEM;
 		goto rx_fail;
 	}
 
 	tsize = roundup2(adapter->num_tx_desc *
 	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
 	/*
 	 * Now set up the TX queues, txconf is needed to handle the
 	 * possibility that things fail midcourse and we need to
 	 * undo memory gracefully
 	 */ 
 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
 		/* Set up some basics */
 		txr = &adapter->tx_rings[i];
 		txr->adapter = adapter;
 		txr->me = i;
 
 		/* Initialize the TX lock */
 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
 		    device_get_nameunit(dev), txr->me);
 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
 
 		if (igb_dma_malloc(adapter, tsize,
 			&txr->txdma, BUS_DMA_NOWAIT)) {
 			device_printf(dev,
 			    "Unable to allocate TX Descriptor memory\n");
 			error = ENOMEM;
 			goto err_tx_desc;
 		}
 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
 		bzero((void *)txr->tx_base, tsize);
 
         	/* Now allocate transmit buffers for the ring */
         	if (igb_allocate_transmit_buffers(txr)) {
 			device_printf(dev,
 			    "Critical Failure setting up transmit buffers\n");
 			error = ENOMEM;
 			goto err_tx_desc;
         	}
 #if __FreeBSD_version >= 800000
 		/* Allocate a buf ring */
 		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
 		    M_WAITOK, &txr->tx_mtx);
 #endif
 	}
 
 	/*
 	 * Next the RX queues...
 	 */ 
 	rsize = roundup2(adapter->num_rx_desc *
 	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
 		rxr = &adapter->rx_rings[i];
 		rxr->adapter = adapter;
 		rxr->me = i;
 
 		/* Initialize the RX lock */
 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
 		    device_get_nameunit(dev), txr->me);
 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
 
 		if (igb_dma_malloc(adapter, rsize,
 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
 			device_printf(dev,
 			    "Unable to allocate RxDescriptor memory\n");
 			error = ENOMEM;
 			goto err_rx_desc;
 		}
 		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
 		bzero((void *)rxr->rx_base, rsize);
 
         	/* Allocate receive buffers for the ring*/
 		if (igb_allocate_receive_buffers(rxr)) {
 			device_printf(dev,
 			    "Critical Failure setting up receive buffers\n");
 			error = ENOMEM;
 			goto err_rx_desc;
 		}
 	}
 
 	/*
 	** Finally set up the queue holding structs
 	*/
 	for (int i = 0; i < adapter->num_queues; i++) {
 		que = &adapter->queues[i];
 		que->adapter = adapter;
 		que->txr = &adapter->tx_rings[i];
 		que->rxr = &adapter->rx_rings[i];
 	}
 
 	return (0);
 
 err_rx_desc:
 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
 		igb_dma_free(adapter, &rxr->rxdma);
 err_tx_desc:
 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
 		igb_dma_free(adapter, &txr->txdma);
 	free(adapter->rx_rings, M_DEVBUF);
 rx_fail:
 #if __FreeBSD_version >= 800000
 	buf_ring_free(txr->br, M_DEVBUF);
 #endif
 	free(adapter->tx_rings, M_DEVBUF);
 tx_fail:
 	free(adapter->queues, M_DEVBUF);
 fail:
 	return (error);
 }
 
 /*********************************************************************
  *
  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
  *  the information needed to transmit a packet on the wire. This is
  *  called only once at attach, setup is done every reset.
  *
  **********************************************************************/
 static int
 igb_allocate_transmit_buffers(struct tx_ring *txr)
 {
 	struct adapter *adapter = txr->adapter;
 	device_t dev = adapter->dev;
 	struct igb_tx_buffer *txbuf;
 	int error, i;
 
 	/*
 	 * Setup DMA descriptor areas.
 	 */
 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
 			       1, 0,			/* alignment, bounds */
 			       BUS_SPACE_MAXADDR,	/* lowaddr */
 			       BUS_SPACE_MAXADDR,	/* highaddr */
 			       NULL, NULL,		/* filter, filterarg */
 			       IGB_TSO_SIZE,		/* maxsize */
 			       IGB_MAX_SCATTER,		/* nsegments */
 			       PAGE_SIZE,		/* maxsegsize */
 			       0,			/* flags */
 			       NULL,			/* lockfunc */
 			       NULL,			/* lockfuncarg */
 			       &txr->txtag))) {
 		device_printf(dev,"Unable to allocate TX DMA tag\n");
 		goto fail;
 	}
 
 	if (!(txr->tx_buffers =
 	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
         /* Create the descriptor buffer dma maps */
 	txbuf = txr->tx_buffers;
 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
 		if (error != 0) {
 			device_printf(dev, "Unable to create TX DMA map\n");
 			goto fail;
 		}
 	}
 
 	return 0;
 fail:
 	/* We free all, it handles case where we are in the middle */
 	igb_free_transmit_structures(adapter);
 	return (error);
 }
 
 /*********************************************************************
  *
  *  Initialize a transmit ring.
  *
  **********************************************************************/
 static void
 igb_setup_transmit_ring(struct tx_ring *txr)
 {
 	struct adapter *adapter = txr->adapter;
 	struct igb_tx_buffer *txbuf;
 	int i;
 
 	/* Clear the old descriptor contents */
 	IGB_TX_LOCK(txr);
 	bzero((void *)txr->tx_base,
 	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
 	/* Reset indices */
 	txr->next_avail_desc = 0;
 	txr->next_to_clean = 0;
 
 	/* Free any existing tx buffers. */
         txbuf = txr->tx_buffers;
 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
 		if (txbuf->m_head != NULL) {
 			bus_dmamap_sync(txr->txtag, txbuf->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(txr->txtag, txbuf->map);
 			m_freem(txbuf->m_head);
 			txbuf->m_head = NULL;
 		}
 		/* clear the watch index */
 		txbuf->next_eop = -1;
         }
 
 	/* Set number of descriptors available */
 	txr->tx_avail = adapter->num_tx_desc;
 
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	IGB_TX_UNLOCK(txr);
 }
 
 /*********************************************************************
  *
  *  Initialize all transmit rings.
  *
  **********************************************************************/
 static void
 igb_setup_transmit_structures(struct adapter *adapter)
 {
 	struct tx_ring *txr = adapter->tx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++)
 		igb_setup_transmit_ring(txr);
 
 	return;
 }
 
 /*********************************************************************
  *
  *  Enable transmit unit.
  *
  **********************************************************************/
 static void
 igb_initialize_transmit_units(struct adapter *adapter)
 {
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct e1000_hw *hw = &adapter->hw;
 	u32		tctl, txdctl;
 
 	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
 	tctl = txdctl = 0;
 
 	/* Setup the Tx Descriptor Rings */
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		u64 bus_addr = txr->txdma.dma_paddr;
 
 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
 		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
 		    (uint32_t)(bus_addr >> 32));
 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
 		    (uint32_t)bus_addr);
 
 		/* Setup the HW Tx Head and Tail descriptor pointers */
 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
 
 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
 		    E1000_READ_REG(hw, E1000_TDBAL(i)),
 		    E1000_READ_REG(hw, E1000_TDLEN(i)));
 
-		txr->watchdog_check = FALSE;
+		txr->queue_status = IGB_QUEUE_IDLE;
 
 		txdctl |= IGB_TX_PTHRESH;
 		txdctl |= IGB_TX_HTHRESH << 8;
 		txdctl |= IGB_TX_WTHRESH << 16;
 		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
 		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
 	}
 
 	if (adapter->hw.mac.type == e1000_vfadapt)
 		return;
 
+	e1000_config_collision_dist(hw);
+
 	/* Program the Transmit Control Register */
 	tctl = E1000_READ_REG(hw, E1000_TCTL);
 	tctl &= ~E1000_TCTL_CT;
 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
 
-	e1000_config_collision_dist(hw);
-
 	/* This write will effectively turn on the transmit unit. */
 	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
 }
 
 /*********************************************************************
  *
  *  Free all transmit rings.
  *
  **********************************************************************/
 static void
 igb_free_transmit_structures(struct adapter *adapter)
 {
 	struct tx_ring *txr = adapter->tx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		IGB_TX_LOCK(txr);
 		igb_free_transmit_buffers(txr);
 		igb_dma_free(adapter, &txr->txdma);
 		IGB_TX_UNLOCK(txr);
 		IGB_TX_LOCK_DESTROY(txr);
 	}
 	free(adapter->tx_rings, M_DEVBUF);
 }
 
 /*********************************************************************
  *
  *  Free transmit ring related data structures.
  *
  **********************************************************************/
 static void
 igb_free_transmit_buffers(struct tx_ring *txr)
 {
 	struct adapter *adapter = txr->adapter;
 	struct igb_tx_buffer *tx_buffer;
 	int             i;
 
 	INIT_DEBUGOUT("free_transmit_ring: begin");
 
 	if (txr->tx_buffers == NULL)
 		return;
 
 	tx_buffer = txr->tx_buffers;
 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
 		if (tx_buffer->m_head != NULL) {
 			bus_dmamap_sync(txr->txtag, tx_buffer->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(txr->txtag,
 			    tx_buffer->map);
 			m_freem(tx_buffer->m_head);
 			tx_buffer->m_head = NULL;
 			if (tx_buffer->map != NULL) {
 				bus_dmamap_destroy(txr->txtag,
 				    tx_buffer->map);
 				tx_buffer->map = NULL;
 			}
 		} else if (tx_buffer->map != NULL) {
 			bus_dmamap_unload(txr->txtag,
 			    tx_buffer->map);
 			bus_dmamap_destroy(txr->txtag,
 			    tx_buffer->map);
 			tx_buffer->map = NULL;
 		}
 	}
 #if __FreeBSD_version >= 800000
 	if (txr->br != NULL)
 		buf_ring_free(txr->br, M_DEVBUF);
 #endif
 	if (txr->tx_buffers != NULL) {
 		free(txr->tx_buffers, M_DEVBUF);
 		txr->tx_buffers = NULL;
 	}
 	if (txr->txtag != NULL) {
 		bus_dma_tag_destroy(txr->txtag);
 		txr->txtag = NULL;
 	}
 	return;
 }
 
 /**********************************************************************
  *
  *  Setup work for hardware segmentation offload (TSO)
  *
  **********************************************************************/
 static boolean_t
 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
 {
 	struct adapter *adapter = txr->adapter;
 	struct e1000_adv_tx_context_desc *TXD;
 	struct igb_tx_buffer        *tx_buffer;
 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
 	u32 mss_l4len_idx = 0;
 	u16 vtag = 0;
 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
 	struct ether_vlan_header *eh;
 	struct ip *ip;
 	struct tcphdr *th;
 
 
 	/*
 	 * Determine where frame payload starts.
 	 * Jump over vlan headers if already present
 	 */
 	eh = mtod(mp, struct ether_vlan_header *);
 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 	else
 		ehdrlen = ETHER_HDR_LEN;
 
 	/* Ensure we have at least the IP+TCP header in the first mbuf. */
 	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
 		return FALSE;
 
 	/* Only supports IPV4 for now */
 	ctxd = txr->next_avail_desc;
 	tx_buffer = &txr->tx_buffers[ctxd];
 	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
 
 	ip = (struct ip *)(mp->m_data + ehdrlen);
 	if (ip->ip_p != IPPROTO_TCP)
                 return FALSE;   /* 0 */
 	ip->ip_sum = 0;
 	ip_hlen = ip->ip_hl << 2;
 	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
 	th->th_sum = in_pseudo(ip->ip_src.s_addr,
 	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 	tcp_hlen = th->th_off << 2;
 	/*
 	 * Calculate header length, this is used
 	 * in the transmit desc in igb_xmit
 	 */
 	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
 
 	/* VLAN MACLEN IPLEN */
 	if (mp->m_flags & M_VLANTAG) {
 		vtag = htole16(mp->m_pkthdr.ether_vtag);
 		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
 	}
 
 	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
 	vlan_macip_lens |= ip_hlen;
 	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
 
 	/* ADV DTYPE TUCMD */
 	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
 	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
 	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
 	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
 
 	/* MSS L4LEN IDX */
 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
 	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
 	/* 82575 needs the queue index added */
 	if (adapter->hw.mac.type == e1000_82575)
 		mss_l4len_idx |= txr->me << 4;
 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
 
 	TXD->seqnum_seed = htole32(0);
 	tx_buffer->m_head = NULL;
 	tx_buffer->next_eop = -1;
 
 	if (++ctxd == adapter->num_tx_desc)
 		ctxd = 0;
 
 	txr->tx_avail--;
 	txr->next_avail_desc = ctxd;
 	return TRUE;
 }
 
 
 /*********************************************************************
  *
  *  Context Descriptor setup for VLAN or CSUM
  *
  **********************************************************************/
 
 static bool
 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
 {
 	struct adapter *adapter = txr->adapter;
 	struct e1000_adv_tx_context_desc *TXD;
 	struct igb_tx_buffer        *tx_buffer;
 	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
 	struct ether_vlan_header *eh;
 	struct ip *ip = NULL;
 	struct ip6_hdr *ip6;
 	int  ehdrlen, ctxd, ip_hlen = 0;
 	u16	etype, vtag = 0;
 	u8	ipproto = 0;
 	bool	offload = TRUE;
 
 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
 		offload = FALSE;
 
 	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
 	ctxd = txr->next_avail_desc;
 	tx_buffer = &txr->tx_buffers[ctxd];
 	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
 
 	/*
 	** In advanced descriptors the vlan tag must 
 	** be placed into the context descriptor, thus
 	** we need to be here just for that setup.
 	*/
 	if (mp->m_flags & M_VLANTAG) {
 		vtag = htole16(mp->m_pkthdr.ether_vtag);
 		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
 	} else if (offload == FALSE)
 		return FALSE;
 
 	/*
 	 * Determine where frame payload starts.
 	 * Jump over vlan headers if already present,
 	 * helpful for QinQ too.
 	 */
 	eh = mtod(mp, struct ether_vlan_header *);
 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		etype = ntohs(eh->evl_proto);
 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 	} else {
 		etype = ntohs(eh->evl_encap_proto);
 		ehdrlen = ETHER_HDR_LEN;
 	}
 
 	/* Set the ether header length */
 	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
 
 	switch (etype) {
 		case ETHERTYPE_IP:
 			ip = (struct ip *)(mp->m_data + ehdrlen);
 			ip_hlen = ip->ip_hl << 2;
 			if (mp->m_len < ehdrlen + ip_hlen) {
 				offload = FALSE;
 				break;
 			}
 			ipproto = ip->ip_p;
 			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
 			break;
 		case ETHERTYPE_IPV6:
 			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
 			ip_hlen = sizeof(struct ip6_hdr);
 			if (mp->m_len < ehdrlen + ip_hlen)
 				return (FALSE);
 			ipproto = ip6->ip6_nxt;
 			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
 			break;
 		default:
 			offload = FALSE;
 			break;
 	}
 
 	vlan_macip_lens |= ip_hlen;
 	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
 
 	switch (ipproto) {
 		case IPPROTO_TCP:
 			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
 				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
 			break;
 		case IPPROTO_UDP:
 			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
 				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
 			break;
 #if __FreeBSD_version >= 800000
 		case IPPROTO_SCTP:
 			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
 				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
 			break;
 #endif
 		default:
 			offload = FALSE;
 			break;
 	}
 
 	/* 82575 needs the queue index added */
 	if (adapter->hw.mac.type == e1000_82575)
 		mss_l4len_idx = txr->me << 4;
 
 	/* Now copy bits into descriptor */
 	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
 	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
 	TXD->seqnum_seed = htole32(0);
 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
 
 	tx_buffer->m_head = NULL;
 	tx_buffer->next_eop = -1;
 
 	/* We've consumed the first desc, adjust counters */
 	if (++ctxd == adapter->num_tx_desc)
 		ctxd = 0;
 	txr->next_avail_desc = ctxd;
 	--txr->tx_avail;
 
         return (offload);
 }
 
 
 /**********************************************************************
  *
  *  Examine each tx_buffer in the used queue. If the hardware is done
  *  processing the packet then free associated resources. The
  *  tx_buffer is put back on the free queue.
  *
  *  TRUE return means there's work in the ring to clean, FALSE its empty.
  **********************************************************************/
 static bool
 igb_txeof(struct tx_ring *txr)
 {
 	struct adapter	*adapter = txr->adapter;
-        int first, last, done;
+        int first, last, done, processed;
         struct igb_tx_buffer *tx_buffer;
         struct e1000_tx_desc   *tx_desc, *eop_desc;
 	struct ifnet   *ifp = adapter->ifp;
 
 	IGB_TX_LOCK_ASSERT(txr);
 
-        if (txr->tx_avail == adapter->num_tx_desc)
+        if (txr->tx_avail == adapter->num_tx_desc) {
+		txr->queue_status = IGB_QUEUE_IDLE;
                 return FALSE;
+	}
 
+	processed = 0;
         first = txr->next_to_clean;
         tx_desc = &txr->tx_base[first];
         tx_buffer = &txr->tx_buffers[first];
 	last = tx_buffer->next_eop;
         eop_desc = &txr->tx_base[last];
 
 	/*
 	 * What this does is get the index of the
 	 * first descriptor AFTER the EOP of the 
 	 * first packet, that way we can do the
 	 * simple comparison on the inner while loop.
 	 */
 	if (++last == adapter->num_tx_desc)
  		last = 0;
 	done = last;
 
         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
 		/* We clean the range of the packet */
 		while (first != done) {
                 	tx_desc->upper.data = 0;
                 	tx_desc->lower.data = 0;
                 	tx_desc->buffer_addr = 0;
                 	++txr->tx_avail;
+			++processed;
 
 			if (tx_buffer->m_head) {
 				txr->bytes +=
 				    tx_buffer->m_head->m_pkthdr.len;
 				bus_dmamap_sync(txr->txtag,
 				    tx_buffer->map,
 				    BUS_DMASYNC_POSTWRITE);
 				bus_dmamap_unload(txr->txtag,
 				    tx_buffer->map);
 
                         	m_freem(tx_buffer->m_head);
                         	tx_buffer->m_head = NULL;
                 	}
 			tx_buffer->next_eop = -1;
 			txr->watchdog_time = ticks;
 
 	                if (++first == adapter->num_tx_desc)
 				first = 0;
 
 	                tx_buffer = &txr->tx_buffers[first];
 			tx_desc = &txr->tx_base[first];
 		}
 		++txr->packets;
 		++ifp->if_opackets;
 		/* See if we can continue to the next packet */
 		last = tx_buffer->next_eop;
 		if (last != -1) {
         		eop_desc = &txr->tx_base[last];
 			/* Get new done point */
 			if (++last == adapter->num_tx_desc) last = 0;
 			done = last;
 		} else
 			break;
         }
         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
         txr->next_to_clean = first;
 
+	/*
+	** Watchdog calculation, we know there's
+	** work outstanding or the first return
+	** would have been taken, so none processed
+	** for too long indicates a hang.
+	*/
+	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
+		txr->queue_status = IGB_QUEUE_HUNG;
+
         /*
          * If we have enough room, clear IFF_DRV_OACTIVE
          * to tell the stack that it is OK to send packets.
          */
         if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {                
                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 		/* All clean, turn off the watchdog */
                 if (txr->tx_avail == adapter->num_tx_desc) {
-			txr->watchdog_check = FALSE;
+			txr->queue_status = IGB_QUEUE_IDLE;
 			return (FALSE);
 		}
         }
 
 	return (TRUE);
 }
 
 
 /*********************************************************************
  *
  *  Refresh mbuf buffers for RX descriptor rings
  *   - now keeps its own state so discards due to resource
  *     exhaustion are unnecessary, if an mbuf cannot be obtained
  *     it just returns, keeping its placeholder, thus it can simply
  *     be recalled to try again.
  *
  **********************************************************************/
 static void
 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
 {
 	struct adapter		*adapter = rxr->adapter;
 	bus_dma_segment_t	hseg[1];
 	bus_dma_segment_t	pseg[1];
 	struct igb_rx_buf	*rxbuf;
 	struct mbuf		*mh, *mp;
 	int			i, nsegs, error, cleaned;
 
 	i = rxr->next_to_refresh;
 	cleaned = -1; /* Signify no completions */
 	while (i != limit) {
 		rxbuf = &rxr->rx_buffers[i];
-		if ((rxbuf->m_head == NULL) && (rxr->hdr_split)) {
+		/* No hdr mbuf used with header split off */
+		if (rxr->hdr_split == FALSE)
+			goto no_split;
+		if (rxbuf->m_head == NULL) {
 			mh = m_gethdr(M_DONTWAIT, MT_DATA);
 			if (mh == NULL)
 				goto update;
-			mh->m_pkthdr.len = mh->m_len = MHLEN;
-			mh->m_len = MHLEN;
-			mh->m_flags |= M_PKTHDR;
-			m_adj(mh, ETHER_ALIGN);
-			/* Get the memory mapping */
-			error = bus_dmamap_load_mbuf_sg(rxr->htag,
-			    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
-			if (error != 0) {
-				printf("GET BUF: dmamap load"
-				    " failure - %d\n", error);
-				m_free(mh);
-				goto update;
-			}
-			rxbuf->m_head = mh;
-			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
-			    BUS_DMASYNC_PREREAD);
-			rxr->rx_base[i].read.hdr_addr =
-			    htole64(hseg[0].ds_addr);
-		}
+		} else
+			mh = rxbuf->m_head;
 
+		mh->m_pkthdr.len = mh->m_len = MHLEN;
+		mh->m_len = MHLEN;
+		mh->m_flags |= M_PKTHDR;
+		/* Get the memory mapping */
+		error = bus_dmamap_load_mbuf_sg(rxr->htag,
+		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
+		if (error != 0) {
+			printf("Refresh mbufs: hdr dmamap load"
+			    " failure - %d\n", error);
+			m_free(mh);
+			rxbuf->m_head = NULL;
+			goto update;
+		}
+		rxbuf->m_head = mh;
+		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
+		    BUS_DMASYNC_PREREAD);
+		rxr->rx_base[i].read.hdr_addr =
+		    htole64(hseg[0].ds_addr);
+no_split:
 		if (rxbuf->m_pack == NULL) {
 			mp = m_getjcl(M_DONTWAIT, MT_DATA,
 			    M_PKTHDR, adapter->rx_mbuf_sz);
 			if (mp == NULL)
 				goto update;
-			mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
-			/* Get the memory mapping */
-			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
-			    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
-			if (error != 0) {
-				printf("GET BUF: dmamap load"
-				    " failure - %d\n", error);
-				m_free(mp);
-				goto update;
-			}
-			rxbuf->m_pack = mp;
-			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
-			    BUS_DMASYNC_PREREAD);
-			rxr->rx_base[i].read.pkt_addr =
-			    htole64(pseg[0].ds_addr);
+		} else
+			mp = rxbuf->m_pack;
+
+		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
+		/* Get the memory mapping */
+		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
+		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
+		if (error != 0) {
+			printf("Refresh mbufs: payload dmamap load"
+			    " failure - %d\n", error);
+			m_free(mp);
+			rxbuf->m_pack = NULL;
+			goto update;
 		}
+		rxbuf->m_pack = mp;
+		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
+		    BUS_DMASYNC_PREREAD);
+		rxr->rx_base[i].read.pkt_addr =
+		    htole64(pseg[0].ds_addr);
 
 		cleaned = i;
 		/* Calculate next index */
 		if (++i == adapter->num_rx_desc)
 			i = 0;
 		/* This is the work marker for refresh */
 		rxr->next_to_refresh = i;
 	}
 update:
 	if (cleaned != -1) /* If we refreshed some, bump tail */
 		E1000_WRITE_REG(&adapter->hw,
 		    E1000_RDT(rxr->me), cleaned);
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for rx_buffer structures. Since we use one
  *  rx_buffer per received packet, the maximum number of rx_buffer's
  *  that we'll need is equal to the number of receive descriptors
  *  that we've allocated.
  *
  **********************************************************************/
 static int
 igb_allocate_receive_buffers(struct rx_ring *rxr)
 {
 	struct	adapter 	*adapter = rxr->adapter;
 	device_t 		dev = adapter->dev;
 	struct igb_rx_buf	*rxbuf;
 	int             	i, bsize, error;
 
 	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
 	if (!(rxr->rx_buffers =
 	    (struct igb_rx_buf *) malloc(bsize,
 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
 				   1, 0,		/* alignment, bounds */
 				   BUS_SPACE_MAXADDR,	/* lowaddr */
 				   BUS_SPACE_MAXADDR,	/* highaddr */
 				   NULL, NULL,		/* filter, filterarg */
 				   MSIZE,		/* maxsize */
 				   1,			/* nsegments */
 				   MSIZE,		/* maxsegsize */
 				   0,			/* flags */
 				   NULL,		/* lockfunc */
 				   NULL,		/* lockfuncarg */
 				   &rxr->htag))) {
 		device_printf(dev, "Unable to create RX DMA tag\n");
 		goto fail;
 	}
 
 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
 				   1, 0,		/* alignment, bounds */
 				   BUS_SPACE_MAXADDR,	/* lowaddr */
 				   BUS_SPACE_MAXADDR,	/* highaddr */
 				   NULL, NULL,		/* filter, filterarg */
-				   MJUMPAGESIZE,	/* maxsize */
+				   MJUM9BYTES,		/* maxsize */
 				   1,			/* nsegments */
-				   MJUMPAGESIZE,	/* maxsegsize */
+				   MJUM9BYTES,		/* maxsegsize */
 				   0,			/* flags */
 				   NULL,		/* lockfunc */
 				   NULL,		/* lockfuncarg */
 				   &rxr->ptag))) {
 		device_printf(dev, "Unable to create RX payload DMA tag\n");
 		goto fail;
 	}
 
 	for (i = 0; i < adapter->num_rx_desc; i++) {
 		rxbuf = &rxr->rx_buffers[i];
 		error = bus_dmamap_create(rxr->htag,
 		    BUS_DMA_NOWAIT, &rxbuf->hmap);
 		if (error) {
 			device_printf(dev,
 			    "Unable to create RX head DMA maps\n");
 			goto fail;
 		}
 		error = bus_dmamap_create(rxr->ptag,
 		    BUS_DMA_NOWAIT, &rxbuf->pmap);
 		if (error) {
 			device_printf(dev,
 			    "Unable to create RX packet DMA maps\n");
 			goto fail;
 		}
 	}
 
 	return (0);
 
 fail:
 	/* Frees all, but can handle partial completion */
 	igb_free_receive_structures(adapter);
 	return (error);
 }
 
 
 static void
 igb_free_receive_ring(struct rx_ring *rxr)
 {
 	struct	adapter		*adapter;
 	struct igb_rx_buf	*rxbuf;
 	int i;
 
 	adapter = rxr->adapter;
 	for (i = 0; i < adapter->num_rx_desc; i++) {
 		rxbuf = &rxr->rx_buffers[i];
 		if (rxbuf->m_head != NULL) {
 			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
 			rxbuf->m_head->m_flags |= M_PKTHDR;
 			m_freem(rxbuf->m_head);
 		}
 		if (rxbuf->m_pack != NULL) {
 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
 			rxbuf->m_pack->m_flags |= M_PKTHDR;
 			m_freem(rxbuf->m_pack);
 		}
 		rxbuf->m_head = NULL;
 		rxbuf->m_pack = NULL;
 	}
 }
 
 
 /*********************************************************************
  *
  *  Initialize a receive ring and its buffers.
  *
  **********************************************************************/
 static int
 igb_setup_receive_ring(struct rx_ring *rxr)
 {
 	struct	adapter		*adapter;
 	struct  ifnet		*ifp;
 	device_t		dev;
 	struct igb_rx_buf	*rxbuf;
 	bus_dma_segment_t	pseg[1], hseg[1];
 	struct lro_ctrl		*lro = &rxr->lro;
 	int			rsize, nsegs, error = 0;
 
 	adapter = rxr->adapter;
 	dev = adapter->dev;
 	ifp = adapter->ifp;
 
 	/* Clear the ring contents */
 	IGB_RX_LOCK(rxr);
 	rsize = roundup2(adapter->num_rx_desc *
 	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
 	bzero((void *)rxr->rx_base, rsize);
 
 	/*
 	** Free current RX buffer structures and their mbufs
 	*/
 	igb_free_receive_ring(rxr);
 
 	/* Configure for header split? */
 	if (igb_header_split)
 		rxr->hdr_split = TRUE;
 
         /* Now replenish the ring mbufs */
-	for (int j = 0; j != adapter->num_rx_desc; ++j) {
+	for (int j = 0; j < adapter->num_rx_desc; ++j) {
 		struct mbuf	*mh, *mp;
 
 		rxbuf = &rxr->rx_buffers[j];
 		if (rxr->hdr_split == FALSE)
 			goto skip_head;
 
 		/* First the header */
 		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
 		if (rxbuf->m_head == NULL) {
 			error = ENOBUFS;
                         goto fail;
 		}
 		m_adj(rxbuf->m_head, ETHER_ALIGN);
 		mh = rxbuf->m_head;
 		mh->m_len = mh->m_pkthdr.len = MHLEN;
 		mh->m_flags |= M_PKTHDR;
 		/* Get the memory mapping */
 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
 		    rxbuf->hmap, rxbuf->m_head, hseg,
 		    &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) /* Nothing elegant to do here */
                         goto fail;
 		bus_dmamap_sync(rxr->htag,
 		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
 		/* Update descriptor */
 		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
 
 skip_head:
 		/* Now the payload cluster */
 		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
 		    M_PKTHDR, adapter->rx_mbuf_sz);
 		if (rxbuf->m_pack == NULL) {
 			error = ENOBUFS;
                         goto fail;
 		}
 		mp = rxbuf->m_pack;
 		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
 		/* Get the memory mapping */
 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
 		    rxbuf->pmap, mp, pseg,
 		    &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0)
                         goto fail;
 		bus_dmamap_sync(rxr->ptag,
 		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
 		/* Update descriptor */
 		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
         }
 
 	/* Setup our descriptor indices */
 	rxr->next_to_check = 0;
 	rxr->next_to_refresh = 0;
 	rxr->lro_enabled = FALSE;
 	rxr->rx_split_packets = 0;
 	rxr->rx_bytes = 0;
 
 	rxr->fmp = NULL;
 	rxr->lmp = NULL;
 	rxr->discard = FALSE;
 
 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	/*
 	** Now set up the LRO interface, we
 	** also only do head split when LRO
 	** is enabled, since so often they
 	** are undesireable in similar setups.
 	*/
 	if (ifp->if_capenable & IFCAP_LRO) {
-		int err = tcp_lro_init(lro);
-		if (err) {
+		error = tcp_lro_init(lro);
+		if (error) {
 			device_printf(dev, "LRO Initialization failed!\n");
 			goto fail;
 		}
 		INIT_DEBUGOUT("RX LRO Initialized\n");
 		rxr->lro_enabled = TRUE;
 		lro->ifp = adapter->ifp;
 	}
 
 	IGB_RX_UNLOCK(rxr);
 	return (0);
 
 fail:
 	igb_free_receive_ring(rxr);
 	IGB_RX_UNLOCK(rxr);
 	return (error);
 }
 
 /*********************************************************************
  *
  *  Initialize all receive rings.
  *
  **********************************************************************/
 static int
 igb_setup_receive_structures(struct adapter *adapter)
 {
 	struct rx_ring *rxr = adapter->rx_rings;
 	int i;
 
 	for (i = 0; i < adapter->num_queues; i++, rxr++)
 		if (igb_setup_receive_ring(rxr))
 			goto fail;
 
 	return (0);
 fail:
 	/*
 	 * Free RX buffers allocated so far, we will only handle
 	 * the rings that completed, the failing case will have
 	 * cleaned up for itself. 'i' is the endpoint.
 	 */
 	for (int j = 0; j > i; ++j) {
 		rxr = &adapter->rx_rings[i];
+		IGB_RX_LOCK(rxr);
 		igb_free_receive_ring(rxr);
+		IGB_RX_UNLOCK(rxr);
 	}
 
 	return (ENOBUFS);
 }
 
 /*********************************************************************
  *
  *  Enable receive unit.
  *
  **********************************************************************/
 static void
 igb_initialize_receive_units(struct adapter *adapter)
 {
 	struct rx_ring	*rxr = adapter->rx_rings;
 	struct ifnet	*ifp = adapter->ifp;
 	struct e1000_hw *hw = &adapter->hw;
 	u32		rctl, rxcsum, psize, srrctl = 0;
 
 	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
 
 	/*
 	 * Make sure receives are disabled while setting
 	 * up the descriptor ring
 	 */
 	rctl = E1000_READ_REG(hw, E1000_RCTL);
 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
 
 	/*
 	** Set up for header split
 	*/
 	if (rxr->hdr_split) {
 		/* Use a standard mbuf for the header */
 		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
 		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
 	} else
 		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
 
 	/*
 	** Set up for jumbo frames
 	*/
 	if (ifp->if_mtu > ETHERMTU) {
 		rctl |= E1000_RCTL_LPE;
-		srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
-		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
-
+		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
+			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
+		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
+			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
+		}
 		/* Set maximum packet len */
 		psize = adapter->max_frame_size;
 		/* are we on a vlan? */
 		if (adapter->ifp->if_vlantrunk != NULL)
 			psize += VLAN_TAG_SIZE;
 		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
 	} else {
 		rctl &= ~E1000_RCTL_LPE;
 		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
 		rctl |= E1000_RCTL_SZ_2048;
 	}
 
 	/* Setup the Base and Length of the Rx Descriptor Rings */
 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
 		u64 bus_addr = rxr->rxdma.dma_paddr;
 		u32 rxdctl;
 
 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
 		E1000_WRITE_REG(hw, E1000_RDBAH(i),
 		    (uint32_t)(bus_addr >> 32));
 		E1000_WRITE_REG(hw, E1000_RDBAL(i),
 		    (uint32_t)bus_addr);
 		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
 		/* Enable this Queue */
 		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
 		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
 		rxdctl &= 0xFFF00000;
 		rxdctl |= IGB_RX_PTHRESH;
 		rxdctl |= IGB_RX_HTHRESH << 8;
 		rxdctl |= IGB_RX_WTHRESH << 16;
 		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
 	}
 
 	/*
 	** Setup for RX MultiQueue
 	*/
 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
 	if (adapter->num_queues >1) {
 		u32 random[10], mrqc, shift = 0;
 		union igb_reta {
 			u32 dword;
 			u8  bytes[4];
 		} reta;
 
 		arc4rand(&random, sizeof(random), 0);
 		if (adapter->hw.mac.type == e1000_82575)
 			shift = 6;
 		/* Warning FM follows */
 		for (int i = 0; i < 128; i++) {
 			reta.bytes[i & 3] =
 			    (i % adapter->num_queues) << shift;
 			if ((i & 3) == 3)
 				E1000_WRITE_REG(hw,
 				    E1000_RETA(i >> 2), reta.dword);
 		}
 		/* Now fill in hash table */
 		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
 		for (int i = 0; i < 10; i++)
 			E1000_WRITE_REG_ARRAY(hw,
 			    E1000_RSSRK(0), i, random[i]);
 
 		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
 		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
 		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
 		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
 		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
 		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
 		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
 		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
 
 		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
 
 		/*
 		** NOTE: Receive Full-Packet Checksum Offload 
 		** is mutually exclusive with Multiqueue. However
 		** this is not the same as TCP/IP checksums which
 		** still work.
 		*/
 		rxcsum |= E1000_RXCSUM_PCSD;
 #if __FreeBSD_version >= 800000
 		/* For SCTP Offload */
 		if ((hw->mac.type == e1000_82576)
 		    && (ifp->if_capenable & IFCAP_RXCSUM))
 			rxcsum |= E1000_RXCSUM_CRCOFL;
 #endif
 	} else {
 		/* Non RSS setup */
 		if (ifp->if_capenable & IFCAP_RXCSUM) {
 			rxcsum |= E1000_RXCSUM_IPPCSE;
 #if __FreeBSD_version >= 800000
 			if (adapter->hw.mac.type == e1000_82576)
 				rxcsum |= E1000_RXCSUM_CRCOFL;
 #endif
 		} else
 			rxcsum &= ~E1000_RXCSUM_TUOFL;
 	}
 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
 
 	/* Setup the Receive Control Register */
 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
 		   E1000_RCTL_RDMTS_HALF |
 		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
 	/* Strip CRC bytes. */
 	rctl |= E1000_RCTL_SECRC;
 	/* Make sure VLAN Filters are off */
 	rctl &= ~E1000_RCTL_VFE;
 	/* Don't store bad packets */
 	rctl &= ~E1000_RCTL_SBP;
 
 	/* Enable Receives */
 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
 
 	/*
 	 * Setup the HW Rx Head and Tail Descriptor Pointers
 	 *   - needs to be after enable
 	 */
 	for (int i = 0; i < adapter->num_queues; i++) {
 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
 		E1000_WRITE_REG(hw, E1000_RDT(i),
 		     adapter->num_rx_desc - 1);
 	}
 	return;
 }
 
 /*********************************************************************
  *
  *  Free receive rings.
  *
  **********************************************************************/
 static void
 igb_free_receive_structures(struct adapter *adapter)
 {
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
 		struct lro_ctrl	*lro = &rxr->lro;
 		igb_free_receive_buffers(rxr);
 		tcp_lro_free(lro);
 		igb_dma_free(adapter, &rxr->rxdma);
 	}
 
 	free(adapter->rx_rings, M_DEVBUF);
 }
 
 /*********************************************************************
  *
  *  Free receive ring data structures.
  *
  **********************************************************************/
 static void
 igb_free_receive_buffers(struct rx_ring *rxr)
 {
 	struct adapter		*adapter = rxr->adapter;
 	struct igb_rx_buf	*rxbuf;
 	int i;
 
 	INIT_DEBUGOUT("free_receive_structures: begin");
 
 	/* Cleanup any existing buffers */
 	if (rxr->rx_buffers != NULL) {
 		for (i = 0; i < adapter->num_rx_desc; i++) {
 			rxbuf = &rxr->rx_buffers[i];
 			if (rxbuf->m_head != NULL) {
 				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
 				    BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
 				rxbuf->m_head->m_flags |= M_PKTHDR;
 				m_freem(rxbuf->m_head);
 			}
 			if (rxbuf->m_pack != NULL) {
 				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
 				    BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
 				rxbuf->m_pack->m_flags |= M_PKTHDR;
 				m_freem(rxbuf->m_pack);
 			}
 			rxbuf->m_head = NULL;
 			rxbuf->m_pack = NULL;
 			if (rxbuf->hmap != NULL) {
 				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
 				rxbuf->hmap = NULL;
 			}
 			if (rxbuf->pmap != NULL) {
 				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
 				rxbuf->pmap = NULL;
 			}
 		}
 		if (rxr->rx_buffers != NULL) {
 			free(rxr->rx_buffers, M_DEVBUF);
 			rxr->rx_buffers = NULL;
 		}
 	}
 
 	if (rxr->htag != NULL) {
 		bus_dma_tag_destroy(rxr->htag);
 		rxr->htag = NULL;
 	}
 	if (rxr->ptag != NULL) {
 		bus_dma_tag_destroy(rxr->ptag);
 		rxr->ptag = NULL;
 	}
 }
 
 static __inline void
 igb_rx_discard(struct rx_ring *rxr, int i)
 {
-	struct adapter		*adapter = rxr->adapter;
 	struct igb_rx_buf	*rbuf;
-	struct mbuf             *mh, *mp;
 
 	rbuf = &rxr->rx_buffers[i];
+
+	/* Partially received? Free the chain */
 	if (rxr->fmp != NULL) {
 		rxr->fmp->m_flags |= M_PKTHDR;
 		m_freem(rxr->fmp);
 		rxr->fmp = NULL;
 		rxr->lmp = NULL;
 	}
 
-	mh = rbuf->m_head;
-	mp = rbuf->m_pack;
+	/*
+	** With advanced descriptors the writeback
+	** clobbers the buffer addrs, so its easier
+	** to just free the existing mbufs and take
+	** the normal refresh path to get new buffers
+	** and mapping.
+	*/
+	if (rbuf->m_head) {
+		m_free(rbuf->m_head);
+		rbuf->m_head = NULL;
+	}
 
-	/* Reuse loaded DMA map and just update mbuf chain */
-	if (mh) {	/* with no hdr split would be null */
-		mh->m_len = MHLEN;
-		mh->m_flags |= M_PKTHDR;
-		mh->m_next = NULL;
+	if (rbuf->m_pack) {
+		m_free(rbuf->m_pack);
+		rbuf->m_pack = NULL;
 	}
 
-	mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
-	mp->m_data = mp->m_ext.ext_buf;
-	mp->m_next = NULL;
 	return;
 }
 
 static __inline void
 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
 {
 
 	/*
 	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
 	 * should be computed by hardware. Also it should not have VLAN tag in
 	 * ethernet header.
 	 */
 	if (rxr->lro_enabled &&
 	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
 	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
 	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
 	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
 	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
 	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
 		/*
 		 * Send to the stack if:
 		 **  - LRO not enabled, or
 		 **  - no LRO resources, or
 		 **  - lro enqueue fails
 		 */
 		if (rxr->lro.lro_cnt != 0)
 			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
 				return;
 	}
 	IGB_RX_UNLOCK(rxr);
 	(*ifp->if_input)(ifp, m);
 	IGB_RX_LOCK(rxr);
 }
 
 /*********************************************************************
  *
  *  This routine executes in interrupt context. It replenishes
  *  the mbufs in the descriptor and sends data which has been
  *  dma'ed into host memory to upper layer.
  *
  *  We loop at most count times if count is > 0, or until done if
  *  count < 0.
  *
  *  Return TRUE if more to clean, FALSE otherwise
  *********************************************************************/
 static bool
 igb_rxeof(struct igb_queue *que, int count, int *done)
 {
 	struct adapter		*adapter = que->adapter;
 	struct rx_ring		*rxr = que->rxr;
 	struct ifnet		*ifp = adapter->ifp;
 	struct lro_ctrl		*lro = &rxr->lro;
 	struct lro_entry	*queued;
 	int			i, processed = 0, rxdone = 0;
 	u32			ptype, staterr = 0;
 	union e1000_adv_rx_desc	*cur;
 
 	IGB_RX_LOCK(rxr);
 	/* Sync the ring. */
 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	/* Main clean loop */
 	for (i = rxr->next_to_check; count != 0;) {
 		struct mbuf		*sendmp, *mh, *mp;
 		struct igb_rx_buf	*rxbuf;
 		u16			hlen, plen, hdr, vtag;
 		bool			eop = FALSE;
  
 		cur = &rxr->rx_base[i];
 		staterr = le32toh(cur->wb.upper.status_error);
 		if ((staterr & E1000_RXD_STAT_DD) == 0)
 			break;
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
 		count--;
 		sendmp = mh = mp = NULL;
 		cur->wb.upper.status_error = 0;
 		rxbuf = &rxr->rx_buffers[i];
 		plen = le16toh(cur->wb.upper.length);
 		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
 		vtag = le16toh(cur->wb.upper.vlan);
 		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
 		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
 
 		/* Make sure all segments of a bad packet are discarded */
 		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
 		    (rxr->discard)) {
 			ifp->if_ierrors++;
 			++rxr->rx_discarded;
 			if (!eop) /* Catch subsequent segs */
 				rxr->discard = TRUE;
 			else
 				rxr->discard = FALSE;
 			igb_rx_discard(rxr, i);
 			goto next_desc;
 		}
 
 		/*
 		** The way the hardware is configured to
 		** split, it will ONLY use the header buffer
 		** when header split is enabled, otherwise we
 		** get normal behavior, ie, both header and
 		** payload are DMA'd into the payload buffer.
 		**
 		** The fmp test is to catch the case where a
 		** packet spans multiple descriptors, in that
 		** case only the first header is valid.
 		*/
 		if (rxr->hdr_split && rxr->fmp == NULL) {
 			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
 			    E1000_RXDADV_HDRBUFLEN_SHIFT;
 			if (hlen > IGB_HDR_BUF)
 				hlen = IGB_HDR_BUF;
-			/* Handle the header mbuf */
 			mh = rxr->rx_buffers[i].m_head;
 			mh->m_len = hlen;
-			/* clear buf info for refresh */
+			/* clear buf pointer for refresh */
 			rxbuf->m_head = NULL;
 			/*
 			** Get the payload length, this
 			** could be zero if its a small
 			** packet.
 			*/
 			if (plen > 0) {
 				mp = rxr->rx_buffers[i].m_pack;
 				mp->m_len = plen;
 				mh->m_next = mp;
-				/* clear buf info for refresh */
+				/* clear buf pointer */
 				rxbuf->m_pack = NULL;
 				rxr->rx_split_packets++;
 			}
 		} else {
 			/*
 			** Either no header split, or a
 			** secondary piece of a fragmented
 			** split packet.
 			*/
 			mh = rxr->rx_buffers[i].m_pack;
 			mh->m_len = plen;
 			/* clear buf info for refresh */
 			rxbuf->m_pack = NULL;
 		}
 
 		++processed; /* So we know when to refresh */
 
 		/* Initial frame - setup */
 		if (rxr->fmp == NULL) {
 			mh->m_pkthdr.len = mh->m_len;
-			/* Store the first mbuf */
+			/* Save the head of the chain */
 			rxr->fmp = mh;
 			rxr->lmp = mh;
 			if (mp != NULL) {
 				/* Add payload if split */
 				mh->m_pkthdr.len += mp->m_len;
 				rxr->lmp = mh->m_next;
 			}
 		} else {
 			/* Chain mbuf's together */
 			rxr->lmp->m_next = mh;
 			rxr->lmp = rxr->lmp->m_next;
 			rxr->fmp->m_pkthdr.len += mh->m_len;
 		}
 
 		if (eop) {
 			rxr->fmp->m_pkthdr.rcvif = ifp;
 			ifp->if_ipackets++;
 			rxr->rx_packets++;
 			/* capture data for AIM */
 			rxr->packets++;
 			rxr->bytes += rxr->fmp->m_pkthdr.len;
 			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
 
 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
 				igb_rx_checksum(staterr, rxr->fmp, ptype);
 
 			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
 			    (staterr & E1000_RXD_STAT_VP) != 0) {
 				rxr->fmp->m_pkthdr.ether_vtag = vtag;
 				rxr->fmp->m_flags |= M_VLANTAG;
 			}
 #if __FreeBSD_version >= 800000
 			rxr->fmp->m_pkthdr.flowid = que->msix;
 			rxr->fmp->m_flags |= M_FLOWID;
 #endif
 			sendmp = rxr->fmp;
 			/* Make sure to set M_PKTHDR. */
 			sendmp->m_flags |= M_PKTHDR;
 			rxr->fmp = NULL;
 			rxr->lmp = NULL;
 		}
 
 next_desc:
 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 		/* Advance our pointers to the next descriptor. */
 		if (++i == adapter->num_rx_desc)
 			i = 0;
 		/*
 		** Send to the stack or LRO
 		*/
 		if (sendmp != NULL) {
 			rxr->next_to_check = i;
 			igb_rx_input(rxr, ifp, sendmp, ptype);
 			i = rxr->next_to_check;
 			rxdone++;
 		}
 
 		/* Every 8 descriptors we go to refresh mbufs */
 		if (processed == 8) {
                         igb_refresh_mbufs(rxr, i);
                         processed = 0;
 		}
 	}
 
 	/* Catch any remainders */
 	if (processed != 0) {
 		igb_refresh_mbufs(rxr, i);
 		processed = 0;
 	}
 
 	rxr->next_to_check = i;
 
 	/*
 	 * Flush any outstanding LRO work
 	 */
 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
 		tcp_lro_flush(lro, queued);
 	}
 
 	IGB_RX_UNLOCK(rxr);
 
 	if (done != NULL)
 		*done = rxdone;
 
 	/*
 	** We still have cleaning to do?
 	** Schedule another interrupt if so.
 	*/
 	if ((staterr & E1000_RXD_STAT_DD) != 0)
 		return (TRUE);
 
 	return (FALSE);
 }
 
 /*********************************************************************
  *
  *  Verify that the hardware indicated that the checksum is valid.
  *  Inform the stack about the status of checksum so that stack
  *  doesn't spend time verifying the checksum.
  *
  *********************************************************************/
 static void
 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
 {
 	u16 status = (u16)staterr;
 	u8  errors = (u8) (staterr >> 24);
 	int sctp;
 
 	/* Ignore Checksum bit is set */
 	if (status & E1000_RXD_STAT_IXSM) {
 		mp->m_pkthdr.csum_flags = 0;
 		return;
 	}
 
 	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
 	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
 		sctp = 1;
 	else
 		sctp = 0;
 	if (status & E1000_RXD_STAT_IPCS) {
 		/* Did it pass? */
 		if (!(errors & E1000_RXD_ERR_IPE)) {
 			/* IP Checksum Good */
 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
 		} else
 			mp->m_pkthdr.csum_flags = 0;
 	}
 
 	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
 		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 #if __FreeBSD_version >= 800000
 		if (sctp) /* reassign */
 			type = CSUM_SCTP_VALID;
 #endif
 		/* Did it pass? */
 		if (!(errors & E1000_RXD_ERR_TCPE)) {
 			mp->m_pkthdr.csum_flags |= type;
 			if (sctp == 0)
 				mp->m_pkthdr.csum_data = htons(0xffff);
 		}
 	}
 	return;
 }
 
 /*
  * This routine is run via an vlan
  * config EVENT
  */
 static void
 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	u32		index, bit;
 
 	if (ifp->if_softc !=  arg)   /* Not our event */
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
                 return;
 
+	IGB_CORE_LOCK(adapter);
 	index = (vtag >> 5) & 0x7F;
 	bit = vtag & 0x1F;
-	igb_shadow_vfta[index] |= (1 << bit);
+	adapter->shadow_vfta[index] |= (1 << bit);
 	++adapter->num_vlans;
 	/* Re-init to load the changes */
-	igb_init(adapter);
+	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+		igb_init_locked(adapter);
+	IGB_CORE_UNLOCK(adapter);
 }
 
 /*
  * This routine is run via an vlan
  * unconfig EVENT
  */
 static void
 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	u32		index, bit;
 
 	if (ifp->if_softc !=  arg)
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
                 return;
 
+	IGB_CORE_LOCK(adapter);
 	index = (vtag >> 5) & 0x7F;
 	bit = vtag & 0x1F;
-	igb_shadow_vfta[index] &= ~(1 << bit);
+	adapter->shadow_vfta[index] &= ~(1 << bit);
 	--adapter->num_vlans;
 	/* Re-init to load the changes */
-	igb_init(adapter);
+	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+		igb_init_locked(adapter);
+	IGB_CORE_UNLOCK(adapter);
 }
 
 static void
 igb_setup_vlan_hw_support(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32             reg;
 
 	/*
 	** We get here thru init_locked, meaning
 	** a soft reset, this has already cleared
 	** the VFTA and other state, so if there
 	** have been no vlan's registered do nothing.
 	*/
 	if (adapter->num_vlans == 0)
                 return;
 
 	/*
 	** A soft reset zero's out the VFTA, so
 	** we need to repopulate it now.
 	*/
 	for (int i = 0; i < IGB_VFTA_SIZE; i++)
-                if (igb_shadow_vfta[i] != 0) {
+                if (adapter->shadow_vfta[i] != 0) {
 			if (hw->mac.type == e1000_vfadapt)
-				e1000_vfta_set_vf(hw, igb_shadow_vfta[i], TRUE);
+				e1000_vfta_set_vf(hw,
+				    adapter->shadow_vfta[i], TRUE);
 			else
 				E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
-                           	 i, igb_shadow_vfta[i]);
+                           	 i, adapter->shadow_vfta[i]);
 		}
 
 	if (hw->mac.type == e1000_vfadapt)
 		e1000_rlpml_set_vf(hw,
 		    adapter->max_frame_size + VLAN_TAG_SIZE);
 	else {
 		reg = E1000_READ_REG(hw, E1000_CTRL);
 		reg |= E1000_CTRL_VME;
 		E1000_WRITE_REG(hw, E1000_CTRL, reg);
 
 		/* Enable the Filter Table */
 		reg = E1000_READ_REG(hw, E1000_RCTL);
 		reg &= ~E1000_RCTL_CFIEN;
 		reg |= E1000_RCTL_VFE;
 		E1000_WRITE_REG(hw, E1000_RCTL, reg);
 
 		/* Update the frame size */
 		E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
 		    adapter->max_frame_size + VLAN_TAG_SIZE);
 	}
 }
 
 static void
 igb_enable_intr(struct adapter *adapter)
 {
 	/* With RSS set up what to auto clear */
 	if (adapter->msix_mem) {
 		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
 		    adapter->eims_mask);
 		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
 		    adapter->eims_mask);
 		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
 		    adapter->eims_mask);
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
 		    E1000_IMS_LSC);
 	} else {
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
 		    IMS_ENABLE_MASK);
 	}
 	E1000_WRITE_FLUSH(&adapter->hw);
 
 	return;
 }
 
 static void
 igb_disable_intr(struct adapter *adapter)
 {
 	if (adapter->msix_mem) {
 		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
 		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
 	} 
 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
 	E1000_WRITE_FLUSH(&adapter->hw);
 	return;
 }
 
 /*
  * Bit of a misnomer, what this really means is
  * to enable OS management of the system... aka
  * to disable special hardware management features 
  */
 static void
 igb_init_manageability(struct adapter *adapter)
 {
 	if (adapter->has_manage) {
 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
 
 		/* disable hardware interception of ARP */
 		manc &= ~(E1000_MANC_ARP_EN);
 
                 /* enable receiving management packets to the host */
 		manc |= E1000_MANC_EN_MNG2HOST;
 		manc2h |= 1 << 5;  /* Mng Port 623 */
 		manc2h |= 1 << 6;  /* Mng Port 664 */
 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
 	}
 }
 
 /*
  * Give control back to hardware management
  * controller if there is one.
  */
 static void
 igb_release_manageability(struct adapter *adapter)
 {
 	if (adapter->has_manage) {
 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
 
 		/* re-enable hardware interception of ARP */
 		manc |= E1000_MANC_ARP_EN;
 		manc &= ~E1000_MANC_EN_MNG2HOST;
 
 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
 	}
 }
 
 /*
  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
  * For ASF and Pass Through versions of f/w this means that
  * the driver is loaded. 
  *
  */
 static void
 igb_get_hw_control(struct adapter *adapter)
 {
 	u32 ctrl_ext;
 
 	if (adapter->hw.mac.type == e1000_vfadapt)
 		return;
 
 	/* Let firmware know the driver has taken over */
 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
 }
 
 /*
  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
  * For ASF and Pass Through versions of f/w this means that the
  * driver is no longer loaded.
  *
  */
 static void
 igb_release_hw_control(struct adapter *adapter)
 {
 	u32 ctrl_ext;
 
 	if (adapter->hw.mac.type == e1000_vfadapt)
 		return;
 
 	/* Let firmware taken over control of h/w */
 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
 }
 
 static int
 igb_is_valid_ether_addr(uint8_t *addr)
 {
 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
 
 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
 		return (FALSE);
 	}
 
 	return (TRUE);
 }
 
 
 /*
  * Enable PCI Wake On Lan capability
  */
 static void
 igb_enable_wakeup(device_t dev)
 {
 	u16     cap, status;
 	u8      id;
 
 	/* First find the capabilities pointer*/
 	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
 	/* Read the PM Capabilities */
 	id = pci_read_config(dev, cap, 1);
 	if (id != PCIY_PMG)     /* Something wrong */
 		return;
 	/* OK, we have the power capabilities, so
 	   now get the status register */
 	cap += PCIR_POWER_STATUS;
 	status = pci_read_config(dev, cap, 2);
 	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
 	pci_write_config(dev, cap, status, 2);
 	return;
 }
 
 static void
 igb_led_func(void *arg, int onoff)
 {
 	struct adapter	*adapter = arg;
 
 	IGB_CORE_LOCK(adapter);
 	if (onoff) {
 		e1000_setup_led(&adapter->hw);
 		e1000_led_on(&adapter->hw);
 	} else {
 		e1000_led_off(&adapter->hw);
 		e1000_cleanup_led(&adapter->hw);
 	}
 	IGB_CORE_UNLOCK(adapter);
 }
 
 /**********************************************************************
  *
  *  Update the board statistics counters.
  *
  **********************************************************************/
 static void
 igb_update_stats_counters(struct adapter *adapter)
 {
 	struct ifnet		*ifp;
         struct e1000_hw		*hw = &adapter->hw;
 	struct e1000_hw_stats	*stats;
 
 	/* 
 	** The virtual function adapter has only a
 	** small controlled set of stats, do only 
 	** those and return.
 	*/
 	if (adapter->hw.mac.type == e1000_vfadapt) {
 		igb_update_vf_stats_counters(adapter);
 		return;
 	}
 
 	stats = (struct e1000_hw_stats	*)adapter->stats;
 
 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
 	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
 		stats->symerrs +=
 		    E1000_READ_REG(hw,E1000_SYMERRS);
 		stats->sec += E1000_READ_REG(hw, E1000_SEC);
 	}
 
 	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
 	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
 	stats->scc += E1000_READ_REG(hw, E1000_SCC);
 	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
 
 	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
 	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
 	stats->colc += E1000_READ_REG(hw, E1000_COLC);
 	stats->dc += E1000_READ_REG(hw, E1000_DC);
 	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
 	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
 	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
 	/*
 	** For watchdog management we need to know if we have been
 	** paused during the last interval, so capture that here.
 	*/ 
         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
         stats->xoffrxc += adapter->pause_frames;
 	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
 	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
 	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
 	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
 	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
 	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
 	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
 	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
 	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
 	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
 	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
 	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
 
 	/* For the 64-bit byte counters the low dword must be read first. */
 	/* Both registers clear on the read of the high dword */
 
 	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
 	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
 	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
 	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
 
 	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
 	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
 	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
 	stats->roc += E1000_READ_REG(hw, E1000_ROC);
 	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
 
 	stats->tor += E1000_READ_REG(hw, E1000_TORH);
 	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
 
 	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
 	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
 	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
 	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
 	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
 	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
 	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
 	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
 	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
 	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
 
 	/* Interrupt Counts */
 
 	stats->iac += E1000_READ_REG(hw, E1000_IAC);
 	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
 	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
 	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
 	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
 	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
 	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
 	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
 	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
 
 	/* Host to Card Statistics */
 
 	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
 	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
 	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
 	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
 	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
 	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
 	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
 	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
 	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
 	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
 	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
 	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
 	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
 	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
 
 	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
 	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
 	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
 	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
 	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
 	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
 
 	ifp = adapter->ifp;
 	ifp->if_collisions = stats->colc;
 
 	/* Rx Errors */
 	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
 	    stats->crcerrs + stats->algnerrc +
 	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
 
 	/* Tx Errors */
 	ifp->if_oerrors = stats->ecol +
 	    stats->latecol + adapter->watchdog_events;
 
 	/* Driver specific counters */
 	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
 	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
 	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
 	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
 	adapter->packet_buf_alloc_tx =
 	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
 	adapter->packet_buf_alloc_rx =
 	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
 }
 
 
 /**********************************************************************
  *
  *  Initialize the VF board statistics counters.
  *
  **********************************************************************/
 static void
 igb_vf_init_stats(struct adapter *adapter)
 {
         struct e1000_hw *hw = &adapter->hw;
 	struct e1000_vf_stats	*stats;
 
 	stats = (struct e1000_vf_stats	*)adapter->stats;
 	if (stats == NULL)
 		return;
         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
 }
  
 /**********************************************************************
  *
  *  Update the VF board statistics counters.
  *
  **********************************************************************/
 static void
 igb_update_vf_stats_counters(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	struct e1000_vf_stats	*stats;
 
 	if (adapter->link_speed == 0)
 		return;
 
 	stats = (struct e1000_vf_stats	*)adapter->stats;
 
 	UPDATE_VF_REG(E1000_VFGPRC,
 	    stats->last_gprc, stats->gprc);
 	UPDATE_VF_REG(E1000_VFGORC,
 	    stats->last_gorc, stats->gorc);
 	UPDATE_VF_REG(E1000_VFGPTC,
 	    stats->last_gptc, stats->gptc);
 	UPDATE_VF_REG(E1000_VFGOTC,
 	    stats->last_gotc, stats->gotc);
 	UPDATE_VF_REG(E1000_VFMPRC,
 	    stats->last_mprc, stats->mprc);
 }
 
 /* Export a single 32-bit register via a read-only sysctl. */
 static int
 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter;
 	u_int val;
 
 	adapter = oidp->oid_arg1;
 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
 	return (sysctl_handle_int(oidp, &val, 0, req));
 }
 
 /*
+**  Tuneable interrupt rate handler
+*/
+static int
+igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
+{
+	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
+	int			error;
+	u32			reg, usec, rate;
+                        
+	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
+	usec = ((reg & 0x7FFC) >> 2);
+	if (usec > 0)
+		rate = 1000000 / usec;
+	else
+		rate = 0;
+	error = sysctl_handle_int(oidp, &rate, 0, req);
+	if (error || !req->newptr)
+		return error;
+	return 0;
+}
+
+/*
  * Add sysctl variables, one per statistic, to the system.
  */
 static void
 igb_add_hw_stats(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 
 	struct tx_ring *txr = adapter->tx_rings;
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
 	struct e1000_hw_stats *stats = adapter->stats;
 
 	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
 	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
 
 #define QUEUE_NAME_LEN 32
 	char namebuf[QUEUE_NAME_LEN];
 
 	/* Driver Statistics */
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
 			CTLFLAG_RD, &adapter->link_irq, 0,
 			"Link MSIX IRQ Handled");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
 			CTLFLAG_RD, &adapter->dropped_pkts,
 			"Driver dropped packets");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
 			"Driver tx dma failure in xmit");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
 			CTLFLAG_RD, &adapter->rx_overruns,
 			"RX overruns");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
 			CTLFLAG_RD, &adapter->watchdog_events,
 			"Watchdog timeouts");
 
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
 			CTLFLAG_RD, &adapter->device_control,
 			"Device Control Register");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
 			CTLFLAG_RD, &adapter->rx_control,
 			"Receiver Control Register");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
 			CTLFLAG_RD, &adapter->int_mask,
 			"Interrupt Mask");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
 			CTLFLAG_RD, &adapter->eint_mask,
 			"Extended Interrupt Mask");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
 			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
 			"Transmit Buffer Packet Allocation");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
 			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
 			"Receive Buffer Packet Allocation");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
 			"Flow Control High Watermark");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
 			"Flow Control Low Watermark");
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
 		struct lro_ctrl *lro = &rxr->lro;
 
 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 					    CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
+
+		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
+				CTLFLAG_RD, &adapter->queues[i],
+				sizeof(&adapter->queues[i]),
+				igb_sysctl_interrupt_rate_handler,
+				"IU", "Interrupt Rate");
 
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
 				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
 				igb_sysctl_reg_handler, "IU",
  				"Transmit Descriptor Head");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
 				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
 				igb_sysctl_reg_handler, "IU",
  				"Transmit Descriptor Tail");
 		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
 				CTLFLAG_RD, &txr->no_desc_avail,
 				"Queue No Descriptor Available");
 		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
 				CTLFLAG_RD, &txr->tx_packets,
 				"Queue Packets Transmitted");
 
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
 				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
 				igb_sysctl_reg_handler, "IU",
 				"Receive Descriptor Head");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
 				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
 				igb_sysctl_reg_handler, "IU",
 				"Receive Descriptor Tail");
 		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
 				CTLFLAG_RD, &rxr->rx_packets,
 				"Queue Packets Received");
 		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
 				CTLFLAG_RD, &rxr->rx_bytes,
 				"Queue Bytes Received");
 		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
 				CTLFLAG_RD, &lro->lro_queued, 0,
 				"LRO Queued");
 		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
 				CTLFLAG_RD, &lro->lro_flushed, 0,
 				"LRO Flushed");
 	}
 
 	/* MAC stats get their own sub node */
 
 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
 				    CTLFLAG_RD, NULL, "MAC Statistics");
 	stat_list = SYSCTL_CHILDREN(stat_node);
 
 	/*
 	** VF adapter has a very limited set of stats
 	** since its not managing the metal, so to speak.
 	*/
 	if (adapter->hw.mac.type == e1000_vfadapt) {
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
 			CTLFLAG_RD, &stats->gprc,
 			"Good Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
 			CTLFLAG_RD, &stats->gptc,
 			"Good Packets Transmitted");
  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
  			CTLFLAG_RD, &stats->gorc, 
  			"Good Octets Received"); 
  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
  			CTLFLAG_RD, &stats->gotc, 
  			"Good Octets Transmitted"); 
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
 			CTLFLAG_RD, &stats->mprc,
 			"Multicast Packets Received");
 		return;
 	}
 
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
 			CTLFLAG_RD, &stats->ecol,
 			"Excessive collisions");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
 			CTLFLAG_RD, &stats->scc,
 			"Single collisions");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
 			CTLFLAG_RD, &stats->mcc,
 			"Multiple collisions");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
 			CTLFLAG_RD, &stats->latecol,
 			"Late collisions");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
 			CTLFLAG_RD, &stats->colc,
 			"Collision Count");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
 			CTLFLAG_RD, &stats->symerrs,
 			"Symbol Errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
 			CTLFLAG_RD, &stats->sec,
 			"Sequence Errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
 			CTLFLAG_RD, &stats->dc,
 			"Defer Count");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
 			CTLFLAG_RD, &stats->mpc,
 			"Missed Packets");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
 			CTLFLAG_RD, &stats->rnbc,
 			"Receive No Buffers");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
 			CTLFLAG_RD, &stats->ruc,
 			"Receive Undersize");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
 			CTLFLAG_RD, &stats->rfc,
 			"Fragmented Packets Received ");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
 			CTLFLAG_RD, &stats->roc,
 			"Oversized Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
 			CTLFLAG_RD, &stats->rjc,
 			"Recevied Jabber");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
 			CTLFLAG_RD, &stats->rxerrc,
 			"Receive Errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
 			CTLFLAG_RD, &stats->crcerrs,
 			"CRC errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
 			CTLFLAG_RD, &stats->algnerrc,
 			"Alignment Errors");
 	/* On 82575 these are collision counts */
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
 			CTLFLAG_RD, &stats->cexterr,
 			"Collision/Carrier extension errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
 			CTLFLAG_RD, &stats->xonrxc,
 			"XON Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
 			CTLFLAG_RD, &stats->xontxc,
 			"XON Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
 			CTLFLAG_RD, &stats->xoffrxc,
 			"XOFF Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
 			CTLFLAG_RD, &stats->xofftxc,
 			"XOFF Transmitted");
 	/* Packet Reception Stats */
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
 			CTLFLAG_RD, &stats->tpr,
 			"Total Packets Received ");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
 			CTLFLAG_RD, &stats->gprc,
 			"Good Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
 			CTLFLAG_RD, &stats->bprc,
 			"Broadcast Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
 			CTLFLAG_RD, &stats->mprc,
 			"Multicast Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
 			CTLFLAG_RD, &stats->prc64,
 			"64 byte frames received ");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
 			CTLFLAG_RD, &stats->prc127,
 			"65-127 byte frames received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
 			CTLFLAG_RD, &stats->prc255,
 			"128-255 byte frames received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
 			CTLFLAG_RD, &stats->prc511,
 			"256-511 byte frames received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
 			CTLFLAG_RD, &stats->prc1023,
 			"512-1023 byte frames received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
 			CTLFLAG_RD, &stats->prc1522,
 			"1023-1522 byte frames received");
  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
  			CTLFLAG_RD, &stats->gorc, 
  			"Good Octets Received"); 
 
 	/* Packet Transmission Stats */
  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
  			CTLFLAG_RD, &stats->gotc, 
  			"Good Octets Transmitted"); 
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
 			CTLFLAG_RD, &stats->tpt,
 			"Total Packets Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
 			CTLFLAG_RD, &stats->gptc,
 			"Good Packets Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
 			CTLFLAG_RD, &stats->bptc,
 			"Broadcast Packets Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
 			CTLFLAG_RD, &stats->mptc,
 			"Multicast Packets Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
 			CTLFLAG_RD, &stats->ptc64,
 			"64 byte frames transmitted ");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
 			CTLFLAG_RD, &stats->ptc127,
 			"65-127 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
 			CTLFLAG_RD, &stats->ptc255,
 			"128-255 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
 			CTLFLAG_RD, &stats->ptc511,
 			"256-511 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
 			CTLFLAG_RD, &stats->ptc1023,
 			"512-1023 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
 			CTLFLAG_RD, &stats->ptc1522,
 			"1024-1522 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
 			CTLFLAG_RD, &stats->tsctc,
 			"TSO Contexts Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
 			CTLFLAG_RD, &stats->tsctfc,
 			"TSO Contexts Failed");
 
 
 	/* Interrupt Stats */
 
 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
 	int_list = SYSCTL_CHILDREN(int_node);
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
 			CTLFLAG_RD, &stats->iac,
 			"Interrupt Assertion Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
 			CTLFLAG_RD, &stats->icrxptc,
 			"Interrupt Cause Rx Pkt Timer Expire Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
 			CTLFLAG_RD, &stats->icrxatc,
 			"Interrupt Cause Rx Abs Timer Expire Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
 			CTLFLAG_RD, &stats->ictxptc,
 			"Interrupt Cause Tx Pkt Timer Expire Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
 			CTLFLAG_RD, &stats->ictxatc,
 			"Interrupt Cause Tx Abs Timer Expire Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
 			CTLFLAG_RD, &stats->ictxqec,
 			"Interrupt Cause Tx Queue Empty Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
 			CTLFLAG_RD, &stats->ictxqmtc,
 			"Interrupt Cause Tx Queue Min Thresh Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
 			CTLFLAG_RD, &stats->icrxdmtc,
 			"Interrupt Cause Rx Desc Min Thresh Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
 			CTLFLAG_RD, &stats->icrxoc,
 			"Interrupt Cause Receiver Overrun Count");
 
 	/* Host to Card Stats */
 
 	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
 				    CTLFLAG_RD, NULL, 
 				    "Host to Card Statistics");
 
 	host_list = SYSCTL_CHILDREN(host_node);
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
 			CTLFLAG_RD, &stats->cbtmpc,
 			"Circuit Breaker Tx Packet Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
 			CTLFLAG_RD, &stats->htdpmc,
 			"Host Transmit Discarded Packets");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
 			CTLFLAG_RD, &stats->rpthc,
 			"Rx Packets To Host");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
 			CTLFLAG_RD, &stats->cbrmpc,
 			"Circuit Breaker Rx Packet Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
 			CTLFLAG_RD, &stats->cbrdpc,
 			"Circuit Breaker Rx Dropped Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
 			CTLFLAG_RD, &stats->hgptc,
 			"Host Good Packets Tx Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
 			CTLFLAG_RD, &stats->htcbdpc,
 			"Host Tx Circuit Breaker Dropped Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
 			CTLFLAG_RD, &stats->hgorc,
 			"Host Good Octets Received Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
 			CTLFLAG_RD, &stats->hgotc,
 			"Host Good Octets Transmit Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
 			CTLFLAG_RD, &stats->lenerrs,
 			"Length Errors");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
 			CTLFLAG_RD, &stats->scvpc,
 			"SerDes/SGMII Code Violation Pkt Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
 			CTLFLAG_RD, &stats->hrmpc,
 			"Header Redirection Missed Packet Count");
 }
 
 
 /**********************************************************************
  *
  *  This routine provides a way to dump out the adapter eeprom,
  *  often a useful debug/service tool. This only dumps the first
  *  32 words, stuff that matters is in that extent.
  *
  **********************************************************************/
 static int
 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter;
 	int error;
 	int result;
 
 	result = -1;
 	error = sysctl_handle_int(oidp, &result, 0, req);
 
 	if (error || !req->newptr)
 		return (error);
 
 	/*
 	 * This value will cause a hex dump of the
 	 * first 32 16-bit words of the EEPROM to
 	 * the screen.
 	 */
 	if (result == 1) {
 		adapter = (struct adapter *)arg1;
 		igb_print_nvm_info(adapter);
         }
 
 	return (error);
 }
 
 static void
 igb_print_nvm_info(struct adapter *adapter)
 {
 	u16	eeprom_data;
 	int	i, j, row = 0;
 
 	/* Its a bit crude, but it gets the job done */
 	printf("\nInterface EEPROM Dump:\n");
 	printf("Offset\n0x0000  ");
 	for (i = 0, j = 0; i < 32; i++, j++) {
 		if (j == 8) { /* Make the offset block */
 			j = 0; ++row;
 			printf("\n0x00%x0  ",row);
 		}
 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
 		printf("%04x ", eeprom_data);
 	}
 	printf("\n");
 }
 
 static void
 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
 	const char *description, int *limit, int value)
 {
 	*limit = value;
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
 }
Index: projects/binutils-2.17/sys/dev/e1000/if_igb.h
===================================================================
--- projects/binutils-2.17/sys/dev/e1000/if_igb.h	(revision 215829)
+++ projects/binutils-2.17/sys/dev/e1000/if_igb.h	(revision 215830)
@@ -1,515 +1,526 @@
 /******************************************************************************
 
   Copyright (c) 2001-2010, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
   modification, are permitted provided that the following conditions are met:
   
    1. Redistributions of source code must retain the above copyright notice, 
       this list of conditions and the following disclaimer.
   
    2. Redistributions in binary form must reproduce the above copyright 
       notice, this list of conditions and the following disclaimer in the 
       documentation and/or other materials provided with the distribution.
   
    3. Neither the name of the Intel Corporation nor the names of its 
       contributors may be used to endorse or promote products derived from 
       this software without specific prior written permission.
   
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   POSSIBILITY OF SUCH DAMAGE.
 
 ******************************************************************************/
 /*$FreeBSD$*/
 
 #ifndef _IGB_H_DEFINED_
 #define _IGB_H_DEFINED_
 
 /* Tunables */
 
 /*
  * IGB_TXD: Maximum number of Transmit Descriptors
  *
  *   This value is the number of transmit descriptors allocated by the driver.
  *   Increasing this value allows the driver to queue more transmits. Each
  *   descriptor is 16 bytes.
  *   Since TDLEN should be multiple of 128bytes, the number of transmit
  *   desscriptors should meet the following condition.
  *      (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0
  */
 #define IGB_MIN_TXD		256
 #define IGB_DEFAULT_TXD		1024
 #define IGB_MAX_TXD		4096
 
 /*
  * IGB_RXD: Maximum number of Transmit Descriptors
  *
  *   This value is the number of receive descriptors allocated by the driver.
  *   Increasing this value allows the driver to buffer more incoming packets.
  *   Each descriptor is 16 bytes.  A receive buffer is also allocated for each
  *   descriptor. The maximum MTU size is 16110.
  *   Since TDLEN should be multiple of 128bytes, the number of transmit
  *   desscriptors should meet the following condition.
  *      (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0
  */
 #define IGB_MIN_RXD		256
 #define IGB_DEFAULT_RXD		1024
 #define IGB_MAX_RXD		4096
 
 /*
  * IGB_TIDV - Transmit Interrupt Delay Value
  * Valid Range: 0-65535 (0=off)
  * Default Value: 64
  *   This value delays the generation of transmit interrupts in units of
  *   1.024 microseconds. Transmit interrupt reduction can improve CPU
  *   efficiency if properly tuned for specific network traffic. If the
  *   system is reporting dropped transmits, this value may be set too high
  *   causing the driver to run out of available transmit descriptors.
  */
 #define IGB_TIDV                         64
 
 /*
  * IGB_TADV - Transmit Absolute Interrupt Delay Value
  * Valid Range: 0-65535 (0=off)
  * Default Value: 64
  *   This value, in units of 1.024 microseconds, limits the delay in which a
  *   transmit interrupt is generated. Useful only if IGB_TIDV is non-zero,
  *   this value ensures that an interrupt is generated after the initial
  *   packet is sent on the wire within the set amount of time.  Proper tuning,
  *   along with IGB_TIDV, may improve traffic throughput in specific
  *   network conditions.
  */
 #define IGB_TADV                         64
 
 /*
  * IGB_RDTR - Receive Interrupt Delay Timer (Packet Timer)
  * Valid Range: 0-65535 (0=off)
  * Default Value: 0
  *   This value delays the generation of receive interrupts in units of 1.024
  *   microseconds.  Receive interrupt reduction can improve CPU efficiency if
  *   properly tuned for specific network traffic. Increasing this value adds
  *   extra latency to frame reception and can end up decreasing the throughput
  *   of TCP traffic. If the system is reporting dropped receives, this value
  *   may be set too high, causing the driver to run out of available receive
  *   descriptors.
  *
  *   CAUTION: When setting IGB_RDTR to a value other than 0, adapters
  *            may hang (stop transmitting) under certain network conditions.
  *            If this occurs a WATCHDOG message is logged in the system
  *            event log. In addition, the controller is automatically reset,
  *            restoring the network connection. To eliminate the potential
  *            for the hang ensure that IGB_RDTR is set to 0.
  */
 #define IGB_RDTR                         0
 
 /*
  * Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544)
  * Valid Range: 0-65535 (0=off)
  * Default Value: 64
  *   This value, in units of 1.024 microseconds, limits the delay in which a
  *   receive interrupt is generated. Useful only if IGB_RDTR is non-zero,
  *   this value ensures that an interrupt is generated after the initial
  *   packet is received within the set amount of time.  Proper tuning,
  *   along with IGB_RDTR, may improve traffic throughput in specific network
  *   conditions.
  */
 #define IGB_RADV                         64
 
 /*
  * This parameter controls the duration of transmit watchdog timer.
  */
 #define IGB_WATCHDOG                   (10 * hz)
 
 /*
  * This parameter controls when the driver calls the routine to reclaim
  * transmit descriptors.
  */
 #define IGB_TX_CLEANUP_THRESHOLD	(adapter->num_tx_desc / 8)
 #define IGB_TX_OP_THRESHOLD	(adapter->num_tx_desc / 32)
 
 /*
  * This parameter controls whether or not autonegotation is enabled.
  *              0 - Disable autonegotiation
  *              1 - Enable  autonegotiation
  */
 #define DO_AUTO_NEG                     1
 
 /*
  * This parameter control whether or not the driver will wait for
  * autonegotiation to complete.
  *              1 - Wait for autonegotiation to complete
  *              0 - Don't wait for autonegotiation to complete
  */
 #define WAIT_FOR_AUTO_NEG_DEFAULT       0
 
 /* Tunables -- End */
 
 #define AUTONEG_ADV_DEFAULT	(ADVERTISE_10_HALF | ADVERTISE_10_FULL | \
 				ADVERTISE_100_HALF | ADVERTISE_100_FULL | \
 				ADVERTISE_1000_FULL)
 
 #define AUTO_ALL_MODES		0
 
 /* PHY master/slave setting */
 #define IGB_MASTER_SLAVE		e1000_ms_hw_default
 
 /*
  * Micellaneous constants
  */
 #define IGB_VENDOR_ID			0x8086
 
 #define IGB_JUMBO_PBA			0x00000028
 #define IGB_DEFAULT_PBA			0x00000030
 #define IGB_SMARTSPEED_DOWNSHIFT	3
 #define IGB_SMARTSPEED_MAX		15
 #define IGB_MAX_LOOP			10
 
 #define IGB_RX_PTHRESH			(hw->mac.type <= e1000_82576 ? 16 : 8)
 #define IGB_RX_HTHRESH			8
 #define IGB_RX_WTHRESH			1
 
 #define IGB_TX_PTHRESH			8
 #define IGB_TX_HTHRESH			1
 #define IGB_TX_WTHRESH			(((hw->mac.type == e1000_82576 || \
 					  hw->mac.type == e1000_vfadapt) && \
                                           adapter->msix_mem) ? 1 : 16)
 
 #define MAX_NUM_MULTICAST_ADDRESSES     128
 #define PCI_ANY_ID                      (~0U)
 #define ETHER_ALIGN                     2
 #define IGB_TX_BUFFER_SIZE		((uint32_t) 1514)
 #define IGB_FC_PAUSE_TIME		0x0680
 #define IGB_EEPROM_APME			0x400;
+#define IGB_QUEUE_IDLE			0
+#define IGB_QUEUE_WORKING		1
+#define IGB_QUEUE_HUNG			2
 
 /*
  * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
  * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will
  * also optimize cache line size effect. H/W supports up to cache line size 128.
  */
 #define IGB_DBA_ALIGN			128
 
 #define SPEED_MODE_BIT (1<<21)		/* On PCI-E MACs only */
 
 /* PCI Config defines */
 #define IGB_MSIX_BAR		3
 
 /* Defines for printing debug information */
 #define DEBUG_INIT  0
 #define DEBUG_IOCTL 0
 #define DEBUG_HW    0
 
 #define INIT_DEBUGOUT(S)            if (DEBUG_INIT)  printf(S "\n")
 #define INIT_DEBUGOUT1(S, A)        if (DEBUG_INIT)  printf(S "\n", A)
 #define INIT_DEBUGOUT2(S, A, B)     if (DEBUG_INIT)  printf(S "\n", A, B)
 #define IOCTL_DEBUGOUT(S)           if (DEBUG_IOCTL) printf(S "\n")
 #define IOCTL_DEBUGOUT1(S, A)       if (DEBUG_IOCTL) printf(S "\n", A)
 #define IOCTL_DEBUGOUT2(S, A, B)    if (DEBUG_IOCTL) printf(S "\n", A, B)
 #define HW_DEBUGOUT(S)              if (DEBUG_HW) printf(S "\n")
 #define HW_DEBUGOUT1(S, A)          if (DEBUG_HW) printf(S "\n", A)
 #define HW_DEBUGOUT2(S, A, B)       if (DEBUG_HW) printf(S "\n", A, B)
 
 #define IGB_MAX_SCATTER		64
 #define IGB_VFTA_SIZE		128
 #define IGB_BR_SIZE		4096	/* ring buf size */
 #define IGB_TSO_SIZE		(65535 + sizeof(struct ether_vlan_header))
 #define IGB_TSO_SEG_SIZE	4096	/* Max dma segment size */
 #define IGB_HDR_BUF		128
 #define IGB_PKTTYPE_MASK	0x0000FFF0
 #define ETH_ZLEN		60
 #define ETH_ADDR_LEN		6
 
 /* Offload bits in mbuf flag */
 #if __FreeBSD_version >= 800000
 #define CSUM_OFFLOAD		(CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP)
 #else
 #define CSUM_OFFLOAD		(CSUM_IP|CSUM_TCP|CSUM_UDP)
 #endif
 
 /* Define the starting Interrupt rate per Queue */
 #define IGB_INTS_PER_SEC        8000
-#define IGB_DEFAULT_ITR          1000000000/(IGB_INTS_PER_SEC * 256)
+#define IGB_DEFAULT_ITR         ((1000000/IGB_INTS_PER_SEC) << 2)
 
 #define IGB_LINK_ITR            2000
 
 /* Precision Time Sync (IEEE 1588) defines */
 #define ETHERTYPE_IEEE1588	0x88F7
 #define PICOSECS_PER_TICK	20833
 #define TSYNC_PORT		319 /* UDP port for the protocol */
 
 /*
  * Bus dma allocation structure used by
  * e1000_dma_malloc and e1000_dma_free.
  */
 struct igb_dma_alloc {
         bus_addr_t              dma_paddr;
         caddr_t                 dma_vaddr;
         bus_dma_tag_t           dma_tag;
         bus_dmamap_t            dma_map;
         bus_dma_segment_t       dma_seg;
         int                     dma_nseg;
 };
 
 
 /*
 ** Driver queue struct: this is the interrupt container
 **  for the associated tx and rx ring.
 */
 struct igb_queue {
 	struct adapter		*adapter;
 	u32			msix;		/* This queue's MSIX vector */
 	u32			eims;		/* This queue's EIMS bit */
 	u32			eitr_setting;
 	struct resource		*res;
 	void			*tag;
 	struct tx_ring		*txr;
 	struct rx_ring		*rxr;
 	struct task		que_task;
 	struct taskqueue	*tq;
 	u64			irqs;
 };
 
 /*
  * Transmit ring: one per queue
  */
 struct tx_ring {
 	struct adapter		*adapter;
 	u32			me;
 	struct mtx		tx_mtx;
 	char			mtx_name[16];
 	struct igb_dma_alloc	txdma;
 	struct e1000_tx_desc	*tx_base;
 	u32			next_avail_desc;
 	u32			next_to_clean;
 	volatile u16		tx_avail;
 	struct igb_tx_buffer	*tx_buffers;
 #if __FreeBSD_version >= 800000
 	struct buf_ring		*br;
 #endif
 	bus_dma_tag_t		txtag;
 
 	u32			bytes;
 	u32			packets;
 
-	bool			watchdog_check;
+	int			queue_status;
 	int			watchdog_time;
 	int			tdt;
 	int			tdh;
 	u64			no_desc_avail;
 	u64			tx_packets;
 };
 
 /*
  * Receive ring: one per queue
  */
 struct rx_ring {
 	struct adapter		*adapter;
 	u32			me;
 	struct igb_dma_alloc	rxdma;
 	union e1000_adv_rx_desc	*rx_base;
 	struct lro_ctrl		lro;
 	bool			lro_enabled;
 	bool			hdr_split;
 	bool			discard;
 	struct mtx		rx_mtx;
 	char			mtx_name[16];
 	u32			next_to_refresh;
 	u32			next_to_check;
 	struct igb_rx_buf	*rx_buffers;
 	bus_dma_tag_t		htag;		/* dma tag for rx head */
 	bus_dma_tag_t		ptag;		/* dma tag for rx packet */
 	/*
 	 * First/last mbuf pointers, for
 	 * collecting multisegment RX packets.
 	 */
 	struct mbuf	       *fmp;
 	struct mbuf	       *lmp;
 
 	u32			bytes;
 	u32			packets;
 	int			rdt;
 	int			rdh;
 
 	/* Soft stats */
 	u64			rx_split_packets;
 	u64			rx_discarded;
 	u64			rx_packets;
 	u64			rx_bytes;
 };
 
 struct adapter {
 	struct ifnet	*ifp;
 	struct e1000_hw	hw;
 
 	struct e1000_osdep osdep;
 	struct device	*dev;
 	struct cdev	*led_dev;
 
 	struct resource *pci_mem;
 	struct resource *msix_mem;
 	struct resource	*res;
 	void		*tag;
 	u32		eims_mask;
 
 	int		linkvec;
 	int		link_mask;
 	struct task	link_task;
 	int		link_irq;
 
 	struct ifmedia	media;
 	struct callout	timer;
 	int		msix;	/* total vectors allocated */
 	int		if_flags;
 	int		max_frame_size;
 	int		min_frame_size;
 	int		pause_frames;
 	struct mtx	core_mtx;
 	int		igb_insert_vlan_header;
         u16		num_queues;
 
 	eventhandler_tag vlan_attach;
 	eventhandler_tag vlan_detach;
 	u32		num_vlans;
 
 	/* Management and WOL features */
 	int		wol;
 	int		has_manage;
 
-	/* Info about the board itself */
+	/*
+	** Shadow VFTA table, this is needed because
+	** the real vlan filter table gets cleared during
+	** a soft reset and the driver needs to be able
+	** to repopulate it.
+	*/
+	u32		shadow_vfta[IGB_VFTA_SIZE];
+
+	/* Info about the interface */
 	u8		link_active;
 	u16		link_speed;
 	u16		link_duplex;
 	u32		smartspeed;
 
 	/* Interface queues */
 	struct igb_queue	*queues;
 
 	/*
 	 * Transmit rings
 	 */
 	struct tx_ring		*tx_rings;
         u16			num_tx_desc;
 
 	/* Multicast array pointer */
 	u8			*mta;
 
 	/* 
 	 * Receive rings
 	 */
 	struct rx_ring		*rx_rings;
 	bool			rx_hdr_split;
         u16			num_rx_desc;
 	int			rx_process_limit;
 	u32			rx_mbuf_sz;
 	u32			rx_mask;
 
 	/* Misc stats maintained by the driver */
 	unsigned long	dropped_pkts;
 	unsigned long	mbuf_defrag_failed;
 	unsigned long	mbuf_header_failed;
 	unsigned long	mbuf_packet_failed;
 	unsigned long	no_tx_map_avail;
         unsigned long	no_tx_dma_setup;
 	unsigned long	watchdog_events;
 	unsigned long	rx_overruns;
 	unsigned long	device_control;
 	unsigned long	rx_control;
 	unsigned long	int_mask;
 	unsigned long	eint_mask;
 	unsigned long	packet_buf_alloc_rx;
 	unsigned long	packet_buf_alloc_tx;
 
 	boolean_t       in_detach;
 
 #ifdef IGB_IEEE1588
 	/* IEEE 1588 precision time support */
 	struct cyclecounter     cycles;
 	struct nettimer         clock;
 	struct nettime_compare  compare;
 	struct hwtstamp_ctrl    hwtstamp;
 #endif
 
 	void 			*stats;
 };
 
 /* ******************************************************************************
  * vendor_info_array
  *
  * This array contains the list of Subvendor/Subdevice IDs on which the driver
  * should load.
  *
  * ******************************************************************************/
 typedef struct _igb_vendor_info_t {
 	unsigned int vendor_id;
 	unsigned int device_id;
 	unsigned int subvendor_id;
 	unsigned int subdevice_id;
 	unsigned int index;
 } igb_vendor_info_t;
 
 
 struct igb_tx_buffer {
 	int		next_eop;  /* Index of the desc to watch */
         struct mbuf    *m_head;
         bus_dmamap_t    map;         /* bus_dma map for packet */
 };
 
 struct igb_rx_buf {
         struct mbuf    *m_head;
         struct mbuf    *m_pack;
 	bus_dmamap_t	hmap;	/* bus_dma map for header */
 	bus_dmamap_t	pmap;	/* bus_dma map for packet */
 };
 
 #define	IGB_CORE_LOCK_INIT(_sc, _name) \
 	mtx_init(&(_sc)->core_mtx, _name, "IGB Core Lock", MTX_DEF)
 #define	IGB_CORE_LOCK_DESTROY(_sc)	mtx_destroy(&(_sc)->core_mtx)
 #define	IGB_CORE_LOCK(_sc)		mtx_lock(&(_sc)->core_mtx)
 #define	IGB_CORE_UNLOCK(_sc)		mtx_unlock(&(_sc)->core_mtx)
 #define	IGB_CORE_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->core_mtx, MA_OWNED)
 
 #define	IGB_TX_LOCK_DESTROY(_sc)	mtx_destroy(&(_sc)->tx_mtx)
 #define	IGB_TX_LOCK(_sc)		mtx_lock(&(_sc)->tx_mtx)
 #define	IGB_TX_UNLOCK(_sc)		mtx_unlock(&(_sc)->tx_mtx)
 #define	IGB_TX_TRYLOCK(_sc)		mtx_trylock(&(_sc)->tx_mtx)
 #define	IGB_TX_LOCK_ASSERT(_sc)		mtx_assert(&(_sc)->tx_mtx, MA_OWNED)
 
 #define	IGB_RX_LOCK_DESTROY(_sc)	mtx_destroy(&(_sc)->rx_mtx)
 #define	IGB_RX_LOCK(_sc)		mtx_lock(&(_sc)->rx_mtx)
 #define	IGB_RX_UNLOCK(_sc)		mtx_unlock(&(_sc)->rx_mtx)
 #define	IGB_RX_LOCK_ASSERT(_sc)		mtx_assert(&(_sc)->rx_mtx, MA_OWNED)
 
 #define UPDATE_VF_REG(reg, last, cur)		\
 {						\
 	u32 new = E1000_READ_REG(hw, reg);	\
 	if (new < last)				\
 		cur += 0x100000000LL;		\
 	last = new;				\
 	cur &= 0xFFFFFFFF00000000LL;		\
 	cur |= new;				\
 }
 
 #if __FreeBSD_version < 800504
 static __inline int
 drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br)
 {
 #ifdef ALTQ
 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
 		return (1);
 #endif
 	return (!buf_ring_empty(br));
 }
 #endif
 
 #endif /* _IGB_H_DEFINED_ */
 
 
Index: projects/binutils-2.17/sys/dev/fxp/if_fxp.c
===================================================================
--- projects/binutils-2.17/sys/dev/fxp/if_fxp.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/fxp/if_fxp.c	(revision 215830)
@@ -1,3187 +1,3175 @@
 /*-
  * Copyright (c) 1995, David Greenman
  * Copyright (c) 2001 Jonathan Lemon <jlemon@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Intel EtherExpress Pro/100B PCI Fast Ethernet driver
  */
 
 #ifdef HAVE_KERNEL_OPTION_HEADERS
 #include "opt_device_polling.h"
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/mbuf.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/rman.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
 #include <machine/bus.h>
 #include <machine/in_cksum.h>
 #include <machine/resource.h>
 
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>		/* for PCIM_CMD_xxx */
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 
 #include <dev/fxp/if_fxpreg.h>
 #include <dev/fxp/if_fxpvar.h>
 #include <dev/fxp/rcvbundl.h>
 
 MODULE_DEPEND(fxp, pci, 1, 1, 1);
 MODULE_DEPEND(fxp, ether, 1, 1, 1);
 MODULE_DEPEND(fxp, miibus, 1, 1, 1);
 #include "miibus_if.h"
 
 /*
  * NOTE!  On the Alpha, we have an alignment constraint.  The
  * card DMAs the packet immediately following the RFA.  However,
  * the first thing in the packet is a 14-byte Ethernet header.
  * This means that the packet is misaligned.  To compensate,
  * we actually offset the RFA 2 bytes into the cluster.  This
  * alignes the packet after the Ethernet header at a 32-bit
  * boundary.  HOWEVER!  This means that the RFA is misaligned!
  */
 #define	RFA_ALIGNMENT_FUDGE	2
 
 /*
  * Set initial transmit threshold at 64 (512 bytes). This is
  * increased by 64 (512 bytes) at a time, to maximum of 192
  * (1536 bytes), if an underrun occurs.
  */
 static int tx_threshold = 64;
 
 /*
  * The configuration byte map has several undefined fields which
  * must be one or must be zero.  Set up a template for these bits.
- * The actual configuration is performed in fxp_init.
+ * The actual configuration is performed in fxp_init_body.
  *
  * See struct fxp_cb_config for the bit definitions.
  */
-static u_char fxp_cb_config_template[] = {
+static const u_char const fxp_cb_config_template[] = {
 	0x0, 0x0,		/* cb_status */
 	0x0, 0x0,		/* cb_command */
 	0x0, 0x0, 0x0, 0x0,	/* link_addr */
 	0x0,	/*  0 */
 	0x0,	/*  1 */
 	0x0,	/*  2 */
 	0x0,	/*  3 */
 	0x0,	/*  4 */
 	0x0,	/*  5 */
 	0x32,	/*  6 */
 	0x0,	/*  7 */
 	0x0,	/*  8 */
 	0x0,	/*  9 */
 	0x6,	/* 10 */
 	0x0,	/* 11 */
 	0x0,	/* 12 */
 	0x0,	/* 13 */
 	0xf2,	/* 14 */
 	0x48,	/* 15 */
 	0x0,	/* 16 */
 	0x40,	/* 17 */
 	0xf0,	/* 18 */
 	0x0,	/* 19 */
 	0x3f,	/* 20 */
 	0x5,	/* 21 */
 	0x0,	/* 22 */
 	0x0,	/* 23 */
 	0x0,	/* 24 */
 	0x0,	/* 25 */
 	0x0,	/* 26 */
 	0x0,	/* 27 */
 	0x0,	/* 28 */
 	0x0,	/* 29 */
 	0x0,	/* 30 */
 	0x0	/* 31 */
 };
 
 /*
  * Claim various Intel PCI device identifiers for this driver.  The
  * sub-vendor and sub-device field are extensively used to identify
  * particular variants, but we don't currently differentiate between
  * them.
  */
-static struct fxp_ident fxp_ident_table[] = {
+static const struct fxp_ident const fxp_ident_table[] = {
     { 0x1029,	-1,	0, "Intel 82559 PCI/CardBus Pro/100" },
     { 0x1030,	-1,	0, "Intel 82559 Pro/100 Ethernet" },
     { 0x1031,	-1,	3, "Intel 82801CAM (ICH3) Pro/100 VE Ethernet" },
     { 0x1032,	-1,	3, "Intel 82801CAM (ICH3) Pro/100 VE Ethernet" },
     { 0x1033,	-1,	3, "Intel 82801CAM (ICH3) Pro/100 VM Ethernet" },
     { 0x1034,	-1,	3, "Intel 82801CAM (ICH3) Pro/100 VM Ethernet" },
     { 0x1035,	-1,	3, "Intel 82801CAM (ICH3) Pro/100 Ethernet" },
     { 0x1036,	-1,	3, "Intel 82801CAM (ICH3) Pro/100 Ethernet" },
     { 0x1037,	-1,	3, "Intel 82801CAM (ICH3) Pro/100 Ethernet" },
     { 0x1038,	-1,	3, "Intel 82801CAM (ICH3) Pro/100 VM Ethernet" },
     { 0x1039,	-1,	4, "Intel 82801DB (ICH4) Pro/100 VE Ethernet" },
     { 0x103A,	-1,	4, "Intel 82801DB (ICH4) Pro/100 Ethernet" },
     { 0x103B,	-1,	4, "Intel 82801DB (ICH4) Pro/100 VM Ethernet" },
     { 0x103C,	-1,	4, "Intel 82801DB (ICH4) Pro/100 Ethernet" },
     { 0x103D,	-1,	4, "Intel 82801DB (ICH4) Pro/100 VE Ethernet" },
     { 0x103E,	-1,	4, "Intel 82801DB (ICH4) Pro/100 VM Ethernet" },
     { 0x1050,	-1,	5, "Intel 82801BA (D865) Pro/100 VE Ethernet" },
     { 0x1051,	-1,	5, "Intel 82562ET (ICH5/ICH5R) Pro/100 VE Ethernet" },
     { 0x1059,	-1,	0, "Intel 82551QM Pro/100 M Mobile Connection" },
     { 0x1064,	-1,	6, "Intel 82562EZ (ICH6)" },
     { 0x1065,	-1,	6, "Intel 82562ET/EZ/GT/GZ PRO/100 VE Ethernet" },
     { 0x1068,	-1,	6, "Intel 82801FBM (ICH6-M) Pro/100 VE Ethernet" },
     { 0x1069,	-1,	6, "Intel 82562EM/EX/GX Pro/100 Ethernet" },
     { 0x1091,	-1,	7, "Intel 82562GX Pro/100 Ethernet" },
     { 0x1092,	-1,	7, "Intel Pro/100 VE Network Connection" },
     { 0x1093,	-1,	7, "Intel Pro/100 VM Network Connection" },
     { 0x1094,	-1,	7, "Intel Pro/100 946GZ (ICH7) Network Connection" },
     { 0x1209,	-1,	0, "Intel 82559ER Embedded 10/100 Ethernet" },
     { 0x1229,	0x01,	0, "Intel 82557 Pro/100 Ethernet" },
     { 0x1229,	0x02,	0, "Intel 82557 Pro/100 Ethernet" },
     { 0x1229,	0x03,	0, "Intel 82557 Pro/100 Ethernet" },
     { 0x1229,	0x04,	0, "Intel 82558 Pro/100 Ethernet" },
     { 0x1229,	0x05,	0, "Intel 82558 Pro/100 Ethernet" },
     { 0x1229,	0x06,	0, "Intel 82559 Pro/100 Ethernet" },
     { 0x1229,	0x07,	0, "Intel 82559 Pro/100 Ethernet" },
     { 0x1229,	0x08,	0, "Intel 82559 Pro/100 Ethernet" },
     { 0x1229,	0x09,	0, "Intel 82559ER Pro/100 Ethernet" },
     { 0x1229,	0x0c,	0, "Intel 82550 Pro/100 Ethernet" },
     { 0x1229,	0x0d,	0, "Intel 82550 Pro/100 Ethernet" },
     { 0x1229,	0x0e,	0, "Intel 82550 Pro/100 Ethernet" },
     { 0x1229,	0x0f,	0, "Intel 82551 Pro/100 Ethernet" },
     { 0x1229,	0x10,	0, "Intel 82551 Pro/100 Ethernet" },
     { 0x1229,	-1,	0, "Intel 82557/8/9 Pro/100 Ethernet" },
     { 0x2449,	-1,	2, "Intel 82801BA/CAM (ICH2/3) Pro/100 Ethernet" },
     { 0x27dc,	-1,	7, "Intel 82801GB (ICH7) 10/100 Ethernet" },
     { 0,	-1,	0, NULL },
 };
 
 #ifdef FXP_IP_CSUM_WAR
 #define FXP_CSUM_FEATURES    (CSUM_IP | CSUM_TCP | CSUM_UDP)
 #else
 #define FXP_CSUM_FEATURES    (CSUM_TCP | CSUM_UDP)
 #endif
 
 static int		fxp_probe(device_t dev);
 static int		fxp_attach(device_t dev);
 static int		fxp_detach(device_t dev);
 static int		fxp_shutdown(device_t dev);
 static int		fxp_suspend(device_t dev);
 static int		fxp_resume(device_t dev);
 
-static struct fxp_ident	*fxp_find_ident(device_t dev);
+static const struct fxp_ident *fxp_find_ident(device_t dev);
 static void		fxp_intr(void *xsc);
 static void		fxp_rxcsum(struct fxp_softc *sc, struct ifnet *ifp,
 			    struct mbuf *m, uint16_t status, int pos);
 static int		fxp_intr_body(struct fxp_softc *sc, struct ifnet *ifp,
 			    uint8_t statack, int count);
 static void 		fxp_init(void *xsc);
 static void 		fxp_init_body(struct fxp_softc *sc);
 static void 		fxp_tick(void *xsc);
 static void 		fxp_start(struct ifnet *ifp);
 static void 		fxp_start_body(struct ifnet *ifp);
 static int		fxp_encap(struct fxp_softc *sc, struct mbuf **m_head);
 static void		fxp_txeof(struct fxp_softc *sc);
 static void		fxp_stop(struct fxp_softc *sc);
 static void 		fxp_release(struct fxp_softc *sc);
 static int		fxp_ioctl(struct ifnet *ifp, u_long command,
 			    caddr_t data);
 static void 		fxp_watchdog(struct fxp_softc *sc);
 static void		fxp_add_rfabuf(struct fxp_softc *sc,
     			    struct fxp_rx *rxp);
 static void		fxp_discard_rfabuf(struct fxp_softc *sc,
     			    struct fxp_rx *rxp);
 static int		fxp_new_rfabuf(struct fxp_softc *sc,
     			    struct fxp_rx *rxp);
 static int		fxp_mc_addrs(struct fxp_softc *sc);
 static void		fxp_mc_setup(struct fxp_softc *sc);
 static uint16_t		fxp_eeprom_getword(struct fxp_softc *sc, int offset,
 			    int autosize);
 static void 		fxp_eeprom_putword(struct fxp_softc *sc, int offset,
 			    uint16_t data);
 static void		fxp_autosize_eeprom(struct fxp_softc *sc);
 static void		fxp_read_eeprom(struct fxp_softc *sc, u_short *data,
 			    int offset, int words);
 static void		fxp_write_eeprom(struct fxp_softc *sc, u_short *data,
 			    int offset, int words);
 static int		fxp_ifmedia_upd(struct ifnet *ifp);
 static void		fxp_ifmedia_sts(struct ifnet *ifp,
 			    struct ifmediareq *ifmr);
 static int		fxp_serial_ifmedia_upd(struct ifnet *ifp);
 static void		fxp_serial_ifmedia_sts(struct ifnet *ifp,
 			    struct ifmediareq *ifmr);
 static int		fxp_miibus_readreg(device_t dev, int phy, int reg);
 static int		fxp_miibus_writereg(device_t dev, int phy, int reg,
 			    int value);
 static void		fxp_load_ucode(struct fxp_softc *sc);
 static void		fxp_update_stats(struct fxp_softc *sc);
 static void		fxp_sysctl_node(struct fxp_softc *sc);
 static int		sysctl_int_range(SYSCTL_HANDLER_ARGS,
 			    int low, int high);
 static int		sysctl_hw_fxp_bundle_max(SYSCTL_HANDLER_ARGS);
 static int		sysctl_hw_fxp_int_delay(SYSCTL_HANDLER_ARGS);
 static void 		fxp_scb_wait(struct fxp_softc *sc);
 static void		fxp_scb_cmd(struct fxp_softc *sc, int cmd);
 static void		fxp_dma_wait(struct fxp_softc *sc,
     			    volatile uint16_t *status, bus_dma_tag_t dmat,
 			    bus_dmamap_t map);
 
 static device_method_t fxp_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		fxp_probe),
 	DEVMETHOD(device_attach,	fxp_attach),
 	DEVMETHOD(device_detach,	fxp_detach),
 	DEVMETHOD(device_shutdown,	fxp_shutdown),
 	DEVMETHOD(device_suspend,	fxp_suspend),
 	DEVMETHOD(device_resume,	fxp_resume),
 
 	/* MII interface */
 	DEVMETHOD(miibus_readreg,	fxp_miibus_readreg),
 	DEVMETHOD(miibus_writereg,	fxp_miibus_writereg),
 
 	{ 0, 0 }
 };
 
 static driver_t fxp_driver = {
 	"fxp",
 	fxp_methods,
 	sizeof(struct fxp_softc),
 };
 
 static devclass_t fxp_devclass;
 
 DRIVER_MODULE(fxp, pci, fxp_driver, fxp_devclass, 0, 0);
 DRIVER_MODULE(miibus, fxp, miibus_driver, miibus_devclass, 0, 0);
 
 static struct resource_spec fxp_res_spec_mem[] = {
 	{ SYS_RES_MEMORY,	FXP_PCI_MMBA,	RF_ACTIVE },
 	{ SYS_RES_IRQ,		0,		RF_ACTIVE | RF_SHAREABLE },
 	{ -1, 0 }
 };
 
 static struct resource_spec fxp_res_spec_io[] = {
 	{ SYS_RES_IOPORT,	FXP_PCI_IOBA,	RF_ACTIVE },
 	{ SYS_RES_IRQ,		0,		RF_ACTIVE | RF_SHAREABLE },
 	{ -1, 0 }
 };
 
 /*
  * Wait for the previous command to be accepted (but not necessarily
  * completed).
  */
 static void
 fxp_scb_wait(struct fxp_softc *sc)
 {
 	union {
 		uint16_t w;
 		uint8_t b[2];
 	} flowctl;
 	int i = 10000;
 
 	while (CSR_READ_1(sc, FXP_CSR_SCB_COMMAND) && --i)
 		DELAY(2);
 	if (i == 0) {
 		flowctl.b[0] = CSR_READ_1(sc, FXP_CSR_FLOWCONTROL);
 		flowctl.b[1] = CSR_READ_1(sc, FXP_CSR_FLOWCONTROL + 1);
 		device_printf(sc->dev, "SCB timeout: 0x%x 0x%x 0x%x 0x%x\n",
 		    CSR_READ_1(sc, FXP_CSR_SCB_COMMAND),
 		    CSR_READ_1(sc, FXP_CSR_SCB_STATACK),
 		    CSR_READ_1(sc, FXP_CSR_SCB_RUSCUS), flowctl.w);
 	}
 }
 
 static void
 fxp_scb_cmd(struct fxp_softc *sc, int cmd)
 {
 
 	if (cmd == FXP_SCB_COMMAND_CU_RESUME && sc->cu_resume_bug) {
 		CSR_WRITE_1(sc, FXP_CSR_SCB_COMMAND, FXP_CB_COMMAND_NOP);
 		fxp_scb_wait(sc);
 	}
 	CSR_WRITE_1(sc, FXP_CSR_SCB_COMMAND, cmd);
 }
 
 static void
 fxp_dma_wait(struct fxp_softc *sc, volatile uint16_t *status,
     bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	int i;
 
 	for (i = 10000; i > 0; i--) {
 		DELAY(2);
 		bus_dmamap_sync(dmat, map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		if ((le16toh(*status) & FXP_CB_STATUS_C) != 0)
 			break;
 	}
 	if (i == 0)
 		device_printf(sc->dev, "DMA timeout\n");
 }
 
-static struct fxp_ident *
+static const struct fxp_ident *
 fxp_find_ident(device_t dev)
 {
 	uint16_t devid;
 	uint8_t revid;
-	struct fxp_ident *ident;
+	const struct fxp_ident *ident;
 
 	if (pci_get_vendor(dev) == FXP_VENDORID_INTEL) {
 		devid = pci_get_device(dev);
 		revid = pci_get_revid(dev);
 		for (ident = fxp_ident_table; ident->name != NULL; ident++) {
 			if (ident->devid == devid &&
 			    (ident->revid == revid || ident->revid == -1)) {
 				return (ident);
 			}
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Return identification string if this device is ours.
  */
 static int
 fxp_probe(device_t dev)
 {
-	struct fxp_ident *ident;
+	const struct fxp_ident *ident;
 
 	ident = fxp_find_ident(dev);
 	if (ident != NULL) {
 		device_set_desc(dev, ident->name);
 		return (BUS_PROBE_DEFAULT);
 	}
 	return (ENXIO);
 }
 
 static void
 fxp_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	uint32_t *addr;
 
 	if (error)
 		return;
 
 	KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg));
 	addr = arg;
 	*addr = segs->ds_addr;
 }
 
 static int
 fxp_attach(device_t dev)
 {
 	struct fxp_softc *sc;
 	struct fxp_cb_tx *tcbp;
 	struct fxp_tx *txp;
 	struct fxp_rx *rxp;
 	struct ifnet *ifp;
 	uint32_t val;
 	uint16_t data, myea[ETHER_ADDR_LEN / 2];
 	u_char eaddr[ETHER_ADDR_LEN];
 	int i, pmc, prefer_iomap;
 	int error;
 
 	error = 0;
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 	mtx_init(&sc->sc_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
 	    MTX_DEF);
 	callout_init_mtx(&sc->stat_ch, &sc->sc_mtx, 0);
 	ifmedia_init(&sc->sc_media, 0, fxp_serial_ifmedia_upd,
 	    fxp_serial_ifmedia_sts);
 
 	ifp = sc->ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "can not if_alloc()\n");
 		error = ENOSPC;
 		goto fail;
 	}
 
 	/*
 	 * Enable bus mastering.
 	 */
 	pci_enable_busmaster(dev);
 	val = pci_read_config(dev, PCIR_COMMAND, 2);
 
 	/*
 	 * Figure out which we should try first - memory mapping or i/o mapping?
 	 * We default to memory mapping. Then we accept an override from the
 	 * command line. Then we check to see which one is enabled.
 	 */
 	prefer_iomap = 0;
 	resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "prefer_iomap", &prefer_iomap);
 	if (prefer_iomap)
 		sc->fxp_spec = fxp_res_spec_io;
 	else
 		sc->fxp_spec = fxp_res_spec_mem;
 
 	error = bus_alloc_resources(dev, sc->fxp_spec, sc->fxp_res);
 	if (error) {
 		if (sc->fxp_spec == fxp_res_spec_mem)
 			sc->fxp_spec = fxp_res_spec_io;
 		else
 			sc->fxp_spec = fxp_res_spec_mem;
 		error = bus_alloc_resources(dev, sc->fxp_spec, sc->fxp_res);
 	}
 	if (error) {
 		device_printf(dev, "could not allocate resources\n");
 		error = ENXIO;
 		goto fail;
 	}
 
 	if (bootverbose) {
 		device_printf(dev, "using %s space register mapping\n",
 		   sc->fxp_spec == fxp_res_spec_mem ? "memory" : "I/O");
 	}
 
 	/*
 	 * Put CU/RU idle state and prepare full reset.
 	 */
 	CSR_WRITE_4(sc, FXP_CSR_PORT, FXP_PORT_SELECTIVE_RESET);
 	DELAY(10);
 	/* Full reset and disable interrupts. */
 	CSR_WRITE_4(sc, FXP_CSR_PORT, FXP_PORT_SOFTWARE_RESET);
 	DELAY(10);
 	CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, FXP_SCB_INTR_DISABLE);
 
 	/*
 	 * Find out how large of an SEEPROM we have.
 	 */
 	fxp_autosize_eeprom(sc);
 
 	/*
 	 * Find out the chip revision; lump all 82557 revs together.
 	 */
 	sc->ident = fxp_find_ident(dev);
 	if (sc->ident->ich > 0) {
 		/* Assume ICH controllers are 82559. */
 		sc->revision = FXP_REV_82559_A0;
 	} else {
 		fxp_read_eeprom(sc, &data, 5, 1);
 		if ((data >> 8) == 1)
 			sc->revision = FXP_REV_82557;
 		else
 			sc->revision = pci_get_revid(dev);
 	}
 
 	/*
 	 * Check availability of WOL. 82559ER does not support WOL.
 	 */
 	if (sc->revision >= FXP_REV_82558_A4 &&
 	    sc->revision != FXP_REV_82559S_A) {
 		fxp_read_eeprom(sc, &data, 10, 1);
 		if ((data & 0x20) != 0 &&
 		    pci_find_extcap(sc->dev, PCIY_PMG, &pmc) == 0)
 			sc->flags |= FXP_FLAG_WOLCAP;
 	}
 
 	/* Receiver lock-up workaround detection. */
 	fxp_read_eeprom(sc, &data, 3, 1);
 	if ((data & 0x03) != 0x03) {
 		sc->flags |= FXP_FLAG_RXBUG;
 		device_printf(dev, "Enabling Rx lock-up workaround\n");
 	}
 
 	/*
 	 * Determine whether we must use the 503 serial interface.
 	 */
 	fxp_read_eeprom(sc, &data, 6, 1);
 	if (sc->revision == FXP_REV_82557 && (data & FXP_PHY_DEVICE_MASK) != 0
 	    && (data & FXP_PHY_SERIAL_ONLY))
 		sc->flags |= FXP_FLAG_SERIAL_MEDIA;
 
 	fxp_sysctl_node(sc);
 	/*
 	 * Enable workarounds for certain chip revision deficiencies.
 	 *
 	 * Systems based on the ICH2/ICH2-M chip from Intel, and possibly
 	 * some systems based a normal 82559 design, have a defect where
 	 * the chip can cause a PCI protocol violation if it receives
 	 * a CU_RESUME command when it is entering the IDLE state.  The
 	 * workaround is to disable Dynamic Standby Mode, so the chip never
 	 * deasserts CLKRUN#, and always remains in an active state.
 	 *
 	 * See Intel 82801BA/82801BAM Specification Update, Errata #30.
 	 */
 	if ((sc->ident->ich >= 2 && sc->ident->ich <= 3) ||
 	    (sc->ident->ich == 0 && sc->revision >= FXP_REV_82559_A0)) {
 		fxp_read_eeprom(sc, &data, 10, 1);
 		if (data & 0x02) {			/* STB enable */
 			uint16_t cksum;
 			int i;
 
 			device_printf(dev,
 			    "Disabling dynamic standby mode in EEPROM\n");
 			data &= ~0x02;
 			fxp_write_eeprom(sc, &data, 10, 1);
 			device_printf(dev, "New EEPROM ID: 0x%x\n", data);
 			cksum = 0;
 			for (i = 0; i < (1 << sc->eeprom_size) - 1; i++) {
 				fxp_read_eeprom(sc, &data, i, 1);
 				cksum += data;
 			}
 			i = (1 << sc->eeprom_size) - 1;
 			cksum = 0xBABA - cksum;
 			fxp_read_eeprom(sc, &data, i, 1);
 			fxp_write_eeprom(sc, &cksum, i, 1);
 			device_printf(dev,
 			    "EEPROM checksum @ 0x%x: 0x%x -> 0x%x\n",
 			    i, data, cksum);
 #if 1
 			/*
 			 * If the user elects to continue, try the software
 			 * workaround, as it is better than nothing.
 			 */
 			sc->flags |= FXP_FLAG_CU_RESUME_BUG;
 #endif
 		}
 	}
 
 	/*
 	 * If we are not a 82557 chip, we can enable extended features.
 	 */
 	if (sc->revision != FXP_REV_82557) {
 		/*
 		 * If MWI is enabled in the PCI configuration, and there
 		 * is a valid cacheline size (8 or 16 dwords), then tell
 		 * the board to turn on MWI.
 		 */
 		if (val & PCIM_CMD_MWRICEN &&
 		    pci_read_config(dev, PCIR_CACHELNSZ, 1) != 0)
 			sc->flags |= FXP_FLAG_MWI_ENABLE;
 
 		/* turn on the extended TxCB feature */
 		sc->flags |= FXP_FLAG_EXT_TXCB;
 
 		/* enable reception of long frames for VLAN */
 		sc->flags |= FXP_FLAG_LONG_PKT_EN;
 	} else {
 		/* a hack to get long VLAN frames on a 82557 */
 		sc->flags |= FXP_FLAG_SAVE_BAD;
 	}
 
 	/* For 82559 or later chips, Rx checksum offload is supported. */
 	if (sc->revision >= FXP_REV_82559_A0) {
 		/* 82559ER does not support Rx checksum offloading. */
 		if (sc->ident->devid != 0x1209)
 			sc->flags |= FXP_FLAG_82559_RXCSUM;
 	}
 	/*
 	 * Enable use of extended RFDs and TCBs for 82550
 	 * and later chips. Note: we need extended TXCB support
 	 * too, but that's already enabled by the code above.
 	 * Be careful to do this only on the right devices.
 	 */
 	if (sc->revision == FXP_REV_82550 || sc->revision == FXP_REV_82550_C ||
 	    sc->revision == FXP_REV_82551_E || sc->revision == FXP_REV_82551_F
 	    || sc->revision == FXP_REV_82551_10) {
 		sc->rfa_size = sizeof (struct fxp_rfa);
 		sc->tx_cmd = FXP_CB_COMMAND_IPCBXMIT;
 		sc->flags |= FXP_FLAG_EXT_RFA;
 		/* Use extended RFA instead of 82559 checksum mode. */
 		sc->flags &= ~FXP_FLAG_82559_RXCSUM;
 	} else {
 		sc->rfa_size = sizeof (struct fxp_rfa) - FXP_RFAX_LEN;
 		sc->tx_cmd = FXP_CB_COMMAND_XMIT;
 	}
 
 	/*
 	 * Allocate DMA tags and DMA safe memory.
 	 */
 	sc->maxtxseg = FXP_NTXSEG;
 	sc->maxsegsize = MCLBYTES;
 	if (sc->flags & FXP_FLAG_EXT_RFA) {
 		sc->maxtxseg--;
 		sc->maxsegsize = FXP_TSO_SEGSIZE;
 	}
 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 2, 0,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	    sc->maxsegsize * sc->maxtxseg + sizeof(struct ether_vlan_header),
 	    sc->maxtxseg, sc->maxsegsize, 0,
 	    busdma_lock_mutex, &Giant, &sc->fxp_txmtag);
 	if (error) {
 		device_printf(dev, "could not create TX DMA tag\n");
 		goto fail;
 	}
 
 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 2, 0,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	    MCLBYTES, 1, MCLBYTES, 0,
 	    busdma_lock_mutex, &Giant, &sc->fxp_rxmtag);
 	if (error) {
 		device_printf(dev, "could not create RX DMA tag\n");
 		goto fail;
 	}
 
 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 4, 0,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	    sizeof(struct fxp_stats), 1, sizeof(struct fxp_stats), 0,
 	    busdma_lock_mutex, &Giant, &sc->fxp_stag);
 	if (error) {
 		device_printf(dev, "could not create stats DMA tag\n");
 		goto fail;
 	}
 
 	error = bus_dmamem_alloc(sc->fxp_stag, (void **)&sc->fxp_stats,
 	    BUS_DMA_NOWAIT | BUS_DMA_ZERO, &sc->fxp_smap);
 	if (error) {
 		device_printf(dev, "could not allocate stats DMA memory\n");
 		goto fail;
 	}
 	error = bus_dmamap_load(sc->fxp_stag, sc->fxp_smap, sc->fxp_stats,
 	    sizeof(struct fxp_stats), fxp_dma_map_addr, &sc->stats_addr, 0);
 	if (error) {
 		device_printf(dev, "could not load the stats DMA buffer\n");
 		goto fail;
 	}
 
 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 4, 0,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	    FXP_TXCB_SZ, 1, FXP_TXCB_SZ, 0,
 	    busdma_lock_mutex, &Giant, &sc->cbl_tag);
 	if (error) {
 		device_printf(dev, "could not create TxCB DMA tag\n");
 		goto fail;
 	}
 
 	error = bus_dmamem_alloc(sc->cbl_tag, (void **)&sc->fxp_desc.cbl_list,
 	    BUS_DMA_NOWAIT | BUS_DMA_ZERO, &sc->cbl_map);
 	if (error) {
 		device_printf(dev, "could not allocate TxCB DMA memory\n");
 		goto fail;
 	}
 
 	error = bus_dmamap_load(sc->cbl_tag, sc->cbl_map,
 	    sc->fxp_desc.cbl_list, FXP_TXCB_SZ, fxp_dma_map_addr,
 	    &sc->fxp_desc.cbl_addr, 0);
 	if (error) {
 		device_printf(dev, "could not load TxCB DMA buffer\n");
 		goto fail;
 	}
 
 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 4, 0,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	    sizeof(struct fxp_cb_mcs), 1, sizeof(struct fxp_cb_mcs), 0,
 	    busdma_lock_mutex, &Giant, &sc->mcs_tag);
 	if (error) {
 		device_printf(dev,
 		    "could not create multicast setup DMA tag\n");
 		goto fail;
 	}
 
 	error = bus_dmamem_alloc(sc->mcs_tag, (void **)&sc->mcsp,
 	    BUS_DMA_NOWAIT | BUS_DMA_ZERO, &sc->mcs_map);
 	if (error) {
 		device_printf(dev,
 		    "could not allocate multicast setup DMA memory\n");
 		goto fail;
 	}
 	error = bus_dmamap_load(sc->mcs_tag, sc->mcs_map, sc->mcsp,
 	    sizeof(struct fxp_cb_mcs), fxp_dma_map_addr, &sc->mcs_addr, 0);
 	if (error) {
 		device_printf(dev,
 		    "can't load the multicast setup DMA buffer\n");
 		goto fail;
 	}
 
 	/*
 	 * Pre-allocate the TX DMA maps and setup the pointers to
 	 * the TX command blocks.
 	 */
 	txp = sc->fxp_desc.tx_list;
 	tcbp = sc->fxp_desc.cbl_list;
 	for (i = 0; i < FXP_NTXCB; i++) {
 		txp[i].tx_cb = tcbp + i;
 		error = bus_dmamap_create(sc->fxp_txmtag, 0, &txp[i].tx_map);
 		if (error) {
 			device_printf(dev, "can't create DMA map for TX\n");
 			goto fail;
 		}
 	}
 	error = bus_dmamap_create(sc->fxp_rxmtag, 0, &sc->spare_map);
 	if (error) {
 		device_printf(dev, "can't create spare DMA map\n");
 		goto fail;
 	}
 
 	/*
 	 * Pre-allocate our receive buffers.
 	 */
 	sc->fxp_desc.rx_head = sc->fxp_desc.rx_tail = NULL;
 	for (i = 0; i < FXP_NRFABUFS; i++) {
 		rxp = &sc->fxp_desc.rx_list[i];
 		error = bus_dmamap_create(sc->fxp_rxmtag, 0, &rxp->rx_map);
 		if (error) {
 			device_printf(dev, "can't create DMA map for RX\n");
 			goto fail;
 		}
 		if (fxp_new_rfabuf(sc, rxp) != 0) {
 			error = ENOMEM;
 			goto fail;
 		}
 		fxp_add_rfabuf(sc, rxp);
 	}
 
 	/*
 	 * Read MAC address.
 	 */
 	fxp_read_eeprom(sc, myea, 0, 3);
 	eaddr[0] = myea[0] & 0xff;
 	eaddr[1] = myea[0] >> 8;
 	eaddr[2] = myea[1] & 0xff;
 	eaddr[3] = myea[1] >> 8;
 	eaddr[4] = myea[2] & 0xff;
 	eaddr[5] = myea[2] >> 8;
 	if (bootverbose) {
 		device_printf(dev, "PCI IDs: %04x %04x %04x %04x %04x\n",
 		    pci_get_vendor(dev), pci_get_device(dev),
 		    pci_get_subvendor(dev), pci_get_subdevice(dev),
 		    pci_get_revid(dev));
 		fxp_read_eeprom(sc, &data, 10, 1);
 		device_printf(dev, "Dynamic Standby mode is %s\n",
 		    data & 0x02 ? "enabled" : "disabled");
 	}
 
 	/*
 	 * If this is only a 10Mbps device, then there is no MII, and
 	 * the PHY will use a serial interface instead.
 	 *
 	 * The Seeq 80c24 AutoDUPLEX(tm) Ethernet Interface Adapter
 	 * doesn't have a programming interface of any sort.  The
 	 * media is sensed automatically based on how the link partner
 	 * is configured.  This is, in essence, manual configuration.
 	 */
 	if (sc->flags & FXP_FLAG_SERIAL_MEDIA) {
 		ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL);
 		ifmedia_set(&sc->sc_media, IFM_ETHER|IFM_MANUAL);
 	} else {
 		/*
 		 * i82557 wedge when isolating all of their PHYs.
 		 */
 		error = mii_attach(dev, &sc->miibus, ifp, fxp_ifmedia_upd,
 		    fxp_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY,
 		    MII_OFFSET_ANY, MIIF_NOISOLATE);
 		if (error != 0) {
 	                device_printf(dev, "attaching PHYs failed\n");
 			goto fail;
 		}
 	}
 
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_init = fxp_init;
 	ifp->if_softc = sc;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = fxp_ioctl;
 	ifp->if_start = fxp_start;
 
 	ifp->if_capabilities = ifp->if_capenable = 0;
 
 	/* Enable checksum offload/TSO for 82550 or better chips */
 	if (sc->flags & FXP_FLAG_EXT_RFA) {
 		ifp->if_hwassist = FXP_CSUM_FEATURES | CSUM_TSO;
 		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO4;
 		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_TSO4;
 	}
 
 	if (sc->flags & FXP_FLAG_82559_RXCSUM) {
 		ifp->if_capabilities |= IFCAP_RXCSUM;
 		ifp->if_capenable |= IFCAP_RXCSUM;
 	}
 
 	if (sc->flags & FXP_FLAG_WOLCAP) {
 		ifp->if_capabilities |= IFCAP_WOL_MAGIC;
 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
 	}
 
 #ifdef DEVICE_POLLING
 	/* Inform the world we support polling. */
 	ifp->if_capabilities |= IFCAP_POLLING;
 #endif
 
 	/*
 	 * Attach the interface.
 	 */
 	ether_ifattach(ifp, eaddr);
 
 	/*
 	 * Tell the upper layer(s) we support long frames.
 	 * Must appear after the call to ether_ifattach() because
 	 * ether_ifattach() sets ifi_hdrlen to the default value.
 	 */
 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
 	ifp->if_capabilities |= IFCAP_VLAN_MTU;
 	ifp->if_capenable |= IFCAP_VLAN_MTU; /* the hw bits already set */
 	if ((sc->flags & FXP_FLAG_EXT_RFA) != 0) {
 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING |
 		    IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO;
 		ifp->if_capenable |= IFCAP_VLAN_HWTAGGING |
 		    IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO;
 	}
 
 	/*
 	 * Let the system queue as many packets as we have available
 	 * TX descriptors.
 	 */
 	IFQ_SET_MAXLEN(&ifp->if_snd, FXP_NTXCB - 1);
 	ifp->if_snd.ifq_drv_maxlen = FXP_NTXCB - 1;
 	IFQ_SET_READY(&ifp->if_snd);
 
 	/*
 	 * Hook our interrupt after all initialization is complete.
 	 */
 	error = bus_setup_intr(dev, sc->fxp_res[1], INTR_TYPE_NET | INTR_MPSAFE,
 			       NULL, fxp_intr, sc, &sc->ih);
 	if (error) {
 		device_printf(dev, "could not setup irq\n");
 		ether_ifdetach(sc->ifp);
 		goto fail;
 	}
 
 	/*
 	 * Configure hardware to reject magic frames otherwise
 	 * system will hang on recipt of magic frames.
 	 */
 	if ((sc->flags & FXP_FLAG_WOLCAP) != 0) {
 		FXP_LOCK(sc);
 		/* Clear wakeup events. */
 		CSR_WRITE_1(sc, FXP_CSR_PMDR, CSR_READ_1(sc, FXP_CSR_PMDR));
 		fxp_init_body(sc);
 		fxp_stop(sc);
 		FXP_UNLOCK(sc);
 	}
 
 fail:
 	if (error)
 		fxp_release(sc);
 	return (error);
 }
 
 /*
  * Release all resources.  The softc lock should not be held and the
  * interrupt should already be torn down.
  */
 static void
 fxp_release(struct fxp_softc *sc)
 {
 	struct fxp_rx *rxp;
 	struct fxp_tx *txp;
 	int i;
 
 	FXP_LOCK_ASSERT(sc, MA_NOTOWNED);
 	KASSERT(sc->ih == NULL,
 	    ("fxp_release() called with intr handle still active"));
 	if (sc->miibus)
 		device_delete_child(sc->dev, sc->miibus);
 	bus_generic_detach(sc->dev);
 	ifmedia_removeall(&sc->sc_media);
 	if (sc->fxp_desc.cbl_list) {
 		bus_dmamap_unload(sc->cbl_tag, sc->cbl_map);
 		bus_dmamem_free(sc->cbl_tag, sc->fxp_desc.cbl_list,
 		    sc->cbl_map);
 	}
 	if (sc->fxp_stats) {
 		bus_dmamap_unload(sc->fxp_stag, sc->fxp_smap);
 		bus_dmamem_free(sc->fxp_stag, sc->fxp_stats, sc->fxp_smap);
 	}
 	if (sc->mcsp) {
 		bus_dmamap_unload(sc->mcs_tag, sc->mcs_map);
 		bus_dmamem_free(sc->mcs_tag, sc->mcsp, sc->mcs_map);
 	}
 	bus_release_resources(sc->dev, sc->fxp_spec, sc->fxp_res);
 	if (sc->fxp_rxmtag) {
 		for (i = 0; i < FXP_NRFABUFS; i++) {
 			rxp = &sc->fxp_desc.rx_list[i];
 			if (rxp->rx_mbuf != NULL) {
 				bus_dmamap_sync(sc->fxp_rxmtag, rxp->rx_map,
 				    BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(sc->fxp_rxmtag, rxp->rx_map);
 				m_freem(rxp->rx_mbuf);
 			}
 			bus_dmamap_destroy(sc->fxp_rxmtag, rxp->rx_map);
 		}
 		bus_dmamap_destroy(sc->fxp_rxmtag, sc->spare_map);
 		bus_dma_tag_destroy(sc->fxp_rxmtag);
 	}
 	if (sc->fxp_txmtag) {
 		for (i = 0; i < FXP_NTXCB; i++) {
 			txp = &sc->fxp_desc.tx_list[i];
 			if (txp->tx_mbuf != NULL) {
 				bus_dmamap_sync(sc->fxp_txmtag, txp->tx_map,
 				    BUS_DMASYNC_POSTWRITE);
 				bus_dmamap_unload(sc->fxp_txmtag, txp->tx_map);
 				m_freem(txp->tx_mbuf);
 			}
 			bus_dmamap_destroy(sc->fxp_txmtag, txp->tx_map);
 		}
 		bus_dma_tag_destroy(sc->fxp_txmtag);
 	}
 	if (sc->fxp_stag)
 		bus_dma_tag_destroy(sc->fxp_stag);
 	if (sc->cbl_tag)
 		bus_dma_tag_destroy(sc->cbl_tag);
 	if (sc->mcs_tag)
 		bus_dma_tag_destroy(sc->mcs_tag);
 	if (sc->ifp)
 		if_free(sc->ifp);
 
 	mtx_destroy(&sc->sc_mtx);
 }
 
 /*
  * Detach interface.
  */
 static int
 fxp_detach(device_t dev)
 {
 	struct fxp_softc *sc = device_get_softc(dev);
 
 #ifdef DEVICE_POLLING
 	if (sc->ifp->if_capenable & IFCAP_POLLING)
 		ether_poll_deregister(sc->ifp);
 #endif
 
 	FXP_LOCK(sc);
 	/*
 	 * Stop DMA and drop transmit queue, but disable interrupts first.
 	 */
 	CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, FXP_SCB_INTR_DISABLE);
 	fxp_stop(sc);
 	FXP_UNLOCK(sc);
 	callout_drain(&sc->stat_ch);
 
 	/*
 	 * Close down routes etc.
 	 */
 	ether_ifdetach(sc->ifp);
 
 	/*
 	 * Unhook interrupt before dropping lock. This is to prevent
 	 * races with fxp_intr().
 	 */
 	bus_teardown_intr(sc->dev, sc->fxp_res[1], sc->ih);
 	sc->ih = NULL;
 
 	/* Release our allocated resources. */
 	fxp_release(sc);
 	return (0);
 }
 
 /*
  * Device shutdown routine. Called at system shutdown after sync. The
  * main purpose of this routine is to shut off receiver DMA so that
  * kernel memory doesn't get clobbered during warmboot.
  */
 static int
 fxp_shutdown(device_t dev)
 {
 
 	/*
 	 * Make sure that DMA is disabled prior to reboot. Not doing
 	 * do could allow DMA to corrupt kernel memory during the
 	 * reboot before the driver initializes.
 	 */
 	return (fxp_suspend(dev));
 }
 
 /*
  * Device suspend routine.  Stop the interface and save some PCI
  * settings in case the BIOS doesn't restore them properly on
  * resume.
  */
 static int
 fxp_suspend(device_t dev)
 {
 	struct fxp_softc *sc = device_get_softc(dev);
 	struct ifnet *ifp;
 	int pmc;
 	uint16_t pmstat;
 
 	FXP_LOCK(sc);
 
 	ifp = sc->ifp;
 	if (pci_find_extcap(sc->dev, PCIY_PMG, &pmc) == 0) {
 		pmstat = pci_read_config(sc->dev, pmc + PCIR_POWER_STATUS, 2);
 		pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
 		if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0) {
 			/* Request PME. */
 			pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
 			sc->flags |= FXP_FLAG_WOL;
 			/* Reconfigure hardware to accept magic frames. */
 			fxp_init_body(sc);
 		}
 		pci_write_config(sc->dev, pmc + PCIR_POWER_STATUS, pmstat, 2);
 	}
 	fxp_stop(sc);
 
 	sc->suspended = 1;
 
 	FXP_UNLOCK(sc);
 	return (0);
 }
 
 /*
  * Device resume routine. re-enable busmastering, and restart the interface if
  * appropriate.
  */
 static int
 fxp_resume(device_t dev)
 {
 	struct fxp_softc *sc = device_get_softc(dev);
 	struct ifnet *ifp = sc->ifp;
 	int pmc;
 	uint16_t pmstat;
 
 	FXP_LOCK(sc);
 
 	if (pci_find_extcap(sc->dev, PCIY_PMG, &pmc) == 0) {
 		sc->flags &= ~FXP_FLAG_WOL;
 		pmstat = pci_read_config(sc->dev, pmc + PCIR_POWER_STATUS, 2);
 		/* Disable PME and clear PME status. */
 		pmstat &= ~PCIM_PSTAT_PMEENABLE;
 		pci_write_config(sc->dev, pmc + PCIR_POWER_STATUS, pmstat, 2);
 		if ((sc->flags & FXP_FLAG_WOLCAP) != 0)
 			CSR_WRITE_1(sc, FXP_CSR_PMDR,
 			    CSR_READ_1(sc, FXP_CSR_PMDR));
 	}
 
 	CSR_WRITE_4(sc, FXP_CSR_PORT, FXP_PORT_SELECTIVE_RESET);
 	DELAY(10);
 
 	/* reinitialize interface if necessary */
 	if (ifp->if_flags & IFF_UP)
 		fxp_init_body(sc);
 
 	sc->suspended = 0;
 
 	FXP_UNLOCK(sc);
 	return (0);
 }
 
 static void
 fxp_eeprom_shiftin(struct fxp_softc *sc, int data, int length)
 {
 	uint16_t reg;
 	int x;
 
 	/*
 	 * Shift in data.
 	 */
 	for (x = 1 << (length - 1); x; x >>= 1) {
 		if (data & x)
 			reg = FXP_EEPROM_EECS | FXP_EEPROM_EEDI;
 		else
 			reg = FXP_EEPROM_EECS;
 		CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg);
 		DELAY(1);
 		CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg | FXP_EEPROM_EESK);
 		DELAY(1);
 		CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg);
 		DELAY(1);
 	}
 }
 
 /*
  * Read from the serial EEPROM. Basically, you manually shift in
  * the read opcode (one bit at a time) and then shift in the address,
  * and then you shift out the data (all of this one bit at a time).
  * The word size is 16 bits, so you have to provide the address for
  * every 16 bits of data.
  */
 static uint16_t
 fxp_eeprom_getword(struct fxp_softc *sc, int offset, int autosize)
 {
 	uint16_t reg, data;
 	int x;
 
 	CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, FXP_EEPROM_EECS);
 	/*
 	 * Shift in read opcode.
 	 */
 	fxp_eeprom_shiftin(sc, FXP_EEPROM_OPC_READ, 3);
 	/*
 	 * Shift in address.
 	 */
 	data = 0;
 	for (x = 1 << (sc->eeprom_size - 1); x; x >>= 1) {
 		if (offset & x)
 			reg = FXP_EEPROM_EECS | FXP_EEPROM_EEDI;
 		else
 			reg = FXP_EEPROM_EECS;
 		CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg);
 		DELAY(1);
 		CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg | FXP_EEPROM_EESK);
 		DELAY(1);
 		CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg);
 		DELAY(1);
 		reg = CSR_READ_2(sc, FXP_CSR_EEPROMCONTROL) & FXP_EEPROM_EEDO;
 		data++;
 		if (autosize && reg == 0) {
 			sc->eeprom_size = data;
 			break;
 		}
 	}
 	/*
 	 * Shift out data.
 	 */
 	data = 0;
 	reg = FXP_EEPROM_EECS;
 	for (x = 1 << 15; x; x >>= 1) {
 		CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg | FXP_EEPROM_EESK);
 		DELAY(1);
 		if (CSR_READ_2(sc, FXP_CSR_EEPROMCONTROL) & FXP_EEPROM_EEDO)
 			data |= x;
 		CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg);
 		DELAY(1);
 	}
 	CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, 0);
 	DELAY(1);
 
 	return (data);
 }
 
 static void
 fxp_eeprom_putword(struct fxp_softc *sc, int offset, uint16_t data)
 {
 	int i;
 
 	/*
 	 * Erase/write enable.
 	 */
 	CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, FXP_EEPROM_EECS);
 	fxp_eeprom_shiftin(sc, 0x4, 3);
 	fxp_eeprom_shiftin(sc, 0x03 << (sc->eeprom_size - 2), sc->eeprom_size);
 	CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, 0);
 	DELAY(1);
 	/*
 	 * Shift in write opcode, address, data.
 	 */
 	CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, FXP_EEPROM_EECS);
 	fxp_eeprom_shiftin(sc, FXP_EEPROM_OPC_WRITE, 3);
 	fxp_eeprom_shiftin(sc, offset, sc->eeprom_size);
 	fxp_eeprom_shiftin(sc, data, 16);
 	CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, 0);
 	DELAY(1);
 	/*
 	 * Wait for EEPROM to finish up.
 	 */
 	CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, FXP_EEPROM_EECS);
 	DELAY(1);
 	for (i = 0; i < 1000; i++) {
 		if (CSR_READ_2(sc, FXP_CSR_EEPROMCONTROL) & FXP_EEPROM_EEDO)
 			break;
 		DELAY(50);
 	}
 	CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, 0);
 	DELAY(1);
 	/*
 	 * Erase/write disable.
 	 */
 	CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, FXP_EEPROM_EECS);
 	fxp_eeprom_shiftin(sc, 0x4, 3);
 	fxp_eeprom_shiftin(sc, 0, sc->eeprom_size);
 	CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, 0);
 	DELAY(1);
 }
 
 /*
  * From NetBSD:
  *
  * Figure out EEPROM size.
  *
  * 559's can have either 64-word or 256-word EEPROMs, the 558
  * datasheet only talks about 64-word EEPROMs, and the 557 datasheet
  * talks about the existance of 16 to 256 word EEPROMs.
  *
  * The only known sizes are 64 and 256, where the 256 version is used
  * by CardBus cards to store CIS information.
  *
  * The address is shifted in msb-to-lsb, and after the last
  * address-bit the EEPROM is supposed to output a `dummy zero' bit,
  * after which follows the actual data. We try to detect this zero, by
  * probing the data-out bit in the EEPROM control register just after
  * having shifted in a bit. If the bit is zero, we assume we've
  * shifted enough address bits. The data-out should be tri-state,
  * before this, which should translate to a logical one.
  */
 static void
 fxp_autosize_eeprom(struct fxp_softc *sc)
 {
 
 	/* guess maximum size of 256 words */
 	sc->eeprom_size = 8;
 
 	/* autosize */
 	(void) fxp_eeprom_getword(sc, 0, 1);
 }
 
 static void
 fxp_read_eeprom(struct fxp_softc *sc, u_short *data, int offset, int words)
 {
 	int i;
 
 	for (i = 0; i < words; i++)
 		data[i] = fxp_eeprom_getword(sc, offset + i, 0);
 }
 
 static void
 fxp_write_eeprom(struct fxp_softc *sc, u_short *data, int offset, int words)
 {
 	int i;
 
 	for (i = 0; i < words; i++)
 		fxp_eeprom_putword(sc, offset + i, data[i]);
 }
 
 /*
  * Grab the softc lock and call the real fxp_start_body() routine
  */
 static void
 fxp_start(struct ifnet *ifp)
 {
 	struct fxp_softc *sc = ifp->if_softc;
 
 	FXP_LOCK(sc);
 	fxp_start_body(ifp);
 	FXP_UNLOCK(sc);
 }
 
 /*
  * Start packet transmission on the interface.
  * This routine must be called with the softc lock held, and is an
  * internal entry point only.
  */
 static void
 fxp_start_body(struct ifnet *ifp)
 {
 	struct fxp_softc *sc = ifp->if_softc;
 	struct mbuf *mb_head;
 	int txqueued;
 
 	FXP_LOCK_ASSERT(sc, MA_OWNED);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return;
 
 	if (sc->tx_queued > FXP_NTXCB_HIWAT)
 		fxp_txeof(sc);
 	/*
 	 * We're finished if there is nothing more to add to the list or if
 	 * we're all filled up with buffers to transmit.
 	 * NOTE: One TxCB is reserved to guarantee that fxp_mc_setup() can add
 	 *       a NOP command when needed.
 	 */
 	txqueued = 0;
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
 	    sc->tx_queued < FXP_NTXCB - 1) {
 
 		/*
 		 * Grab a packet to transmit.
 		 */
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, mb_head);
 		if (mb_head == NULL)
 			break;
 
 		if (fxp_encap(sc, &mb_head)) {
 			if (mb_head == NULL)
 				break;
 			IFQ_DRV_PREPEND(&ifp->if_snd, mb_head);
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 		}
 		txqueued++;
 		/*
 		 * Pass packet to bpf if there is a listener.
 		 */
 		BPF_MTAP(ifp, mb_head);
 	}
 
 	/*
 	 * We're finished. If we added to the list, issue a RESUME to get DMA
 	 * going again if suspended.
 	 */
 	if (txqueued > 0) {
 		bus_dmamap_sync(sc->cbl_tag, sc->cbl_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 		fxp_scb_wait(sc);
 		fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_RESUME);
 		/*
 		 * Set a 5 second timer just in case we don't hear
 		 * from the card again.
 		 */
 		sc->watchdog_timer = 5;
 	}
 }
 
 static int
 fxp_encap(struct fxp_softc *sc, struct mbuf **m_head)
 {
 	struct ifnet *ifp;
 	struct mbuf *m;
 	struct fxp_tx *txp;
 	struct fxp_cb_tx *cbp;
 	struct tcphdr *tcp;
 	bus_dma_segment_t segs[FXP_NTXSEG];
 	int error, i, nseg, tcp_payload;
 
 	FXP_LOCK_ASSERT(sc, MA_OWNED);
 	ifp = sc->ifp;
 
 	tcp_payload = 0;
 	tcp = NULL;
 	/*
 	 * Get pointer to next available tx desc.
 	 */
 	txp = sc->fxp_desc.tx_last->tx_next;
 
 	/*
 	 * A note in Appendix B of the Intel 8255x 10/100 Mbps
 	 * Ethernet Controller Family Open Source Software
 	 * Developer Manual says:
 	 *   Using software parsing is only allowed with legal
 	 *   TCP/IP or UDP/IP packets.
 	 *   ...
 	 *   For all other datagrams, hardware parsing must
 	 *   be used.
 	 * Software parsing appears to truncate ICMP and
 	 * fragmented UDP packets that contain one to three
 	 * bytes in the second (and final) mbuf of the packet.
 	 */
 	if (sc->flags & FXP_FLAG_EXT_RFA)
 		txp->tx_cb->ipcb_ip_activation_high =
 		    FXP_IPCB_HARDWAREPARSING_ENABLE;
 
 	m = *m_head;
 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
 		/*
 		 * 82550/82551 requires ethernet/IP/TCP headers must be
 		 * contained in the first active transmit buffer.
 		 */
 		struct ether_header *eh;
 		struct ip *ip;
 		uint32_t ip_off, poff;
 
 		if (M_WRITABLE(*m_head) == 0) {
 			/* Get a writable copy. */
 			m = m_dup(*m_head, M_DONTWAIT);
 			m_freem(*m_head);
 			if (m == NULL) {
 				*m_head = NULL;
 				return (ENOBUFS);
 			}
 			*m_head = m;
 		}
 		ip_off = sizeof(struct ether_header);
 		m = m_pullup(*m_head, ip_off);
 		if (m == NULL) {
 			*m_head = NULL;
 			return (ENOBUFS);
 		}
 		eh = mtod(m, struct ether_header *);
 		/* Check the existence of VLAN tag. */
 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 			ip_off = sizeof(struct ether_vlan_header);
 			m = m_pullup(m, ip_off);
 			if (m == NULL) {
 				*m_head = NULL;
 				return (ENOBUFS);
 			}
 		}
 		m = m_pullup(m, ip_off + sizeof(struct ip));
 		if (m == NULL) {
 			*m_head = NULL;
 			return (ENOBUFS);
 		}
 		ip = (struct ip *)(mtod(m, char *) + ip_off);
 		poff = ip_off + (ip->ip_hl << 2);
 		m = m_pullup(m, poff + sizeof(struct tcphdr));
 		if (m == NULL) {
 			*m_head = NULL;
 			return (ENOBUFS);
 		}
 		tcp = (struct tcphdr *)(mtod(m, char *) + poff);
 		m = m_pullup(m, poff + sizeof(struct tcphdr) + tcp->th_off);
 		if (m == NULL) {
 			*m_head = NULL;
 			return (ENOBUFS);
 		}
 
 		/*
 		 * Since 82550/82551 doesn't modify IP length and pseudo
 		 * checksum in the first frame driver should compute it.
 		 */
 		ip = (struct ip *)(mtod(m, char *) + ip_off);
 		tcp = (struct tcphdr *)(mtod(m, char *) + poff);
 		ip->ip_sum = 0;
 		ip->ip_len = htons(m->m_pkthdr.tso_segsz + (ip->ip_hl << 2) +
 		    (tcp->th_off << 2));
 		tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons(IPPROTO_TCP + (tcp->th_off << 2) +
 		    m->m_pkthdr.tso_segsz));
 		/* Compute total TCP payload. */
 		tcp_payload = m->m_pkthdr.len - ip_off - (ip->ip_hl << 2);
 		tcp_payload -= tcp->th_off << 2;
 		*m_head = m;
 	} else if (m->m_pkthdr.csum_flags & FXP_CSUM_FEATURES) {
 		/*
 		 * Deal with TCP/IP checksum offload. Note that
 		 * in order for TCP checksum offload to work,
 		 * the pseudo header checksum must have already
 		 * been computed and stored in the checksum field
 		 * in the TCP header. The stack should have
 		 * already done this for us.
 		 */
 		txp->tx_cb->ipcb_ip_schedule = FXP_IPCB_TCPUDP_CHECKSUM_ENABLE;
 		if (m->m_pkthdr.csum_flags & CSUM_TCP)
 			txp->tx_cb->ipcb_ip_schedule |= FXP_IPCB_TCP_PACKET;
 
 #ifdef FXP_IP_CSUM_WAR
 		/*
 		 * XXX The 82550 chip appears to have trouble
 		 * dealing with IP header checksums in very small
 		 * datagrams, namely fragments from 1 to 3 bytes
 		 * in size. For example, say you want to transmit
 		 * a UDP packet of 1473 bytes. The packet will be
 		 * fragmented over two IP datagrams, the latter
 		 * containing only one byte of data. The 82550 will
 		 * botch the header checksum on the 1-byte fragment.
 		 * As long as the datagram contains 4 or more bytes
 		 * of data, you're ok.
 		 *
                  * The following code attempts to work around this
 		 * problem: if the datagram is less than 38 bytes
 		 * in size (14 bytes ether header, 20 bytes IP header,
 		 * plus 4 bytes of data), we punt and compute the IP
 		 * header checksum by hand. This workaround doesn't
 		 * work very well, however, since it can be fooled
 		 * by things like VLAN tags and IP options that make
 		 * the header sizes/offsets vary.
 		 */
 
 		if (m->m_pkthdr.csum_flags & CSUM_IP) {
 			if (m->m_pkthdr.len < 38) {
 				struct ip *ip;
 				m->m_data += ETHER_HDR_LEN;
 				ip = mtod(m, struct ip *);
 				ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
 				m->m_data -= ETHER_HDR_LEN;
 				m->m_pkthdr.csum_flags &= ~CSUM_IP;
 			} else {
 				txp->tx_cb->ipcb_ip_activation_high =
 				    FXP_IPCB_HARDWAREPARSING_ENABLE;
 				txp->tx_cb->ipcb_ip_schedule |=
 				    FXP_IPCB_IP_CHECKSUM_ENABLE;
 			}
 		}
 #endif
 	}
 
 	error = bus_dmamap_load_mbuf_sg(sc->fxp_txmtag, txp->tx_map, *m_head,
 	    segs, &nseg, 0);
 	if (error == EFBIG) {
 		m = m_collapse(*m_head, M_DONTWAIT, sc->maxtxseg);
 		if (m == NULL) {
 			m_freem(*m_head);
 			*m_head = NULL;
 			return (ENOMEM);
 		}
 		*m_head = m;
 		error = bus_dmamap_load_mbuf_sg(sc->fxp_txmtag, txp->tx_map,
 	    	    *m_head, segs, &nseg, 0);
 		if (error != 0) {
 			m_freem(*m_head);
 			*m_head = NULL;
 			return (ENOMEM);
 		}
 	} else if (error != 0)
 		return (error);
 	if (nseg == 0) {
 		m_freem(*m_head);
 		*m_head = NULL;
 		return (EIO);
 	}
 
 	KASSERT(nseg <= sc->maxtxseg, ("too many DMA segments"));
 	bus_dmamap_sync(sc->fxp_txmtag, txp->tx_map, BUS_DMASYNC_PREWRITE);
 
 	cbp = txp->tx_cb;
 	for (i = 0; i < nseg; i++) {
 		/*
 		 * If this is an 82550/82551, then we're using extended
 		 * TxCBs _and_ we're using checksum offload. This means
 		 * that the TxCB is really an IPCB. One major difference
 		 * between the two is that with plain extended TxCBs,
 		 * the bottom half of the TxCB contains two entries from
 		 * the TBD array, whereas IPCBs contain just one entry:
 		 * one entry (8 bytes) has been sacrificed for the TCP/IP
 		 * checksum offload control bits. So to make things work
 		 * right, we have to start filling in the TBD array
 		 * starting from a different place depending on whether
 		 * the chip is an 82550/82551 or not.
 		 */
 		if (sc->flags & FXP_FLAG_EXT_RFA) {
 			cbp->tbd[i + 1].tb_addr = htole32(segs[i].ds_addr);
 			cbp->tbd[i + 1].tb_size = htole32(segs[i].ds_len);
 		} else {
 			cbp->tbd[i].tb_addr = htole32(segs[i].ds_addr);
 			cbp->tbd[i].tb_size = htole32(segs[i].ds_len);
 		}
 	}
 	if (sc->flags & FXP_FLAG_EXT_RFA) {
 		/* Configure dynamic TBD for 82550/82551. */
 		cbp->tbd_number = 0xFF;
 		cbp->tbd[nseg].tb_size |= htole32(0x8000);
 	} else
 		cbp->tbd_number = nseg;
 	/* Configure TSO. */
 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
 		cbp->tbd[-1].tb_size = htole32(m->m_pkthdr.tso_segsz << 16);
 		cbp->tbd[1].tb_size |= htole32(tcp_payload << 16);
 		cbp->ipcb_ip_schedule |= FXP_IPCB_LARGESEND_ENABLE |
 		    FXP_IPCB_IP_CHECKSUM_ENABLE |
 		    FXP_IPCB_TCP_PACKET |
 		    FXP_IPCB_TCPUDP_CHECKSUM_ENABLE;
 	}
 	/* Configure VLAN hardware tag insertion. */
 	if ((m->m_flags & M_VLANTAG) != 0) {
 		cbp->ipcb_vlan_id = htons(m->m_pkthdr.ether_vtag);
 		txp->tx_cb->ipcb_ip_activation_high |=
 		    FXP_IPCB_INSERTVLAN_ENABLE;
 	}
 
 	txp->tx_mbuf = m;
 	txp->tx_cb->cb_status = 0;
 	txp->tx_cb->byte_count = 0;
 	if (sc->tx_queued != FXP_CXINT_THRESH - 1)
 		txp->tx_cb->cb_command =
 		    htole16(sc->tx_cmd | FXP_CB_COMMAND_SF |
 		    FXP_CB_COMMAND_S);
 	else
 		txp->tx_cb->cb_command =
 		    htole16(sc->tx_cmd | FXP_CB_COMMAND_SF |
 		    FXP_CB_COMMAND_S | FXP_CB_COMMAND_I);
 	if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0)
 		txp->tx_cb->tx_threshold = tx_threshold;
 
 	/*
 	 * Advance the end of list forward.
 	 */
-
-#ifdef __alpha__
-	/*
-	 * On platforms which can't access memory in 16-bit
-	 * granularities, we must prevent the card from DMA'ing
-	 * up the status while we update the command field.
-	 * This could cause us to overwrite the completion status.
-	 * XXX This is probably bogus and we're _not_ looking
-	 * for atomicity here.
-	 */
-	atomic_clear_16(&sc->fxp_desc.tx_last->tx_cb->cb_command,
-	    htole16(FXP_CB_COMMAND_S));
-#else
 	sc->fxp_desc.tx_last->tx_cb->cb_command &= htole16(~FXP_CB_COMMAND_S);
-#endif /*__alpha__*/
 	sc->fxp_desc.tx_last = txp;
 
 	/*
 	 * Advance the beginning of the list forward if there are
 	 * no other packets queued (when nothing is queued, tx_first
 	 * sits on the last TxCB that was sent out).
 	 */
 	if (sc->tx_queued == 0)
 		sc->fxp_desc.tx_first = txp;
 
 	sc->tx_queued++;
 
 	return (0);
 }
 
 #ifdef DEVICE_POLLING
 static poll_handler_t fxp_poll;
 
 static int
 fxp_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
 {
 	struct fxp_softc *sc = ifp->if_softc;
 	uint8_t statack;
 	int rx_npkts = 0;
 
 	FXP_LOCK(sc);
 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 		FXP_UNLOCK(sc);
 		return (rx_npkts);
 	}
 
 	statack = FXP_SCB_STATACK_CXTNO | FXP_SCB_STATACK_CNA |
 	    FXP_SCB_STATACK_FR;
 	if (cmd == POLL_AND_CHECK_STATUS) {
 		uint8_t tmp;
 
 		tmp = CSR_READ_1(sc, FXP_CSR_SCB_STATACK);
 		if (tmp == 0xff || tmp == 0) {
 			FXP_UNLOCK(sc);
 			return (rx_npkts); /* nothing to do */
 		}
 		tmp &= ~statack;
 		/* ack what we can */
 		if (tmp != 0)
 			CSR_WRITE_1(sc, FXP_CSR_SCB_STATACK, tmp);
 		statack |= tmp;
 	}
 	rx_npkts = fxp_intr_body(sc, ifp, statack, count);
 	FXP_UNLOCK(sc);
 	return (rx_npkts);
 }
 #endif /* DEVICE_POLLING */
 
 /*
  * Process interface interrupts.
  */
 static void
 fxp_intr(void *xsc)
 {
 	struct fxp_softc *sc = xsc;
 	struct ifnet *ifp = sc->ifp;
 	uint8_t statack;
 
 	FXP_LOCK(sc);
 	if (sc->suspended) {
 		FXP_UNLOCK(sc);
 		return;
 	}
 
 #ifdef DEVICE_POLLING
 	if (ifp->if_capenable & IFCAP_POLLING) {
 		FXP_UNLOCK(sc);
 		return;
 	}
 #endif
 	while ((statack = CSR_READ_1(sc, FXP_CSR_SCB_STATACK)) != 0) {
 		/*
 		 * It should not be possible to have all bits set; the
 		 * FXP_SCB_INTR_SWI bit always returns 0 on a read.  If
 		 * all bits are set, this may indicate that the card has
 		 * been physically ejected, so ignore it.
 		 */
 		if (statack == 0xff) {
 			FXP_UNLOCK(sc);
 			return;
 		}
 
 		/*
 		 * First ACK all the interrupts in this pass.
 		 */
 		CSR_WRITE_1(sc, FXP_CSR_SCB_STATACK, statack);
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 			fxp_intr_body(sc, ifp, statack, -1);
 	}
 	FXP_UNLOCK(sc);
 }
 
 static void
 fxp_txeof(struct fxp_softc *sc)
 {
 	struct ifnet *ifp;
 	struct fxp_tx *txp;
 
 	ifp = sc->ifp;
 	bus_dmamap_sync(sc->cbl_tag, sc->cbl_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	for (txp = sc->fxp_desc.tx_first; sc->tx_queued &&
 	    (le16toh(txp->tx_cb->cb_status) & FXP_CB_STATUS_C) != 0;
 	    txp = txp->tx_next) {
 		if (txp->tx_mbuf != NULL) {
 			bus_dmamap_sync(sc->fxp_txmtag, txp->tx_map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(sc->fxp_txmtag, txp->tx_map);
 			m_freem(txp->tx_mbuf);
 			txp->tx_mbuf = NULL;
 			/* clear this to reset csum offload bits */
 			txp->tx_cb->tbd[0].tb_addr = 0;
 		}
 		sc->tx_queued--;
 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	}
 	sc->fxp_desc.tx_first = txp;
 	bus_dmamap_sync(sc->cbl_tag, sc->cbl_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	if (sc->tx_queued == 0)
 		sc->watchdog_timer = 0;
 }
 
 static void
 fxp_rxcsum(struct fxp_softc *sc, struct ifnet *ifp, struct mbuf *m,
     uint16_t status, int pos)
 {
 	struct ether_header *eh;
 	struct ip *ip;
 	struct udphdr *uh;
 	int32_t hlen, len, pktlen, temp32;
 	uint16_t csum, *opts;
 
 	if ((sc->flags & FXP_FLAG_82559_RXCSUM) == 0) {
 		if ((status & FXP_RFA_STATUS_PARSE) != 0) {
 			if (status & FXP_RFDX_CS_IP_CSUM_BIT_VALID)
 				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
 			if (status & FXP_RFDX_CS_IP_CSUM_VALID)
 				m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
 			if ((status & FXP_RFDX_CS_TCPUDP_CSUM_BIT_VALID) &&
 			    (status & FXP_RFDX_CS_TCPUDP_CSUM_VALID)) {
 				m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
 				    CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 		}
 		return;
 	}
 
 	pktlen = m->m_pkthdr.len;
 	if (pktlen < sizeof(struct ether_header) + sizeof(struct ip))
 		return;
 	eh = mtod(m, struct ether_header *);
 	if (eh->ether_type != htons(ETHERTYPE_IP))
 		return;
 	ip = (struct ip *)(eh + 1);
 	if (ip->ip_v != IPVERSION)
 		return;
 
 	hlen = ip->ip_hl << 2;
 	pktlen -= sizeof(struct ether_header);
 	if (hlen < sizeof(struct ip))
 		return;
 	if (ntohs(ip->ip_len) < hlen)
 		return;
 	if (ntohs(ip->ip_len) != pktlen)
 		return;
 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
 		return;	/* can't handle fragmented packet */
 
 	switch (ip->ip_p) {
 	case IPPROTO_TCP:
 		if (pktlen < (hlen + sizeof(struct tcphdr)))
 			return;
 		break;
 	case IPPROTO_UDP:
 		if (pktlen < (hlen + sizeof(struct udphdr)))
 			return;
 		uh = (struct udphdr *)((caddr_t)ip + hlen);
 		if (uh->uh_sum == 0)
 			return; /* no checksum */
 		break;
 	default:
 		return;
 	}
 	/* Extract computed checksum. */
 	csum = be16dec(mtod(m, char *) + pos);
 	/* checksum fixup for IP options */
 	len = hlen - sizeof(struct ip);
 	if (len > 0) {
 		opts = (uint16_t *)(ip + 1);
 		for (; len > 0; len -= sizeof(uint16_t), opts++) {
 			temp32 = csum - *opts;
 			temp32 = (temp32 >> 16) + (temp32 & 65535);
 			csum = temp32 & 65535;
 		}
 	}
 	m->m_pkthdr.csum_flags |= CSUM_DATA_VALID;
 	m->m_pkthdr.csum_data = csum;
 }
 
 static int
 fxp_intr_body(struct fxp_softc *sc, struct ifnet *ifp, uint8_t statack,
     int count)
 {
 	struct mbuf *m;
 	struct fxp_rx *rxp;
 	struct fxp_rfa *rfa;
 	int rnr = (statack & FXP_SCB_STATACK_RNR) ? 1 : 0;
 	int rx_npkts;
 	uint16_t status;
 
 	rx_npkts = 0;
 	FXP_LOCK_ASSERT(sc, MA_OWNED);
 
 	if (rnr)
 		sc->rnr++;
 #ifdef DEVICE_POLLING
 	/* Pick up a deferred RNR condition if `count' ran out last time. */
 	if (sc->flags & FXP_FLAG_DEFERRED_RNR) {
 		sc->flags &= ~FXP_FLAG_DEFERRED_RNR;
 		rnr = 1;
 	}
 #endif
 
 	/*
 	 * Free any finished transmit mbuf chains.
 	 *
 	 * Handle the CNA event likt a CXTNO event. It used to
 	 * be that this event (control unit not ready) was not
 	 * encountered, but it is now with the SMPng modifications.
 	 * The exact sequence of events that occur when the interface
 	 * is brought up are different now, and if this event
 	 * goes unhandled, the configuration/rxfilter setup sequence
 	 * can stall for several seconds. The result is that no
 	 * packets go out onto the wire for about 5 to 10 seconds
 	 * after the interface is ifconfig'ed for the first time.
 	 */
 	if (statack & (FXP_SCB_STATACK_CXTNO | FXP_SCB_STATACK_CNA))
 		fxp_txeof(sc);
 
 	/*
 	 * Try to start more packets transmitting.
 	 */
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		fxp_start_body(ifp);
 
 	/*
 	 * Just return if nothing happened on the receive side.
 	 */
 	if (!rnr && (statack & FXP_SCB_STATACK_FR) == 0)
 		return (rx_npkts);
 
 	/*
 	 * Process receiver interrupts. If a no-resource (RNR)
 	 * condition exists, get whatever packets we can and
 	 * re-start the receiver.
 	 *
 	 * When using polling, we do not process the list to completion,
 	 * so when we get an RNR interrupt we must defer the restart
 	 * until we hit the last buffer with the C bit set.
 	 * If we run out of cycles and rfa_headm has the C bit set,
 	 * record the pending RNR in the FXP_FLAG_DEFERRED_RNR flag so
 	 * that the info will be used in the subsequent polling cycle.
 	 */
 	for (;;) {
 		rxp = sc->fxp_desc.rx_head;
 		m = rxp->rx_mbuf;
 		rfa = (struct fxp_rfa *)(m->m_ext.ext_buf +
 		    RFA_ALIGNMENT_FUDGE);
 		bus_dmamap_sync(sc->fxp_rxmtag, rxp->rx_map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 #ifdef DEVICE_POLLING /* loop at most count times if count >=0 */
 		if (count >= 0 && count-- == 0) {
 			if (rnr) {
 				/* Defer RNR processing until the next time. */
 				sc->flags |= FXP_FLAG_DEFERRED_RNR;
 				rnr = 0;
 			}
 			break;
 		}
 #endif /* DEVICE_POLLING */
 
 		status = le16toh(rfa->rfa_status);
 		if ((status & FXP_RFA_STATUS_C) == 0)
 			break;
 
 		if ((status & FXP_RFA_STATUS_RNR) != 0)
 			rnr++;
 		/*
 		 * Advance head forward.
 		 */
 		sc->fxp_desc.rx_head = rxp->rx_next;
 
 		/*
 		 * Add a new buffer to the receive chain.
 		 * If this fails, the old buffer is recycled
 		 * instead.
 		 */
 		if (fxp_new_rfabuf(sc, rxp) == 0) {
 			int total_len;
 
 			/*
 			 * Fetch packet length (the top 2 bits of
 			 * actual_size are flags set by the controller
 			 * upon completion), and drop the packet in case
 			 * of bogus length or CRC errors.
 			 */
 			total_len = le16toh(rfa->actual_size) & 0x3fff;
 			if ((sc->flags & FXP_FLAG_82559_RXCSUM) != 0 &&
 			    (ifp->if_capenable & IFCAP_RXCSUM) != 0) {
 				/* Adjust for appended checksum bytes. */
 				total_len -= 2;
 			}
 			if (total_len < sizeof(struct ether_header) ||
 			    total_len > (MCLBYTES - RFA_ALIGNMENT_FUDGE -
 			    sc->rfa_size) ||
 			    status & (FXP_RFA_STATUS_CRC |
 			    FXP_RFA_STATUS_ALIGN)) {
 				m_freem(m);
 				fxp_add_rfabuf(sc, rxp);
 				continue;
 			}
 
 			m->m_pkthdr.len = m->m_len = total_len;
 			m->m_pkthdr.rcvif = ifp;
 
                         /* Do IP checksum checking. */
 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
 				fxp_rxcsum(sc, ifp, m, status, total_len);
 			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
 			    (status & FXP_RFA_STATUS_VLAN) != 0) {
 				m->m_pkthdr.ether_vtag =
 				    ntohs(rfa->rfax_vlan_id);
 				m->m_flags |= M_VLANTAG;
 			}
 			/*
 			 * Drop locks before calling if_input() since it
 			 * may re-enter fxp_start() in the netisr case.
 			 * This would result in a lock reversal.  Better
 			 * performance might be obtained by chaining all
 			 * packets received, dropping the lock, and then
 			 * calling if_input() on each one.
 			 */
 			FXP_UNLOCK(sc);
 			(*ifp->if_input)(ifp, m);
 			FXP_LOCK(sc);
 			rx_npkts++;
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 				return (rx_npkts);
 		} else {
 			/* Reuse RFA and loaded DMA map. */
 			ifp->if_iqdrops++;
 			fxp_discard_rfabuf(sc, rxp);
 		}
 		fxp_add_rfabuf(sc, rxp);
 	}
 	if (rnr) {
 		fxp_scb_wait(sc);
 		CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL,
 		    sc->fxp_desc.rx_head->rx_addr);
 		fxp_scb_cmd(sc, FXP_SCB_COMMAND_RU_START);
 	}
 	return (rx_npkts);
 }
 
 static void
 fxp_update_stats(struct fxp_softc *sc)
 {
 	struct ifnet *ifp = sc->ifp;
 	struct fxp_stats *sp = sc->fxp_stats;
 	struct fxp_hwstats *hsp;
 	uint32_t *status;
 
 	FXP_LOCK_ASSERT(sc, MA_OWNED);
 
 	bus_dmamap_sync(sc->fxp_stag, sc->fxp_smap,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	/* Update statistical counters. */
 	if (sc->revision >= FXP_REV_82559_A0)
 		status = &sp->completion_status;
 	else if (sc->revision >= FXP_REV_82558_A4)
 		status = (uint32_t *)&sp->tx_tco;
 	else
 		status = &sp->tx_pause;
 	if (*status == htole32(FXP_STATS_DR_COMPLETE)) {
 		hsp = &sc->fxp_hwstats;
 		hsp->tx_good += le32toh(sp->tx_good);
 		hsp->tx_maxcols += le32toh(sp->tx_maxcols);
 		hsp->tx_latecols += le32toh(sp->tx_latecols);
 		hsp->tx_underruns += le32toh(sp->tx_underruns);
 		hsp->tx_lostcrs += le32toh(sp->tx_lostcrs);
 		hsp->tx_deffered += le32toh(sp->tx_deffered);
 		hsp->tx_single_collisions += le32toh(sp->tx_single_collisions);
 		hsp->tx_multiple_collisions +=
 		    le32toh(sp->tx_multiple_collisions);
 		hsp->tx_total_collisions += le32toh(sp->tx_total_collisions);
 		hsp->rx_good += le32toh(sp->rx_good);
 		hsp->rx_crc_errors += le32toh(sp->rx_crc_errors);
 		hsp->rx_alignment_errors += le32toh(sp->rx_alignment_errors);
 		hsp->rx_rnr_errors += le32toh(sp->rx_rnr_errors);
 		hsp->rx_overrun_errors += le32toh(sp->rx_overrun_errors);
 		hsp->rx_cdt_errors += le32toh(sp->rx_cdt_errors);
 		hsp->rx_shortframes += le32toh(sp->rx_shortframes);
 		hsp->tx_pause += le32toh(sp->tx_pause);
 		hsp->rx_pause += le32toh(sp->rx_pause);
 		hsp->rx_controls += le32toh(sp->rx_controls);
 		hsp->tx_tco += le16toh(sp->tx_tco);
 		hsp->rx_tco += le16toh(sp->rx_tco);
 
 		ifp->if_opackets += le32toh(sp->tx_good);
 		ifp->if_collisions += le32toh(sp->tx_total_collisions);
 		if (sp->rx_good) {
 			ifp->if_ipackets += le32toh(sp->rx_good);
 			sc->rx_idle_secs = 0;
 		} else if (sc->flags & FXP_FLAG_RXBUG) {
 			/*
 			 * Receiver's been idle for another second.
 			 */
 			sc->rx_idle_secs++;
 		}
 		ifp->if_ierrors += 
 		    le32toh(sp->rx_crc_errors) +
 		    le32toh(sp->rx_alignment_errors) +
 		    le32toh(sp->rx_rnr_errors) +
 		    le32toh(sp->rx_overrun_errors);
 		/*
 		 * If any transmit underruns occured, bump up the transmit
 		 * threshold by another 512 bytes (64 * 8).
 		 */
 		if (sp->tx_underruns) {
 			ifp->if_oerrors += le32toh(sp->tx_underruns);
 			if (tx_threshold < 192)
 				tx_threshold += 64;
 		}
 		*status = 0;
 		bus_dmamap_sync(sc->fxp_stag, sc->fxp_smap,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	}
 }
 
 /*
  * Update packet in/out/collision statistics. The i82557 doesn't
  * allow you to access these counters without doing a fairly
  * expensive DMA to get _all_ of the statistics it maintains, so
  * we do this operation here only once per second. The statistics
  * counters in the kernel are updated from the previous dump-stats
  * DMA and then a new dump-stats DMA is started. The on-chip
  * counters are zeroed when the DMA completes. If we can't start
  * the DMA immediately, we don't wait - we just prepare to read
  * them again next time.
  */
 static void
 fxp_tick(void *xsc)
 {
 	struct fxp_softc *sc = xsc;
 	struct ifnet *ifp = sc->ifp;
 
 	FXP_LOCK_ASSERT(sc, MA_OWNED);
 
 	/* Update statistical counters. */
 	fxp_update_stats(sc);
 
 	/*
 	 * Release any xmit buffers that have completed DMA. This isn't
 	 * strictly necessary to do here, but it's advantagous for mbufs
 	 * with external storage to be released in a timely manner rather
 	 * than being defered for a potentially long time. This limits
 	 * the delay to a maximum of one second.
 	 */
 	fxp_txeof(sc);
 
 	/*
 	 * If we haven't received any packets in FXP_MAC_RX_IDLE seconds,
 	 * then assume the receiver has locked up and attempt to clear
 	 * the condition by reprogramming the multicast filter. This is
 	 * a work-around for a bug in the 82557 where the receiver locks
 	 * up if it gets certain types of garbage in the syncronization
 	 * bits prior to the packet header. This bug is supposed to only
 	 * occur in 10Mbps mode, but has been seen to occur in 100Mbps
 	 * mode as well (perhaps due to a 10/100 speed transition).
 	 */
 	if (sc->rx_idle_secs > FXP_MAX_RX_IDLE) {
 		sc->rx_idle_secs = 0;
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 			fxp_init_body(sc);
 		return;
 	}
 	/*
 	 * If there is no pending command, start another stats
 	 * dump. Otherwise punt for now.
 	 */
 	if (CSR_READ_1(sc, FXP_CSR_SCB_COMMAND) == 0) {
 		/*
 		 * Start another stats dump.
 		 */
 		fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_DUMPRESET);
 	}
 	if (sc->miibus != NULL)
 		mii_tick(device_get_softc(sc->miibus));
 
 	/*
 	 * Check that chip hasn't hung.
 	 */
 	fxp_watchdog(sc);
 
 	/*
 	 * Schedule another timeout one second from now.
 	 */
 	callout_reset(&sc->stat_ch, hz, fxp_tick, sc);
 }
 
 /*
  * Stop the interface. Cancels the statistics updater and resets
  * the interface.
  */
 static void
 fxp_stop(struct fxp_softc *sc)
 {
 	struct ifnet *ifp = sc->ifp;
 	struct fxp_tx *txp;
 	int i;
 
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 	sc->watchdog_timer = 0;
 
 	/*
 	 * Cancel stats updater.
 	 */
 	callout_stop(&sc->stat_ch);
 
 	/*
 	 * Preserve PCI configuration, configure, IA/multicast
 	 * setup and put RU and CU into idle state.
 	 */
 	CSR_WRITE_4(sc, FXP_CSR_PORT, FXP_PORT_SELECTIVE_RESET);
 	DELAY(50);
 	/* Disable interrupts. */
 	CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, FXP_SCB_INTR_DISABLE);
 
 	fxp_update_stats(sc);
 
 	/*
 	 * Release any xmit buffers.
 	 */
 	txp = sc->fxp_desc.tx_list;
 	if (txp != NULL) {
 		for (i = 0; i < FXP_NTXCB; i++) {
  			if (txp[i].tx_mbuf != NULL) {
 				bus_dmamap_sync(sc->fxp_txmtag, txp[i].tx_map,
 				    BUS_DMASYNC_POSTWRITE);
 				bus_dmamap_unload(sc->fxp_txmtag,
 				    txp[i].tx_map);
 				m_freem(txp[i].tx_mbuf);
 				txp[i].tx_mbuf = NULL;
 				/* clear this to reset csum offload bits */
 				txp[i].tx_cb->tbd[0].tb_addr = 0;
 			}
 		}
 	}
 	bus_dmamap_sync(sc->cbl_tag, sc->cbl_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	sc->tx_queued = 0;
 }
 
 /*
  * Watchdog/transmission transmit timeout handler. Called when a
  * transmission is started on the interface, but no interrupt is
  * received before the timeout. This usually indicates that the
  * card has wedged for some reason.
  */
 static void
 fxp_watchdog(struct fxp_softc *sc)
 {
 
 	FXP_LOCK_ASSERT(sc, MA_OWNED);
 
 	if (sc->watchdog_timer == 0 || --sc->watchdog_timer)
 		return;
 
 	device_printf(sc->dev, "device timeout\n");
 	sc->ifp->if_oerrors++;
 
 	fxp_init_body(sc);
 }
 
 /*
  * Acquire locks and then call the real initialization function.  This
  * is necessary because ether_ioctl() calls if_init() and this would
  * result in mutex recursion if the mutex was held.
  */
 static void
 fxp_init(void *xsc)
 {
 	struct fxp_softc *sc = xsc;
 
 	FXP_LOCK(sc);
 	fxp_init_body(sc);
 	FXP_UNLOCK(sc);
 }
 
 /*
  * Perform device initialization. This routine must be called with the
  * softc lock held.
  */
 static void
 fxp_init_body(struct fxp_softc *sc)
 {
 	struct ifnet *ifp = sc->ifp;
 	struct fxp_cb_config *cbp;
 	struct fxp_cb_ias *cb_ias;
 	struct fxp_cb_tx *tcbp;
 	struct fxp_tx *txp;
 	int i, prm;
 
 	FXP_LOCK_ASSERT(sc, MA_OWNED);
 	/*
 	 * Cancel any pending I/O
 	 */
 	fxp_stop(sc);
 
 	/*
 	 * Issue software reset, which also unloads the microcode.
 	 */
 	sc->flags &= ~FXP_FLAG_UCODE;
 	CSR_WRITE_4(sc, FXP_CSR_PORT, FXP_PORT_SOFTWARE_RESET);
 	DELAY(50);
 
 	prm = (ifp->if_flags & IFF_PROMISC) ? 1 : 0;
 
 	/*
 	 * Initialize base of CBL and RFA memory. Loading with zero
 	 * sets it up for regular linear addressing.
 	 */
 	CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, 0);
 	fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_BASE);
 
 	fxp_scb_wait(sc);
 	fxp_scb_cmd(sc, FXP_SCB_COMMAND_RU_BASE);
 
 	/*
 	 * Initialize base of dump-stats buffer.
 	 */
 	fxp_scb_wait(sc);
 	bzero(sc->fxp_stats, sizeof(struct fxp_stats));
 	bus_dmamap_sync(sc->fxp_stag, sc->fxp_smap,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, sc->stats_addr);
 	fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_DUMP_ADR);
 
 	/*
 	 * Attempt to load microcode if requested.
 	 * For ICH based controllers do not load microcode.
 	 */
 	if (sc->ident->ich == 0) {
 		if (ifp->if_flags & IFF_LINK0 &&
 		    (sc->flags & FXP_FLAG_UCODE) == 0)
 			fxp_load_ucode(sc);
 	}
 
 	/*
 	 * Set IFF_ALLMULTI status. It's needed in configure action
 	 * command.
 	 */
 	fxp_mc_addrs(sc);
 
 	/*
 	 * We temporarily use memory that contains the TxCB list to
 	 * construct the config CB. The TxCB list memory is rebuilt
 	 * later.
 	 */
 	cbp = (struct fxp_cb_config *)sc->fxp_desc.cbl_list;
 
 	/*
 	 * This bcopy is kind of disgusting, but there are a bunch of must be
 	 * zero and must be one bits in this structure and this is the easiest
 	 * way to initialize them all to proper values.
 	 */
 	bcopy(fxp_cb_config_template, cbp, sizeof(fxp_cb_config_template));
 
 	cbp->cb_status =	0;
 	cbp->cb_command =	htole16(FXP_CB_COMMAND_CONFIG |
 	    FXP_CB_COMMAND_EL);
 	cbp->link_addr =	0xffffffff;	/* (no) next command */
 	cbp->byte_count =	sc->flags & FXP_FLAG_EXT_RFA ? 32 : 22;
 	cbp->rx_fifo_limit =	8;	/* rx fifo threshold (32 bytes) */
 	cbp->tx_fifo_limit =	0;	/* tx fifo threshold (0 bytes) */
 	cbp->adaptive_ifs =	0;	/* (no) adaptive interframe spacing */
 	cbp->mwi_enable =	sc->flags & FXP_FLAG_MWI_ENABLE ? 1 : 0;
 	cbp->type_enable =	0;	/* actually reserved */
 	cbp->read_align_en =	sc->flags & FXP_FLAG_READ_ALIGN ? 1 : 0;
 	cbp->end_wr_on_cl =	sc->flags & FXP_FLAG_WRITE_ALIGN ? 1 : 0;
 	cbp->rx_dma_bytecount =	0;	/* (no) rx DMA max */
 	cbp->tx_dma_bytecount =	0;	/* (no) tx DMA max */
 	cbp->dma_mbce =		0;	/* (disable) dma max counters */
 	cbp->late_scb =		0;	/* (don't) defer SCB update */
 	cbp->direct_dma_dis =	1;	/* disable direct rcv dma mode */
 	cbp->tno_int_or_tco_en =0;	/* (disable) tx not okay interrupt */
 	cbp->ci_int =		1;	/* interrupt on CU idle */
 	cbp->ext_txcb_dis = 	sc->flags & FXP_FLAG_EXT_TXCB ? 0 : 1;
 	cbp->ext_stats_dis = 	1;	/* disable extended counters */
 	cbp->keep_overrun_rx = 	0;	/* don't pass overrun frames to host */
 	cbp->save_bf =		sc->flags & FXP_FLAG_SAVE_BAD ? 1 : prm;
 	cbp->disc_short_rx =	!prm;	/* discard short packets */
 	cbp->underrun_retry =	1;	/* retry mode (once) on DMA underrun */
 	cbp->two_frames =	0;	/* do not limit FIFO to 2 frames */
 	cbp->dyn_tbd =		sc->flags & FXP_FLAG_EXT_RFA ? 1 : 0;
 	cbp->ext_rfa =		sc->flags & FXP_FLAG_EXT_RFA ? 1 : 0;
 	cbp->mediatype =	sc->flags & FXP_FLAG_SERIAL_MEDIA ? 0 : 1;
 	cbp->csma_dis =		0;	/* (don't) disable link */
 	cbp->tcp_udp_cksum =	((sc->flags & FXP_FLAG_82559_RXCSUM) != 0 &&
 	    (ifp->if_capenable & IFCAP_RXCSUM) != 0) ? 1 : 0;
 	cbp->vlan_tco =		0;	/* (don't) enable vlan wakeup */
 	cbp->link_wake_en =	0;	/* (don't) assert PME# on link change */
 	cbp->arp_wake_en =	0;	/* (don't) assert PME# on arp */
 	cbp->mc_wake_en =	0;	/* (don't) enable PME# on mcmatch */
 	cbp->nsai =		1;	/* (don't) disable source addr insert */
 	cbp->preamble_length =	2;	/* (7 byte) preamble */
 	cbp->loopback =		0;	/* (don't) loopback */
 	cbp->linear_priority =	0;	/* (normal CSMA/CD operation) */
 	cbp->linear_pri_mode =	0;	/* (wait after xmit only) */
 	cbp->interfrm_spacing =	6;	/* (96 bits of) interframe spacing */
 	cbp->promiscuous =	prm;	/* promiscuous mode */
 	cbp->bcast_disable =	0;	/* (don't) disable broadcasts */
 	cbp->wait_after_win =	0;	/* (don't) enable modified backoff alg*/
 	cbp->ignore_ul =	0;	/* consider U/L bit in IA matching */
 	cbp->crc16_en =		0;	/* (don't) enable crc-16 algorithm */
 	cbp->crscdt =		sc->flags & FXP_FLAG_SERIAL_MEDIA ? 1 : 0;
 
 	cbp->stripping =	!prm;	/* truncate rx packet to byte count */
 	cbp->padding =		1;	/* (do) pad short tx packets */
 	cbp->rcv_crc_xfer =	0;	/* (don't) xfer CRC to host */
 	cbp->long_rx_en =	sc->flags & FXP_FLAG_LONG_PKT_EN ? 1 : 0;
 	cbp->ia_wake_en =	0;	/* (don't) wake up on address match */
 	cbp->magic_pkt_dis =	sc->flags & FXP_FLAG_WOL ? 0 : 1;
 	cbp->force_fdx =	0;	/* (don't) force full duplex */
 	cbp->fdx_pin_en =	1;	/* (enable) FDX# pin */
 	cbp->multi_ia =		0;	/* (don't) accept multiple IAs */
 	cbp->mc_all =		ifp->if_flags & IFF_ALLMULTI ? 1 : prm;
 	cbp->gamla_rx =		sc->flags & FXP_FLAG_EXT_RFA ? 1 : 0;
 	cbp->vlan_strip_en =	((sc->flags & FXP_FLAG_EXT_RFA) != 0 &&
 	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) ? 1 : 0;
 
 	if (sc->tunable_noflow || sc->revision == FXP_REV_82557) {
 		/*
 		 * The 82557 has no hardware flow control, the values
 		 * below are the defaults for the chip.
 		 */
 		cbp->fc_delay_lsb =	0;
 		cbp->fc_delay_msb =	0x40;
 		cbp->pri_fc_thresh =	3;
 		cbp->tx_fc_dis =	0;
 		cbp->rx_fc_restop =	0;
 		cbp->rx_fc_restart =	0;
 		cbp->fc_filter =	0;
 		cbp->pri_fc_loc =	1;
 	} else {
 		cbp->fc_delay_lsb =	0x1f;
 		cbp->fc_delay_msb =	0x01;
 		cbp->pri_fc_thresh =	3;
 		cbp->tx_fc_dis =	0;	/* enable transmit FC */
 		cbp->rx_fc_restop =	1;	/* enable FC restop frames */
 		cbp->rx_fc_restart =	1;	/* enable FC restart frames */
 		cbp->fc_filter =	!prm;	/* drop FC frames to host */
 		cbp->pri_fc_loc =	1;	/* FC pri location (byte31) */
 	}
 
 	/* Enable 82558 and 82559 extended statistics functionality. */
 	if (sc->revision >= FXP_REV_82558_A4) {
 		if (sc->revision >= FXP_REV_82559_A0) {
 			/*
 			 * Extend configuration table size to 32
 			 * to include TCO configuration.
 			 */
 			cbp->byte_count = 32;
 			cbp->ext_stats_dis = 1;
 			/* Enable TCO stats. */
 			cbp->tno_int_or_tco_en = 1;
 			cbp->gamla_rx = 1;
 		} else
 			cbp->ext_stats_dis = 0;
 	}
 
 	/*
 	 * Start the config command/DMA.
 	 */
 	fxp_scb_wait(sc);
 	bus_dmamap_sync(sc->cbl_tag, sc->cbl_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, sc->fxp_desc.cbl_addr);
 	fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_START);
 	/* ...and wait for it to complete. */
 	fxp_dma_wait(sc, &cbp->cb_status, sc->cbl_tag, sc->cbl_map);
 
 	/*
 	 * Now initialize the station address. Temporarily use the TxCB
 	 * memory area like we did above for the config CB.
 	 */
 	cb_ias = (struct fxp_cb_ias *)sc->fxp_desc.cbl_list;
 	cb_ias->cb_status = 0;
 	cb_ias->cb_command = htole16(FXP_CB_COMMAND_IAS | FXP_CB_COMMAND_EL);
 	cb_ias->link_addr = 0xffffffff;
 	bcopy(IF_LLADDR(sc->ifp), cb_ias->macaddr, ETHER_ADDR_LEN);
 
 	/*
 	 * Start the IAS (Individual Address Setup) command/DMA.
 	 */
 	fxp_scb_wait(sc);
 	bus_dmamap_sync(sc->cbl_tag, sc->cbl_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, sc->fxp_desc.cbl_addr);
 	fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_START);
 	/* ...and wait for it to complete. */
 	fxp_dma_wait(sc, &cb_ias->cb_status, sc->cbl_tag, sc->cbl_map);
 
 	/*
 	 * Initialize the multicast address list.
 	 */
 	fxp_mc_setup(sc);
 
 	/*
 	 * Initialize transmit control block (TxCB) list.
 	 */
 	txp = sc->fxp_desc.tx_list;
 	tcbp = sc->fxp_desc.cbl_list;
 	bzero(tcbp, FXP_TXCB_SZ);
 	for (i = 0; i < FXP_NTXCB; i++) {
 		txp[i].tx_mbuf = NULL;
 		tcbp[i].cb_status = htole16(FXP_CB_STATUS_C | FXP_CB_STATUS_OK);
 		tcbp[i].cb_command = htole16(FXP_CB_COMMAND_NOP);
 		tcbp[i].link_addr = htole32(sc->fxp_desc.cbl_addr +
 		    (((i + 1) & FXP_TXCB_MASK) * sizeof(struct fxp_cb_tx)));
 		if (sc->flags & FXP_FLAG_EXT_TXCB)
 			tcbp[i].tbd_array_addr =
 			    htole32(FXP_TXCB_DMA_ADDR(sc, &tcbp[i].tbd[2]));
 		else
 			tcbp[i].tbd_array_addr =
 			    htole32(FXP_TXCB_DMA_ADDR(sc, &tcbp[i].tbd[0]));
 		txp[i].tx_next = &txp[(i + 1) & FXP_TXCB_MASK];
 	}
 	/*
 	 * Set the suspend flag on the first TxCB and start the control
 	 * unit. It will execute the NOP and then suspend.
 	 */
 	tcbp->cb_command = htole16(FXP_CB_COMMAND_NOP | FXP_CB_COMMAND_S);
 	bus_dmamap_sync(sc->cbl_tag, sc->cbl_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	sc->fxp_desc.tx_first = sc->fxp_desc.tx_last = txp;
 	sc->tx_queued = 1;
 
 	fxp_scb_wait(sc);
 	CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, sc->fxp_desc.cbl_addr);
 	fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_START);
 
 	/*
 	 * Initialize receiver buffer area - RFA.
 	 */
 	fxp_scb_wait(sc);
 	CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, sc->fxp_desc.rx_head->rx_addr);
 	fxp_scb_cmd(sc, FXP_SCB_COMMAND_RU_START);
 
 	/*
 	 * Set current media.
 	 */
 	if (sc->miibus != NULL)
 		mii_mediachg(device_get_softc(sc->miibus));
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	/*
 	 * Enable interrupts.
 	 */
 #ifdef DEVICE_POLLING
 	/*
 	 * ... but only do that if we are not polling. And because (presumably)
 	 * the default is interrupts on, we need to disable them explicitly!
 	 */
 	if (ifp->if_capenable & IFCAP_POLLING )
 		CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, FXP_SCB_INTR_DISABLE);
 	else
 #endif /* DEVICE_POLLING */
 	CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, 0);
 
 	/*
 	 * Start stats updater.
 	 */
 	callout_reset(&sc->stat_ch, hz, fxp_tick, sc);
 }
 
 static int
 fxp_serial_ifmedia_upd(struct ifnet *ifp)
 {
 
 	return (0);
 }
 
 static void
 fxp_serial_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 
 	ifmr->ifm_active = IFM_ETHER|IFM_MANUAL;
 }
 
 /*
  * Change media according to request.
  */
 static int
 fxp_ifmedia_upd(struct ifnet *ifp)
 {
 	struct fxp_softc *sc = ifp->if_softc;
 	struct mii_data *mii;
 
 	mii = device_get_softc(sc->miibus);
 	FXP_LOCK(sc);
 	if (mii->mii_instance) {
 		struct mii_softc	*miisc;
 		LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
 			mii_phy_reset(miisc);
 	}
 	mii_mediachg(mii);
 	FXP_UNLOCK(sc);
 	return (0);
 }
 
 /*
  * Notify the world which media we're using.
  */
 static void
 fxp_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct fxp_softc *sc = ifp->if_softc;
 	struct mii_data *mii;
 
 	mii = device_get_softc(sc->miibus);
 	FXP_LOCK(sc);
 	mii_pollstat(mii);
 	ifmr->ifm_active = mii->mii_media_active;
 	ifmr->ifm_status = mii->mii_media_status;
 
 	if (IFM_SUBTYPE(ifmr->ifm_active) == IFM_10_T &&
 	    sc->flags & FXP_FLAG_CU_RESUME_BUG)
 		sc->cu_resume_bug = 1;
 	else
 		sc->cu_resume_bug = 0;
 	FXP_UNLOCK(sc);
 }
 
 /*
  * Add a buffer to the end of the RFA buffer list.
  * Return 0 if successful, 1 for failure. A failure results in
  * reusing the RFA buffer.
  * The RFA struct is stuck at the beginning of mbuf cluster and the
  * data pointer is fixed up to point just past it.
  */
 static int
 fxp_new_rfabuf(struct fxp_softc *sc, struct fxp_rx *rxp)
 {
 	struct mbuf *m;
 	struct fxp_rfa *rfa;
 	bus_dmamap_t tmp_map;
 	int error;
 
 	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	/*
 	 * Move the data pointer up so that the incoming data packet
 	 * will be 32-bit aligned.
 	 */
 	m->m_data += RFA_ALIGNMENT_FUDGE;
 
 	/*
 	 * Get a pointer to the base of the mbuf cluster and move
 	 * data start past it.
 	 */
 	rfa = mtod(m, struct fxp_rfa *);
 	m->m_data += sc->rfa_size;
 	rfa->size = htole16(MCLBYTES - sc->rfa_size - RFA_ALIGNMENT_FUDGE);
 
 	rfa->rfa_status = 0;
 	rfa->rfa_control = htole16(FXP_RFA_CONTROL_EL);
 	rfa->actual_size = 0;
 	m->m_len = m->m_pkthdr.len = MCLBYTES - RFA_ALIGNMENT_FUDGE -
 	    sc->rfa_size;
 
 	/*
 	 * Initialize the rest of the RFA.  Note that since the RFA
 	 * is misaligned, we cannot store values directly.  We're thus
 	 * using the le32enc() function which handles endianness and
 	 * is also alignment-safe.
 	 */
 	le32enc(&rfa->link_addr, 0xffffffff);
 	le32enc(&rfa->rbd_addr, 0xffffffff);
 
 	/* Map the RFA into DMA memory. */
 	error = bus_dmamap_load(sc->fxp_rxmtag, sc->spare_map, rfa,
 	    MCLBYTES - RFA_ALIGNMENT_FUDGE, fxp_dma_map_addr,
 	    &rxp->rx_addr, BUS_DMA_NOWAIT);
 	if (error) {
 		m_freem(m);
 		return (error);
 	}
 
 	if (rxp->rx_mbuf != NULL)
 		bus_dmamap_unload(sc->fxp_rxmtag, rxp->rx_map);
 	tmp_map = sc->spare_map;
 	sc->spare_map = rxp->rx_map;
 	rxp->rx_map = tmp_map;
 	rxp->rx_mbuf = m;
 
 	bus_dmamap_sync(sc->fxp_rxmtag, rxp->rx_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	return (0);
 }
 
 static void
 fxp_add_rfabuf(struct fxp_softc *sc, struct fxp_rx *rxp)
 {
 	struct fxp_rfa *p_rfa;
 	struct fxp_rx *p_rx;
 
 	/*
 	 * If there are other buffers already on the list, attach this
 	 * one to the end by fixing up the tail to point to this one.
 	 */
 	if (sc->fxp_desc.rx_head != NULL) {
 		p_rx = sc->fxp_desc.rx_tail;
 		p_rfa = (struct fxp_rfa *)
 		    (p_rx->rx_mbuf->m_ext.ext_buf + RFA_ALIGNMENT_FUDGE);
 		p_rx->rx_next = rxp;
 		le32enc(&p_rfa->link_addr, rxp->rx_addr);
 		p_rfa->rfa_control = 0;
 		bus_dmamap_sync(sc->fxp_rxmtag, p_rx->rx_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	} else {
 		rxp->rx_next = NULL;
 		sc->fxp_desc.rx_head = rxp;
 	}
 	sc->fxp_desc.rx_tail = rxp;
 }
 
 static void
 fxp_discard_rfabuf(struct fxp_softc *sc, struct fxp_rx *rxp)
 {
 	struct mbuf *m;
 	struct fxp_rfa *rfa;
 
 	m = rxp->rx_mbuf;
 	m->m_data = m->m_ext.ext_buf;
 	/*
 	 * Move the data pointer up so that the incoming data packet
 	 * will be 32-bit aligned.
 	 */
 	m->m_data += RFA_ALIGNMENT_FUDGE;
 
 	/*
 	 * Get a pointer to the base of the mbuf cluster and move
 	 * data start past it.
 	 */
 	rfa = mtod(m, struct fxp_rfa *);
 	m->m_data += sc->rfa_size;
 	rfa->size = htole16(MCLBYTES - sc->rfa_size - RFA_ALIGNMENT_FUDGE);
 
 	rfa->rfa_status = 0;
 	rfa->rfa_control = htole16(FXP_RFA_CONTROL_EL);
 	rfa->actual_size = 0;
 
 	/*
 	 * Initialize the rest of the RFA.  Note that since the RFA
 	 * is misaligned, we cannot store values directly.  We're thus
 	 * using the le32enc() function which handles endianness and
 	 * is also alignment-safe.
 	 */
 	le32enc(&rfa->link_addr, 0xffffffff);
 	le32enc(&rfa->rbd_addr, 0xffffffff);
 
 	bus_dmamap_sync(sc->fxp_rxmtag, rxp->rx_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 }
 
 static int
 fxp_miibus_readreg(device_t dev, int phy, int reg)
 {
 	struct fxp_softc *sc = device_get_softc(dev);
 	int count = 10000;
 	int value;
 
 	CSR_WRITE_4(sc, FXP_CSR_MDICONTROL,
 	    (FXP_MDI_READ << 26) | (reg << 16) | (phy << 21));
 
 	while (((value = CSR_READ_4(sc, FXP_CSR_MDICONTROL)) & 0x10000000) == 0
 	    && count--)
 		DELAY(10);
 
 	if (count <= 0)
 		device_printf(dev, "fxp_miibus_readreg: timed out\n");
 
 	return (value & 0xffff);
 }
 
 static int
 fxp_miibus_writereg(device_t dev, int phy, int reg, int value)
 {
 	struct fxp_softc *sc = device_get_softc(dev);
 	int count = 10000;
 
 	CSR_WRITE_4(sc, FXP_CSR_MDICONTROL,
 	    (FXP_MDI_WRITE << 26) | (reg << 16) | (phy << 21) |
 	    (value & 0xffff));
 
 	while ((CSR_READ_4(sc, FXP_CSR_MDICONTROL) & 0x10000000) == 0 &&
 	    count--)
 		DELAY(10);
 
 	if (count <= 0)
 		device_printf(dev, "fxp_miibus_writereg: timed out\n");
 	return (0);
 }
 
 static int
 fxp_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct fxp_softc *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct mii_data *mii;
 	int flag, mask, error = 0, reinit;
 
 	switch (command) {
 	case SIOCSIFFLAGS:
 		FXP_LOCK(sc);
 		/*
 		 * If interface is marked up and not running, then start it.
 		 * If it is marked down and running, stop it.
 		 * XXX If it's up then re-initialize it. This is so flags
 		 * such as IFF_PROMISC are handled.
 		 */
 		if (ifp->if_flags & IFF_UP) {
 			if (((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) &&
 			    ((ifp->if_flags ^ sc->if_flags) &
 			    (IFF_PROMISC | IFF_ALLMULTI | IFF_LINK0)) != 0)
 				fxp_init_body(sc);
 			else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 				fxp_init_body(sc);
 		} else {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 				fxp_stop(sc);
 		}
 		sc->if_flags = ifp->if_flags;
 		FXP_UNLOCK(sc);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 			fxp_init(sc);
 		break;
 
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		if (sc->miibus != NULL) {
 			mii = device_get_softc(sc->miibus);
                         error = ifmedia_ioctl(ifp, ifr,
                             &mii->mii_media, command);
 		} else {
                         error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, command);
 		}
 		break;
 
 	case SIOCSIFCAP:
 		reinit = 0;
 		mask = ifp->if_capenable ^ ifr->ifr_reqcap;
 #ifdef DEVICE_POLLING
 		if (mask & IFCAP_POLLING) {
 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
 				error = ether_poll_register(fxp_poll, ifp);
 				if (error)
 					return(error);
 				FXP_LOCK(sc);
 				CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL,
 				    FXP_SCB_INTR_DISABLE);
 				ifp->if_capenable |= IFCAP_POLLING;
 				FXP_UNLOCK(sc);
 			} else {
 				error = ether_poll_deregister(ifp);
 				/* Enable interrupts in any case */
 				FXP_LOCK(sc);
 				CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, 0);
 				ifp->if_capenable &= ~IFCAP_POLLING;
 				FXP_UNLOCK(sc);
 			}
 		}
 #endif
 		FXP_LOCK(sc);
 		if ((mask & IFCAP_TXCSUM) != 0 &&
 		    (ifp->if_capabilities & IFCAP_TXCSUM) != 0) {
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 			if ((ifp->if_capenable & IFCAP_TXCSUM) != 0)
 				ifp->if_hwassist |= FXP_CSUM_FEATURES;
 			else
 				ifp->if_hwassist &= ~FXP_CSUM_FEATURES;
 		}
 		if ((mask & IFCAP_RXCSUM) != 0 &&
 		    (ifp->if_capabilities & IFCAP_RXCSUM) != 0) {
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 			if ((sc->flags & FXP_FLAG_82559_RXCSUM) != 0)
 				reinit++;
 		}
 		if ((mask & IFCAP_TSO4) != 0 &&
 		    (ifp->if_capabilities & IFCAP_TSO4) != 0) {
 			ifp->if_capenable ^= IFCAP_TSO4;
 			if ((ifp->if_capenable & IFCAP_TSO4) != 0)
 				ifp->if_hwassist |= CSUM_TSO;
 			else
 				ifp->if_hwassist &= ~CSUM_TSO;
 		}
 		if ((mask & IFCAP_WOL_MAGIC) != 0 &&
 		    (ifp->if_capabilities & IFCAP_WOL_MAGIC) != 0)
 			ifp->if_capenable ^= IFCAP_WOL_MAGIC;
 		if ((mask & IFCAP_VLAN_MTU) != 0 &&
 		    (ifp->if_capabilities & IFCAP_VLAN_MTU) != 0) {
 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
 			if (sc->revision != FXP_REV_82557)
 				flag = FXP_FLAG_LONG_PKT_EN;
 			else /* a hack to get long frames on the old chip */
 				flag = FXP_FLAG_SAVE_BAD;
 			sc->flags ^= flag;
 			if (ifp->if_flags & IFF_UP)
 				reinit++;
 		}
 		if ((mask & IFCAP_VLAN_HWCSUM) != 0 &&
 		    (ifp->if_capabilities & IFCAP_VLAN_HWCSUM) != 0)
 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
 		if ((mask & IFCAP_VLAN_HWTSO) != 0 &&
 		    (ifp->if_capabilities & IFCAP_VLAN_HWTSO) != 0)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
 		if ((mask & IFCAP_VLAN_HWTAGGING) != 0 &&
 		    (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) {
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
 				ifp->if_capenable &=
 				    ~(IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM);
 			reinit++;
 		}
 		if (reinit > 0 && ifp->if_flags & IFF_UP)
 			fxp_init_body(sc);
 		FXP_UNLOCK(sc);
 		VLAN_CAPABILITIES(ifp);
 		break;
 
 	default:
 		error = ether_ioctl(ifp, command, data);
 	}
 	return (error);
 }
 
 /*
  * Fill in the multicast address list and return number of entries.
  */
 static int
 fxp_mc_addrs(struct fxp_softc *sc)
 {
 	struct fxp_cb_mcs *mcsp = sc->mcsp;
 	struct ifnet *ifp = sc->ifp;
 	struct ifmultiaddr *ifma;
 	int nmcasts;
 
 	nmcasts = 0;
 	if ((ifp->if_flags & IFF_ALLMULTI) == 0) {
 		if_maddr_rlock(ifp);
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_LINK)
 				continue;
 			if (nmcasts >= MAXMCADDR) {
 				ifp->if_flags |= IFF_ALLMULTI;
 				nmcasts = 0;
 				break;
 			}
 			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
 			    &sc->mcsp->mc_addr[nmcasts][0], ETHER_ADDR_LEN);
 			nmcasts++;
 		}
 		if_maddr_runlock(ifp);
 	}
 	mcsp->mc_cnt = htole16(nmcasts * ETHER_ADDR_LEN);
 	return (nmcasts);
 }
 
 /*
  * Program the multicast filter.
  *
  * We have an artificial restriction that the multicast setup command
  * must be the first command in the chain, so we take steps to ensure
  * this. By requiring this, it allows us to keep up the performance of
  * the pre-initialized command ring (esp. link pointers) by not actually
  * inserting the mcsetup command in the ring - i.e. its link pointer
  * points to the TxCB ring, but the mcsetup descriptor itself is not part
  * of it. We then can do 'CU_START' on the mcsetup descriptor and have it
  * lead into the regular TxCB ring when it completes.
  */
 static void
 fxp_mc_setup(struct fxp_softc *sc)
 {
 	struct fxp_cb_mcs *mcsp;
 	int count;
 
 	FXP_LOCK_ASSERT(sc, MA_OWNED);
 
 	mcsp = sc->mcsp;
 	mcsp->cb_status = 0;
 	mcsp->cb_command = htole16(FXP_CB_COMMAND_MCAS | FXP_CB_COMMAND_EL);
 	mcsp->link_addr = 0xffffffff;
 	fxp_mc_addrs(sc);
 
 	/*
 	 * Wait until command unit is idle. This should never be the
 	 * case when nothing is queued, but make sure anyway.
 	 */
 	count = 100;
 	while ((CSR_READ_1(sc, FXP_CSR_SCB_RUSCUS) >> 6) !=
 	    FXP_SCB_CUS_IDLE && --count)
 		DELAY(10);
 	if (count == 0) {
 		device_printf(sc->dev, "command queue timeout\n");
 		return;
 	}
 
 	/*
 	 * Start the multicast setup command.
 	 */
 	fxp_scb_wait(sc);
 	bus_dmamap_sync(sc->mcs_tag, sc->mcs_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, sc->mcs_addr);
 	fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_START);
 	/* ...and wait for it to complete. */
 	fxp_dma_wait(sc, &mcsp->cb_status, sc->mcs_tag, sc->mcs_map);
 }
 
 static uint32_t fxp_ucode_d101a[] = D101_A_RCVBUNDLE_UCODE;
 static uint32_t fxp_ucode_d101b0[] = D101_B0_RCVBUNDLE_UCODE;
 static uint32_t fxp_ucode_d101ma[] = D101M_B_RCVBUNDLE_UCODE;
 static uint32_t fxp_ucode_d101s[] = D101S_RCVBUNDLE_UCODE;
 static uint32_t fxp_ucode_d102[] = D102_B_RCVBUNDLE_UCODE;
 static uint32_t fxp_ucode_d102c[] = D102_C_RCVBUNDLE_UCODE;
 static uint32_t fxp_ucode_d102e[] = D102_E_RCVBUNDLE_UCODE;
 
 #define UCODE(x)	x, sizeof(x)/sizeof(uint32_t)
 
-struct ucode {
+static const struct ucode {
 	uint32_t	revision;
 	uint32_t	*ucode;
 	int		length;
 	u_short		int_delay_offset;
 	u_short		bundle_max_offset;
-} ucode_table[] = {
+} const ucode_table[] = {
 	{ FXP_REV_82558_A4, UCODE(fxp_ucode_d101a), D101_CPUSAVER_DWORD, 0 },
 	{ FXP_REV_82558_B0, UCODE(fxp_ucode_d101b0), D101_CPUSAVER_DWORD, 0 },
 	{ FXP_REV_82559_A0, UCODE(fxp_ucode_d101ma),
 	    D101M_CPUSAVER_DWORD, D101M_CPUSAVER_BUNDLE_MAX_DWORD },
 	{ FXP_REV_82559S_A, UCODE(fxp_ucode_d101s),
 	    D101S_CPUSAVER_DWORD, D101S_CPUSAVER_BUNDLE_MAX_DWORD },
 	{ FXP_REV_82550, UCODE(fxp_ucode_d102),
 	    D102_B_CPUSAVER_DWORD, D102_B_CPUSAVER_BUNDLE_MAX_DWORD },
 	{ FXP_REV_82550_C, UCODE(fxp_ucode_d102c),
 	    D102_C_CPUSAVER_DWORD, D102_C_CPUSAVER_BUNDLE_MAX_DWORD },
 	{ FXP_REV_82551_F, UCODE(fxp_ucode_d102e),
 	    D102_E_CPUSAVER_DWORD, D102_E_CPUSAVER_BUNDLE_MAX_DWORD },
 	{ 0, NULL, 0, 0, 0 }
 };
 
 static void
 fxp_load_ucode(struct fxp_softc *sc)
 {
-	struct ucode *uc;
+	const struct ucode *uc;
 	struct fxp_cb_ucode *cbp;
 	int i;
 
 	for (uc = ucode_table; uc->ucode != NULL; uc++)
 		if (sc->revision == uc->revision)
 			break;
 	if (uc->ucode == NULL)
 		return;
 	cbp = (struct fxp_cb_ucode *)sc->fxp_desc.cbl_list;
 	cbp->cb_status = 0;
 	cbp->cb_command = htole16(FXP_CB_COMMAND_UCODE | FXP_CB_COMMAND_EL);
 	cbp->link_addr = 0xffffffff;    	/* (no) next command */
 	for (i = 0; i < uc->length; i++)
 		cbp->ucode[i] = htole32(uc->ucode[i]);
 	if (uc->int_delay_offset)
 		*(uint16_t *)&cbp->ucode[uc->int_delay_offset] =
 		    htole16(sc->tunable_int_delay + sc->tunable_int_delay / 2);
 	if (uc->bundle_max_offset)
 		*(uint16_t *)&cbp->ucode[uc->bundle_max_offset] =
 		    htole16(sc->tunable_bundle_max);
 	/*
 	 * Download the ucode to the chip.
 	 */
 	fxp_scb_wait(sc);
 	bus_dmamap_sync(sc->cbl_tag, sc->cbl_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, sc->fxp_desc.cbl_addr);
 	fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_START);
 	/* ...and wait for it to complete. */
 	fxp_dma_wait(sc, &cbp->cb_status, sc->cbl_tag, sc->cbl_map);
 	device_printf(sc->dev,
 	    "Microcode loaded, int_delay: %d usec  bundle_max: %d\n",
 	    sc->tunable_int_delay,
 	    uc->bundle_max_offset == 0 ? 0 : sc->tunable_bundle_max);
 	sc->flags |= FXP_FLAG_UCODE;
 }
 
 #define FXP_SYSCTL_STAT_ADD(c, h, n, p, d)	\
 	SYSCTL_ADD_UINT(c, h, OID_AUTO, n, CTLFLAG_RD, p, 0, d)
 
 static void
 fxp_sysctl_node(struct fxp_softc *sc)
 {
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid_list *child, *parent;
 	struct sysctl_oid *tree;
 	struct fxp_hwstats *hsp;
 
 	ctx = device_get_sysctl_ctx(sc->dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 
 	SYSCTL_ADD_PROC(ctx, child,
 	    OID_AUTO, "int_delay", CTLTYPE_INT | CTLFLAG_RW,
 	    &sc->tunable_int_delay, 0, sysctl_hw_fxp_int_delay, "I",
 	    "FXP driver receive interrupt microcode bundling delay");
 	SYSCTL_ADD_PROC(ctx, child,
 	    OID_AUTO, "bundle_max", CTLTYPE_INT | CTLFLAG_RW,
 	    &sc->tunable_bundle_max, 0, sysctl_hw_fxp_bundle_max, "I",
 	    "FXP driver receive interrupt microcode bundle size limit");
 	SYSCTL_ADD_INT(ctx, child,OID_AUTO, "rnr", CTLFLAG_RD, &sc->rnr, 0,
 	    "FXP RNR events");
 	SYSCTL_ADD_INT(ctx, child,
 	    OID_AUTO, "noflow", CTLFLAG_RW, &sc->tunable_noflow, 0,
 	    "FXP flow control disabled");
 
 	/*
 	 * Pull in device tunables.
 	 */
 	sc->tunable_int_delay = TUNABLE_INT_DELAY;
 	sc->tunable_bundle_max = TUNABLE_BUNDLE_MAX;
 	sc->tunable_noflow = 1;
 	(void) resource_int_value(device_get_name(sc->dev),
 	    device_get_unit(sc->dev), "int_delay", &sc->tunable_int_delay);
 	(void) resource_int_value(device_get_name(sc->dev),
 	    device_get_unit(sc->dev), "bundle_max", &sc->tunable_bundle_max);
 	(void) resource_int_value(device_get_name(sc->dev),
 	    device_get_unit(sc->dev), "noflow", &sc->tunable_noflow);
 	sc->rnr = 0;
 
 	hsp = &sc->fxp_hwstats;
 	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "stats", CTLFLAG_RD,
 	    NULL, "FXP statistics");
 	parent = SYSCTL_CHILDREN(tree);
 
 	/* Rx MAC statistics. */
 	tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "rx", CTLFLAG_RD,
 	    NULL, "Rx MAC statistics");
 	child = SYSCTL_CHILDREN(tree);
 	FXP_SYSCTL_STAT_ADD(ctx, child, "good_frames",
 	    &hsp->rx_good, "Good frames");
 	FXP_SYSCTL_STAT_ADD(ctx, child, "crc_errors",
 	    &hsp->rx_crc_errors, "CRC errors");
 	FXP_SYSCTL_STAT_ADD(ctx, child, "alignment_errors",
 	    &hsp->rx_alignment_errors, "Alignment errors");
 	FXP_SYSCTL_STAT_ADD(ctx, child, "rnr_errors",
 	    &hsp->rx_rnr_errors, "RNR errors");
 	FXP_SYSCTL_STAT_ADD(ctx, child, "overrun_errors",
 	    &hsp->rx_overrun_errors, "Overrun errors");
 	FXP_SYSCTL_STAT_ADD(ctx, child, "cdt_errors",
 	    &hsp->rx_cdt_errors, "Collision detect errors");
 	FXP_SYSCTL_STAT_ADD(ctx, child, "shortframes",
 	    &hsp->rx_shortframes, "Short frame errors");
 	if (sc->revision >= FXP_REV_82558_A4) {
 		FXP_SYSCTL_STAT_ADD(ctx, child, "pause",
 		    &hsp->rx_pause, "Pause frames");
 		FXP_SYSCTL_STAT_ADD(ctx, child, "controls",
 		    &hsp->rx_controls, "Unsupported control frames");
 	}
 	if (sc->revision >= FXP_REV_82559_A0)
 		FXP_SYSCTL_STAT_ADD(ctx, child, "tco",
 		    &hsp->rx_tco, "TCO frames");
 
 	/* Tx MAC statistics. */
 	tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "tx", CTLFLAG_RD,
 	    NULL, "Tx MAC statistics");
 	child = SYSCTL_CHILDREN(tree);
 	FXP_SYSCTL_STAT_ADD(ctx, child, "good_frames",
 	    &hsp->tx_good, "Good frames");
 	FXP_SYSCTL_STAT_ADD(ctx, child, "maxcols",
 	    &hsp->tx_maxcols, "Maximum collisions errors");
 	FXP_SYSCTL_STAT_ADD(ctx, child, "latecols",
 	    &hsp->tx_latecols, "Late collisions errors");
 	FXP_SYSCTL_STAT_ADD(ctx, child, "underruns",
 	    &hsp->tx_underruns, "Underrun errors");
 	FXP_SYSCTL_STAT_ADD(ctx, child, "lostcrs",
 	    &hsp->tx_lostcrs, "Lost carrier sense");
 	FXP_SYSCTL_STAT_ADD(ctx, child, "deffered",
 	    &hsp->tx_deffered, "Deferred");
 	FXP_SYSCTL_STAT_ADD(ctx, child, "single_collisions",
 	    &hsp->tx_single_collisions, "Single collisions");
 	FXP_SYSCTL_STAT_ADD(ctx, child, "multiple_collisions",
 	    &hsp->tx_multiple_collisions, "Multiple collisions");
 	FXP_SYSCTL_STAT_ADD(ctx, child, "total_collisions",
 	    &hsp->tx_total_collisions, "Total collisions");
 	if (sc->revision >= FXP_REV_82558_A4)
 		FXP_SYSCTL_STAT_ADD(ctx, child, "pause",
 		    &hsp->tx_pause, "Pause frames");
 	if (sc->revision >= FXP_REV_82559_A0)
 		FXP_SYSCTL_STAT_ADD(ctx, child, "tco",
 		    &hsp->tx_tco, "TCO frames");
 }
 
 #undef FXP_SYSCTL_STAT_ADD
 
 static int
 sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high)
 {
 	int error, value;
 
 	value = *(int *)arg1;
 	error = sysctl_handle_int(oidp, &value, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (value < low || value > high)
 		return (EINVAL);
 	*(int *)arg1 = value;
 	return (0);
 }
 
 /*
  * Interrupt delay is expressed in microseconds, a multiplier is used
  * to convert this to the appropriate clock ticks before using.
  */
 static int
 sysctl_hw_fxp_int_delay(SYSCTL_HANDLER_ARGS)
 {
+
 	return (sysctl_int_range(oidp, arg1, arg2, req, 300, 3000));
 }
 
 static int
 sysctl_hw_fxp_bundle_max(SYSCTL_HANDLER_ARGS)
 {
+
 	return (sysctl_int_range(oidp, arg1, arg2, req, 1, 0xffff));
 }
Index: projects/binutils-2.17/sys/dev/fxp/if_fxpvar.h
===================================================================
--- projects/binutils-2.17/sys/dev/fxp/if_fxpvar.h	(revision 215829)
+++ projects/binutils-2.17/sys/dev/fxp/if_fxpvar.h	(revision 215830)
@@ -1,247 +1,247 @@
 /*-
  * Copyright (c) 1995, David Greenman
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Misc. defintions for the Intel EtherExpress Pro/100B PCI Fast
  * Ethernet driver
  */
 
 /*
  * Number of transmit control blocks. This determines the number
  * of transmit buffers that can be chained in the CB list.
  * This must be a power of two.
  */
 #define FXP_NTXCB       128
 #define	FXP_NTXCB_HIWAT	((FXP_NTXCB * 7) / 10)
 
 /*
  * Maximum size of a DMA segment.
  */
 #define	FXP_TSO_SEGSIZE	4096
 
 /*
  * Size of the TxCB list.
  */
 #define FXP_TXCB_SZ	(FXP_NTXCB * sizeof(struct fxp_cb_tx))
 
 /*
  * Macro to obtain the DMA address of a virtual address in the
  * TxCB list based on the base DMA address of the TxCB list.
  */
 #define FXP_TXCB_DMA_ADDR(sc, addr)					\
 	(sc->fxp_desc.cbl_addr + (uintptr_t)addr -			\
 	(uintptr_t)sc->fxp_desc.cbl_list)
 
 /*
  * Number of completed TX commands at which point an interrupt
  * will be generated to garbage collect the attached buffers.
  * Must be at least one less than FXP_NTXCB, and should be
  * enough less so that the transmitter doesn't becomes idle
  * during the buffer rundown (which would reduce performance).
  */
 #define FXP_CXINT_THRESH 120
 
 /*
  * TxCB list index mask. This is used to do list wrap-around.
  */
 #define FXP_TXCB_MASK   (FXP_NTXCB - 1)
 
 /*
  * Number of receive frame area buffers. These are large so chose
  * wisely.
  */
 #ifdef DEVICE_POLLING
 #define FXP_NRFABUFS	192
 #else
 #define FXP_NRFABUFS    64
 #endif
 
 /*
  * Maximum number of seconds that the receiver can be idle before we
  * assume it's dead and attempt to reset it by reprogramming the
  * multicast filter. This is part of a work-around for a bug in the
  * NIC. See fxp_stats_update().
  */
 #define FXP_MAX_RX_IDLE 15
 
 /*
  * Default maximum time, in microseconds, that an interrupt may be delayed
  * in an attempt to coalesce interrupts.  This is only effective if the Intel
  * microcode is loaded, and may be changed via either loader tunables or
  * sysctl.  See also the CPUSAVER_DWORD entry in rcvbundl.h.
  */
 #define TUNABLE_INT_DELAY 1000
 
 /*
  * Default number of packets that will be bundled, before an interrupt is
  * generated.  This is only effective if the Intel microcode is loaded, and
  * may be changed via either loader tunables or sysctl.  This may not be
  * present in all microcode revisions, see also the CPUSAVER_BUNDLE_MAX_DWORD
  * entry in rcvbundl.h.
  */
 #define TUNABLE_BUNDLE_MAX 6
 
 #define	FXP_LOCK(_sc)		mtx_lock(&(_sc)->sc_mtx)
 #define	FXP_UNLOCK(_sc)		mtx_unlock(&(_sc)->sc_mtx)
 #define	FXP_LOCK_ASSERT(_sc, _what)	mtx_assert(&(_sc)->sc_mtx, (_what))
 
 /*
  * Structures to handle TX and RX descriptors.
  */
 struct fxp_rx {
 	struct fxp_rx *rx_next;
 	struct mbuf *rx_mbuf;
 	bus_dmamap_t rx_map;
 	uint32_t rx_addr;
 };
 
 struct fxp_tx {
 	struct fxp_tx *tx_next;
 	struct fxp_cb_tx *tx_cb;
 	struct mbuf *tx_mbuf;
 	bus_dmamap_t tx_map;
 };
 
 struct fxp_desc_list {
 	struct fxp_rx rx_list[FXP_NRFABUFS];
 	struct fxp_tx tx_list[FXP_NTXCB];
 	struct fxp_tx mcs_tx;
 	struct fxp_rx *rx_head;
 	struct fxp_rx *rx_tail;
 	struct fxp_tx *tx_first;
 	struct fxp_tx *tx_last;
 	struct fxp_rfa *rfa_list;
 	struct fxp_cb_tx *cbl_list;
 	uint32_t cbl_addr;
 	bus_dma_tag_t rx_tag;
 };
 
 struct fxp_ident {
 	uint16_t	devid;
 	int16_t		revid;		/* -1 matches anything */
 	uint8_t		ich;
-	char 		*name;
+	const char	*name;
 };
 
 struct fxp_hwstats {
 	uint32_t tx_good;
 	uint32_t tx_maxcols;
 	uint32_t tx_latecols;
 	uint32_t tx_underruns;
 	uint32_t tx_lostcrs;
 	uint32_t tx_deffered;
 	uint32_t tx_single_collisions;
 	uint32_t tx_multiple_collisions;
 	uint32_t tx_total_collisions;
 	uint32_t tx_pause;
 	uint32_t tx_tco;
 	uint32_t rx_good;
 	uint32_t rx_crc_errors;
 	uint32_t rx_alignment_errors;
 	uint32_t rx_rnr_errors;
 	uint32_t rx_overrun_errors;
 	uint32_t rx_cdt_errors;
 	uint32_t rx_shortframes;
 	uint32_t rx_pause;
 	uint32_t rx_controls;
 	uint32_t rx_tco;
 };
 
 /*
  * NOTE: Elements are ordered for optimal cacheline behavior, and NOT
  *	 for functional grouping.
  */
 struct fxp_softc {
 	struct ifnet *ifp;		/* per-interface network data */
 	struct resource	*fxp_res[2];	/* I/O and IRQ resources */
 	struct resource_spec *fxp_spec;	/* the resource spec we used */
 	void *ih;			/* interrupt handler cookie */
-	struct fxp_ident *ident;
+	const struct fxp_ident *ident;
 	struct mtx sc_mtx;
 	bus_dma_tag_t fxp_txmtag;	/* bus DMA tag for Tx mbufs */
 	bus_dma_tag_t fxp_rxmtag;	/* bus DMA tag for Rx mbufs */
 	bus_dma_tag_t fxp_stag;		/* bus DMA tag for stats */
 	bus_dmamap_t fxp_smap;		/* bus DMA map for stats */
 	bus_dma_tag_t cbl_tag;		/* DMA tag for the TxCB list */
 	bus_dmamap_t cbl_map;		/* DMA map for the TxCB list */
 	bus_dma_tag_t mcs_tag;		/* DMA tag for the multicast setup */
 	bus_dmamap_t mcs_map;		/* DMA map for the multicast setup */
 	bus_dmamap_t spare_map;		/* spare DMA map */
 	struct fxp_desc_list fxp_desc;	/* descriptors management struct */
 	int maxtxseg;			/* maximum # of TX segments */
 	int maxsegsize;			/* maximum size of a TX segment */
 	int tx_queued;			/* # of active TxCB's */
 	struct fxp_stats *fxp_stats;	/* Pointer to interface stats */
 	uint32_t stats_addr;		/* DMA address of the stats structure */
 	struct fxp_hwstats fxp_hwstats;
 	int rx_idle_secs;		/* # of seconds RX has been idle */
 	struct callout stat_ch;		/* stat callout */
 	int watchdog_timer;		/* seconds until chip reset */
 	struct fxp_cb_mcs *mcsp;	/* Pointer to mcast setup descriptor */
 	uint32_t mcs_addr;		/* DMA address of the multicast cmd */
 	struct ifmedia sc_media;	/* media information */
 	device_t miibus;
 	device_t dev;
 	int tunable_int_delay;		/* interrupt delay value for ucode */
 	int tunable_bundle_max;		/* max # frames per interrupt (ucode) */
 	int tunable_noflow;		/* flow control disabled */
 	int rnr;			/* RNR events */
 	int eeprom_size;		/* size of serial EEPROM */
 	int suspended;			/* 0 = normal  1 = suspended or dead */
 	int cu_resume_bug;
 	int revision;
 	int flags;
 	int if_flags;
 	uint8_t rfa_size;
 	uint32_t tx_cmd;
 };
 
 #define FXP_FLAG_MWI_ENABLE	0x0001	/* MWI enable */
 #define FXP_FLAG_READ_ALIGN	0x0002	/* align read access with cacheline */
 #define FXP_FLAG_WRITE_ALIGN	0x0004	/* end write on cacheline */
 #define FXP_FLAG_EXT_TXCB	0x0008	/* enable use of extended TXCB */
 #define FXP_FLAG_SERIAL_MEDIA	0x0010	/* 10Mbps serial interface */
 #define FXP_FLAG_LONG_PKT_EN	0x0020	/* enable long packet reception */
 #define FXP_FLAG_CU_RESUME_BUG	0x0080	/* requires workaround for CU_RESUME */
 #define FXP_FLAG_UCODE		0x0100	/* ucode is loaded */
 #define FXP_FLAG_DEFERRED_RNR	0x0200	/* DEVICE_POLLING deferred RNR */
 #define FXP_FLAG_EXT_RFA	0x0400	/* extended RFDs for csum offload */
 #define FXP_FLAG_SAVE_BAD	0x0800	/* save bad pkts: bad size, CRC, etc */
 #define FXP_FLAG_82559_RXCSUM	0x1000	/* 82559 compatible RX checksum */
 #define FXP_FLAG_WOLCAP		0x2000	/* WOL capability */
 #define FXP_FLAG_WOL		0x4000	/* WOL active */
 #define FXP_FLAG_RXBUG		0x8000	/* Rx lock-up bug */
 
 /* Macros to ease CSR access. */
 #define	CSR_READ_1(sc, reg)		bus_read_1(sc->fxp_res[0], reg)
 #define	CSR_READ_2(sc, reg)		bus_read_2(sc->fxp_res[0], reg)
 #define	CSR_READ_4(sc, reg)		bus_read_4(sc->fxp_res[0], reg)
 #define	CSR_WRITE_1(sc, reg, val)	bus_write_1(sc->fxp_res[0], reg, val)
 #define	CSR_WRITE_2(sc, reg, val)	bus_write_2(sc->fxp_res[0], reg, val)
 #define	CSR_WRITE_4(sc, reg, val)	bus_write_4(sc->fxp_res[0], reg, val)
Index: projects/binutils-2.17/sys/dev/gem/if_gem.c
===================================================================
--- projects/binutils-2.17/sys/dev/gem/if_gem.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/gem/if_gem.c	(revision 215830)
@@ -1,2255 +1,2259 @@
 /*-
  * Copyright (C) 2001 Eduardo Horvath.
  * Copyright (c) 2001-2003 Thomas Moestl
  * Copyright (c) 2007 Marius Strobl <marius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: NetBSD: gem.c,v 1.21 2002/06/01 23:50:58 lukem Exp
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for Apple GMAC, Sun ERI and Sun GEM Ethernet controllers
  */
 
 #if 0
 #define	GEM_DEBUG
 #endif
 
 #if 0	/* XXX: In case of emergency, re-enable this. */
 #define	GEM_RINT_TIMEOUT
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/endian.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/rman.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
 #include <machine/bus.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 
 #include <dev/gem/if_gemreg.h>
 #include <dev/gem/if_gemvar.h>
 
 CTASSERT(powerof2(GEM_NRXDESC) && GEM_NRXDESC >= 32 && GEM_NRXDESC <= 8192);
 CTASSERT(powerof2(GEM_NTXDESC) && GEM_NTXDESC >= 32 && GEM_NTXDESC <= 8192);
 
 #define	GEM_TRIES	10000
 
 /*
  * The hardware supports basic TCP/UDP checksum offloading.  However,
  * the hardware doesn't compensate the checksum for UDP datagram which
  * can yield to 0x0.  As a safe guard, UDP checksum offload is disabled
  * by default.  It can be reactivated by setting special link option
  * link0 with ifconfig(8).
  */
 #define	GEM_CSUM_FEATURES	(CSUM_TCP)
 
 static int	gem_add_rxbuf(struct gem_softc *sc, int idx);
 static int	gem_bitwait(struct gem_softc *sc, u_int bank, bus_addr_t r,
 		    uint32_t clr, uint32_t set);
 static void	gem_cddma_callback(void *xsc, bus_dma_segment_t *segs,
 		    int nsegs, int error);
 static int	gem_disable_rx(struct gem_softc *sc);
 static int	gem_disable_tx(struct gem_softc *sc);
 static void	gem_eint(struct gem_softc *sc, u_int status);
 static void	gem_init(void *xsc);
 static void	gem_init_locked(struct gem_softc *sc);
 static void	gem_init_regs(struct gem_softc *sc);
 static int	gem_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
 static int	gem_load_txmbuf(struct gem_softc *sc, struct mbuf **m_head);
 static int	gem_meminit(struct gem_softc *sc);
 static void	gem_mifinit(struct gem_softc *sc);
 static void	gem_reset(struct gem_softc *sc);
 static int	gem_reset_rx(struct gem_softc *sc);
 static void	gem_reset_rxdma(struct gem_softc *sc);
 static int	gem_reset_tx(struct gem_softc *sc);
 static u_int	gem_ringsize(u_int sz);
 static void	gem_rint(struct gem_softc *sc);
 #ifdef GEM_RINT_TIMEOUT
 static void	gem_rint_timeout(void *arg);
 #endif
 static inline void gem_rxcksum(struct mbuf *m, uint64_t flags);
 static void	gem_rxdrain(struct gem_softc *sc);
 static void	gem_setladrf(struct gem_softc *sc);
 static void	gem_start(struct ifnet *ifp);
 static void	gem_start_locked(struct ifnet *ifp);
 static void	gem_stop(struct ifnet *ifp, int disable);
 static void	gem_tick(void *arg);
 static void	gem_tint(struct gem_softc *sc);
 static inline void gem_txkick(struct gem_softc *sc);
 static int	gem_watchdog(struct gem_softc *sc);
 
 devclass_t gem_devclass;
 DRIVER_MODULE(miibus, gem, miibus_driver, miibus_devclass, 0, 0);
 MODULE_DEPEND(gem, miibus, 1, 1, 1);
 
 #ifdef GEM_DEBUG
 #include <sys/ktr.h>
 #define	KTR_GEM		KTR_SPARE2
 #endif
 
 #define	GEM_BANK1_BITWAIT(sc, r, clr, set)				\
 	gem_bitwait((sc), GEM_RES_BANK1, (r), (clr), (set))
 #define	GEM_BANK2_BITWAIT(sc, r, clr, set)				\
 	gem_bitwait((sc), GEM_RES_BANK2, (r), (clr), (set))
 
 int
 gem_attach(struct gem_softc *sc)
 {
 	struct gem_txsoft *txs;
 	struct ifnet *ifp;
 	int error, i, phy;
 	uint32_t v;
 
 	if (bootverbose)
 		device_printf(sc->sc_dev, "flags=0x%x\n", sc->sc_flags);
 
 	/* Set up ifnet structure. */
 	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL)
 		return (ENOSPC);
 	sc->sc_csum_features = GEM_CSUM_FEATURES;
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(sc->sc_dev),
 	    device_get_unit(sc->sc_dev));
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_start = gem_start;
 	ifp->if_ioctl = gem_ioctl;
 	ifp->if_init = gem_init;
 	IFQ_SET_MAXLEN(&ifp->if_snd, GEM_TXQUEUELEN);
 	ifp->if_snd.ifq_drv_maxlen = GEM_TXQUEUELEN;
 	IFQ_SET_READY(&ifp->if_snd);
 
 	callout_init_mtx(&sc->sc_tick_ch, &sc->sc_mtx, 0);
 #ifdef GEM_RINT_TIMEOUT
 	callout_init_mtx(&sc->sc_rx_ch, &sc->sc_mtx, 0);
 #endif
 
 	/* Make sure the chip is stopped. */
 	gem_reset(sc);
 
 	error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, NULL,
 	    NULL, &sc->sc_pdmatag);
 	if (error != 0)
 		goto fail_ifnet;
 
 	error = bus_dma_tag_create(sc->sc_pdmatag, 1, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES,
 	    1, MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->sc_rdmatag);
 	if (error != 0)
 		goto fail_ptag;
 
 	error = bus_dma_tag_create(sc->sc_pdmatag, 1, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 	    MCLBYTES * GEM_NTXSEGS, GEM_NTXSEGS, MCLBYTES,
 	    BUS_DMA_ALLOCNOW, NULL, NULL, &sc->sc_tdmatag);
 	if (error != 0)
 		goto fail_rtag;
 
 	error = bus_dma_tag_create(sc->sc_pdmatag, PAGE_SIZE, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 	    sizeof(struct gem_control_data), 1,
 	    sizeof(struct gem_control_data), 0,
 	    NULL, NULL, &sc->sc_cdmatag);
 	if (error != 0)
 		goto fail_ttag;
 
 	/*
 	 * Allocate the control data structures, create and load the
 	 * DMA map for it.
 	 */
 	if ((error = bus_dmamem_alloc(sc->sc_cdmatag,
 	    (void **)&sc->sc_control_data,
 	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
 	    &sc->sc_cddmamap)) != 0) {
 		device_printf(sc->sc_dev,
 		    "unable to allocate control data, error = %d\n", error);
 		goto fail_ctag;
 	}
 
 	sc->sc_cddma = 0;
 	if ((error = bus_dmamap_load(sc->sc_cdmatag, sc->sc_cddmamap,
 	    sc->sc_control_data, sizeof(struct gem_control_data),
 	    gem_cddma_callback, sc, 0)) != 0 || sc->sc_cddma == 0) {
 		device_printf(sc->sc_dev,
 		    "unable to load control data DMA map, error = %d\n",
 		    error);
 		goto fail_cmem;
 	}
 
 	/*
 	 * Initialize the transmit job descriptors.
 	 */
 	STAILQ_INIT(&sc->sc_txfreeq);
 	STAILQ_INIT(&sc->sc_txdirtyq);
 
 	/*
 	 * Create the transmit buffer DMA maps.
 	 */
 	error = ENOMEM;
 	for (i = 0; i < GEM_TXQUEUELEN; i++) {
 		txs = &sc->sc_txsoft[i];
 		txs->txs_mbuf = NULL;
 		txs->txs_ndescs = 0;
 		if ((error = bus_dmamap_create(sc->sc_tdmatag, 0,
 		    &txs->txs_dmamap)) != 0) {
 			device_printf(sc->sc_dev,
 			    "unable to create TX DMA map %d, error = %d\n",
 			    i, error);
 			goto fail_txd;
 		}
 		STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q);
 	}
 
 	/*
 	 * Create the receive buffer DMA maps.
 	 */
 	for (i = 0; i < GEM_NRXDESC; i++) {
 		if ((error = bus_dmamap_create(sc->sc_rdmatag, 0,
 		    &sc->sc_rxsoft[i].rxs_dmamap)) != 0) {
 			device_printf(sc->sc_dev,
 			    "unable to create RX DMA map %d, error = %d\n",
 			    i, error);
 			goto fail_rxd;
 		}
 		sc->sc_rxsoft[i].rxs_mbuf = NULL;
 	}
 
 	/* Bypass probing PHYs if we already know for sure to use a SERDES. */
 	if ((sc->sc_flags & GEM_SERDES) != 0)
 		goto serdes;
 
 	/* Bad things will happen when touching this register on ERI. */
 	if (sc->sc_variant != GEM_SUN_ERI) {
 		GEM_BANK1_WRITE_4(sc, GEM_MII_DATAPATH_MODE,
 		    GEM_MII_DATAPATH_MII);
 		GEM_BANK1_BARRIER(sc, GEM_MII_DATAPATH_MODE, 4,
 		    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	}
 
 	gem_mifinit(sc);
 
 	/*
 	 * Look for an external PHY.
 	 */
 	error = ENXIO;
 	v = GEM_BANK1_READ_4(sc, GEM_MIF_CONFIG);
 	if ((v & GEM_MIF_CONFIG_MDI1) != 0) {
 		v |= GEM_MIF_CONFIG_PHY_SEL;
 		GEM_BANK1_WRITE_4(sc, GEM_MIF_CONFIG, v);
 		GEM_BANK1_BARRIER(sc, GEM_MIF_CONFIG, 4,
 		    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 		switch (sc->sc_variant) {
 		case GEM_SUN_ERI:
 			phy = GEM_PHYAD_EXTERNAL;
 			break;
 		default:
 			phy = MII_PHY_ANY;
 			break;
 		}
 		error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp,
 		    gem_mediachange, gem_mediastatus, BMSR_DEFCAPMASK, phy,
-		    MII_OFFSET_ANY, 0);
+		    MII_OFFSET_ANY, MIIF_DOPAUSE);
 	}
 
 	/*
 	 * Fall back on an internal PHY if no external PHY was found.
 	 * Note that with Apple (K2) GMACs GEM_MIF_CONFIG_MDI0 can't be
 	 * trusted when the firmware has powered down the chip.
 	 */
 	if (error != 0 &&
 	    ((v & GEM_MIF_CONFIG_MDI0) != 0 || GEM_IS_APPLE(sc))) {
 		v &= ~GEM_MIF_CONFIG_PHY_SEL;
 		GEM_BANK1_WRITE_4(sc, GEM_MIF_CONFIG, v);
 		GEM_BANK1_BARRIER(sc, GEM_MIF_CONFIG, 4,
 		    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 		switch (sc->sc_variant) {
 		case GEM_SUN_ERI:
 		case GEM_APPLE_K2_GMAC:
 			phy = GEM_PHYAD_INTERNAL;
 			break;
 		case GEM_APPLE_GMAC:
 			phy = GEM_PHYAD_EXTERNAL;
 			break;
 		default:
 			phy = MII_PHY_ANY;
 			break;
 		}
 		error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp,
 		    gem_mediachange, gem_mediastatus, BMSR_DEFCAPMASK, phy,
-		    MII_OFFSET_ANY, 0);
+		    MII_OFFSET_ANY, MIIF_DOPAUSE);
 	}
 
 	/*
 	 * Try the external PCS SERDES if we didn't find any PHYs.
 	 */
 	if (error != 0 && sc->sc_variant == GEM_SUN_GEM) {
  serdes:
 		GEM_BANK1_WRITE_4(sc, GEM_MII_DATAPATH_MODE,
 		    GEM_MII_DATAPATH_SERDES);
 		GEM_BANK1_BARRIER(sc, GEM_MII_DATAPATH_MODE, 4,
 		    BUS_SPACE_BARRIER_WRITE);
 		GEM_BANK1_WRITE_4(sc, GEM_MII_SLINK_CONTROL,
 		    GEM_MII_SLINK_LOOPBACK | GEM_MII_SLINK_EN_SYNC_D);
 		GEM_BANK1_BARRIER(sc, GEM_MII_SLINK_CONTROL, 4,
 		    BUS_SPACE_BARRIER_WRITE);
 		GEM_BANK1_WRITE_4(sc, GEM_MII_CONFIG, GEM_MII_CONFIG_ENABLE);
 		GEM_BANK1_BARRIER(sc, GEM_MII_CONFIG, 4,
 		    BUS_SPACE_BARRIER_WRITE);
 		sc->sc_flags |= GEM_SERDES;
 		error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp,
 		    gem_mediachange, gem_mediastatus, BMSR_DEFCAPMASK,
-		    GEM_PHYAD_EXTERNAL, MII_OFFSET_ANY, 0);
+		    GEM_PHYAD_EXTERNAL, MII_OFFSET_ANY, MIIF_DOPAUSE);
 	}
 	if (error != 0) {
 		device_printf(sc->sc_dev, "attaching PHYs failed\n");
 		goto fail_rxd;
 	}
 	sc->sc_mii = device_get_softc(sc->sc_miibus);
 
 	/*
 	 * From this point forward, the attachment cannot fail.  A failure
 	 * before this point releases all resources that may have been
 	 * allocated.
 	 */
 
 	/* Get RX FIFO size. */
 	sc->sc_rxfifosize = 64 *
 	    GEM_BANK1_READ_4(sc, GEM_RX_FIFO_SIZE);
 
 	/* Get TX FIFO size. */
 	v = GEM_BANK1_READ_4(sc, GEM_TX_FIFO_SIZE);
 	device_printf(sc->sc_dev, "%ukB RX FIFO, %ukB TX FIFO\n",
 	    sc->sc_rxfifosize / 1024, v / 16);
 
 	/* Attach the interface. */
 	ether_ifattach(ifp, sc->sc_enaddr);
 
 	/*
 	 * Tell the upper layer(s) we support long frames/checksum offloads.
 	 */
 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_HWCSUM;
 	ifp->if_hwassist |= sc->sc_csum_features;
 	ifp->if_capenable |= IFCAP_VLAN_MTU | IFCAP_HWCSUM;
 
 	return (0);
 
 	/*
 	 * Free any resources we've allocated during the failed attach
 	 * attempt.  Do this in reverse order and fall through.
 	 */
  fail_rxd:
 	for (i = 0; i < GEM_NRXDESC; i++)
 		if (sc->sc_rxsoft[i].rxs_dmamap != NULL)
 			bus_dmamap_destroy(sc->sc_rdmatag,
 			    sc->sc_rxsoft[i].rxs_dmamap);
  fail_txd:
 	for (i = 0; i < GEM_TXQUEUELEN; i++)
 		if (sc->sc_txsoft[i].txs_dmamap != NULL)
 			bus_dmamap_destroy(sc->sc_tdmatag,
 			    sc->sc_txsoft[i].txs_dmamap);
 	bus_dmamap_unload(sc->sc_cdmatag, sc->sc_cddmamap);
  fail_cmem:
 	bus_dmamem_free(sc->sc_cdmatag, sc->sc_control_data,
 	    sc->sc_cddmamap);
  fail_ctag:
 	bus_dma_tag_destroy(sc->sc_cdmatag);
  fail_ttag:
 	bus_dma_tag_destroy(sc->sc_tdmatag);
  fail_rtag:
 	bus_dma_tag_destroy(sc->sc_rdmatag);
  fail_ptag:
 	bus_dma_tag_destroy(sc->sc_pdmatag);
  fail_ifnet:
 	if_free(ifp);
 	return (error);
 }
 
 void
 gem_detach(struct gem_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	int i;
 
 	ether_ifdetach(ifp);
 	GEM_LOCK(sc);
 	gem_stop(ifp, 1);
 	GEM_UNLOCK(sc);
 	callout_drain(&sc->sc_tick_ch);
 #ifdef GEM_RINT_TIMEOUT
 	callout_drain(&sc->sc_rx_ch);
 #endif
 	if_free(ifp);
 	device_delete_child(sc->sc_dev, sc->sc_miibus);
 
 	for (i = 0; i < GEM_NRXDESC; i++)
 		if (sc->sc_rxsoft[i].rxs_dmamap != NULL)
 			bus_dmamap_destroy(sc->sc_rdmatag,
 			    sc->sc_rxsoft[i].rxs_dmamap);
 	for (i = 0; i < GEM_TXQUEUELEN; i++)
 		if (sc->sc_txsoft[i].txs_dmamap != NULL)
 			bus_dmamap_destroy(sc->sc_tdmatag,
 			    sc->sc_txsoft[i].txs_dmamap);
 	GEM_CDSYNC(sc, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	bus_dmamap_unload(sc->sc_cdmatag, sc->sc_cddmamap);
 	bus_dmamem_free(sc->sc_cdmatag, sc->sc_control_data,
 	    sc->sc_cddmamap);
 	bus_dma_tag_destroy(sc->sc_cdmatag);
 	bus_dma_tag_destroy(sc->sc_tdmatag);
 	bus_dma_tag_destroy(sc->sc_rdmatag);
 	bus_dma_tag_destroy(sc->sc_pdmatag);
 }
 
 void
 gem_suspend(struct gem_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 
 	GEM_LOCK(sc);
 	gem_stop(ifp, 0);
 	GEM_UNLOCK(sc);
 }
 
 void
 gem_resume(struct gem_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 
 	GEM_LOCK(sc);
 	/*
 	 * On resume all registers have to be initialized again like
 	 * after power-on.
 	 */
 	sc->sc_flags &= ~GEM_INITED;
 	if (ifp->if_flags & IFF_UP)
 		gem_init_locked(sc);
 	GEM_UNLOCK(sc);
 }
 
 static inline void
 gem_rxcksum(struct mbuf *m, uint64_t flags)
 {
 	struct ether_header *eh;
 	struct ip *ip;
 	struct udphdr *uh;
 	uint16_t *opts;
 	int32_t hlen, len, pktlen;
 	uint32_t temp32;
 	uint16_t cksum;
 
 	pktlen = m->m_pkthdr.len;
 	if (pktlen < sizeof(struct ether_header) + sizeof(struct ip))
 		return;
 	eh = mtod(m, struct ether_header *);
 	if (eh->ether_type != htons(ETHERTYPE_IP))
 		return;
 	ip = (struct ip *)(eh + 1);
 	if (ip->ip_v != IPVERSION)
 		return;
 
 	hlen = ip->ip_hl << 2;
 	pktlen -= sizeof(struct ether_header);
 	if (hlen < sizeof(struct ip))
 		return;
 	if (ntohs(ip->ip_len) < hlen)
 		return;
 	if (ntohs(ip->ip_len) != pktlen)
 		return;
 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
 		return;	/* Cannot handle fragmented packet. */
 
 	switch (ip->ip_p) {
 	case IPPROTO_TCP:
 		if (pktlen < (hlen + sizeof(struct tcphdr)))
 			return;
 		break;
 	case IPPROTO_UDP:
 		if (pktlen < (hlen + sizeof(struct udphdr)))
 			return;
 		uh = (struct udphdr *)((uint8_t *)ip + hlen);
 		if (uh->uh_sum == 0)
 			return; /* no checksum */
 		break;
 	default:
 		return;
 	}
 
 	cksum = ~(flags & GEM_RD_CHECKSUM);
 	/* checksum fixup for IP options */
 	len = hlen - sizeof(struct ip);
 	if (len > 0) {
 		opts = (uint16_t *)(ip + 1);
 		for (; len > 0; len -= sizeof(uint16_t), opts++) {
 			temp32 = cksum - *opts;
 			temp32 = (temp32 >> 16) + (temp32 & 65535);
 			cksum = temp32 & 65535;
 		}
 	}
 	m->m_pkthdr.csum_flags |= CSUM_DATA_VALID;
 	m->m_pkthdr.csum_data = cksum;
 }
 
 static void
 gem_cddma_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error)
 {
 	struct gem_softc *sc = xsc;
 
 	if (error != 0)
 		return;
 	if (nsegs != 1)
 		panic("%s: bad control buffer segment count", __func__);
 	sc->sc_cddma = segs[0].ds_addr;
 }
 
 static void
 gem_tick(void *arg)
 {
 	struct gem_softc *sc = arg;
 	struct ifnet *ifp = sc->sc_ifp;
 	uint32_t v;
 
 	GEM_LOCK_ASSERT(sc, MA_OWNED);
 
 	/*
 	 * Unload collision and error counters.
 	 */
 	ifp->if_collisions +=
 	    GEM_BANK1_READ_4(sc, GEM_MAC_NORM_COLL_CNT) +
 	    GEM_BANK1_READ_4(sc, GEM_MAC_FIRST_COLL_CNT);
 	v = GEM_BANK1_READ_4(sc, GEM_MAC_EXCESS_COLL_CNT) +
 	    GEM_BANK1_READ_4(sc, GEM_MAC_LATE_COLL_CNT);
 	ifp->if_collisions += v;
 	ifp->if_oerrors += v;
 	ifp->if_ierrors +=
 	    GEM_BANK1_READ_4(sc, GEM_MAC_RX_LEN_ERR_CNT) +
 	    GEM_BANK1_READ_4(sc, GEM_MAC_RX_ALIGN_ERR) +
 	    GEM_BANK1_READ_4(sc, GEM_MAC_RX_CRC_ERR_CNT) +
 	    GEM_BANK1_READ_4(sc, GEM_MAC_RX_CODE_VIOL);
 
 	/*
 	 * Then clear the hardware counters.
 	 */
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_NORM_COLL_CNT, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_FIRST_COLL_CNT, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_EXCESS_COLL_CNT, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_LATE_COLL_CNT, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_LEN_ERR_CNT, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_ALIGN_ERR, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CRC_ERR_CNT, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CODE_VIOL, 0);
 
 	mii_tick(sc->sc_mii);
 
 	if (gem_watchdog(sc) == EJUSTRETURN)
 		return;
 
 	callout_reset(&sc->sc_tick_ch, hz, gem_tick, sc);
 }
 
 static int
 gem_bitwait(struct gem_softc *sc, u_int bank, bus_addr_t r, uint32_t clr,
     uint32_t set)
 {
 	int i;
 	uint32_t reg;
 
 	for (i = GEM_TRIES; i--; DELAY(100)) {
 		reg = GEM_BANKN_READ_M(bank, 4, sc, r);
 		if ((reg & clr) == 0 && (reg & set) == set)
 			return (1);
 	}
 	return (0);
 }
 
 static void
 gem_reset(struct gem_softc *sc)
 {
 
 #ifdef GEM_DEBUG
 	CTR2(KTR_GEM, "%s: %s", device_get_name(sc->sc_dev), __func__);
 #endif
 	gem_reset_rx(sc);
 	gem_reset_tx(sc);
 
 	/* Do a full reset. */
 	GEM_BANK2_WRITE_4(sc, GEM_RESET, GEM_RESET_RX | GEM_RESET_TX);
 	GEM_BANK2_BARRIER(sc, GEM_RESET, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!GEM_BANK2_BITWAIT(sc, GEM_RESET, GEM_RESET_RX | GEM_RESET_TX, 0))
 		device_printf(sc->sc_dev, "cannot reset device\n");
 }
 
 static void
 gem_rxdrain(struct gem_softc *sc)
 {
 	struct gem_rxsoft *rxs;
 	int i;
 
 	for (i = 0; i < GEM_NRXDESC; i++) {
 		rxs = &sc->sc_rxsoft[i];
 		if (rxs->rxs_mbuf != NULL) {
 			bus_dmamap_sync(sc->sc_rdmatag, rxs->rxs_dmamap,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(sc->sc_rdmatag, rxs->rxs_dmamap);
 			m_freem(rxs->rxs_mbuf);
 			rxs->rxs_mbuf = NULL;
 		}
 	}
 }
 
 static void
 gem_stop(struct ifnet *ifp, int disable)
 {
 	struct gem_softc *sc = ifp->if_softc;
 	struct gem_txsoft *txs;
 
 #ifdef GEM_DEBUG
 	CTR2(KTR_GEM, "%s: %s", device_get_name(sc->sc_dev), __func__);
 #endif
 
 	callout_stop(&sc->sc_tick_ch);
 #ifdef GEM_RINT_TIMEOUT
 	callout_stop(&sc->sc_rx_ch);
 #endif
 
 	gem_reset_tx(sc);
 	gem_reset_rx(sc);
 
 	/*
 	 * Release any queued transmit buffers.
 	 */
 	while ((txs = STAILQ_FIRST(&sc->sc_txdirtyq)) != NULL) {
 		STAILQ_REMOVE_HEAD(&sc->sc_txdirtyq, txs_q);
 		if (txs->txs_ndescs != 0) {
 			bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap);
 			if (txs->txs_mbuf != NULL) {
 				m_freem(txs->txs_mbuf);
 				txs->txs_mbuf = NULL;
 			}
 		}
 		STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q);
 	}
 
 	if (disable)
 		gem_rxdrain(sc);
 
 	/*
 	 * Mark the interface down and cancel the watchdog timer.
 	 */
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 	sc->sc_flags &= ~GEM_LINK;
 	sc->sc_wdog_timer = 0;
 }
 
 static int
 gem_reset_rx(struct gem_softc *sc)
 {
 
 	/*
 	 * Resetting while DMA is in progress can cause a bus hang, so we
 	 * disable DMA first.
 	 */
 	gem_disable_rx(sc);
 	GEM_BANK1_WRITE_4(sc, GEM_RX_CONFIG, 0);
 	GEM_BANK1_BARRIER(sc, GEM_RX_CONFIG, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!GEM_BANK1_BITWAIT(sc, GEM_RX_CONFIG, GEM_RX_CONFIG_RXDMA_EN, 0))
 		device_printf(sc->sc_dev, "cannot disable RX DMA\n");
 
+	/* Wait 5ms extra. */
+	DELAY(5000);
+
 	/* Finally, reset the ERX. */
 	GEM_BANK2_WRITE_4(sc, GEM_RESET, GEM_RESET_RX);
 	GEM_BANK2_BARRIER(sc, GEM_RESET, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!GEM_BANK2_BITWAIT(sc, GEM_RESET, GEM_RESET_RX | GEM_RESET_TX,
 	    0)) {
 		device_printf(sc->sc_dev, "cannot reset receiver\n");
 		return (1);
 	}
 	return (0);
 }
 
 /*
  * Reset the receiver DMA engine.
  *
  * Intended to be used in case of GEM_INTR_RX_TAG_ERR, GEM_MAC_RX_OVERFLOW
  * etc in order to reset the receiver DMA engine only and not do a full
  * reset which amongst others also downs the link and clears the FIFOs.
  */
 static void
 gem_reset_rxdma(struct gem_softc *sc)
 {
 	int i;
 
 	if (gem_reset_rx(sc) != 0)
 		return (gem_init_locked(sc));
 	for (i = 0; i < GEM_NRXDESC; i++)
 		if (sc->sc_rxsoft[i].rxs_mbuf != NULL)
 			GEM_UPDATE_RXDESC(sc, i);
 	sc->sc_rxptr = 0;
 	GEM_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	/* NOTE: we use only 32-bit DMA addresses here. */
 	GEM_BANK1_WRITE_4(sc, GEM_RX_RING_PTR_HI, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_RX_RING_PTR_LO, GEM_CDRXADDR(sc, 0));
 	GEM_BANK1_WRITE_4(sc, GEM_RX_KICK, GEM_NRXDESC - 4);
 	GEM_BANK1_WRITE_4(sc, GEM_RX_CONFIG,
 	    gem_ringsize(GEM_NRXDESC /* XXX */) |
 	    ((ETHER_HDR_LEN + sizeof(struct ip)) <<
 	    GEM_RX_CONFIG_CXM_START_SHFT) |
 	    (GEM_THRSH_1024 << GEM_RX_CONFIG_FIFO_THRS_SHIFT) |
 	    (ETHER_ALIGN << GEM_RX_CONFIG_FBOFF_SHFT));
 	/* Adjust for the SBus clock probably isn't worth the fuzz. */
 	GEM_BANK1_WRITE_4(sc, GEM_RX_BLANKING,
 	    ((6 * (sc->sc_flags & GEM_PCI66) != 0 ? 2 : 1) <<
 	    GEM_RX_BLANKING_TIME_SHIFT) | 6);
 	GEM_BANK1_WRITE_4(sc, GEM_RX_PAUSE_THRESH,
 	    (3 * sc->sc_rxfifosize / 256) |
 	    ((sc->sc_rxfifosize / 256) << 12));
 	GEM_BANK1_WRITE_4(sc, GEM_RX_CONFIG,
 	    GEM_BANK1_READ_4(sc, GEM_RX_CONFIG) | GEM_RX_CONFIG_RXDMA_EN);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_MASK,
 	    GEM_MAC_RX_DONE | GEM_MAC_RX_FRAME_CNT);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG,
 	    GEM_BANK1_READ_4(sc, GEM_MAC_RX_CONFIG) | GEM_MAC_RX_ENABLE);
 }
 
 static int
 gem_reset_tx(struct gem_softc *sc)
 {
 
 	/*
 	 * Resetting while DMA is in progress can cause a bus hang, so we
 	 * disable DMA first.
 	 */
 	gem_disable_tx(sc);
 	GEM_BANK1_WRITE_4(sc, GEM_TX_CONFIG, 0);
 	GEM_BANK1_BARRIER(sc, GEM_TX_CONFIG, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!GEM_BANK1_BITWAIT(sc, GEM_TX_CONFIG, GEM_TX_CONFIG_TXDMA_EN, 0))
 		device_printf(sc->sc_dev, "cannot disable TX DMA\n");
 
+	/* Wait 5ms extra. */
+	DELAY(5000);
+
 	/* Finally, reset the ETX. */
 	GEM_BANK2_WRITE_4(sc, GEM_RESET, GEM_RESET_TX);
 	GEM_BANK2_BARRIER(sc, GEM_RESET, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!GEM_BANK2_BITWAIT(sc, GEM_RESET, GEM_RESET_RX | GEM_RESET_TX,
 	    0)) {
 		device_printf(sc->sc_dev, "cannot reset transmitter\n");
 		return (1);
 	}
 	return (0);
 }
 
 static int
 gem_disable_rx(struct gem_softc *sc)
 {
 
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG,
 	    GEM_BANK1_READ_4(sc, GEM_MAC_RX_CONFIG) & ~GEM_MAC_RX_ENABLE);
 	GEM_BANK1_BARRIER(sc, GEM_MAC_RX_CONFIG, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	return (GEM_BANK1_BITWAIT(sc, GEM_MAC_RX_CONFIG, GEM_MAC_RX_ENABLE,
 	    0));
 }
 
 static int
 gem_disable_tx(struct gem_softc *sc)
 {
 
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_TX_CONFIG,
 	    GEM_BANK1_READ_4(sc, GEM_MAC_TX_CONFIG) & ~GEM_MAC_TX_ENABLE);
 	GEM_BANK1_BARRIER(sc, GEM_MAC_TX_CONFIG, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	return (GEM_BANK1_BITWAIT(sc, GEM_MAC_TX_CONFIG, GEM_MAC_TX_ENABLE,
 	    0));
 }
 
 static int
 gem_meminit(struct gem_softc *sc)
 {
 	struct gem_rxsoft *rxs;
 	int error, i;
 
 	GEM_LOCK_ASSERT(sc, MA_OWNED);
 
 	/*
 	 * Initialize the transmit descriptor ring.
 	 */
 	for (i = 0; i < GEM_NTXDESC; i++) {
 		sc->sc_txdescs[i].gd_flags = 0;
 		sc->sc_txdescs[i].gd_addr = 0;
 	}
 	sc->sc_txfree = GEM_MAXTXFREE;
 	sc->sc_txnext = 0;
 	sc->sc_txwin = 0;
 
 	/*
 	 * Initialize the receive descriptor and receive job
 	 * descriptor rings.
 	 */
 	for (i = 0; i < GEM_NRXDESC; i++) {
 		rxs = &sc->sc_rxsoft[i];
 		if (rxs->rxs_mbuf == NULL) {
 			if ((error = gem_add_rxbuf(sc, i)) != 0) {
 				device_printf(sc->sc_dev,
 				    "unable to allocate or map RX buffer %d, "
 				    "error = %d\n", i, error);
 				/*
 				 * XXX we should attempt to run with fewer
 				 * receive buffers instead of just failing.
 				 */
 				gem_rxdrain(sc);
 				return (1);
 			}
 		} else
 			GEM_INIT_RXDESC(sc, i);
 	}
 	sc->sc_rxptr = 0;
 
 	GEM_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	return (0);
 }
 
 static u_int
 gem_ringsize(u_int sz)
 {
 
 	switch (sz) {
 	case 32:
 		return (GEM_RING_SZ_32);
 	case 64:
 		return (GEM_RING_SZ_64);
 	case 128:
 		return (GEM_RING_SZ_128);
 	case 256:
 		return (GEM_RING_SZ_256);
 	case 512:
 		return (GEM_RING_SZ_512);
 	case 1024:
 		return (GEM_RING_SZ_1024);
 	case 2048:
 		return (GEM_RING_SZ_2048);
 	case 4096:
 		return (GEM_RING_SZ_4096);
 	case 8192:
 		return (GEM_RING_SZ_8192);
 	default:
 		printf("%s: invalid ring size %d\n", __func__, sz);
 		return (GEM_RING_SZ_32);
 	}
 }
 
 static void
 gem_init(void *xsc)
 {
 	struct gem_softc *sc = xsc;
 
 	GEM_LOCK(sc);
 	gem_init_locked(sc);
 	GEM_UNLOCK(sc);
 }
 
 /*
  * Initialization of interface; set up initialization block
  * and transmit/receive descriptor rings.
  */
 static void
 gem_init_locked(struct gem_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	uint32_t v;
 
 	GEM_LOCK_ASSERT(sc, MA_OWNED);
 
 #ifdef GEM_DEBUG
 	CTR2(KTR_GEM, "%s: %s: calling stop", device_get_name(sc->sc_dev),
 	    __func__);
 #endif
 	/*
 	 * Initialization sequence.  The numbered steps below correspond
 	 * to the sequence outlined in section 6.3.5.1 in the Ethernet
 	 * Channel Engine manual (part of the PCIO manual).
 	 * See also the STP2002-STQ document from Sun Microsystems.
 	 */
 
 	/* step 1 & 2.  Reset the Ethernet Channel. */
 	gem_stop(ifp, 0);
 	gem_reset(sc);
 #ifdef GEM_DEBUG
 	CTR2(KTR_GEM, "%s: %s: restarting", device_get_name(sc->sc_dev),
 	    __func__);
 #endif
 
 	if ((sc->sc_flags & GEM_SERDES) == 0)
 		/* Re-initialize the MIF. */
 		gem_mifinit(sc);
 
 	/* step 3.  Setup data structures in host memory. */
 	if (gem_meminit(sc) != 0)
 		return;
 
 	/* step 4.  TX MAC registers & counters */
 	gem_init_regs(sc);
 
 	/* step 5.  RX MAC registers & counters */
 	gem_setladrf(sc);
 
 	/* step 6 & 7.  Program Descriptor Ring Base Addresses. */
 	/* NOTE: we use only 32-bit DMA addresses here. */
 	GEM_BANK1_WRITE_4(sc, GEM_TX_RING_PTR_HI, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_TX_RING_PTR_LO, GEM_CDTXADDR(sc, 0));
 
 	GEM_BANK1_WRITE_4(sc, GEM_RX_RING_PTR_HI, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_RX_RING_PTR_LO, GEM_CDRXADDR(sc, 0));
 #ifdef GEM_DEBUG
 	CTR3(KTR_GEM, "loading RX ring %lx, TX ring %lx, cddma %lx",
 	    GEM_CDRXADDR(sc, 0), GEM_CDTXADDR(sc, 0), sc->sc_cddma);
 #endif
 
 	/* step 8.  Global Configuration & Interrupt Mask */
 
 	/*
 	 * Set the internal arbitration to "infinite" bursts of the
 	 * maximum length of 31 * 64 bytes so DMA transfers aren't
 	 * split up in cache line size chunks.  This greatly improves
 	 * RX performance.
 	 * Enable silicon bug workarounds for the Apple variants.
 	 */
 	GEM_BANK1_WRITE_4(sc, GEM_CONFIG,
 	    GEM_CONFIG_TXDMA_LIMIT | GEM_CONFIG_RXDMA_LIMIT |
 	    ((sc->sc_flags & GEM_PCI) != 0 ? GEM_CONFIG_BURST_INF :
 	    GEM_CONFIG_BURST_64) | (GEM_IS_APPLE(sc) ?
 	    GEM_CONFIG_RONPAULBIT | GEM_CONFIG_BUG2FIX : 0));
 
 	GEM_BANK1_WRITE_4(sc, GEM_INTMASK,
 	    ~(GEM_INTR_TX_INTME | GEM_INTR_TX_EMPTY | GEM_INTR_RX_DONE |
 	    GEM_INTR_RX_NOBUF | GEM_INTR_RX_TAG_ERR | GEM_INTR_PERR |
 	    GEM_INTR_BERR
 #ifdef GEM_DEBUG
 	    | GEM_INTR_PCS | GEM_INTR_MIF
 #endif
 	    ));
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_MASK,
 	    GEM_MAC_RX_DONE | GEM_MAC_RX_FRAME_CNT);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_TX_MASK,
 	    GEM_MAC_TX_XMIT_DONE | GEM_MAC_TX_DEFER_EXP |
 	    GEM_MAC_TX_PEAK_EXP);
 #ifdef GEM_DEBUG
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_CONTROL_MASK,
 	    ~(GEM_MAC_PAUSED | GEM_MAC_PAUSE | GEM_MAC_RESUME));
 #else
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_CONTROL_MASK,
 	    GEM_MAC_PAUSED | GEM_MAC_PAUSE | GEM_MAC_RESUME);
 #endif
 
 	/* step 9.  ETX Configuration: use mostly default values. */
 
 	/* Enable DMA. */
 	v = gem_ringsize(GEM_NTXDESC);
 	/* Set TX FIFO threshold and enable DMA. */
 	v |= ((sc->sc_variant == GEM_SUN_ERI ? 0x100 : 0x4ff) << 10) &
 	    GEM_TX_CONFIG_TXFIFO_TH;
 	GEM_BANK1_WRITE_4(sc, GEM_TX_CONFIG, v | GEM_TX_CONFIG_TXDMA_EN);
 
 	/* step 10.  ERX Configuration */
 
 	/* Encode Receive Descriptor ring size. */
 	v = gem_ringsize(GEM_NRXDESC /* XXX */);
 	/* RX TCP/UDP checksum offset */
 	v |= ((ETHER_HDR_LEN + sizeof(struct ip)) <<
 	    GEM_RX_CONFIG_CXM_START_SHFT);
 	/* Set RX FIFO threshold, set first byte offset and enable DMA. */
 	GEM_BANK1_WRITE_4(sc, GEM_RX_CONFIG,
 	    v | (GEM_THRSH_1024 << GEM_RX_CONFIG_FIFO_THRS_SHIFT) |
 	    (ETHER_ALIGN << GEM_RX_CONFIG_FBOFF_SHFT) |
 	    GEM_RX_CONFIG_RXDMA_EN);
 
 	/* Adjust for the SBus clock probably isn't worth the fuzz. */
 	GEM_BANK1_WRITE_4(sc, GEM_RX_BLANKING,
 	    ((6 * (sc->sc_flags & GEM_PCI66) != 0 ? 2 : 1) <<
 	    GEM_RX_BLANKING_TIME_SHIFT) | 6);
 
 	/*
 	 * The following value is for an OFF Threshold of about 3/4 full
 	 * and an ON Threshold of 1/4 full.
 	 */
 	GEM_BANK1_WRITE_4(sc, GEM_RX_PAUSE_THRESH,
 	    (3 * sc->sc_rxfifosize / 256) |
 	    ((sc->sc_rxfifosize / 256) << 12));
 
 	/* step 11.  Configure Media. */
 
 	/* step 12.  RX_MAC Configuration Register */
 	v = GEM_BANK1_READ_4(sc, GEM_MAC_RX_CONFIG);
 	v |= GEM_MAC_RX_ENABLE | GEM_MAC_RX_STRIP_CRC;
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG, 0);
 	GEM_BANK1_BARRIER(sc, GEM_MAC_RX_CONFIG, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!GEM_BANK1_BITWAIT(sc, GEM_MAC_RX_CONFIG, GEM_MAC_RX_ENABLE, 0))
 		device_printf(sc->sc_dev, "cannot configure RX MAC\n");
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG, v);
 
 	/* step 13.  TX_MAC Configuration Register */
 	v = GEM_BANK1_READ_4(sc, GEM_MAC_TX_CONFIG);
 	v |= GEM_MAC_TX_ENABLE;
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_TX_CONFIG, 0);
 	GEM_BANK1_BARRIER(sc, GEM_MAC_TX_CONFIG, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!GEM_BANK1_BITWAIT(sc, GEM_MAC_TX_CONFIG, GEM_MAC_TX_ENABLE, 0))
 		device_printf(sc->sc_dev, "cannot configure TX MAC\n");
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_TX_CONFIG, v);
 
 	/* step 14.  Issue Transmit Pending command. */
 
 	/* step 15.  Give the reciever a swift kick. */
 	GEM_BANK1_WRITE_4(sc, GEM_RX_KICK, GEM_NRXDESC - 4);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	mii_mediachg(sc->sc_mii);
 
 	/* Start the one second timer. */
 	sc->sc_wdog_timer = 0;
 	callout_reset(&sc->sc_tick_ch, hz, gem_tick, sc);
 }
 
 static int
 gem_load_txmbuf(struct gem_softc *sc, struct mbuf **m_head)
 {
 	bus_dma_segment_t txsegs[GEM_NTXSEGS];
 	struct gem_txsoft *txs;
 	struct ip *ip;
 	struct mbuf *m;
 	uint64_t cflags, flags;
 	int error, nexttx, nsegs, offset, seg;
 
 	GEM_LOCK_ASSERT(sc, MA_OWNED);
 
 	/* Get a work queue entry. */
 	if ((txs = STAILQ_FIRST(&sc->sc_txfreeq)) == NULL) {
 		/* Ran out of descriptors. */
 		return (ENOBUFS);
 	}
 
 	cflags = 0;
 	if (((*m_head)->m_pkthdr.csum_flags & sc->sc_csum_features) != 0) {
 		if (M_WRITABLE(*m_head) == 0) {
 			m = m_dup(*m_head, M_DONTWAIT);
 			m_freem(*m_head);
 			*m_head = m;
 			if (m == NULL)
 				return (ENOBUFS);
 		}
 		offset = sizeof(struct ether_header);
 		m = m_pullup(*m_head, offset + sizeof(struct ip));
 		if (m == NULL) {
 			*m_head = NULL;
 			return (ENOBUFS);
 		}
 		ip = (struct ip *)(mtod(m, caddr_t) + offset);
 		offset += (ip->ip_hl << 2);
 		cflags = offset << GEM_TD_CXSUM_STARTSHFT |
 		    ((offset + m->m_pkthdr.csum_data) <<
 		    GEM_TD_CXSUM_STUFFSHFT) | GEM_TD_CXSUM_ENABLE;
 		*m_head = m;
 	}
 
 	error = bus_dmamap_load_mbuf_sg(sc->sc_tdmatag, txs->txs_dmamap,
 	    *m_head, txsegs, &nsegs, BUS_DMA_NOWAIT);
 	if (error == EFBIG) {
 		m = m_collapse(*m_head, M_DONTWAIT, GEM_NTXSEGS);
 		if (m == NULL) {
 			m_freem(*m_head);
 			*m_head = NULL;
 			return (ENOBUFS);
 		}
 		*m_head = m;
 		error = bus_dmamap_load_mbuf_sg(sc->sc_tdmatag,
 		    txs->txs_dmamap, *m_head, txsegs, &nsegs,
 		    BUS_DMA_NOWAIT);
 		if (error != 0) {
 			m_freem(*m_head);
 			*m_head = NULL;
 			return (error);
 		}
 	} else if (error != 0)
 		return (error);
 	/* If nsegs is wrong then the stack is corrupt. */
 	KASSERT(nsegs <= GEM_NTXSEGS,
 	    ("%s: too many DMA segments (%d)", __func__, nsegs));
 	if (nsegs == 0) {
 		m_freem(*m_head);
 		*m_head = NULL;
 		return (EIO);
 	}
 
 	/*
 	 * Ensure we have enough descriptors free to describe
 	 * the packet.  Note, we always reserve one descriptor
 	 * at the end of the ring as a termination point, in
 	 * order to prevent wrap-around.
 	 */
 	if (nsegs > sc->sc_txfree - 1) {
 		txs->txs_ndescs = 0;
 		bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap);
 		return (ENOBUFS);
 	}
 
 	txs->txs_ndescs = nsegs;
 	txs->txs_firstdesc = sc->sc_txnext;
 	nexttx = txs->txs_firstdesc;
 	for (seg = 0; seg < nsegs; seg++, nexttx = GEM_NEXTTX(nexttx)) {
 #ifdef GEM_DEBUG
 		CTR6(KTR_GEM,
 		    "%s: mapping seg %d (txd %d), len %lx, addr %#lx (%#lx)",
 		    __func__, seg, nexttx, txsegs[seg].ds_len,
 		    txsegs[seg].ds_addr,
 		    GEM_DMA_WRITE(sc, txsegs[seg].ds_addr));
 #endif
 		sc->sc_txdescs[nexttx].gd_addr =
 		    GEM_DMA_WRITE(sc, txsegs[seg].ds_addr);
 		KASSERT(txsegs[seg].ds_len < GEM_TD_BUFSIZE,
 		    ("%s: segment size too large!", __func__));
 		flags = txsegs[seg].ds_len & GEM_TD_BUFSIZE;
 		sc->sc_txdescs[nexttx].gd_flags =
 		    GEM_DMA_WRITE(sc, flags | cflags);
 		txs->txs_lastdesc = nexttx;
 	}
 
 	/* Set EOP on the last descriptor. */
 #ifdef GEM_DEBUG
 	CTR3(KTR_GEM, "%s: end of packet at segment %d, TX %d",
 	    __func__, seg, nexttx);
 #endif
 	sc->sc_txdescs[txs->txs_lastdesc].gd_flags |=
 	    GEM_DMA_WRITE(sc, GEM_TD_END_OF_PACKET);
 
 	/* Lastly set SOP on the first descriptor. */
 #ifdef GEM_DEBUG
 	CTR3(KTR_GEM, "%s: start of packet at segment %d, TX %d",
 	    __func__, seg, nexttx);
 #endif
 	if (++sc->sc_txwin > GEM_NTXSEGS * 2 / 3) {
 		sc->sc_txwin = 0;
 		sc->sc_txdescs[txs->txs_firstdesc].gd_flags |=
 		    GEM_DMA_WRITE(sc, GEM_TD_INTERRUPT_ME |
 		    GEM_TD_START_OF_PACKET);
 	} else
 		sc->sc_txdescs[txs->txs_firstdesc].gd_flags |=
 		    GEM_DMA_WRITE(sc, GEM_TD_START_OF_PACKET);
 
 	/* Sync the DMA map. */
 	bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap,
 	    BUS_DMASYNC_PREWRITE);
 
 #ifdef GEM_DEBUG
 	CTR4(KTR_GEM, "%s: setting firstdesc=%d, lastdesc=%d, ndescs=%d",
 	    __func__, txs->txs_firstdesc, txs->txs_lastdesc,
 	    txs->txs_ndescs);
 #endif
 	STAILQ_REMOVE_HEAD(&sc->sc_txfreeq, txs_q);
 	STAILQ_INSERT_TAIL(&sc->sc_txdirtyq, txs, txs_q);
 	txs->txs_mbuf = *m_head;
 
 	sc->sc_txnext = GEM_NEXTTX(txs->txs_lastdesc);
 	sc->sc_txfree -= txs->txs_ndescs;
 
 	return (0);
 }
 
 static void
 gem_init_regs(struct gem_softc *sc)
 {
 	const u_char *laddr = IF_LLADDR(sc->sc_ifp);
 
 	GEM_LOCK_ASSERT(sc, MA_OWNED);
 
 	/* These registers are not cleared on reset. */
 	if ((sc->sc_flags & GEM_INITED) == 0) {
 		/* magic values */
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_IPG0, 0);
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_IPG1, 8);
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_IPG2, 4);
 
 		/* min frame length */
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_MAC_MIN_FRAME, ETHER_MIN_LEN);
 		/* max frame length and max burst size */
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_MAC_MAX_FRAME,
 		    (ETHER_MAX_LEN + ETHER_VLAN_ENCAP_LEN) | (0x2000 << 16));
 
 		/* more magic values */
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_PREAMBLE_LEN, 0x7);
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_JAM_SIZE, 0x4);
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_ATTEMPT_LIMIT, 0x10);
-		GEM_BANK1_WRITE_4(sc, GEM_MAC_CONTROL_TYPE, 0x8088);
+		GEM_BANK1_WRITE_4(sc, GEM_MAC_CONTROL_TYPE, 0x8808);
 
 		/* random number seed */
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_RANDOM_SEED,
 		    ((laddr[5] << 8) | laddr[4]) & 0x3ff);
 
 		/* secondary MAC address: 0:0:0:0:0:0 */
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR3, 0);
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR4, 0);
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR5, 0);
 
 		/* MAC control address: 01:80:c2:00:00:01 */
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR6, 0x0001);
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR7, 0xc200);
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR8, 0x0180);
 
 		/* MAC filter address: 0:0:0:0:0:0 */
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR_FILTER0, 0);
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR_FILTER1, 0);
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR_FILTER2, 0);
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_ADR_FLT_MASK1_2, 0);
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_ADR_FLT_MASK0, 0);
 
 		sc->sc_flags |= GEM_INITED;
 	}
 
 	/* Counters need to be zeroed. */
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_NORM_COLL_CNT, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_FIRST_COLL_CNT, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_EXCESS_COLL_CNT, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_LATE_COLL_CNT, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_DEFER_TMR_CNT, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_PEAK_ATTEMPTS, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_FRAME_COUNT, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_LEN_ERR_CNT, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_ALIGN_ERR, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CRC_ERR_CNT, 0);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CODE_VIOL, 0);
 
 	/* Set XOFF PAUSE time. */
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_SEND_PAUSE_CMD, 0x1BF0);
 
 	/* Set the station address. */
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR0, (laddr[4] << 8) | laddr[5]);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR1, (laddr[2] << 8) | laddr[3]);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR2, (laddr[0] << 8) | laddr[1]);
 
 	/* Enable MII outputs. */
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_XIF_CONFIG, GEM_MAC_XIF_TX_MII_ENA);
 }
 
 static void
 gem_start(struct ifnet *ifp)
 {
 	struct gem_softc *sc = ifp->if_softc;
 
 	GEM_LOCK(sc);
 	gem_start_locked(ifp);
 	GEM_UNLOCK(sc);
 }
 
 static inline void
 gem_txkick(struct gem_softc *sc)
 {
 
 	/*
 	 * Update the TX kick register.  This register has to point to the
 	 * descriptor after the last valid one and for optimum performance
 	 * should be incremented in multiples of 4 (the DMA engine fetches/
 	 * updates descriptors in batches of 4).
 	 */
 #ifdef GEM_DEBUG
 	CTR3(KTR_GEM, "%s: %s: kicking TX %d",
 	    device_get_name(sc->sc_dev), __func__, sc->sc_txnext);
 #endif
 	GEM_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	GEM_BANK1_WRITE_4(sc, GEM_TX_KICK, sc->sc_txnext);
 }
 
 static void
 gem_start_locked(struct ifnet *ifp)
 {
 	struct gem_softc *sc = ifp->if_softc;
 	struct mbuf *m;
 	int kicked, ntx;
 
 	GEM_LOCK_ASSERT(sc, MA_OWNED);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING || (sc->sc_flags & GEM_LINK) == 0)
 		return;
 
 #ifdef GEM_DEBUG
 	CTR4(KTR_GEM, "%s: %s: txfree %d, txnext %d",
 	    device_get_name(sc->sc_dev), __func__, sc->sc_txfree,
 	    sc->sc_txnext);
 #endif
 	ntx = 0;
 	kicked = 0;
 	for (; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc->sc_txfree > 1;) {
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
 		if (m == NULL)
 			break;
 		if (gem_load_txmbuf(sc, &m) != 0) {
 			if (m == NULL)
 				break;
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			IFQ_DRV_PREPEND(&ifp->if_snd, m);
 			break;
 		}
 		if ((sc->sc_txnext % 4) == 0) {
 			gem_txkick(sc);
 			kicked = 1;
 		} else
 			kicked = 0;
 		ntx++;
 		BPF_MTAP(ifp, m);
 	}
 
 	if (ntx > 0) {
 		if (kicked == 0)
 			gem_txkick(sc);
 #ifdef GEM_DEBUG
 		CTR2(KTR_GEM, "%s: packets enqueued, OWN on %d",
 		    device_get_name(sc->sc_dev), sc->sc_txnext);
 #endif
 
 		/* Set a watchdog timer in case the chip flakes out. */
 		sc->sc_wdog_timer = 5;
 #ifdef GEM_DEBUG
 		CTR3(KTR_GEM, "%s: %s: watchdog %d",
 		    device_get_name(sc->sc_dev), __func__,
 		    sc->sc_wdog_timer);
 #endif
 	}
 }
 
 static void
 gem_tint(struct gem_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct gem_txsoft *txs;
 	int progress;
 	uint32_t txlast;
 #ifdef GEM_DEBUG
 	int i;
 
 	GEM_LOCK_ASSERT(sc, MA_OWNED);
 
 	CTR2(KTR_GEM, "%s: %s", device_get_name(sc->sc_dev), __func__);
 #endif
 
 	/*
 	 * Go through our TX list and free mbufs for those
 	 * frames that have been transmitted.
 	 */
 	progress = 0;
 	GEM_CDSYNC(sc, BUS_DMASYNC_POSTREAD);
 	while ((txs = STAILQ_FIRST(&sc->sc_txdirtyq)) != NULL) {
 #ifdef GEM_DEBUG
 		if ((ifp->if_flags & IFF_DEBUG) != 0) {
 			printf("    txsoft %p transmit chain:\n", txs);
 			for (i = txs->txs_firstdesc;; i = GEM_NEXTTX(i)) {
 				printf("descriptor %d: ", i);
 				printf("gd_flags: 0x%016llx\t",
 				    (long long)GEM_DMA_READ(sc,
 				    sc->sc_txdescs[i].gd_flags));
 				printf("gd_addr: 0x%016llx\n",
 				    (long long)GEM_DMA_READ(sc,
 				    sc->sc_txdescs[i].gd_addr));
 				if (i == txs->txs_lastdesc)
 					break;
 			}
 		}
 #endif
 
 		/*
 		 * In theory, we could harvest some descriptors before
 		 * the ring is empty, but that's a bit complicated.
 		 *
 		 * GEM_TX_COMPLETION points to the last descriptor
 		 * processed + 1.
 		 */
 		txlast = GEM_BANK1_READ_4(sc, GEM_TX_COMPLETION);
 #ifdef GEM_DEBUG
 		CTR4(KTR_GEM, "%s: txs->txs_firstdesc = %d, "
 		    "txs->txs_lastdesc = %d, txlast = %d",
 		    __func__, txs->txs_firstdesc, txs->txs_lastdesc, txlast);
 #endif
 		if (txs->txs_firstdesc <= txs->txs_lastdesc) {
 			if ((txlast >= txs->txs_firstdesc) &&
 			    (txlast <= txs->txs_lastdesc))
 				break;
 		} else {
 			/* Ick -- this command wraps. */
 			if ((txlast >= txs->txs_firstdesc) ||
 			    (txlast <= txs->txs_lastdesc))
 				break;
 		}
 
 #ifdef GEM_DEBUG
 		CTR1(KTR_GEM, "%s: releasing a descriptor", __func__);
 #endif
 		STAILQ_REMOVE_HEAD(&sc->sc_txdirtyq, txs_q);
 
 		sc->sc_txfree += txs->txs_ndescs;
 
 		bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap);
 		if (txs->txs_mbuf != NULL) {
 			m_freem(txs->txs_mbuf);
 			txs->txs_mbuf = NULL;
 		}
 
 		STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q);
 
 		ifp->if_opackets++;
 		progress = 1;
 	}
 
 #ifdef GEM_DEBUG
 	CTR4(KTR_GEM, "%s: GEM_TX_STATE_MACHINE %x GEM_TX_DATA_PTR %llx "
 	    "GEM_TX_COMPLETION %x",
 	    __func__, GEM_BANK1_READ_4(sc, GEM_TX_STATE_MACHINE),
 	    ((long long)GEM_BANK1_READ_4(sc, GEM_TX_DATA_PTR_HI) << 32) |
 	    GEM_BANK1_READ_4(sc, GEM_TX_DATA_PTR_LO),
 	    GEM_BANK1_READ_4(sc, GEM_TX_COMPLETION));
 #endif
 
 	if (progress) {
 		if (sc->sc_txfree == GEM_NTXDESC - 1)
 			sc->sc_txwin = 0;
 
 		/*
 		 * We freed some descriptors, so reset IFF_DRV_OACTIVE
 		 * and restart.
 		 */
 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 		if (STAILQ_EMPTY(&sc->sc_txdirtyq))
 		    sc->sc_wdog_timer = 0;
 		gem_start_locked(ifp);
 	}
 
 #ifdef GEM_DEBUG
 	CTR3(KTR_GEM, "%s: %s: watchdog %d",
 	    device_get_name(sc->sc_dev), __func__, sc->sc_wdog_timer);
 #endif
 }
 
 #ifdef GEM_RINT_TIMEOUT
 static void
 gem_rint_timeout(void *arg)
 {
 	struct gem_softc *sc = arg;
 
 	GEM_LOCK_ASSERT(sc, MA_OWNED);
 
 	gem_rint(sc);
 }
 #endif
 
 static void
 gem_rint(struct gem_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct mbuf *m;
 	uint64_t rxstat;
 	uint32_t rxcomp;
 
 	GEM_LOCK_ASSERT(sc, MA_OWNED);
 
 #ifdef GEM_RINT_TIMEOUT
 	callout_stop(&sc->sc_rx_ch);
 #endif
 #ifdef GEM_DEBUG
 	CTR2(KTR_GEM, "%s: %s", device_get_name(sc->sc_dev), __func__);
 #endif
 
 	/*
 	 * Read the completion register once.  This limits
 	 * how long the following loop can execute.
 	 */
 	rxcomp = GEM_BANK1_READ_4(sc, GEM_RX_COMPLETION);
 #ifdef GEM_DEBUG
 	CTR3(KTR_GEM, "%s: sc->sc_rxptr %d, complete %d",
 	    __func__, sc->sc_rxptr, rxcomp);
 #endif
 	GEM_CDSYNC(sc, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	for (; sc->sc_rxptr != rxcomp;) {
 		m = sc->sc_rxsoft[sc->sc_rxptr].rxs_mbuf;
 		rxstat = GEM_DMA_READ(sc,
 		    sc->sc_rxdescs[sc->sc_rxptr].gd_flags);
 
 		if (rxstat & GEM_RD_OWN) {
 #ifdef GEM_RINT_TIMEOUT
 			/*
 			 * The descriptor is still marked as owned, although
 			 * it is supposed to have completed.  This has been
 			 * observed on some machines.  Just exiting here
 			 * might leave the packet sitting around until another
 			 * one arrives to trigger a new interrupt, which is
 			 * generally undesirable, so set up a timeout.
 			 */
 			callout_reset(&sc->sc_rx_ch, GEM_RXOWN_TICKS,
 			    gem_rint_timeout, sc);
 #endif
 			m = NULL;
 			goto kickit;
 		}
 
 		if (rxstat & GEM_RD_BAD_CRC) {
 			ifp->if_ierrors++;
 			device_printf(sc->sc_dev, "receive error: CRC error\n");
 			GEM_INIT_RXDESC(sc, sc->sc_rxptr);
 			m = NULL;
 			goto kickit;
 		}
 
 #ifdef GEM_DEBUG
 		if ((ifp->if_flags & IFF_DEBUG) != 0) {
 			printf("    rxsoft %p descriptor %d: ",
 			    &sc->sc_rxsoft[sc->sc_rxptr], sc->sc_rxptr);
 			printf("gd_flags: 0x%016llx\t",
 			    (long long)GEM_DMA_READ(sc,
 			    sc->sc_rxdescs[sc->sc_rxptr].gd_flags));
 			printf("gd_addr: 0x%016llx\n",
 			    (long long)GEM_DMA_READ(sc,
 			    sc->sc_rxdescs[sc->sc_rxptr].gd_addr));
 		}
 #endif
 
 		/*
 		 * Allocate a new mbuf cluster.  If that fails, we are
 		 * out of memory, and must drop the packet and recycle
 		 * the buffer that's already attached to this descriptor.
 		 */
 		if (gem_add_rxbuf(sc, sc->sc_rxptr) != 0) {
 			ifp->if_ierrors++;
 			GEM_INIT_RXDESC(sc, sc->sc_rxptr);
 			m = NULL;
 		}
 
  kickit:
 		/*
 		 * Update the RX kick register.  This register has to point
 		 * to the descriptor after the last valid one (before the
 		 * current batch) and for optimum performance should be
 		 * incremented in multiples of 4 (the DMA engine fetches/
 		 * updates descriptors in batches of 4).
 		 */
 		sc->sc_rxptr = GEM_NEXTRX(sc->sc_rxptr);
 		if ((sc->sc_rxptr % 4) == 0) {
 			GEM_CDSYNC(sc,
 			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 			GEM_BANK1_WRITE_4(sc, GEM_RX_KICK,
 			    (sc->sc_rxptr + GEM_NRXDESC - 4) &
 			    GEM_NRXDESC_MASK);
 		}
 
 		if (m == NULL) {
 			if (rxstat & GEM_RD_OWN)
 				break;
 			continue;
 		}
 
 		ifp->if_ipackets++;
 		m->m_data += ETHER_ALIGN; /* first byte offset */
 		m->m_pkthdr.rcvif = ifp;
 		m->m_pkthdr.len = m->m_len = GEM_RD_BUFLEN(rxstat);
 
 		if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
 			gem_rxcksum(m, rxstat);
 
 		/* Pass it on. */
 		GEM_UNLOCK(sc);
 		(*ifp->if_input)(ifp, m);
 		GEM_LOCK(sc);
 	}
 
 #ifdef GEM_DEBUG
 	CTR3(KTR_GEM, "%s: done sc->sc_rxptr %d, complete %d", __func__,
 	    sc->sc_rxptr, GEM_BANK1_READ_4(sc, GEM_RX_COMPLETION));
 #endif
 }
 
 static int
 gem_add_rxbuf(struct gem_softc *sc, int idx)
 {
 	struct gem_rxsoft *rxs = &sc->sc_rxsoft[idx];
 	struct mbuf *m;
 	bus_dma_segment_t segs[1];
 	int error, nsegs;
 
 	GEM_LOCK_ASSERT(sc, MA_OWNED);
 
 	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
 
 #ifdef GEM_DEBUG
 	/* Bzero the packet to check DMA. */
 	memset(m->m_ext.ext_buf, 0, m->m_ext.ext_size);
 #endif
 
 	if (rxs->rxs_mbuf != NULL) {
 		bus_dmamap_sync(sc->sc_rdmatag, rxs->rxs_dmamap,
 		    BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc->sc_rdmatag, rxs->rxs_dmamap);
 	}
 
 	error = bus_dmamap_load_mbuf_sg(sc->sc_rdmatag, rxs->rxs_dmamap,
 	    m, segs, &nsegs, BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "cannot load RS DMA map %d, error = %d\n", idx, error);
 		m_freem(m);
 		return (error);
 	}
 	/* If nsegs is wrong then the stack is corrupt. */
 	KASSERT(nsegs == 1,
 	    ("%s: too many DMA segments (%d)", __func__, nsegs));
 	rxs->rxs_mbuf = m;
 	rxs->rxs_paddr = segs[0].ds_addr;
 
 	bus_dmamap_sync(sc->sc_rdmatag, rxs->rxs_dmamap,
 	    BUS_DMASYNC_PREREAD);
 
 	GEM_INIT_RXDESC(sc, idx);
 
 	return (0);
 }
 
 static void
 gem_eint(struct gem_softc *sc, u_int status)
 {
 
 	sc->sc_ifp->if_ierrors++;
 	if ((status & GEM_INTR_RX_TAG_ERR) != 0) {
 		gem_reset_rxdma(sc);
 		return;
 	}
 
 	device_printf(sc->sc_dev, "%s: status 0x%x", __func__, status);
 	if ((status & GEM_INTR_BERR) != 0) {
 		if ((sc->sc_flags & GEM_PCI) != 0)
 			printf(", PCI bus error 0x%x\n",
 			    GEM_BANK1_READ_4(sc, GEM_PCI_ERROR_STATUS));
 		else
 			printf(", SBus error 0x%x\n",
 			    GEM_BANK1_READ_4(sc, GEM_SBUS_STATUS));
 	}
 }
 
 void
 gem_intr(void *v)
 {
 	struct gem_softc *sc = v;
 	uint32_t status, status2;
 
 	GEM_LOCK(sc);
 	status = GEM_BANK1_READ_4(sc, GEM_STATUS);
 
 #ifdef GEM_DEBUG
 	CTR4(KTR_GEM, "%s: %s: cplt %x, status %x",
 	    device_get_name(sc->sc_dev), __func__,
 	    (status >> GEM_STATUS_TX_COMPLETION_SHFT), (u_int)status);
 
 	/*
 	 * PCS interrupts must be cleared, otherwise no traffic is passed!
 	 */
 	if ((status & GEM_INTR_PCS) != 0) {
 		status2 =
 		    GEM_BANK1_READ_4(sc, GEM_MII_INTERRUP_STATUS) |
 		    GEM_BANK1_READ_4(sc, GEM_MII_INTERRUP_STATUS);
 		if ((status2 & GEM_MII_INTERRUP_LINK) != 0)
 			device_printf(sc->sc_dev,
 			    "%s: PCS link status changed\n", __func__);
 	}
 	if ((status & GEM_MAC_CONTROL_STATUS) != 0) {
 		status2 = GEM_BANK1_READ_4(sc, GEM_MAC_CONTROL_STATUS);
 		if ((status2 & GEM_MAC_PAUSED) != 0)
 			device_printf(sc->sc_dev,
 			    "%s: PAUSE received (PAUSE time %d slots)\n",
 			    __func__, GEM_MAC_PAUSE_TIME(status2));
 		if ((status2 & GEM_MAC_PAUSE) != 0)
 			device_printf(sc->sc_dev,
 			    "%s: transited to PAUSE state\n", __func__);
 		if ((status2 & GEM_MAC_RESUME) != 0)
 			device_printf(sc->sc_dev,
 			    "%s: transited to non-PAUSE state\n", __func__);
 	}
 	if ((status & GEM_INTR_MIF) != 0)
 		device_printf(sc->sc_dev, "%s: MIF interrupt\n", __func__);
 #endif
 
 	if (__predict_false(status &
 	    (GEM_INTR_RX_TAG_ERR | GEM_INTR_PERR | GEM_INTR_BERR)) != 0)
 		gem_eint(sc, status);
 
 	if ((status & (GEM_INTR_RX_DONE | GEM_INTR_RX_NOBUF)) != 0)
 		gem_rint(sc);
 
 	if ((status & (GEM_INTR_TX_EMPTY | GEM_INTR_TX_INTME)) != 0)
 		gem_tint(sc);
 
 	if (__predict_false((status & GEM_INTR_TX_MAC) != 0)) {
 		status2 = GEM_BANK1_READ_4(sc, GEM_MAC_TX_STATUS);
 		if ((status2 &
 		    ~(GEM_MAC_TX_XMIT_DONE | GEM_MAC_TX_DEFER_EXP |
 		    GEM_MAC_TX_PEAK_EXP)) != 0)
 			device_printf(sc->sc_dev,
 			    "MAC TX fault, status %x\n", status2);
 		if ((status2 &
 		    (GEM_MAC_TX_UNDERRUN | GEM_MAC_TX_PKT_TOO_LONG)) != 0) {
 			sc->sc_ifp->if_oerrors++;
 			gem_init_locked(sc);
 		}
 	}
 	if (__predict_false((status & GEM_INTR_RX_MAC) != 0)) {
 		status2 = GEM_BANK1_READ_4(sc, GEM_MAC_RX_STATUS);
 		/*
 		 * At least with GEM_SUN_GEM and some GEM_SUN_ERI
 		 * revisions GEM_MAC_RX_OVERFLOW happen often due to a
 		 * silicon bug so handle them silently.  Moreover, it's
 		 * likely that the receiver has hung so we reset it.
 		 */
 		if ((status2 & GEM_MAC_RX_OVERFLOW) != 0) {
 			sc->sc_ifp->if_ierrors++;
 			gem_reset_rxdma(sc);
 		} else if ((status2 &
 		    ~(GEM_MAC_RX_DONE | GEM_MAC_RX_FRAME_CNT)) != 0)
 			device_printf(sc->sc_dev,
 			    "MAC RX fault, status %x\n", status2);
 	}
 	GEM_UNLOCK(sc);
 }
 
 static int
 gem_watchdog(struct gem_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 
 	GEM_LOCK_ASSERT(sc, MA_OWNED);
 
 #ifdef GEM_DEBUG
 	CTR4(KTR_GEM,
 	    "%s: GEM_RX_CONFIG %x GEM_MAC_RX_STATUS %x GEM_MAC_RX_CONFIG %x",
 	    __func__, GEM_BANK1_READ_4(sc, GEM_RX_CONFIG),
 	    GEM_BANK1_READ_4(sc, GEM_MAC_RX_STATUS),
 	    GEM_BANK1_READ_4(sc, GEM_MAC_RX_CONFIG));
 	CTR4(KTR_GEM,
 	    "%s: GEM_TX_CONFIG %x GEM_MAC_TX_STATUS %x GEM_MAC_TX_CONFIG %x",
 	    __func__, GEM_BANK1_READ_4(sc, GEM_TX_CONFIG),
 	    GEM_BANK1_READ_4(sc, GEM_MAC_TX_STATUS),
 	    GEM_BANK1_READ_4(sc, GEM_MAC_TX_CONFIG));
 #endif
 
 	if (sc->sc_wdog_timer == 0 || --sc->sc_wdog_timer != 0)
 		return (0);
 
 	if ((sc->sc_flags & GEM_LINK) != 0)
 		device_printf(sc->sc_dev, "device timeout\n");
 	else if (bootverbose)
 		device_printf(sc->sc_dev, "device timeout (no link)\n");
 	++ifp->if_oerrors;
 
 	/* Try to get more packets going. */
 	gem_init_locked(sc);
 	gem_start_locked(ifp);
 	return (EJUSTRETURN);
 }
 
 static void
 gem_mifinit(struct gem_softc *sc)
 {
 
 	/* Configure the MIF in frame mode. */
 	GEM_BANK1_WRITE_4(sc, GEM_MIF_CONFIG,
 	    GEM_BANK1_READ_4(sc, GEM_MIF_CONFIG) & ~GEM_MIF_CONFIG_BB_ENA);
 	GEM_BANK1_BARRIER(sc, GEM_MIF_CONFIG, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 }
 
 /*
  * MII interface
  *
  * The MII interface supports at least three different operating modes:
  *
  * Bitbang mode is implemented using data, clock and output enable registers.
  *
  * Frame mode is implemented by loading a complete frame into the frame
  * register and polling the valid bit for completion.
  *
  * Polling mode uses the frame register but completion is indicated by
  * an interrupt.
  *
  */
 int
 gem_mii_readreg(device_t dev, int phy, int reg)
 {
 	struct gem_softc *sc;
 	int n;
 	uint32_t v;
 
 #ifdef GEM_DEBUG_PHY
 	printf("%s: phy %d reg %d\n", __func__, phy, reg);
 #endif
 
 	sc = device_get_softc(dev);
 	if ((sc->sc_flags & GEM_SERDES) != 0) {
 		switch (reg) {
 		case MII_BMCR:
 			reg = GEM_MII_CONTROL;
 			break;
 		case MII_BMSR:
 			reg = GEM_MII_STATUS;
 			break;
 		case MII_PHYIDR1:
 		case MII_PHYIDR2:
 			return (0);
 		case MII_ANAR:
 			reg = GEM_MII_ANAR;
 			break;
 		case MII_ANLPAR:
 			reg = GEM_MII_ANLPAR;
 			break;
 		case MII_EXTSR:
 			return (EXTSR_1000XFDX | EXTSR_1000XHDX);
 		default:
 			device_printf(sc->sc_dev,
 			    "%s: unhandled register %d\n", __func__, reg);
 			return (0);
 		}
 		return (GEM_BANK1_READ_4(sc, reg));
 	}
 
 	/* Construct the frame command. */
 	v = GEM_MIF_FRAME_READ |
 	    (phy << GEM_MIF_PHY_SHIFT) |
 	    (reg << GEM_MIF_REG_SHIFT);
 
 	GEM_BANK1_WRITE_4(sc, GEM_MIF_FRAME, v);
 	GEM_BANK1_BARRIER(sc, GEM_MIF_FRAME, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	for (n = 0; n < 100; n++) {
 		DELAY(1);
 		v = GEM_BANK1_READ_4(sc, GEM_MIF_FRAME);
 		if (v & GEM_MIF_FRAME_TA0)
 			return (v & GEM_MIF_FRAME_DATA);
 	}
 
 	device_printf(sc->sc_dev, "%s: timed out\n", __func__);
 	return (0);
 }
 
 int
 gem_mii_writereg(device_t dev, int phy, int reg, int val)
 {
 	struct gem_softc *sc;
 	int n;
 	uint32_t v;
 
 #ifdef GEM_DEBUG_PHY
 	printf("%s: phy %d reg %d val %x\n", phy, reg, val, __func__);
 #endif
 
 	sc = device_get_softc(dev);
 	if ((sc->sc_flags & GEM_SERDES) != 0) {
 		switch (reg) {
 		case MII_BMSR:
 			reg = GEM_MII_STATUS;
 			break;
 		case MII_BMCR:
 			reg = GEM_MII_CONTROL;
 			if ((val & GEM_MII_CONTROL_RESET) == 0)
 				break;
 			GEM_BANK1_WRITE_4(sc, GEM_MII_CONTROL, val);
 			GEM_BANK1_BARRIER(sc, GEM_MII_CONTROL, 4,
 			    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 			if (!GEM_BANK1_BITWAIT(sc, GEM_MII_CONTROL,
 			    GEM_MII_CONTROL_RESET, 0))
 				device_printf(sc->sc_dev,
 				    "cannot reset PCS\n");
 			/* FALLTHROUGH */
 		case MII_ANAR:
 			GEM_BANK1_WRITE_4(sc, GEM_MII_CONFIG, 0);
 			GEM_BANK1_BARRIER(sc, GEM_MII_CONFIG, 4,
 			    BUS_SPACE_BARRIER_WRITE);
 			GEM_BANK1_WRITE_4(sc, GEM_MII_ANAR, val);
 			GEM_BANK1_BARRIER(sc, GEM_MII_ANAR, 4,
 			    BUS_SPACE_BARRIER_WRITE);
 			GEM_BANK1_WRITE_4(sc, GEM_MII_SLINK_CONTROL,
 			    GEM_MII_SLINK_LOOPBACK | GEM_MII_SLINK_EN_SYNC_D);
 			GEM_BANK1_BARRIER(sc, GEM_MII_SLINK_CONTROL, 4,
 			    BUS_SPACE_BARRIER_WRITE);
 			GEM_BANK1_WRITE_4(sc, GEM_MII_CONFIG,
 			    GEM_MII_CONFIG_ENABLE);
 			GEM_BANK1_BARRIER(sc, GEM_MII_CONFIG, 4,
 			    BUS_SPACE_BARRIER_WRITE);
 			return (0);
 		case MII_ANLPAR:
 			reg = GEM_MII_ANLPAR;
 			break;
 		default:
 			device_printf(sc->sc_dev,
 			    "%s: unhandled register %d\n", __func__, reg);
 			return (0);
 		}
 		GEM_BANK1_WRITE_4(sc, reg, val);
 		GEM_BANK1_BARRIER(sc, reg, 4,
 		    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 		return (0);
 	}
 
 	/* Construct the frame command. */
 	v = GEM_MIF_FRAME_WRITE |
 	    (phy << GEM_MIF_PHY_SHIFT) |
 	    (reg << GEM_MIF_REG_SHIFT) |
 	    (val & GEM_MIF_FRAME_DATA);
 
 	GEM_BANK1_WRITE_4(sc, GEM_MIF_FRAME, v);
 	GEM_BANK1_BARRIER(sc, GEM_MIF_FRAME, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	for (n = 0; n < 100; n++) {
 		DELAY(1);
 		v = GEM_BANK1_READ_4(sc, GEM_MIF_FRAME);
 		if (v & GEM_MIF_FRAME_TA0)
 			return (1);
 	}
 
 	device_printf(sc->sc_dev, "%s: timed out\n", __func__);
 	return (0);
 }
 
 void
 gem_mii_statchg(device_t dev)
 {
 	struct gem_softc *sc;
 	int gigabit;
 	uint32_t rxcfg, txcfg, v;
 
 	sc = device_get_softc(dev);
 
 	GEM_LOCK_ASSERT(sc, MA_OWNED);
 
 #ifdef GEM_DEBUG
 	if ((sc->sc_ifp->if_flags & IFF_DEBUG) != 0)
 		device_printf(sc->sc_dev, "%s: status change\n", __func__);
 #endif
 
 	if ((sc->sc_mii->mii_media_status & IFM_ACTIVE) != 0 &&
 	    IFM_SUBTYPE(sc->sc_mii->mii_media_active) != IFM_NONE)
 		sc->sc_flags |= GEM_LINK;
 	else
 		sc->sc_flags &= ~GEM_LINK;
 
 	switch (IFM_SUBTYPE(sc->sc_mii->mii_media_active)) {
 	case IFM_1000_SX:
 	case IFM_1000_LX:
 	case IFM_1000_CX:
 	case IFM_1000_T:
 		gigabit = 1;
 		break;
 	default:
 		gigabit = 0;
 	}
 
 	/*
 	 * The configuration done here corresponds to the steps F) and
 	 * G) and as far as enabling of RX and TX MAC goes also step H)
 	 * of the initialization sequence outlined in section 3.2.1 of
 	 * the GEM Gigabit Ethernet ASIC Specification.
 	 */
 
 	rxcfg = GEM_BANK1_READ_4(sc, GEM_MAC_RX_CONFIG);
 	rxcfg &= ~(GEM_MAC_RX_CARR_EXTEND | GEM_MAC_RX_ENABLE);
 	txcfg = GEM_MAC_TX_ENA_IPG0 | GEM_MAC_TX_NGU | GEM_MAC_TX_NGU_LIMIT;
 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) != 0)
 		txcfg |= GEM_MAC_TX_IGN_CARRIER | GEM_MAC_TX_IGN_COLLIS;
 	else if (gigabit != 0) {
 		rxcfg |= GEM_MAC_RX_CARR_EXTEND;
 		txcfg |= GEM_MAC_TX_CARR_EXTEND;
 	}
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_TX_CONFIG, 0);
 	GEM_BANK1_BARRIER(sc, GEM_MAC_TX_CONFIG, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!GEM_BANK1_BITWAIT(sc, GEM_MAC_TX_CONFIG, GEM_MAC_TX_ENABLE, 0))
 		device_printf(sc->sc_dev, "cannot disable TX MAC\n");
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_TX_CONFIG, txcfg);
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG, 0);
 	GEM_BANK1_BARRIER(sc, GEM_MAC_RX_CONFIG, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!GEM_BANK1_BITWAIT(sc, GEM_MAC_RX_CONFIG, GEM_MAC_RX_ENABLE, 0))
 		device_printf(sc->sc_dev, "cannot disable RX MAC\n");
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG, rxcfg);
 
 	v = GEM_BANK1_READ_4(sc, GEM_MAC_CONTROL_CONFIG) &
 	    ~(GEM_MAC_CC_RX_PAUSE | GEM_MAC_CC_TX_PAUSE);
-#ifdef notyet
 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) &
 	    IFM_ETH_RXPAUSE) != 0)
 		v |= GEM_MAC_CC_RX_PAUSE;
 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) &
 	    IFM_ETH_TXPAUSE) != 0)
 		v |= GEM_MAC_CC_TX_PAUSE;
-#endif
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_CONTROL_CONFIG, v);
 
 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0 &&
 	    gigabit != 0)
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_SLOT_TIME,
 		    GEM_MAC_SLOT_TIME_CARR_EXTEND);
 	else
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_SLOT_TIME,
 		    GEM_MAC_SLOT_TIME_NORMAL);
 
 	/* XIF Configuration */
 	v = GEM_MAC_XIF_LINK_LED;
 	v |= GEM_MAC_XIF_TX_MII_ENA;
 	if ((sc->sc_flags & GEM_SERDES) == 0) {
 		if ((GEM_BANK1_READ_4(sc, GEM_MIF_CONFIG) &
 		    GEM_MIF_CONFIG_PHY_SEL) != 0) {
 			/* External MII needs echo disable if half duplex. */
 			if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) &
 			    IFM_FDX) == 0)
 				v |= GEM_MAC_XIF_ECHO_DISABL;
 		} else
 			/*
 			 * Internal MII needs buffer enable.
 			 * XXX buffer enable makes only sense for an
 			 * external PHY.
 			 */
 			v |= GEM_MAC_XIF_MII_BUF_ENA;
 	}
 	if (gigabit != 0)
 		v |= GEM_MAC_XIF_GMII_MODE;
 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) != 0)
 		v |= GEM_MAC_XIF_FDPLX_LED;
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_XIF_CONFIG, v);
 
 	if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 &&
 	    (sc->sc_flags & GEM_LINK) != 0) {
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_TX_CONFIG,
 		    txcfg | GEM_MAC_TX_ENABLE);
 		GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG,
 		    rxcfg | GEM_MAC_RX_ENABLE);
 	}
 }
 
 int
 gem_mediachange(struct ifnet *ifp)
 {
 	struct gem_softc *sc = ifp->if_softc;
 	int error;
 
 	/* XXX add support for serial media. */
 
 	GEM_LOCK(sc);
 	error = mii_mediachg(sc->sc_mii);
 	GEM_UNLOCK(sc);
 	return (error);
 }
 
 void
 gem_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct gem_softc *sc = ifp->if_softc;
 
 	GEM_LOCK(sc);
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		GEM_UNLOCK(sc);
 		return;
 	}
 
 	mii_pollstat(sc->sc_mii);
 	ifmr->ifm_active = sc->sc_mii->mii_media_active;
 	ifmr->ifm_status = sc->sc_mii->mii_media_status;
 	GEM_UNLOCK(sc);
 }
 
 static int
 gem_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct gem_softc *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 	int error;
 
 	error = 0;
 	switch (cmd) {
 	case SIOCSIFFLAGS:
 		GEM_LOCK(sc);
 		if ((ifp->if_flags & IFF_UP) != 0) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 &&
 			    ((ifp->if_flags ^ sc->sc_ifflags) &
 			    (IFF_ALLMULTI | IFF_PROMISC)) != 0)
 				gem_setladrf(sc);
 			else
 				gem_init_locked(sc);
 		} else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 			gem_stop(ifp, 0);
 		if ((ifp->if_flags & IFF_LINK0) != 0)
 			sc->sc_csum_features |= CSUM_UDP;
 		else
 			sc->sc_csum_features &= ~CSUM_UDP;
 		if ((ifp->if_capenable & IFCAP_TXCSUM) != 0)
 			ifp->if_hwassist = sc->sc_csum_features;
 		sc->sc_ifflags = ifp->if_flags;
 		GEM_UNLOCK(sc);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		GEM_LOCK(sc);
 		gem_setladrf(sc);
 		GEM_UNLOCK(sc);
 		break;
 	case SIOCGIFMEDIA:
 	case SIOCSIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii->mii_media, cmd);
 		break;
 	case SIOCSIFCAP:
 		GEM_LOCK(sc);
 		ifp->if_capenable = ifr->ifr_reqcap;
 		if ((ifp->if_capenable & IFCAP_TXCSUM) != 0)
 			ifp->if_hwassist = sc->sc_csum_features;
 		else
 			ifp->if_hwassist = 0;
 		GEM_UNLOCK(sc);
 		break;
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 
 	return (error);
 }
 
 static void
 gem_setladrf(struct gem_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ifmultiaddr *inm;
 	int i;
 	uint32_t hash[16];
 	uint32_t crc, v;
 
 	GEM_LOCK_ASSERT(sc, MA_OWNED);
 
 	/* Get the current RX configuration. */
 	v = GEM_BANK1_READ_4(sc, GEM_MAC_RX_CONFIG);
 
 	/*
 	 * Turn off promiscuous mode, promiscuous group mode (all multicast),
 	 * and hash filter.  Depending on the case, the right bit will be
 	 * enabled.
 	 */
 	v &= ~(GEM_MAC_RX_PROMISCUOUS | GEM_MAC_RX_HASH_FILTER |
 	    GEM_MAC_RX_PROMISC_GRP);
 
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG, v);
 	GEM_BANK1_BARRIER(sc, GEM_MAC_RX_CONFIG, 4,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	if (!GEM_BANK1_BITWAIT(sc, GEM_MAC_RX_CONFIG, GEM_MAC_RX_HASH_FILTER,
 	    0))
 		device_printf(sc->sc_dev, "cannot disable RX hash filter\n");
 
 	if ((ifp->if_flags & IFF_PROMISC) != 0) {
 		v |= GEM_MAC_RX_PROMISCUOUS;
 		goto chipit;
 	}
 	if ((ifp->if_flags & IFF_ALLMULTI) != 0) {
 		v |= GEM_MAC_RX_PROMISC_GRP;
 		goto chipit;
 	}
 
 	/*
 	 * Set up multicast address filter by passing all multicast
 	 * addresses through a crc generator, and then using the high
 	 * order 8 bits as an index into the 256 bit logical address
 	 * filter.  The high order 4 bits selects the word, while the
 	 * other 4 bits select the bit within the word (where bit 0
 	 * is the MSB).
 	 */
 
 	/* Clear the hash table. */
 	memset(hash, 0, sizeof(hash));
 
 	if_maddr_rlock(ifp);
 	TAILQ_FOREACH(inm, &ifp->if_multiaddrs, ifma_link) {
 		if (inm->ifma_addr->sa_family != AF_LINK)
 			continue;
 		crc = ether_crc32_le(LLADDR((struct sockaddr_dl *)
 		    inm->ifma_addr), ETHER_ADDR_LEN);
 
 		/* We just want the 8 most significant bits. */
 		crc >>= 24;
 
 		/* Set the corresponding bit in the filter. */
 		hash[crc >> 4] |= 1 << (15 - (crc & 15));
 	}
 	if_maddr_runlock(ifp);
 
 	v |= GEM_MAC_RX_HASH_FILTER;
 
 	/* Now load the hash table into the chip (if we are using it). */
 	for (i = 0; i < 16; i++)
 		GEM_BANK1_WRITE_4(sc,
 		    GEM_MAC_HASH0 + i * (GEM_MAC_HASH1 - GEM_MAC_HASH0),
 		    hash[i]);
 
  chipit:
 	GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG, v);
 }
Index: projects/binutils-2.17/sys/dev/mii/bmtphy.c
===================================================================
--- projects/binutils-2.17/sys/dev/mii/bmtphy.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/mii/bmtphy.c	(revision 215830)
@@ -1,251 +1,294 @@
 /*-
  * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1997 Manuel Bouyer.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *	from: NetBSD: bmtphy.c,v 1.8 2002/07/03 06:25:50 simonb Exp
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for the Broadcom BCM5201/BCM5202 "Mini-Theta" PHYs.  This also
  * drives the PHY on the 3Com 3c905C.  The 3c905C's PHY is described in
  * the 3c905C data sheet.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/bus.h>
 
 #include <net/if.h>
 #include <net/if_media.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 #include "miidevs.h"
 
 #include <dev/mii/bmtphyreg.h>
 
 #include "miibus_if.h"
 
 static int	bmtphy_probe(device_t);
 static int	bmtphy_attach(device_t);
 
+struct bmtphy_softc {
+	struct mii_softc mii_sc;
+	int mii_model;
+};
+
 static device_method_t bmtphy_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		bmtphy_probe),
 	DEVMETHOD(device_attach,	bmtphy_attach),
 	DEVMETHOD(device_detach,	mii_phy_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 
 	{ 0, 0 }
 };
 
 static devclass_t	bmtphy_devclass;
 
 static driver_t	bmtphy_driver = {
 	"bmtphy",
 	bmtphy_methods,
-	sizeof(struct mii_softc)
+	sizeof(struct bmtphy_softc)
 };
 
 DRIVER_MODULE(bmtphy, miibus, bmtphy_driver, bmtphy_devclass, 0, 0);
 
 static int	bmtphy_service(struct mii_softc *, struct mii_data *, int);
 static void	bmtphy_status(struct mii_softc *);
+static void	bmtphy_reset(struct mii_softc *);
 
 static const struct mii_phydesc bmtphys_dp[] = {
 	MII_PHY_DESC(BROADCOM, BCM4401),
 	MII_PHY_DESC(BROADCOM, BCM5201),
+	MII_PHY_DESC(BROADCOM, BCM5214),
 	MII_PHY_DESC(BROADCOM, BCM5221),
+	MII_PHY_DESC(BROADCOM, BCM5222),
 	MII_PHY_END
 };
 
 static const struct mii_phydesc bmtphys_lp[] = {
 	MII_PHY_DESC(BROADCOM, 3C905B),
 	MII_PHY_DESC(BROADCOM, 3C905C),
 	MII_PHY_END
 };
 
 static int
 bmtphy_probe(device_t dev)
 {
 	int	rval;
 
 	/* Let exphy(4) take precedence for these. */
 	rval = mii_phy_dev_probe(dev, bmtphys_lp, BUS_PROBE_LOW_PRIORITY);
 	if (rval <= 0)
 		return (rval);
 
 	return (mii_phy_dev_probe(dev, bmtphys_dp, BUS_PROBE_DEFAULT));
 }
 
 static int
 bmtphy_attach(device_t dev)
 {
-	struct	mii_softc *sc;
-	struct	mii_attach_args *ma;
-	struct	mii_data *mii;
+	struct bmtphy_softc *bsc;
+	struct mii_softc *sc;
+	struct mii_attach_args *ma;
+	struct mii_data *mii;
 
-	sc = device_get_softc(dev);
+	bsc = device_get_softc(dev);
+	sc = &bsc->mii_sc;
 	ma = device_get_ivars(dev);
 	sc->mii_dev = device_get_parent(dev);
 	mii = ma->mii_data;
 	LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list);
 
 	sc->mii_flags = miibus_get_flags(dev);
 	sc->mii_inst = mii->mii_instance++;
 	sc->mii_phy = ma->mii_phyno;
 	sc->mii_service = bmtphy_service;
 	sc->mii_pdata = mii;
 
-	mii_phy_reset(sc);
+	sc->mii_flags |= MIIF_NOMANPAUSE;
 
+	bsc->mii_model = MII_MODEL(ma->mii_id2);
+
+	bmtphy_reset(sc);
+
 	sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & ma->mii_capmask;
 	device_printf(dev, " ");
 	mii_phy_add_media(sc);
 	printf("\n");
 
 	MIIBUS_MEDIAINIT(sc->mii_dev);
 
 	return (0);
 }
 
 static int
 bmtphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
 {
 
 	switch (cmd) {
 	case MII_POLLSTAT:
 		break;
 
 	case MII_MEDIACHG:
 		/*
 		 * If the interface is not up, don't do anything.
 		 */
 		if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
 			break;
 
 		mii_phy_setmedia(sc);
 		break;
 
 	case MII_TICK:
 		if (mii_phy_tick(sc) == EJUSTRETURN)
 			return (0);
 		break;
 	}
 
 	/* Update the media status. */
 	bmtphy_status(sc);
 
 	/* Callback if something changed. */
 	mii_phy_update(sc, cmd);
-
 	return (0);
 }
 
 static void
 bmtphy_status(struct mii_softc *sc)
 {
-	struct	mii_data *mii;
-	struct	ifmedia_entry *ife;
-	int	bmsr, bmcr, aux_csr;
+	struct mii_data *mii;
+	struct ifmedia_entry *ife;
+	int bmsr, bmcr, aux_csr;
 
 	mii = sc->mii_pdata;
 	ife = mii->mii_media.ifm_cur;
 
 	mii->mii_media_status = IFM_AVALID;
 	mii->mii_media_active = IFM_ETHER;
 
 	bmsr = PHY_READ(sc, MII_BMSR) | PHY_READ(sc, MII_BMSR);
-	aux_csr = PHY_READ(sc, MII_BMTPHY_AUX_CSR);
 
 	if (bmsr & BMSR_LINK)
 		mii->mii_media_status |= IFM_ACTIVE;
 
 	bmcr = PHY_READ(sc, MII_BMCR);
 	if (bmcr & BMCR_ISO) {
 		mii->mii_media_active |= IFM_NONE;
 		mii->mii_media_status = 0;
 		return;
 	}
 
 	if (bmcr & BMCR_LOOP)
 		mii->mii_media_active |= IFM_LOOP;
 
 	if (bmcr & BMCR_AUTOEN) {
 		/*
 		 * The media status bits are only valid if autonegotiation
 		 * has completed (or it's disabled).
 		 */
 		if ((bmsr & BMSR_ACOMP) == 0) {
 			/* Erg, still trying, I guess... */
 			mii->mii_media_active |= IFM_NONE;
 			return;
 		}
 
+		aux_csr = PHY_READ(sc, MII_BMTPHY_AUX_CSR);
 		if (aux_csr & AUX_CSR_SPEED)
 			mii->mii_media_active |= IFM_100_TX;
 		else
 			mii->mii_media_active |= IFM_10_T;
 		if (aux_csr & AUX_CSR_FDX)
-			mii->mii_media_active |= IFM_FDX;
+			mii->mii_media_active |=
+			    IFM_FDX | mii_phy_flowstatus(sc);
 		else
 			mii->mii_media_active |= IFM_HDX;
 	} else
 		mii->mii_media_active = ife->ifm_media;
+}
+
+static void
+bmtphy_reset(struct mii_softc *sc)
+{
+	struct bmtphy_softc *bsc;
+	u_int16_t data;
+
+	bsc = (struct bmtphy_softc *)sc;
+
+	mii_phy_reset(sc);
+
+	if (bsc->mii_model == MII_MODEL_BROADCOM_BCM5221) {
+		/* Enable shadow register mode. */
+		data = PHY_READ(sc, 0x1f);
+		PHY_WRITE(sc, 0x1f, data | 0x0080);
+
+		/* Enable APD (Auto PowerDetect). */
+		data = PHY_READ(sc, MII_BMTPHY_AUX2);
+		PHY_WRITE(sc, MII_BMTPHY_AUX2, data | 0x0020);
+
+		/* Enable clocks across APD for Auto-MDIX functionality. */
+		data = PHY_READ(sc, MII_BMTPHY_INTR);
+		PHY_WRITE(sc, MII_BMTPHY_INTR, data | 0x0004);
+
+		/* Disable shadow register mode. */
+		data = PHY_READ(sc, 0x1f);
+		PHY_WRITE(sc, 0x1f, data & ~0x0080);
+	}
 }
Index: projects/binutils-2.17/sys/dev/mii/gentbi.c
===================================================================
--- projects/binutils-2.17/sys/dev/mii/gentbi.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/mii/gentbi.c	(revision 215830)
@@ -1,276 +1,278 @@
 /*	$NetBSD: gentbi.c,v 1.15 2006/03/29 07:05:24 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1997 Manuel Bouyer.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Driver for generic ten-bit (1000BASE-SX) interfaces, built into
  * many Gigabit Ethernet chips.
  *
  * All we have to do here is correctly report speed and duplex.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for generic unknown ten-bit interfaces(1000BASE-{LX,SX}
  * fiber interfaces).
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/errno.h>
 #include <sys/bus.h>
 
 #include <net/if.h>
 #include <net/if_media.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 #include "miidevs.h"
 
 #include "miibus_if.h"
 
 static int	gentbi_probe(device_t);
 static int	gentbi_attach(device_t);
 
 static device_method_t gentbi_methods[] = {
 	/* device interface */
 	DEVMETHOD(device_probe,		gentbi_probe),
 	DEVMETHOD(device_attach,	gentbi_attach),
 	DEVMETHOD(device_detach,	mii_phy_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	{0, 0}
 };
 
 static devclass_t gentbi_devclass;
 
 static driver_t gentbi_driver = {
 	"gentbi",
 	gentbi_methods,
 	sizeof(struct mii_softc)
 };
 
 DRIVER_MODULE(gentbi, miibus, gentbi_driver, gentbi_devclass, 0, 0);
 
 static int	gentbi_service(struct mii_softc *, struct mii_data *, int);
 static void	gentbi_status(struct mii_softc *);
 
 static int
 gentbi_probe(device_t dev)
 {
 	device_t parent;
 	struct mii_attach_args *ma;
 	int bmsr, extsr;
 
 	parent = device_get_parent(dev);
 	ma = device_get_ivars(dev);
 
 	/*
 	 * We match as a generic TBI if:
 	 *
 	 *	- There is no media in the BMSR.
 	 *	- EXTSR has only 1000X.
 	 */
 	bmsr = MIIBUS_READREG(parent, ma->mii_phyno, MII_BMSR);
 	if ((bmsr & BMSR_EXTSTAT) == 0 || (bmsr & BMSR_MEDIAMASK) != 0)
 		return (ENXIO);
 
 	extsr = MIIBUS_READREG(parent, ma->mii_phyno, MII_EXTSR);
 	if (extsr & (EXTSR_1000TFDX|EXTSR_1000THDX))
 		return (ENXIO);
 
 	if (extsr & (EXTSR_1000XFDX|EXTSR_1000XHDX)) {
 		/*
 		 * We think this is a generic TBI.  Return a match
 		 * priority higher than ukphy, but lower than what
 		 * specific drivers will return.
 		 */
 		device_set_desc(dev, "Generic ten-bit interface");
 		return (BUS_PROBE_LOW_PRIORITY);
 	}
 
 	return (ENXIO);
 }
 
 static int
 gentbi_attach(device_t dev)
 {
 	struct mii_softc *sc;
 	struct mii_attach_args *ma;
 	struct mii_data *mii;
 
 	sc = device_get_softc(dev);
 	ma = device_get_ivars(dev);
 	sc->mii_dev = device_get_parent(dev);
 	mii = ma->mii_data;
 	LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list);
 
 	if (bootverbose)
 		device_printf(dev, "OUI 0x%06x, model 0x%04x, rev. %d\n",
 		    MII_OUI(ma->mii_id1, ma->mii_id2),
 		    MII_MODEL(ma->mii_id2), MII_REV(ma->mii_id2));
 
 	sc->mii_flags = miibus_get_flags(dev);
 	sc->mii_inst = mii->mii_instance++;
 	sc->mii_phy = ma->mii_phyno;
 	sc->mii_service = gentbi_service;
 	sc->mii_pdata = mii;
 
+	sc->mii_flags |= MIIF_NOMANPAUSE;
+
 	mii_phy_reset(sc);
 
 	/*
 	 * Mask out all media in the BMSR.  We only are really interested
 	 * in "auto".
 	 */
 	sc->mii_capabilities =
 	    PHY_READ(sc, MII_BMSR) & ma->mii_capmask & ~BMSR_MEDIAMASK;
 	if (sc->mii_capabilities & BMSR_EXTSTAT)
 		sc->mii_extcapabilities = PHY_READ(sc, MII_EXTSR);
 
 	device_printf(dev, " ");
 	mii_phy_add_media(sc);
 	printf("\n");
 
 	MIIBUS_MEDIAINIT(sc->mii_dev);
 	return (0);
 }
 
 static int
 gentbi_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
 {
 
 	switch (cmd) {
 	case MII_POLLSTAT:
 		break;
 
 	case MII_MEDIACHG:
 		/*
 		 * If the interface is not up, don't do anything.
 		 */
 		if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
 			break;
 
 		mii_phy_setmedia(sc);
 		break;
 
 	case MII_TICK:
 		if (mii_phy_tick(sc) == EJUSTRETURN)
 			return (0);
 		break;
 	}
 
 	/* Update the media status. */
 	gentbi_status(sc);
 
 	/* Callback if something changed. */
 	mii_phy_update(sc, cmd);
 	return (0);
 }
 
 static void
 gentbi_status(struct mii_softc *sc)
 {
 	struct mii_data *mii = sc->mii_pdata;
 	struct ifmedia_entry *ife = mii->mii_media.ifm_cur;
 	int bmsr, bmcr, anlpar;
 
 	mii->mii_media_status = IFM_AVALID;
 	mii->mii_media_active = IFM_ETHER;
 
 	bmsr = PHY_READ(sc, MII_BMSR) | PHY_READ(sc, MII_BMSR);
 
 	if (bmsr & BMSR_LINK)
 		mii->mii_media_status |= IFM_ACTIVE;
 
 	bmcr = PHY_READ(sc, MII_BMCR);
 	if (bmcr & BMCR_ISO) {
 		mii->mii_media_active |= IFM_NONE;
 		mii->mii_media_status = 0;
 		return;
 	}
 
 	if (bmcr & BMCR_LOOP)
 		mii->mii_media_active |= IFM_LOOP;
 
 	if (bmcr & BMCR_AUTOEN) {
 		/*
 		 * The media status bits are only valid if autonegotiation
 		 * has completed (or it's disabled).
 		 */
 		if ((bmsr & BMSR_ACOMP) == 0) {
 			/* Erg, still trying, I guess... */
 			mii->mii_media_active |= IFM_NONE;
 			return;
 		}
 
 		/*
 		 * The media is always 1000baseSX.  Check the ANLPAR to
 		 * see if we're doing full-duplex.
 		 */
 		mii->mii_media_active |= IFM_1000_SX;
 		anlpar = PHY_READ(sc, MII_ANLPAR);
 		if ((sc->mii_extcapabilities & EXTSR_1000XFDX) != 0 &&
 		    (anlpar & ANLPAR_X_FD) != 0)
 			mii->mii_media_active |=
 			    IFM_FDX | mii_phy_flowstatus(sc);
 		else
 			mii->mii_media_active |= IFM_HDX;
 	} else
 		mii->mii_media_active = ife->ifm_media;
 }
Index: projects/binutils-2.17/sys/dev/mii/inphy.c
===================================================================
--- projects/binutils-2.17/sys/dev/mii/inphy.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/mii/inphy.c	(revision 215830)
@@ -1,205 +1,208 @@
 /*-
  * Copyright (c) 2001 Jonathan Lemon
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * driver for Intel 82553 and 82555 PHYs
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/bus.h>
 
 #include <net/if.h>
 #include <net/if_media.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 #include "miidevs.h"
 
 #include <dev/mii/inphyreg.h>
 
 #include "miibus_if.h"
 
 static int 	inphy_probe(device_t dev);
 static int 	inphy_attach(device_t dev);
 
 static device_method_t inphy_methods[] = {
 	/* device interface */
 	DEVMETHOD(device_probe,		inphy_probe),
 	DEVMETHOD(device_attach,	inphy_attach),
 	DEVMETHOD(device_detach,	mii_phy_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	{ 0, 0 }
 };
 
 static devclass_t inphy_devclass;
 
 static driver_t inphy_driver = {
 	"inphy",
 	inphy_methods,
 	sizeof(struct mii_softc)
 };
 
 DRIVER_MODULE(inphy, miibus, inphy_driver, inphy_devclass, 0, 0);
 
 static int	inphy_service(struct mii_softc *, struct mii_data *, int);
 static void	inphy_status(struct mii_softc *);
 
 static const struct mii_phydesc inphys[] = {
 	MII_PHY_DESC(INTEL, I82553C),
 	MII_PHY_DESC(INTEL, I82555),
 	MII_PHY_DESC(INTEL, I82562EM),
 	MII_PHY_DESC(INTEL, I82562ET),
 	MII_PHY_DESC(xxINTEL, I82553AB),
 	MII_PHY_END
 };
 
 static int
 inphy_probe(device_t dev)
 {
 
 	return (mii_phy_dev_probe(dev, inphys, BUS_PROBE_DEFAULT));
 }
 
 static int
 inphy_attach(device_t dev)
 {
 	struct mii_softc *sc;
 	struct mii_attach_args *ma;
 	struct mii_data *mii;
 
 	sc = device_get_softc(dev);
 	ma = device_get_ivars(dev);
 	sc->mii_dev = device_get_parent(dev);
 	mii = ma->mii_data;
 	LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list);
 
 	sc->mii_flags = miibus_get_flags(dev);
 	sc->mii_inst = mii->mii_instance++;
 	sc->mii_phy = ma->mii_phyno;
 	sc->mii_service = inphy_service;
 	sc->mii_pdata = mii;
 
+	sc->mii_flags |= MIIF_NOMANPAUSE;
+
 	ifmedia_add(&mii->mii_media,
 	    IFM_MAKEWORD(IFM_ETHER, IFM_100_TX, IFM_LOOP, sc->mii_inst),
 	    MII_MEDIA_100_TX, NULL);
 
 	mii_phy_reset(sc);
 
 	sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & ma->mii_capmask;
 	device_printf(dev, " ");
 	mii_phy_add_media(sc);
 	printf("\n");
 
 	MIIBUS_MEDIAINIT(sc->mii_dev);
 
 	return (0);
 }
 
 static int
 inphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
 {
 
 	switch (cmd) {
 	case MII_POLLSTAT:
 		break;
 
 	case MII_MEDIACHG:
 		/*
 		 * If the interface is not up, don't do anything.
 		 */
 		if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
 			break;
 
 		mii_phy_setmedia(sc);
 		break;
 
 	case MII_TICK:
 		if (mii_phy_tick(sc) == EJUSTRETURN)
 			return (0);
 		break;
 	}
 
 	/* Update the media status. */
 	inphy_status(sc);
 
 	/* Callback if something changed. */
 	mii_phy_update(sc, cmd);
 	return (0);
 }
 
 static void
 inphy_status(struct mii_softc *sc)
 {
 	struct mii_data *mii = sc->mii_pdata;
 	struct ifmedia_entry *ife = mii->mii_media.ifm_cur;
 	int bmsr, bmcr, scr;
 
 	mii->mii_media_status = IFM_AVALID;
 	mii->mii_media_active = IFM_ETHER;
 
 	bmsr = PHY_READ(sc, MII_BMSR) | PHY_READ(sc, MII_BMSR);
 	if (bmsr & BMSR_LINK)
 		mii->mii_media_status |= IFM_ACTIVE;
 
 	bmcr = PHY_READ(sc, MII_BMCR);
 	if (bmcr & BMCR_ISO) {
 		mii->mii_media_active |= IFM_NONE;
 		mii->mii_media_status = 0;
 		return;
 	}
 
 	if (bmcr & BMCR_LOOP)
 		mii->mii_media_active |= IFM_LOOP;
 
 	if (bmcr & BMCR_AUTOEN) {
 		if ((bmsr & BMSR_ACOMP) == 0) {
 			mii->mii_media_active |= IFM_NONE;
 			return;
 		}
 
 		scr = PHY_READ(sc, MII_INPHY_SCR);
 		if (scr & SCR_S100)
 			mii->mii_media_active |= IFM_100_TX;
 		else
 			mii->mii_media_active |= IFM_10_T;
 		if (scr & SCR_FDX)
-			mii->mii_media_active |= IFM_FDX;
+			mii->mii_media_active |=
+			    IFM_FDX | mii_phy_flowstatus(sc);
 		else
 			mii->mii_media_active |= IFM_HDX;
 	} else
 		mii->mii_media_active = ife->ifm_media;
 }
Index: projects/binutils-2.17/sys/dev/mii/mii.c
===================================================================
--- projects/binutils-2.17/sys/dev/mii/mii.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/mii/mii.c	(revision 215830)
@@ -1,548 +1,548 @@
 /*	$NetBSD: mii.c,v 1.12 1999/08/03 19:41:49 drochner Exp $	*/
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * MII bus layer, glues MII-capable network interface drivers to sharable
  * PHY drivers.  This exports an interface compatible with BSD/OS 3.0's,
  * plus some NetBSD extensions.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/bus.h> 
 
 #include <net/if.h>
 #include <net/if_media.h>
 #include <net/route.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 
 MODULE_VERSION(miibus, 1);
 
 #include "miibus_if.h"
 
 static int miibus_print_child(device_t dev, device_t child);
 static int miibus_read_ivar(device_t dev, device_t child, int which,
     uintptr_t *result);
 static int miibus_child_location_str(device_t bus, device_t child, char *buf,
     size_t buflen);
 static int miibus_child_pnpinfo_str(device_t bus, device_t child, char *buf,
     size_t buflen);
 static int miibus_readreg(device_t, int, int);
 static int miibus_writereg(device_t, int, int, int);
 static void miibus_statchg(device_t);
 static void miibus_linkchg(device_t);
 static void miibus_mediainit(device_t);
 
 static device_method_t miibus_methods[] = {
 	/* device interface */
 	DEVMETHOD(device_probe,		miibus_probe),
 	DEVMETHOD(device_attach,	miibus_attach),
 	DEVMETHOD(device_detach,	miibus_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 
 	/* bus interface */
 	DEVMETHOD(bus_print_child,	miibus_print_child),
 	DEVMETHOD(bus_read_ivar,	miibus_read_ivar),
 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
 	DEVMETHOD(bus_child_pnpinfo_str, miibus_child_pnpinfo_str),
 	DEVMETHOD(bus_child_location_str, miibus_child_location_str),
 
 	/* MII interface */
 	DEVMETHOD(miibus_readreg,	miibus_readreg),
 	DEVMETHOD(miibus_writereg,	miibus_writereg),
 	DEVMETHOD(miibus_statchg,	miibus_statchg),    
 	DEVMETHOD(miibus_linkchg,	miibus_linkchg),    
 	DEVMETHOD(miibus_mediainit,	miibus_mediainit),    
 
 	{ 0, 0 }
 };
 
 devclass_t miibus_devclass;
 
 driver_t miibus_driver = {
 	"miibus",
 	miibus_methods,
 	sizeof(struct mii_data)
 };
 
 struct miibus_ivars {
 	struct ifnet	*ifp;
 	ifm_change_cb_t	ifmedia_upd;
 	ifm_stat_cb_t	ifmedia_sts;
 	int		mii_flags;
 };
 
 int
 miibus_probe(device_t dev)
 {
 
 	device_set_desc(dev, "MII bus");
 
 	return (BUS_PROBE_SPECIFIC);
 }
 
 int
 miibus_attach(device_t dev)
 {
 	struct miibus_ivars	*ivars;
 	struct mii_attach_args	*ma;
 	struct mii_data		*mii;
 	device_t		*children;
 	int			i, nchildren;
 
 	mii = device_get_softc(dev);
 	nchildren = 0;
 	if (device_get_children(dev, &children, &nchildren) == 0) {
 		for (i = 0; i < nchildren; i++) {
 			ma = device_get_ivars(children[i]);
 			ma->mii_data = mii;
 		}
 		free(children, M_TEMP);
 	}
 	if (nchildren == 0) {
-		device_printf(dev, "cannot get children");
+		device_printf(dev, "cannot get children\n");
 		return (ENXIO);
 	}
 	ivars = device_get_ivars(dev);
 	ifmedia_init(&mii->mii_media, IFM_IMASK, ivars->ifmedia_upd,
 	    ivars->ifmedia_sts);
 	mii->mii_ifp = ivars->ifp;
 	mii->mii_ifp->if_capabilities |= IFCAP_LINKSTATE;
 	mii->mii_ifp->if_capenable |= IFCAP_LINKSTATE;
 	LIST_INIT(&mii->mii_phys);
 
 	return (bus_generic_attach(dev));
 }
 
 int
 miibus_detach(device_t dev)
 {
 	struct mii_data		*mii;
 
 	bus_generic_detach(dev);
 	mii = device_get_softc(dev);
 	ifmedia_removeall(&mii->mii_media);
 	mii->mii_ifp = NULL;
 
 	return (0);
 }
 
 static int
 miibus_print_child(device_t dev, device_t child)
 {
 	struct mii_attach_args *ma;
 	int retval;
 
 	ma = device_get_ivars(child);
 	retval = bus_print_child_header(dev, child);
 	retval += printf(" PHY %d", ma->mii_phyno);
 	retval += bus_print_child_footer(dev, child);
 
 	return (retval);
 }
 
 static int
 miibus_read_ivar(device_t dev, device_t child __unused, int which,
     uintptr_t *result)
 {
 	struct miibus_ivars *ivars;
 
 	/*
 	 * NB: this uses the instance variables of the miibus rather than
 	 * its PHY children.
 	 */
 	ivars = device_get_ivars(dev);
 	switch (which) {
 	case MIIBUS_IVAR_FLAGS:
 		*result = ivars->mii_flags;
 		break;
 	default:
 		return (ENOENT);
 	}
 	return (0);
 }
 
 static int
 miibus_child_pnpinfo_str(device_t bus __unused, device_t child, char *buf,
     size_t buflen)
 {
 	struct mii_attach_args *ma;
 
 	ma = device_get_ivars(child);
 	snprintf(buf, buflen, "oui=0x%x model=0x%x rev=0x%x",
 	    MII_OUI(ma->mii_id1, ma->mii_id2),
 	    MII_MODEL(ma->mii_id2), MII_REV(ma->mii_id2));
 	return (0);
 }
 
 static int
 miibus_child_location_str(device_t bus __unused, device_t child, char *buf,
     size_t buflen)
 {
 	struct mii_attach_args *ma;
 
 	ma = device_get_ivars(child);
 	snprintf(buf, buflen, "phyno=%d", ma->mii_phyno);
 	return (0);
 }
 
 static int
 miibus_readreg(device_t dev, int phy, int reg)
 {
 	device_t		parent;
 
 	parent = device_get_parent(dev);
 	return (MIIBUS_READREG(parent, phy, reg));
 }
 
 static int
 miibus_writereg(device_t dev, int phy, int reg, int data)
 {
 	device_t		parent;
 
 	parent = device_get_parent(dev);
 	return (MIIBUS_WRITEREG(parent, phy, reg, data));
 }
 
 static void
 miibus_statchg(device_t dev)
 {
 	device_t		parent;
 	struct mii_data		*mii;
 
 	parent = device_get_parent(dev);
 	MIIBUS_STATCHG(parent);
 
 	mii = device_get_softc(dev);
 	mii->mii_ifp->if_baudrate = ifmedia_baudrate(mii->mii_media_active);
 }
 
 static void
 miibus_linkchg(device_t dev)
 {
 	struct mii_data		*mii;
 	device_t		parent;
 	int			link_state;
 
 	parent = device_get_parent(dev);
 	MIIBUS_LINKCHG(parent);
 
 	mii = device_get_softc(dev);
 	
 	if (mii->mii_media_status & IFM_AVALID) {
 		if (mii->mii_media_status & IFM_ACTIVE)
 			link_state = LINK_STATE_UP;
 		else
 			link_state = LINK_STATE_DOWN;
 	} else
 		link_state = LINK_STATE_UNKNOWN;
 	if_link_state_change(mii->mii_ifp, link_state);
 }
 
 static void
 miibus_mediainit(device_t dev)
 {
 	struct mii_data		*mii;
 	struct ifmedia_entry	*m;
 	int			media = 0;
 
 	/* Poke the parent in case it has any media of its own to add. */
 	MIIBUS_MEDIAINIT(device_get_parent(dev));
 
 	mii = device_get_softc(dev);
 	LIST_FOREACH(m, &mii->mii_media.ifm_list, ifm_list) {
 		media = m->ifm_media;
 		if (media == (IFM_ETHER | IFM_AUTO))
 			break;
 	}
 
 	ifmedia_set(&mii->mii_media, media);
 }
 
 /*
  * Helper function used by network interface drivers, attaches the miibus and
  * the PHYs to the network interface driver parent.
  */
 int
 mii_attach(device_t dev, device_t *miibus, struct ifnet *ifp,
     ifm_change_cb_t ifmedia_upd, ifm_stat_cb_t ifmedia_sts, int capmask,
     int phyloc, int offloc, int flags)
 {
 	struct miibus_ivars *ivars;
 	struct mii_attach_args ma, *args;
 	device_t *children, phy;
 	int bmsr, first, i, nchildren, offset, phymax, phymin, rv;
 
 	if (phyloc != MII_PHY_ANY && offloc != MII_OFFSET_ANY) {
-		printf("%s: phyloc and offloc specified", __func__);
+		printf("%s: phyloc and offloc specified\n", __func__);
 		return (EINVAL);
 	}
 
 	if (offloc != MII_OFFSET_ANY && (offloc < 0 || offloc >= MII_NPHY)) {
-		printf("%s: ivalid offloc %d", __func__, offloc);
+		printf("%s: ivalid offloc %d\n", __func__, offloc);
 		return (EINVAL);
 	}
 
 	if (phyloc == MII_PHY_ANY) {
 		phymin = 0;
 		phymax = MII_NPHY - 1;
 	} else {
 		if (phyloc < 0 || phyloc >= MII_NPHY) {
-			printf("%s: ivalid phyloc %d", __func__, phyloc);
+			printf("%s: ivalid phyloc %d\n", __func__, phyloc);
 			return (EINVAL);
 		}
 		phymin = phymax = phyloc;
 	}
 
 	first = 0;
 	if (*miibus == NULL) {
 		first = 1;
 		ivars = malloc(sizeof(*ivars), M_DEVBUF, M_NOWAIT);
 		if (ivars == NULL)
 			return (ENOMEM);
 		ivars->ifp = ifp;
 		ivars->ifmedia_upd = ifmedia_upd;
 		ivars->ifmedia_sts = ifmedia_sts;
 		ivars->mii_flags = flags;
 		*miibus = device_add_child(dev, "miibus", -1);
 		if (*miibus == NULL) {
 			rv = ENXIO;
 			goto fail;
 		}
 		device_set_ivars(*miibus, ivars);
 	} else {
 		ivars = device_get_ivars(*miibus);
 		if (ivars->ifp != ifp || ivars->ifmedia_upd != ifmedia_upd ||
 		    ivars->ifmedia_sts != ifmedia_sts ||
 		    ivars->mii_flags != flags) {
-			printf("%s: non-matching invariant", __func__);
+			printf("%s: non-matching invariant\n", __func__);
 			return (EINVAL);
 		}
 		/*
 		 * Assignment of the attach arguments mii_data for the first
 		 * pass is done in miibus_attach(), i.e. once the miibus softc
 		 * has been allocated.
 		 */
 		ma.mii_data = device_get_softc(*miibus);
 	} 
 
 	ma.mii_capmask = capmask;
 
 	phy = NULL;
 	offset = 0;
 	for (ma.mii_phyno = phymin; ma.mii_phyno <= phymax; ma.mii_phyno++) {
 		/*
 		 * Make sure we haven't already configured a PHY at this
 		 * address.  This allows mii_attach() to be called
 		 * multiple times.
 		 */
 		if (device_get_children(*miibus, &children, &nchildren) == 0) {
 			for (i = 0; i < nchildren; i++) {
 				args = device_get_ivars(children[i]);
 				if (args->mii_phyno == ma.mii_phyno) {
 					/*
 					 * Yes, there is already something
 					 * configured at this address.
 					 */
 					free(children, M_TEMP);
 					goto skip;
 				}
 			}
 			free(children, M_TEMP);
 		}
 
 		/*
 		 * Check to see if there is a PHY at this address.  Note,
 		 * many braindead PHYs report 0/0 in their ID registers,
 		 * so we test for media in the BMSR.
 	 	 */
 		bmsr = MIIBUS_READREG(dev, ma.mii_phyno, MII_BMSR);
 		if (bmsr == 0 || bmsr == 0xffff ||
 		    (bmsr & (BMSR_EXTSTAT | BMSR_MEDIAMASK)) == 0) {
 			/* Assume no PHY at this address. */
 			continue;
 		}
 
 		/*
 		 * There is a PHY at this address.  If we were given an
 		 * `offset' locator, skip this PHY if it doesn't match.
 		 */
 		if (offloc != MII_OFFSET_ANY && offloc != offset)
 			goto skip;
 
 		/*
 		 * Extract the IDs. Braindead PHYs will be handled by
 		 * the `ukphy' driver, as we have no ID information to
 		 * match on.
 	 	 */
 		ma.mii_id1 = MIIBUS_READREG(dev, ma.mii_phyno, MII_PHYIDR1);
 		ma.mii_id2 = MIIBUS_READREG(dev, ma.mii_phyno, MII_PHYIDR2);
 
 		args = malloc(sizeof(struct mii_attach_args), M_DEVBUF,
 		    M_NOWAIT);
 		if (args == NULL)
 			goto skip;
 		bcopy((char *)&ma, (char *)args, sizeof(ma));
 		phy = device_add_child(*miibus, NULL, -1);
 		if (phy == NULL) {
 			free(args, M_DEVBUF);
 			goto skip;
 		}
 		device_set_ivars(phy, args);
  skip:
 		offset++;
 	}
 
 	if (first != 0) {
 		if (phy == NULL) {
 			rv = ENXIO;
 			goto fail;
 		}
 		rv = bus_generic_attach(dev);
 		if (rv != 0)
 			goto fail;
 
 		/* Attaching of the PHY drivers is done in miibus_attach(). */
 		return (0);
 	}
 	rv = bus_generic_attach(*miibus);
 	if (rv != 0)
 		goto fail;
 
 	return (0);
 
  fail:
 	if (*miibus != NULL)
 		device_delete_child(dev, *miibus);
 	free(ivars, M_DEVBUF);
 	if (first != 0)
 		*miibus = NULL;
 	return (rv);
 }
 
 /*
  * Media changed; notify all PHYs.
  */
 int
 mii_mediachg(struct mii_data *mii)
 {
 	struct mii_softc *child;
 	struct ifmedia_entry *ife = mii->mii_media.ifm_cur;
 	int rv;
 
 	mii->mii_media_status = 0;
 	mii->mii_media_active = IFM_NONE;
 
 	LIST_FOREACH(child, &mii->mii_phys, mii_list) {
 		/*
 		 * If the media indicates a different PHY instance,
 		 * isolate this one.
 		 */
 		if (IFM_INST(ife->ifm_media) != child->mii_inst) {
 			if ((child->mii_flags & MIIF_NOISOLATE) != 0) {
 				device_printf(child->mii_dev, "%s: "
 				    "can't handle non-zero PHY instance %d\n",
 				    __func__, child->mii_inst);
 				continue;
 			}
 			PHY_WRITE(child, MII_BMCR, PHY_READ(child, MII_BMCR) |
 			    BMCR_ISO);
 			continue;
 		}
 		rv = (*child->mii_service)(child, mii, MII_MEDIACHG);
 		if (rv)
 			return (rv);
 	}
 	return (0);
 }
 
 /*
  * Call the PHY tick routines, used during autonegotiation.
  */
 void
 mii_tick(struct mii_data *mii)
 {
 	struct mii_softc *child;
 	struct ifmedia_entry *ife = mii->mii_media.ifm_cur;
 
 	LIST_FOREACH(child, &mii->mii_phys, mii_list) {
 		/*
 		 * If this PHY instance isn't currently selected, just skip
 		 * it.
 		 */
 		if (IFM_INST(ife->ifm_media) != child->mii_inst)
 			continue;
 		(void)(*child->mii_service)(child, mii, MII_TICK);
 	}
 }
 
 /*
  * Get media status from PHYs.
  */
 void
 mii_pollstat(struct mii_data *mii)
 {
 	struct mii_softc *child;
 	struct ifmedia_entry *ife = mii->mii_media.ifm_cur;
 
 	mii->mii_media_status = 0;
 	mii->mii_media_active = IFM_NONE;
 
 	LIST_FOREACH(child, &mii->mii_phys, mii_list) {
 		/*
 		 * If we're not polling this PHY instance, just skip it.
 		 */
 		if (IFM_INST(ife->ifm_media) != child->mii_inst)
 			continue;
 		(void)(*child->mii_service)(child, mii, MII_POLLSTAT);
 	}
 }
 
 /*
  * Inform the PHYs that the interface is down.
  */
 void
 mii_down(struct mii_data *mii)
 {
 	struct mii_softc *child;
 
 	LIST_FOREACH(child, &mii->mii_phys, mii_list)
 		mii_phy_down(child);
 }
Index: projects/binutils-2.17/sys/dev/mii/miidevs
===================================================================
--- projects/binutils-2.17/sys/dev/mii/miidevs	(revision 215829)
+++ projects/binutils-2.17/sys/dev/mii/miidevs	(revision 215830)
@@ -1,259 +1,261 @@
 $FreeBSD$
 /*$NetBSD: miidevs,v 1.6 1999/05/14 11:37:30 drochner Exp $*/
 
 /*-
  * Copyright (c) 1998, 1999 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * List of known MII OUIs.
  * For a complete list see http://standards.ieee.org/regauth/oui/
  *
  * XXX Vendors do obviously not agree how OUIs (18 bit) are mapped
  * to the 16 bits available in the id registers. The MII_OUI() macro
  * in "mii.h" reflects the most obvious way. If a vendor uses a
  * different mapping, an "xx" prefixed OUI is defined here which is
  * mangled accordingly to compensate.
  */
 
 oui AGERE			0x00a0bc	Agere Systems
 oui ALTIMA			0x0010a9	Altima Communications
 oui AMD				0x00001a	Advanced Micro Devices
 oui ASIX			0x00602e	Asix Semiconductor
 oui ATHEROS			0x001374	Atheros Communications
 oui BROADCOM			0x001018	Broadcom Corporation
 oui BROADCOM2			0x000af7	Broadcom Corporation
 oui CICADA			0x0003F1	Cicada Semiconductor
 oui DAVICOM			0x00606e	Davicom Semiconductor
 oui ICPLUS			0x0090c3	IC Plus Corp.
 oui ICS				0x00a0be	Integrated Circuit Systems
 oui INTEL			0x00aa00	Intel
 oui JATO			0x00e083	Jato Technologies
 oui JMICRON			0x001b8c	JMicron Technologies
 oui LEVEL1			0x00207b	Level 1
 oui NATSEMI			0x080017	National Semiconductor
 oui QUALSEMI			0x006051	Quality Semiconductor
 oui REALTEK			0x000020	RealTek Semicondctor
 oui SEEQ			0x00a07d	Seeq
 oui SIS				0x00e006	Silicon Integrated Systems
 oui SMSC			0x0005be	SMSC
 oui TDK				0x00c039	TDK
 oui TI				0x080028	Texas Instruments
 oui VITESSE			0x0001c1	Vitesse Semiconductor
 oui XAQTI			0x00e0ae	XaQti Corp.
 oui MARVELL			0x005043	Marvell Semiconductor
 oui xxMARVELL			0x000ac2	Marvell Semiconductor
 
 /* in the 79c873, AMD uses another OUI (which matches Davicom!) */
 oui xxAMD			0x00606e	Advanced Micro Devices
 
 /* Intel 82553 A/B steppings */
 oui xxINTEL			0x00f800	Intel
 
 /* some vendors have the bits swapped within bytes
 	(ie, ordered as on the wire) */
 oui xxALTIMA			0x000895	Altima Communications
 oui xxBROADCOM			0x000818	Broadcom Corporation
 oui xxBROADCOM_ALT1		0x0050ef	Broadcom Corporation
 oui xxBROADCOM_ALT2		0x00d897	Broadcom Corporation
 oui xxICS			0x00057d	Integrated Circuit Systems
 oui xxSEEQ			0x0005be	Seeq
 oui xxSIS			0x000760	Silicon Integrated Systems
 oui xxTI			0x100014	Texas Instruments
 oui xxXAQTI			0x350700	XaQti Corp.
 
 /* Level 1 is completely different - from right to left.
 	(Two bits get lost in the third OUI byte.) */
 oui xxLEVEL1			0x1e0400	Level 1
 
 /* Don't know what's going on here. */
 oui xxDAVICOM			0x006040	Davicom Semiconductor
 
 /* This is the OUI of the gigE PHY in the RealTek 8169S/8110S/8211B chips */
 oui xxREALTEK			0x000732
 
 /*
  * List of known models.  Grouped by oui.
  */
 
 /* Agere Systems PHYs */
 model AGERE ET1011		0x0001 ET1011 10/100/1000baseT PHY
 model AGERE ET1011C		0x0004 ET1011C 10/100/1000baseT PHY
 
 /* Altima Communications PHYs */
 model xxALTIMA AC101		0x0021 AC101 10/100 media interface
 model xxALTIMA AC101L		0x0012 AC101L 10/100 media interface
 model xxALTIMA ACXXX		0x0001 ACXXX 10/100 media interface
 
 /* Advanced Micro Devices PHYs */
 model AMD 79c973phy		0x0036 Am79c973 internal PHY
 model AMD 79c978		0x0039 Am79c978 HomePNA PHY
 model xxAMD 79C873		0x0000 Am79C873/DM9101 10/100 media interface
 
 /* Asix semiconductor PHYs. */
 model ASIX AX88X9X		0x0031 Ax88x9x internal PHY
 
 /* Atheros Communications/Attansic PHYs. */
 model ATHEROS F1		0x0001 Atheros F1 10/100/1000 PHY
 model ATHEROS F2		0x0002 Atheros F2 10/100 PHY
 model ATHEROS F1_7		0x0007 Atheros F1 10/100/1000 PHY
 
 /* Broadcom Corp. PHYs. */
 model BROADCOM 3C905B		0x0012 3c905B 10/100 internal PHY
 model BROADCOM 3C905C		0x0017 3c905C 10/100 internal PHY
 model BROADCOM BCM5201		0x0021 BCM5201 10/100baseTX PHY
+model BROADCOM BCM5214		0x0028 BCM5214 Quad 10/100 PHY
 model BROADCOM BCM5221		0x001e BCM5221 10/100baseTX PHY
+model BROADCOM BCM5222		0x0032 BCM5222 Dual 10/100 PHY
 model BROADCOM BCM4401		0x0036 BCM4401 10/100baseTX PHY
 model xxBROADCOM BCM5400	0x0004 Broadcom 1000baseTX PHY
 model xxBROADCOM BCM5401	0x0005 BCM5401 10/100/1000baseTX PHY
 model xxBROADCOM BCM5411	0x0007 BCM5411 10/100/1000baseTX PHY
 model xxBROADCOM BCM5754	0x000e BCM5754 10/100/1000baseTX PHY
 model xxBROADCOM BCM5752	0x0010 BCM5752 10/100/1000baseTX PHY
 model xxBROADCOM BCM5701	0x0011 BCM5701 10/100/1000baseTX PHY
 model xxBROADCOM BCM5706	0x0015 BCM5706 10/100/1000baseTX/SX PHY
 model xxBROADCOM BCM5703	0x0016 BCM5703 10/100/1000baseTX PHY
 model xxBROADCOM BCM5704	0x0019 BCM5704 10/100/1000baseTX PHY
 model xxBROADCOM BCM5705	0x001a BCM5705 10/100/1000baseTX PHY
 model xxBROADCOM BCM5750	0x0018 BCM5750 10/100/1000baseTX PHY
 model xxBROADCOM BCM54K2	0x002e BCM54K2 10/100/1000baseTX PHY
 model xxBROADCOM BCM5714	0x0034 BCM5714 10/100/1000baseTX PHY
 model xxBROADCOM BCM5780	0x0035 BCM5780 10/100/1000baseTX PHY
 model xxBROADCOM BCM5708C	0x0036 BCM5708C 10/100/1000baseTX PHY
 model xxBROADCOM_ALT1 BCM5482S	0x000b BCM5482S Dual-Port 10/100/1000baseX/FX PHY
 model xxBROADCOM_ALT1 BCM5755	0x000c BCM5755 10/100/1000baseTX PHY
 model xxBROADCOM_ALT1 BCM5787	0x000e BCM5787 10/100/1000baseTX PHY
 model xxBROADCOM_ALT1 BCM5708S	0x0015 BCM5708S 1000/2500BaseSX PHY
 model xxBROADCOM_ALT1 BCM5709CAX	0x002c BCM5709C(AX) 10/100/1000baseTX PHY
 model xxBROADCOM_ALT1 BCM5722	0x002d BCM5722 10/100/1000baseTX PHY
 model xxBROADCOM_ALT1 BCM5784	0x003a BCM5784 10/100/1000baseTX PHY
 model xxBROADCOM_ALT1 BCM5709C	0x003c BCM5709C 10/100/1000baseTX PHY
 model xxBROADCOM_ALT1 BCM5761	0x003d BCM5761 10/100/1000baseTX PHY
 model xxBROADCOM_ALT1 BCM5709S	0x003f BCM5709S 1000/2500baseSX PHY
 model xxBROADCOM_ALT2 BCM5717C	0x0020 BCM5717C 10/100/1000baseTX PHY
 model BROADCOM2 BCM5906		0x0004 BCM5906 10/100baseTX PHY
 
 /* Cicada Semiconductor PHYs (now owned by Vitesse?) */
 model CICADA CS8201		0x0001 Cicada CS8201 10/100/1000TX PHY
 model CICADA CS8204		0x0004 Cicada CS8204 10/100/1000TX PHY
 model CICADA VSC8211		0x000b Cicada VSC8211 10/100/1000TX PHY
 model CICADA CS8201A		0x0020 Cicada CS8201 10/100/1000TX PHY
 model CICADA CS8201B		0x0021 Cicada CS8201 10/100/1000TX PHY
 model CICADA CS8244		0x002c Cicada CS8244 10/100/1000TX PHY
 model VITESSE VSC8601		0x0002 Vitesse VSC8601 10/100/1000TX PHY
 
 /* Davicom Semiconductor PHYs */
 model DAVICOM DM9102		0x0004 DM9102 10/100 media interface
 model xxDAVICOM DM9101		0x0000 DM9101 10/100 media interface
 
 /* Integrated Circuit Systems PHYs */
 model xxICS 1889		0x0001 ICS1889 10/100 media interface
 model xxICS 1890		0x0002 ICS1890 10/100 media interface
 model xxICS 1892		0x0003 ICS1892 10/100 media interface
 model xxICS 1893		0x0004 ICS1893 10/100 media interface
 
 /* IC Plus Corp. PHYs */
 model ICPLUS IP101		0x0005 IC Plus 10/100 PHY
 model ICPLUS IP1000A		0x0008 IC Plus 10/100/1000 media interface
 model ICPLUS IP1001		0x0019 IC Plus IP1001 10/100/1000 media interface
 
 /* Intel PHYs */
 model xxINTEL I82553AB		0x0000 i83553 10/100 media interface
 model INTEL I82555		0x0015 i82555 10/100 media interface
 model INTEL I82562EM		0x0032 i82562EM 10/100 media interface
 model INTEL I82562ET		0x0033 i82562ET 10/100 media interface
 model INTEL I82553C		0x0035 i82553 10/100 media interface
 
 /* Jato Technologies PHYs */
 model JATO BASEX		0x0000 Jato 1000baseX media interface
 
 /* JMicron Technologies PHYs */
 model JMICRON JMP211		0x0021 JMP211 10/100/1000 media interface
 model JMICRON JMP202		0x0022 JMP202 10/100 media interface
 
 /* Level 1 PHYs */
 model xxLEVEL1 LXT970		0x0000 LXT970 10/100 media interface
 
 /* National Semiconductor PHYs */
 model NATSEMI DP83840		0x0000 DP83840 10/100 media interface
 model NATSEMI DP83843		0x0001 DP83843 10/100 media interface
 model NATSEMI DP83815		0x0002 DP83815 10/100 media interface
 model NATSEMI DP83847		0x0003 DP83847 10/100 media interface
 model NATSEMI DP83891		0x0005 DP83891 10/100/1000 media interface
 model NATSEMI DP83861		0x0006 DP83861 10/100/1000 media interface
 model NATSEMI DP83865		0x0007 DP83865 10/100/1000 media interface
 
 /* Quality Semiconductor PHYs */
 model QUALSEMI QS6612		0x0000 QS6612 10/100 media interface
 
 /* RealTek Semiconductor PHYs */
 model REALTEK RTL8201L		0x0020 RTL8201L 10/100 media interface
 model xxREALTEK RTL8305SC	0x0005 RTL8305SC 10/100 802.1q switch
 model xxREALTEK RTL8169S	0x0011 RTL8169S/8110S/8211B media interface
 
 /* Seeq PHYs */
 model xxSEEQ 80220		0x0003 Seeq 80220 10/100 media interface
 model xxSEEQ 84220		0x0004 Seeq 84220 10/100 media interface
 
 /* Silicon Integrated Systems PHYs */
 model xxSIS 900			0x0000 SiS 900 10/100 media interface
 
 /* SMSC PHYs */
 model SMSC LAN83C183		0x0004 SMSC LAN83C183 10/100 media interface
 
 /* TDK */
 model TDK 78Q2120		0x0014 TDK 78Q2120 media interface
 
 /* Texas Instruments PHYs */
 model xxTI TLAN10T		0x0001 ThunderLAN 10baseT media interface
 model xxTI 100VGPMI		0x0002 ThunderLAN 100VG-AnyLan media interface
 
 /* XaQti Corp. PHYs. */
 model XAQTI XMACII		0x0000 XaQti Corp. XMAC II gigabit interface
 
 /* Marvell Semiconductor PHYs */
 model MARVELL E1000		0x0000 Marvell 88E1000 Gigabit PHY
 model MARVELL E1011		0x0002 Marvell 88E1011 Gigabit PHY
 model MARVELL E1000_3		0x0003 Marvell 88E1000 Gigabit PHY
 model MARVELL E1000S		0x0004 Marvell 88E1000S Gigabit PHY
 model MARVELL E1000_5		0x0005 Marvell 88E1000 Gigabit PHY
 model MARVELL E1101		0x0006 Marvell 88E1101 Gigabit PHY
 model MARVELL E3082		0x0008 Marvell 88E3082 10/100 Fast Ethernet PHY
 model MARVELL E1112		0x0009 Marvell 88E1112 Gigabit PHY
 model MARVELL E1149		0x000b Marvell 88E1149 Gigabit PHY
 model MARVELL E1111		0x000c Marvell 88E1111 Gigabit PHY
 model MARVELL E1116		0x0021 Marvell 88E1116 Gigabit PHY
 model MARVELL E1116R		0x0024 Marvell 88E1116R Gigabit PHY
 model MARVELL E1118		0x0022 Marvell 88E1118 Gigabit PHY
 model MARVELL E3016		0x0026 Marvell 88E3016 10/100 Fast Ethernet PHY
 model MARVELL PHYG65G		0x0027 Marvell PHYG65G Gigabit PHY
 model xxMARVELL E1000		0x0005 Marvell 88E1000 Gigabit PHY
 model xxMARVELL E1011		0x0002 Marvell 88E1011 Gigabit PHY
 model xxMARVELL E1000_3		0x0003 Marvell 88E1000 Gigabit PHY
 model xxMARVELL E1000_5		0x0005 Marvell 88E1000 Gigabit PHY
 model xxMARVELL E1111		0x000c Marvell 88E1111 Gigabit PHY
Index: projects/binutils-2.17/sys/dev/mii/nsgphy.c
===================================================================
--- projects/binutils-2.17/sys/dev/mii/nsgphy.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/mii/nsgphy.c	(revision 215830)
@@ -1,255 +1,257 @@
 /*-
  * Copyright (c) 2001 Wind River Systems
  * Copyright (c) 2001
  *	Bill Paul <wpaul@bsdi.com>.  All rights reserved.
  * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Bill Paul.
  * 4. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for the National Semiconductor DP83861, DP83865 and DP83891
  * 10/100/1000 PHYs.
  * Datasheet available at: http://www.national.com/ds/DP/DP83861.pdf
  * and at: http://www.national.com/ds/DP/DP83865.pdf
  *
  * The DP83891 is the older NS GigE PHY which isn't being sold
  * anymore.  The DP83861 is its replacement, which is an 'enhanced'
  * firmware driven component.  The major difference between the
  * two is that the DP83891 can't generate interrupts, while the
  * 83861 can (probably it wasn't originally designed to do this, but
  * it can now thanks to firmware updates).  The DP83861 also allows
  * access to its internal RAM via indirect register access.  The
  * DP83865 is an ultra low power version of the DP83861 and DP83891.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/bus.h>
 
 #include <net/if.h>
 #include <net/if_media.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 #include "miidevs.h"
 
 #include <dev/mii/nsgphyreg.h>
 
 #include "miibus_if.h"
 
 static int nsgphy_probe(device_t);
 static int nsgphy_attach(device_t);
 
 static device_method_t nsgphy_methods[] = {
 	/* device interface */
 	DEVMETHOD(device_probe,		nsgphy_probe),
 	DEVMETHOD(device_attach,	nsgphy_attach),
 	DEVMETHOD(device_detach,	mii_phy_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	{ 0, 0 }
 };
 
 static devclass_t nsgphy_devclass;
 
 static driver_t nsgphy_driver = {
 	"nsgphy",
 	nsgphy_methods,
 	sizeof(struct mii_softc)
 };
 
 DRIVER_MODULE(nsgphy, miibus, nsgphy_driver, nsgphy_devclass, 0, 0);
 
 static int	nsgphy_service(struct mii_softc *, struct mii_data *,int);
 static void	nsgphy_status(struct mii_softc *);
 
 static const struct mii_phydesc nsgphys[] = {
 	MII_PHY_DESC(NATSEMI, DP83861),
 	MII_PHY_DESC(NATSEMI, DP83865),
 	MII_PHY_DESC(NATSEMI, DP83891),
 	MII_PHY_END
 };
 
 static int
 nsgphy_probe(device_t dev)
 {
 
 	return (mii_phy_dev_probe(dev, nsgphys, BUS_PROBE_DEFAULT));
 }
 
 static int
 nsgphy_attach(device_t dev)
 {
 	struct mii_softc *sc;
 	struct mii_attach_args *ma;
 	struct mii_data *mii;
 
 	sc = device_get_softc(dev);
 	ma = device_get_ivars(dev);
 	if (bootverbose)
 		device_printf(dev, "<rev. %d>\n", MII_REV(ma->mii_id2));
 	device_printf(dev, " ");
 	sc->mii_dev = device_get_parent(dev);
 	mii = ma->mii_data;
 	LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list);
 
 	sc->mii_flags = miibus_get_flags(dev);
 	sc->mii_inst = mii->mii_instance++;
 	sc->mii_phy = ma->mii_phyno;
 	sc->mii_service = nsgphy_service;
 	sc->mii_pdata = mii;
 
+	sc->mii_flags |= MIIF_NOMANPAUSE;
+
 	mii_phy_reset(sc);
 
 	/*
 	 * NB: the PHY has the 10baseT BMSR bits hard-wired to 0,
 	 * even though it supports 10baseT.
 	 */
 	sc->mii_capabilities = (PHY_READ(sc, MII_BMSR) |
 	    (BMSR_10TFDX | BMSR_10THDX)) & ma->mii_capmask;
 	if (sc->mii_capabilities & BMSR_EXTSTAT)
 		sc->mii_extcapabilities = PHY_READ(sc, MII_EXTSR);
 
 	mii_phy_add_media(sc);
 	printf("\n");
 
 	MIIBUS_MEDIAINIT(sc->mii_dev);
 	return (0);
 }
 
 static int
 nsgphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
 {
 
 	switch (cmd) {
 	case MII_POLLSTAT:
 		break;
 
 	case MII_MEDIACHG:
 		/*
 		 * If the interface is not up, don't do anything.
 		 */
 		if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
 			break;
 
 		mii_phy_setmedia(sc);
 		break;
 
 	case MII_TICK:
 		if (mii_phy_tick(sc) == EJUSTRETURN)
 			return (0);
 		break;
 	}
 
 	/* Update the media status. */
 	nsgphy_status(sc);
 
 	/* Callback if something changed. */
 	mii_phy_update(sc, cmd);
 	return (0);
 }
 
 static void
 nsgphy_status(struct mii_softc *sc)
 {
 	struct mii_data *mii = sc->mii_pdata;
 	struct ifmedia_entry *ife = mii->mii_media.ifm_cur;
 	int bmsr, bmcr, physup, gtsr;
 
 	mii->mii_media_status = IFM_AVALID;
 	mii->mii_media_active = IFM_ETHER;
 
 	bmsr = PHY_READ(sc, MII_BMSR) | PHY_READ(sc, MII_BMSR);
 
 	physup = PHY_READ(sc, NSGPHY_MII_PHYSUP);
 
 	if (physup & PHY_SUP_LINK)
 		mii->mii_media_status |= IFM_ACTIVE;
 
 	bmcr = PHY_READ(sc, MII_BMCR);
 	if (bmcr & BMCR_ISO) {
 		mii->mii_media_active |= IFM_NONE;
 		mii->mii_media_status = 0;
 		return;
 	}
 
 	if (bmcr & BMCR_LOOP)
 		mii->mii_media_active |= IFM_LOOP;
 
 	if (bmcr & BMCR_AUTOEN) {
 		/*
 		 * The media status bits are only valid if autonegotiation
 		 * has completed (or it's disabled).
 		 */
 		if ((bmsr & BMSR_ACOMP) == 0) {
 			/* Erg, still trying, I guess... */
 			mii->mii_media_active |= IFM_NONE;
 			return;
 		}
 
 		switch (physup & (PHY_SUP_SPEED1 | PHY_SUP_SPEED0)) {
 		case PHY_SUP_SPEED1:
 			mii->mii_media_active |= IFM_1000_T;
 			gtsr = PHY_READ(sc, MII_100T2SR);
 			if (gtsr & GTSR_MS_RES)
 				mii->mii_media_active |= IFM_ETH_MASTER;
 			break;
 
 		case PHY_SUP_SPEED0:
 			mii->mii_media_active |= IFM_100_TX;
 			break;
 
 		case 0:
 			mii->mii_media_active |= IFM_10_T;
 			break;
 
 		default:
 			mii->mii_media_active |= IFM_NONE;
 			mii->mii_media_status = 0;
 			return;
 		}
 
 		if (physup & PHY_SUP_DUPLEX)
 			mii->mii_media_active |=
 			    IFM_FDX | mii_phy_flowstatus(sc);
 		else
 			mii->mii_media_active |= IFM_HDX;
 	} else
 		mii->mii_media_active = ife->ifm_media;
 }
Index: projects/binutils-2.17/sys/dev/mii/nsphyter.c
===================================================================
--- projects/binutils-2.17/sys/dev/mii/nsphyter.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/mii/nsphyter.c	(revision 215830)
@@ -1,300 +1,298 @@
 /*	$NetBSD: nsphyter.c,v 1.28 2008/01/20 07:58:19 msaitoh Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 1997 Manuel Bouyer.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * driver for National Semiconductor's DP83843 `PHYTER' ethernet 10/100 PHY
  * Data Sheet available from www.national.com
  *
  * We also support the DP83815 `MacPHYTER' internal PHY since, for our
  * purposes, they are compatible.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 
 #include <net/if.h>
 #include <net/if_media.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 #include "miidevs.h"
 
 #include <dev/mii/nsphyterreg.h>
 
 #include "miibus_if.h"
 
 static device_probe_t	nsphyter_probe;
 static device_attach_t	nsphyter_attach;
 
 static device_method_t nsphyter_methods[] = {
 	/* device interface */
 	DEVMETHOD(device_probe,		nsphyter_probe),
 	DEVMETHOD(device_attach,	nsphyter_attach),
 	DEVMETHOD(device_detach,	mii_phy_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	{ 0, 0 }
 };
 
 static devclass_t nsphyter_devclass;
 
 static driver_t nsphyter_driver = {
 	"nsphyter",
 	nsphyter_methods,
 	sizeof(struct mii_softc)
 };
 
 DRIVER_MODULE(nsphyter, miibus, nsphyter_driver, nsphyter_devclass, 0, 0);
 
 static int	nsphyter_service(struct mii_softc *, struct mii_data *, int);
 static void	nsphyter_status(struct mii_softc *);
 static void	nsphyter_reset(struct mii_softc *);
 
 static const struct mii_phydesc nsphys[] = {
 	MII_PHY_DESC(NATSEMI, DP83815),
 	MII_PHY_DESC(NATSEMI, DP83843),
 	MII_PHY_DESC(NATSEMI, DP83847),
 	MII_PHY_END
 };
 
 static int
 nsphyter_probe(device_t dev)
 {
 
 	return (mii_phy_dev_probe(dev, nsphys, BUS_PROBE_DEFAULT));
 }
 
 static int
 nsphyter_attach(device_t dev)
 {
 	struct mii_softc *sc;
 	struct mii_attach_args *ma;
 	struct mii_data *mii;
 
 	sc = device_get_softc(dev);
 	ma = device_get_ivars(dev);
 	sc->mii_dev = device_get_parent(dev);
 	mii = ma->mii_data;
 	LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list);
 
 	sc->mii_flags = miibus_get_flags(dev);
 	sc->mii_inst = mii->mii_instance++;
 	sc->mii_phy = ma->mii_phyno;
 	sc->mii_service = nsphyter_service;
 	sc->mii_pdata = mii;
 
+	sc->mii_flags |= MIIF_NOMANPAUSE;
+
 #if 1
 
 #define	ADD(m, c)	ifmedia_add(&mii->mii_media, (m), (c), NULL)
 
 	/*
 	 * XXX IFM_LOOP should be handled by mii_phy_add_media() based
 	 * on MIIF_NOLOOP.
 	 */
 	if ((sc->mii_flags & MIIF_NOLOOP) == 0)
 		ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_TX, IFM_LOOP,
 		    sc->mii_inst), MII_MEDIA_100_TX);
 
 #endif
 
 	nsphyter_reset(sc);
 
 	sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & ma->mii_capmask;
 	device_printf(dev, " ");
 	mii_phy_add_media(sc);
 	printf("\n");
 
 	MIIBUS_MEDIAINIT(sc->mii_dev);
 	return (0);
 }
 
 static int
 nsphyter_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
 {
 
 	switch (cmd) {
 	case MII_POLLSTAT:
 		break;
 
 	case MII_MEDIACHG:
 		/*
 		 * If the interface is not up, don't do anything.
 		 */
 		if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
 			break;
 
 		mii_phy_setmedia(sc);
 		break;
 
 	case MII_TICK:
 		if (mii_phy_tick(sc) == EJUSTRETURN)
 			return (0);
 		break;
 	}
 
 	/* Update the media status. */
 	nsphyter_status(sc);
 
 	/* Callback if something changed. */
 	mii_phy_update(sc, cmd);
 	return (0);
 }
 
 static void
 nsphyter_status(struct mii_softc *sc)
 {
 	struct mii_data *mii = sc->mii_pdata;
 	struct ifmedia_entry *ife = mii->mii_media.ifm_cur;
 	int bmsr, bmcr, physts;
 
 	mii->mii_media_status = IFM_AVALID;
 	mii->mii_media_active = IFM_ETHER;
 
 	bmsr = PHY_READ(sc, MII_BMSR) | PHY_READ(sc, MII_BMSR);
 	physts = PHY_READ(sc, MII_NSPHYTER_PHYSTS);
 
 	if ((physts & PHYSTS_LINK) != 0)
 		mii->mii_media_status |= IFM_ACTIVE;
 
 	bmcr = PHY_READ(sc, MII_BMCR);
 	if ((bmcr & BMCR_ISO) != 0) {
 		mii->mii_media_active |= IFM_NONE;
 		mii->mii_media_status = 0;
 		return;
 	}
 
 	if ((bmcr & BMCR_LOOP) != 0)
 		mii->mii_media_active |= IFM_LOOP;
 
 	if ((bmcr & BMCR_AUTOEN) != 0) {
 		/*
 		 * The media status bits are only valid if autonegotiation
 		 * has completed (or it's disabled).
 		 */
 		if ((bmsr & BMSR_ACOMP) == 0) {
 			/* Erg, still trying, I guess... */
 			mii->mii_media_active |= IFM_NONE;
 			return;
 		}
 
 		if ((physts & PHYSTS_SPEED10) != 0)
 			mii->mii_media_active |= IFM_10_T;
 		else
 			mii->mii_media_active |= IFM_100_TX;
 		if ((physts & PHYSTS_DUPLEX) != 0)
-#ifdef notyet
 			mii->mii_media_active |=
 			    IFM_FDX | mii_phy_flowstatus(sc);
-#else
-			mii->mii_media_active |= IFM_FDX;
-#endif
 		else
 			mii->mii_media_active |= IFM_HDX;
 	} else
 		mii->mii_media_active = ife->ifm_media;
 }
 
 static void
 nsphyter_reset(struct mii_softc *sc)
 {
 	struct ifmedia_entry *ife = sc->mii_pdata->mii_media.ifm_cur;
 	int reg, i;
 
 	if ((sc->mii_flags & MIIF_NOISOLATE) != 0)
 		reg = BMCR_RESET;
 	else
 		reg = BMCR_RESET | BMCR_ISO;
 	PHY_WRITE(sc, MII_BMCR, reg);
 
 	/*
 	 * It is best to allow a little time for the reset to settle
 	 * in before we start polling the BMCR again.  Notably, the
 	 * DP8384{3,7} manuals state that there should be a 500us delay
 	 * between asserting software reset and attempting MII serial
 	 * operations.  Be conservative.  Also, a DP83815 can get into
 	 * a bad state on cable removal and reinsertion if we do not
 	 * delay here.
 	 */
 	DELAY(1000);
 
 	/*
 	 * Wait another 2s for it to complete.
 	 * This is only a little overkill as under normal circumstances
 	 * the PHY can take up to 1s to complete reset.
 	 * This is also a bit odd because after a reset, the BMCR will
 	 * clear the reset bit and simply reports 0 even though the reset
 	 * is not yet complete.
 	 */
 	for (i = 0; i < 1000; i++) {
 		reg = PHY_READ(sc, MII_BMCR);
 		if (reg != 0 && (reg & BMCR_RESET) == 0)
 			break;
 		DELAY(2000);
 	}
 
 	if ((sc->mii_flags & MIIF_NOISOLATE) == 0) {
 		if ((ife == NULL && sc->mii_inst != 0) ||
 		    (ife != NULL && IFM_INST(ife->ifm_media) != sc->mii_inst))
 			PHY_WRITE(sc, MII_BMCR, reg | BMCR_ISO);
 	}
 }
Index: projects/binutils-2.17/sys/dev/mii/ukphy.c
===================================================================
--- projects/binutils-2.17/sys/dev/mii/ukphy.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/mii/ukphy.c	(revision 215830)
@@ -1,184 +1,186 @@
 /*	$NetBSD: ukphy.c,v 1.2 1999/04/23 04:24:32 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center, and by Frank van der Linden.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 1997 Manuel Bouyer.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * driver for generic unknown PHYs
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/errno.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 
 #include <net/if.h>
 #include <net/if_media.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 
 #include "miibus_if.h"
 
 static int ukphy_probe(device_t);
 static int ukphy_attach(device_t);
 
 static device_method_t ukphy_methods[] = {
 	/* device interface */
 	DEVMETHOD(device_probe,		ukphy_probe),
 	DEVMETHOD(device_attach,	ukphy_attach),
 	DEVMETHOD(device_detach,	mii_phy_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	{ 0, 0 }
 };
 
 static devclass_t ukphy_devclass;
 
 static driver_t ukphy_driver = {
 	"ukphy",
 	ukphy_methods,
 	sizeof(struct mii_softc)
 };
 
 DRIVER_MODULE(ukphy, miibus, ukphy_driver, ukphy_devclass, 0, 0);
 
 static int	ukphy_service(struct mii_softc *, struct mii_data *, int);
 
 static int
 ukphy_probe(device_t dev)
 {
 
 	/*
 	 * We know something is here, so always match at a low priority.
 	 */
 	device_set_desc(dev, "Generic IEEE 802.3u media interface");
 	return (BUS_PROBE_GENERIC);
 }
 
 static int
 ukphy_attach(device_t dev)
 {
 	struct mii_softc *sc;
 	struct mii_attach_args *ma;
 	struct mii_data *mii;
 
 	sc = device_get_softc(dev);
 	ma = device_get_ivars(dev);
 	sc->mii_dev = device_get_parent(dev);
 	mii = ma->mii_data;
 	LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list);
 
 	if (bootverbose)
 		device_printf(dev, "OUI 0x%06x, model 0x%04x, rev. %d\n",
 		    MII_OUI(ma->mii_id1, ma->mii_id2),
 		    MII_MODEL(ma->mii_id2), MII_REV(ma->mii_id2));
 
 	sc->mii_flags = miibus_get_flags(dev);
 	sc->mii_inst = mii->mii_instance++;
 	sc->mii_phy = ma->mii_phyno;
 	sc->mii_service = ukphy_service;
 	sc->mii_pdata = mii;
 
+	sc->mii_flags |= MIIF_NOMANPAUSE;
+
 	mii_phy_reset(sc);
 
 	sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & ma->mii_capmask;
 	if (sc->mii_capabilities & BMSR_EXTSTAT)
 		sc->mii_extcapabilities = PHY_READ(sc, MII_EXTSR);
 	device_printf(dev, " ");
 	mii_phy_add_media(sc);
 	printf("\n");
 
 	MIIBUS_MEDIAINIT(sc->mii_dev);
 	mii_phy_setmedia(sc);
 
 	return (0);
 }
 
 static int
 ukphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
 {
 
 	switch (cmd) {
 	case MII_POLLSTAT:
 		break;
 
 	case MII_MEDIACHG:
 		/*
 		 * If the interface is not up, don't do anything.
 		 */
 		if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
 			break;
 
 		mii_phy_setmedia(sc);
 		break;
 
 	case MII_TICK:
 		if (mii_phy_tick(sc) == EJUSTRETURN)
 			return (0);
 		break;
 	}
 
 	/* Update the media status. */
 	ukphy_status(sc);
 
 	/* Callback if something changed. */
 	mii_phy_update(sc, cmd);
 	return (0);
 }
Index: projects/binutils-2.17/sys/dev/pci/pci.c
===================================================================
--- projects/binutils-2.17/sys/dev/pci/pci.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/pci/pci.c	(revision 215830)
@@ -1,4163 +1,4190 @@
 /*-
  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2000, BSDi
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bus.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/linker.h>
 #include <sys/fcntl.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <sys/endian.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 
 #include <sys/bus.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <machine/resource.h>
 #include <machine/stdarg.h>
 
 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
 #include <machine/intr_machdep.h>
 #endif
 
 #include <sys/pciio.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pci_private.h>
 
 #include <dev/usb/controller/ehcireg.h>
 #include <dev/usb/controller/ohcireg.h>
 #include <dev/usb/controller/uhcireg.h>
 
 #include "pcib_if.h"
 #include "pci_if.h"
 
 static pci_addr_t	pci_mapbase(uint64_t mapreg);
 static const char	*pci_maptype(uint64_t mapreg);
 static int		pci_mapsize(uint64_t testval);
 static int		pci_maprange(uint64_t mapreg);
 static pci_addr_t	pci_rombase(uint64_t mapreg);
 static int		pci_romsize(uint64_t testval);
 static void		pci_fixancient(pcicfgregs *cfg);
 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
 
 static int		pci_porten(device_t dev);
 static int		pci_memen(device_t dev);
 static void		pci_assign_interrupt(device_t bus, device_t dev,
 			    int force_route);
 static int		pci_add_map(device_t bus, device_t dev, int reg,
 			    struct resource_list *rl, int force, int prefetch);
 static int		pci_probe(device_t dev);
 static int		pci_attach(device_t dev);
 static void		pci_load_vendor_data(void);
 static int		pci_describe_parse_line(char **ptr, int *vendor,
 			    int *device, char **desc);
 static char		*pci_describe_device(device_t dev);
 static int		pci_modevent(module_t mod, int what, void *arg);
 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
 			    pcicfgregs *cfg);
 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
 			    int reg, uint32_t *data);
 #if 0
 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
 			    int reg, uint32_t data);
 #endif
 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
 static void		pci_disable_msi(device_t dev);
 static void		pci_enable_msi(device_t dev, uint64_t address,
 			    uint16_t data);
 static void		pci_enable_msix(device_t dev, u_int index,
 			    uint64_t address, uint32_t data);
 static void		pci_mask_msix(device_t dev, u_int index);
 static void		pci_unmask_msix(device_t dev, u_int index);
 static int		pci_msi_blacklisted(void);
 static void		pci_resume_msi(device_t dev);
 static void		pci_resume_msix(device_t dev);
 static int		pci_remap_intr_method(device_t bus, device_t dev,
 			    u_int irq);
 
 static device_method_t pci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		pci_probe),
 	DEVMETHOD(device_attach,	pci_attach),
 	DEVMETHOD(device_detach,	bus_generic_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	pci_suspend),
 	DEVMETHOD(device_resume,	pci_resume),
 
 	/* Bus interface */
 	DEVMETHOD(bus_print_child,	pci_print_child),
 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
 	DEVMETHOD(bus_driver_added,	pci_driver_added),
 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
 
 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
 
 	/* PCI interface */
 	DEVMETHOD(pci_read_config,	pci_read_config_method),
 	DEVMETHOD(pci_write_config,	pci_write_config_method),
 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
 
 	{ 0, 0 }
 };
 
 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
 
 static devclass_t pci_devclass;
 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
 MODULE_VERSION(pci, 1);
 
 static char	*pci_vendordata;
 static size_t	pci_vendordata_size;
 
 
 struct pci_quirk {
 	uint32_t devid;	/* Vendor/device of the card */
 	int	type;
 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
 	int	arg1;
 	int	arg2;
 };
 
 struct pci_quirk pci_quirks[] = {
 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
 
 	/*
 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
 	 */
 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
 
 	/*
 	 * MSI doesn't work on earlier Intel chipsets including
 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
 	 */
 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
 
 	/*
 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
 	 * bridge.
 	 */
 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
 
 	/*
 	 * Some virtualization environments emulate an older chipset
 	 * but support MSI just fine.  QEMU uses the Intel 82440.
 	 */
 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
 
 	{ 0 }
 };
 
 /* map register information */
 #define	PCI_MAPMEM	0x01	/* memory map */
 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
 #define	PCI_MAPPORT	0x04	/* port map */
 
 struct devlist pci_devq;
 uint32_t pci_generation;
 uint32_t pci_numdevs = 0;
 static int pcie_chipset, pcix_chipset;
 
 /* sysctl vars */
 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
 
 static int pci_enable_io_modes = 1;
 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
     &pci_enable_io_modes, 1,
     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
 enable these bits correctly.  We'd like to do this all the time, but there\n\
 are some peripherals that this causes problems with.");
 
 static int pci_do_power_nodriver = 0;
 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
     &pci_do_power_nodriver, 0,
   "Place a function into D3 state when no driver attaches to it.  0 means\n\
 disable.  1 means conservatively place devices into D3 state.  2 means\n\
 agressively place devices into D3 state.  3 means put absolutely everything\n\
 in D3 state.");
 
 int pci_do_power_resume = 1;
 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
     &pci_do_power_resume, 1,
   "Transition from D3 -> D0 on resume.");
 
 int pci_do_power_suspend = 1;
 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
     &pci_do_power_suspend, 1,
   "Transition from D0 -> D3 on suspend.");
 
 static int pci_do_msi = 1;
 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
     "Enable support for MSI interrupts");
 
 static int pci_do_msix = 1;
 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
     "Enable support for MSI-X interrupts");
 
 static int pci_honor_msi_blacklist = 1;
 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
 
 #if defined(__i386__) || defined(__amd64__)
 static int pci_usb_takeover = 1;
 #else
 static int pci_usb_takeover = 0;
 #endif
 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
 Disable this if you depend on BIOS emulation of USB devices, that is\n\
 you use USB devices (like keyboard or mouse) but do not load USB drivers");
 
 /* Find a device_t by bus/slot/function in domain 0 */
 
 device_t
 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
 {
 
 	return (pci_find_dbsf(0, bus, slot, func));
 }
 
 /* Find a device_t by domain/bus/slot/function */
 
 device_t
 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
 {
 	struct pci_devinfo *dinfo;
 
 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
 		if ((dinfo->cfg.domain == domain) &&
 		    (dinfo->cfg.bus == bus) &&
 		    (dinfo->cfg.slot == slot) &&
 		    (dinfo->cfg.func == func)) {
 			return (dinfo->cfg.dev);
 		}
 	}
 
 	return (NULL);
 }
 
 /* Find a device_t by vendor/device ID */
 
 device_t
 pci_find_device(uint16_t vendor, uint16_t device)
 {
 	struct pci_devinfo *dinfo;
 
 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
 		if ((dinfo->cfg.vendor == vendor) &&
 		    (dinfo->cfg.device == device)) {
 			return (dinfo->cfg.dev);
 		}
 	}
 
 	return (NULL);
 }
 
 static int
 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
 {
 	va_list ap;
 	int retval;
 
 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
 	    cfg->func);
 	va_start(ap, fmt);
 	retval += vprintf(fmt, ap);
 	va_end(ap);
 	return (retval);
 }
 
 /* return base address of memory or port map */
 
 static pci_addr_t
 pci_mapbase(uint64_t mapreg)
 {
 
 	if (PCI_BAR_MEM(mapreg))
 		return (mapreg & PCIM_BAR_MEM_BASE);
 	else
 		return (mapreg & PCIM_BAR_IO_BASE);
 }
 
 /* return map type of memory or port map */
 
 static const char *
 pci_maptype(uint64_t mapreg)
 {
 
 	if (PCI_BAR_IO(mapreg))
 		return ("I/O Port");
 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
 		return ("Prefetchable Memory");
 	return ("Memory");
 }
 
 /* return log2 of map size decoded for memory or port map */
 
 static int
 pci_mapsize(uint64_t testval)
 {
 	int ln2size;
 
 	testval = pci_mapbase(testval);
 	ln2size = 0;
 	if (testval != 0) {
 		while ((testval & 1) == 0)
 		{
 			ln2size++;
 			testval >>= 1;
 		}
 	}
 	return (ln2size);
 }
 
 /* return base address of device ROM */
 
 static pci_addr_t
 pci_rombase(uint64_t mapreg)
 {
 
 	return (mapreg & PCIM_BIOS_ADDR_MASK);
 }
 
 /* return log2 of map size decided for device ROM */
 
 static int
 pci_romsize(uint64_t testval)
 {
 	int ln2size;
 
 	testval = pci_rombase(testval);
 	ln2size = 0;
 	if (testval != 0) {
 		while ((testval & 1) == 0)
 		{
 			ln2size++;
 			testval >>= 1;
 		}
 	}
 	return (ln2size);
 }
 	
 /* return log2 of address range supported by map register */
 
 static int
 pci_maprange(uint64_t mapreg)
 {
 	int ln2range = 0;
 
 	if (PCI_BAR_IO(mapreg))
 		ln2range = 32;
 	else
 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
 		case PCIM_BAR_MEM_32:
 			ln2range = 32;
 			break;
 		case PCIM_BAR_MEM_1MB:
 			ln2range = 20;
 			break;
 		case PCIM_BAR_MEM_64:
 			ln2range = 64;
 			break;
 		}
 	return (ln2range);
 }
 
 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
 
 static void
 pci_fixancient(pcicfgregs *cfg)
 {
 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
 		return;
 
 	/* PCI to PCI bridges use header type 1 */
 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
 }
 
 /* extract header type specific config data */
 
 static void
 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
 {
 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
 	case PCIM_HDRTYPE_NORMAL:
 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
 		cfg->nummaps	    = PCI_MAXMAPS_0;
 		break;
 	case PCIM_HDRTYPE_BRIDGE:
 		cfg->nummaps	    = PCI_MAXMAPS_1;
 		break;
 	case PCIM_HDRTYPE_CARDBUS:
 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
 		cfg->nummaps	    = PCI_MAXMAPS_2;
 		break;
 	}
 #undef REG
 }
 
 /* read configuration header into pcicfgregs structure */
 struct pci_devinfo *
 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
 {
 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
 	pcicfgregs *cfg = NULL;
 	struct pci_devinfo *devlist_entry;
 	struct devlist *devlist_head;
 
 	devlist_head = &pci_devq;
 
 	devlist_entry = NULL;
 
 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
 		if (devlist_entry == NULL)
 			return (NULL);
 
 		cfg = &devlist_entry->cfg;
 
 		cfg->domain		= d;
 		cfg->bus		= b;
 		cfg->slot		= s;
 		cfg->func		= f;
 		cfg->vendor		= REG(PCIR_VENDOR, 2);
 		cfg->device		= REG(PCIR_DEVICE, 2);
 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
 		cfg->statreg		= REG(PCIR_STATUS, 2);
 		cfg->baseclass		= REG(PCIR_CLASS, 1);
 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
 		cfg->progif		= REG(PCIR_PROGIF, 1);
 		cfg->revid		= REG(PCIR_REVID, 1);
 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
 		cfg->intpin		= REG(PCIR_INTPIN, 1);
 		cfg->intline		= REG(PCIR_INTLINE, 1);
 
 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
 
 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
 		cfg->hdrtype		&= ~PCIM_MFDEV;
 
 		pci_fixancient(cfg);
 		pci_hdrtypedata(pcib, b, s, f, cfg);
 
 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
 			pci_read_extcap(pcib, cfg);
 
 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
 
 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
 
 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
 		devlist_entry->conf.pc_vendor = cfg->vendor;
 		devlist_entry->conf.pc_device = cfg->device;
 
 		devlist_entry->conf.pc_class = cfg->baseclass;
 		devlist_entry->conf.pc_subclass = cfg->subclass;
 		devlist_entry->conf.pc_progif = cfg->progif;
 		devlist_entry->conf.pc_revid = cfg->revid;
 
 		pci_numdevs++;
 		pci_generation++;
 	}
 	return (devlist_entry);
 #undef REG
 }
 
 static void
 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
 {
 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
 	uint64_t addr;
 #endif
 	uint32_t val;
 	int	ptr, nextptr, ptrptr;
 
 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
 	case PCIM_HDRTYPE_NORMAL:
 	case PCIM_HDRTYPE_BRIDGE:
 		ptrptr = PCIR_CAP_PTR;
 		break;
 	case PCIM_HDRTYPE_CARDBUS:
 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
 		break;
 	default:
 		return;		/* no extended capabilities support */
 	}
 	nextptr = REG(ptrptr, 1);	/* sanity check? */
 
 	/*
 	 * Read capability entries.
 	 */
 	while (nextptr != 0) {
 		/* Sanity check */
 		if (nextptr > 255) {
 			printf("illegal PCI extended capability offset %d\n",
 			    nextptr);
 			return;
 		}
 		/* Find the next entry */
 		ptr = nextptr;
 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
 
 		/* Process this entry */
 		switch (REG(ptr + PCICAP_ID, 1)) {
 		case PCIY_PMG:		/* PCI power management */
 			if (cfg->pp.pp_cap == 0) {
 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
 				if ((nextptr - ptr) > PCIR_POWER_DATA)
 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
 			}
 			break;
 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
 		case PCIY_HT:		/* HyperTransport */
 			/* Determine HT-specific capability type. */
 			val = REG(ptr + PCIR_HT_COMMAND, 2);
 			switch (val & PCIM_HTCMD_CAP_MASK) {
 			case PCIM_HTCAP_MSI_MAPPING:
 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
 					/* Sanity check the mapping window. */
 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
 					    4);
 					addr <<= 32;
 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
 					    4);
 					if (addr != MSI_INTEL_ADDR_BASE)
 						device_printf(pcib,
 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
 						    cfg->domain, cfg->bus,
 						    cfg->slot, cfg->func,
 						    (long long)addr);
 				} else
 					addr = MSI_INTEL_ADDR_BASE;
 
 				cfg->ht.ht_msimap = ptr;
 				cfg->ht.ht_msictrl = val;
 				cfg->ht.ht_msiaddr = addr;
 				break;
 			}
 			break;
 #endif
 		case PCIY_MSI:		/* PCI MSI */
 			cfg->msi.msi_location = ptr;
 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
 						     PCIM_MSICTRL_MMC_MASK)>>1);
 			break;
 		case PCIY_MSIX:		/* PCI MSI-X */
 			cfg->msix.msix_location = ptr;
 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
 			cfg->msix.msix_table_bar = PCIR_BAR(val &
 			    PCIM_MSIX_BIR_MASK);
 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
 			val = REG(ptr + PCIR_MSIX_PBA, 4);
 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
 			    PCIM_MSIX_BIR_MASK);
 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
 			break;
 		case PCIY_VPD:		/* PCI Vital Product Data */
 			cfg->vpd.vpd_reg = ptr;
 			break;
 		case PCIY_SUBVENDOR:
 			/* Should always be true. */
 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
 			    PCIM_HDRTYPE_BRIDGE) {
 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
 				cfg->subvendor = val & 0xffff;
 				cfg->subdevice = val >> 16;
 			}
 			break;
 		case PCIY_PCIX:		/* PCI-X */
 			/*
 			 * Assume we have a PCI-X chipset if we have
 			 * at least one PCI-PCI bridge with a PCI-X
 			 * capability.  Note that some systems with
 			 * PCI-express or HT chipsets might match on
 			 * this check as well.
 			 */
 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
 			    PCIM_HDRTYPE_BRIDGE)
 				pcix_chipset = 1;
 			break;
 		case PCIY_EXPRESS:	/* PCI-express */
 			/*
 			 * Assume we have a PCI-express chipset if we have
 			 * at least one PCI-express device.
 			 */
 			pcie_chipset = 1;
 			break;
 		default:
 			break;
 		}
 	}
 /* REG and WREG use carry through to next functions */
 }
 
 /*
  * PCI Vital Product Data
  */
 
 #define	PCI_VPD_TIMEOUT		1000000
 
 static int
 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
 {
 	int count = PCI_VPD_TIMEOUT;
 
 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
 
 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
 
 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
 		if (--count < 0)
 			return (ENXIO);
 		DELAY(1);	/* limit looping */
 	}
 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
 
 	return (0);
 }
 
 #if 0
 static int
 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
 {
 	int count = PCI_VPD_TIMEOUT;
 
 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
 
 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
 		if (--count < 0)
 			return (ENXIO);
 		DELAY(1);	/* limit looping */
 	}
 
 	return (0);
 }
 #endif
 
 #undef PCI_VPD_TIMEOUT
 
 struct vpd_readstate {
 	device_t	pcib;
 	pcicfgregs	*cfg;
 	uint32_t	val;
 	int		bytesinval;
 	int		off;
 	uint8_t		cksum;
 };
 
 static int
 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
 {
 	uint32_t reg;
 	uint8_t byte;
 
 	if (vrs->bytesinval == 0) {
 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
 			return (ENXIO);
 		vrs->val = le32toh(reg);
 		vrs->off += 4;
 		byte = vrs->val & 0xff;
 		vrs->bytesinval = 3;
 	} else {
 		vrs->val = vrs->val >> 8;
 		byte = vrs->val & 0xff;
 		vrs->bytesinval--;
 	}
 
 	vrs->cksum += byte;
 	*data = byte;
 	return (0);
 }
 
 static void
 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
 {
 	struct vpd_readstate vrs;
 	int state;
 	int name;
 	int remain;
 	int i;
 	int alloc, off;		/* alloc/off for RO/W arrays */
 	int cksumvalid;
 	int dflen;
 	uint8_t byte;
 	uint8_t byte2;
 
 	/* init vpd reader */
 	vrs.bytesinval = 0;
 	vrs.off = 0;
 	vrs.pcib = pcib;
 	vrs.cfg = cfg;
 	vrs.cksum = 0;
 
 	state = 0;
 	name = remain = i = 0;	/* shut up stupid gcc */
 	alloc = off = 0;	/* shut up stupid gcc */
 	dflen = 0;		/* shut up stupid gcc */
 	cksumvalid = -1;
 	while (state >= 0) {
 		if (vpd_nextbyte(&vrs, &byte)) {
 			state = -2;
 			break;
 		}
 #if 0
 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
 #endif
 		switch (state) {
 		case 0:		/* item name */
 			if (byte & 0x80) {
 				if (vpd_nextbyte(&vrs, &byte2)) {
 					state = -2;
 					break;
 				}
 				remain = byte2;
 				if (vpd_nextbyte(&vrs, &byte2)) {
 					state = -2;
 					break;
 				}
 				remain |= byte2 << 8;
 				if (remain > (0x7f*4 - vrs.off)) {
 					state = -1;
 					printf(
 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
 					    cfg->domain, cfg->bus, cfg->slot,
 					    cfg->func, remain);
 				}
 				name = byte & 0x7f;
 			} else {
 				remain = byte & 0x7;
 				name = (byte >> 3) & 0xf;
 			}
 			switch (name) {
 			case 0x2:	/* String */
 				cfg->vpd.vpd_ident = malloc(remain + 1,
 				    M_DEVBUF, M_WAITOK);
 				i = 0;
 				state = 1;
 				break;
 			case 0xf:	/* End */
 				state = -1;
 				break;
 			case 0x10:	/* VPD-R */
 				alloc = 8;
 				off = 0;
 				cfg->vpd.vpd_ros = malloc(alloc *
 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
 				    M_WAITOK | M_ZERO);
 				state = 2;
 				break;
 			case 0x11:	/* VPD-W */
 				alloc = 8;
 				off = 0;
 				cfg->vpd.vpd_w = malloc(alloc *
 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
 				    M_WAITOK | M_ZERO);
 				state = 5;
 				break;
 			default:	/* Invalid data, abort */
 				state = -1;
 				break;
 			}
 			break;
 
 		case 1:	/* Identifier String */
 			cfg->vpd.vpd_ident[i++] = byte;
 			remain--;
 			if (remain == 0)  {
 				cfg->vpd.vpd_ident[i] = '\0';
 				state = 0;
 			}
 			break;
 
 		case 2:	/* VPD-R Keyword Header */
 			if (off == alloc) {
 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
 				    M_DEVBUF, M_WAITOK | M_ZERO);
 			}
 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
 			if (vpd_nextbyte(&vrs, &byte2)) {
 				state = -2;
 				break;
 			}
 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
 			if (vpd_nextbyte(&vrs, &byte2)) {
 				state = -2;
 				break;
 			}
 			dflen = byte2;
 			if (dflen == 0 &&
 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
 			    2) == 0) {
 				/*
 				 * if this happens, we can't trust the rest
 				 * of the VPD.
 				 */
 				printf(
 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
 				    cfg->domain, cfg->bus, cfg->slot,
 				    cfg->func, dflen);
 				cksumvalid = 0;
 				state = -1;
 				break;
 			} else if (dflen == 0) {
 				cfg->vpd.vpd_ros[off].value = malloc(1 *
 				    sizeof(*cfg->vpd.vpd_ros[off].value),
 				    M_DEVBUF, M_WAITOK);
 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
 			} else
 				cfg->vpd.vpd_ros[off].value = malloc(
 				    (dflen + 1) *
 				    sizeof(*cfg->vpd.vpd_ros[off].value),
 				    M_DEVBUF, M_WAITOK);
 			remain -= 3;
 			i = 0;
 			/* keep in sync w/ state 3's transistions */
 			if (dflen == 0 && remain == 0)
 				state = 0;
 			else if (dflen == 0)
 				state = 2;
 			else
 				state = 3;
 			break;
 
 		case 3:	/* VPD-R Keyword Value */
 			cfg->vpd.vpd_ros[off].value[i++] = byte;
 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
 			    "RV", 2) == 0 && cksumvalid == -1) {
 				if (vrs.cksum == 0)
 					cksumvalid = 1;
 				else {
 					if (bootverbose)
 						printf(
 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
 						    cfg->domain, cfg->bus,
 						    cfg->slot, cfg->func,
 						    vrs.cksum);
 					cksumvalid = 0;
 					state = -1;
 					break;
 				}
 			}
 			dflen--;
 			remain--;
 			/* keep in sync w/ state 2's transistions */
 			if (dflen == 0)
 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
 			if (dflen == 0 && remain == 0) {
 				cfg->vpd.vpd_rocnt = off;
 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
 				    off * sizeof(*cfg->vpd.vpd_ros),
 				    M_DEVBUF, M_WAITOK | M_ZERO);
 				state = 0;
 			} else if (dflen == 0)
 				state = 2;
 			break;
 
 		case 4:
 			remain--;
 			if (remain == 0)
 				state = 0;
 			break;
 
 		case 5:	/* VPD-W Keyword Header */
 			if (off == alloc) {
 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
 				    M_DEVBUF, M_WAITOK | M_ZERO);
 			}
 			cfg->vpd.vpd_w[off].keyword[0] = byte;
 			if (vpd_nextbyte(&vrs, &byte2)) {
 				state = -2;
 				break;
 			}
 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
 			if (vpd_nextbyte(&vrs, &byte2)) {
 				state = -2;
 				break;
 			}
 			cfg->vpd.vpd_w[off].len = dflen = byte2;
 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
 			    sizeof(*cfg->vpd.vpd_w[off].value),
 			    M_DEVBUF, M_WAITOK);
 			remain -= 3;
 			i = 0;
 			/* keep in sync w/ state 6's transistions */
 			if (dflen == 0 && remain == 0)
 				state = 0;
 			else if (dflen == 0)
 				state = 5;
 			else
 				state = 6;
 			break;
 
 		case 6:	/* VPD-W Keyword Value */
 			cfg->vpd.vpd_w[off].value[i++] = byte;
 			dflen--;
 			remain--;
 			/* keep in sync w/ state 5's transistions */
 			if (dflen == 0)
 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
 			if (dflen == 0 && remain == 0) {
 				cfg->vpd.vpd_wcnt = off;
 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
 				    off * sizeof(*cfg->vpd.vpd_w),
 				    M_DEVBUF, M_WAITOK | M_ZERO);
 				state = 0;
 			} else if (dflen == 0)
 				state = 5;
 			break;
 
 		default:
 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
 			    state);
 			state = -1;
 			break;
 		}
 	}
 
 	if (cksumvalid == 0 || state < -1) {
 		/* read-only data bad, clean up */
 		if (cfg->vpd.vpd_ros != NULL) {
 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
 			free(cfg->vpd.vpd_ros, M_DEVBUF);
 			cfg->vpd.vpd_ros = NULL;
 		}
 	}
 	if (state < -1) {
 		/* I/O error, clean up */
 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
 		if (cfg->vpd.vpd_ident != NULL) {
 			free(cfg->vpd.vpd_ident, M_DEVBUF);
 			cfg->vpd.vpd_ident = NULL;
 		}
 		if (cfg->vpd.vpd_w != NULL) {
 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
 			free(cfg->vpd.vpd_w, M_DEVBUF);
 			cfg->vpd.vpd_w = NULL;
 		}
 	}
 	cfg->vpd.vpd_cached = 1;
 #undef REG
 #undef WREG
 }
 
 int
 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	pcicfgregs *cfg = &dinfo->cfg;
 
 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
 		pci_read_vpd(device_get_parent(dev), cfg);
 
 	*identptr = cfg->vpd.vpd_ident;
 
 	if (*identptr == NULL)
 		return (ENXIO);
 
 	return (0);
 }
 
 int
 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
 	const char **vptr)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	pcicfgregs *cfg = &dinfo->cfg;
 	int i;
 
 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
 		pci_read_vpd(device_get_parent(dev), cfg);
 
 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
 			*vptr = cfg->vpd.vpd_ros[i].value;
 		}
 
 	if (i != cfg->vpd.vpd_rocnt)
 		return (0);
 
 	*vptr = NULL;
 	return (ENXIO);
 }
 
 /*
  * Find the requested extended capability and return the offset in
  * configuration space via the pointer provided. The function returns
  * 0 on success and error code otherwise.
  */
 int
 pci_find_extcap_method(device_t dev, device_t child, int capability,
     int *capreg)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	pcicfgregs *cfg = &dinfo->cfg;
 	u_int32_t status;
 	u_int8_t ptr;
 
 	/*
 	 * Check the CAP_LIST bit of the PCI status register first.
 	 */
 	status = pci_read_config(child, PCIR_STATUS, 2);
 	if (!(status & PCIM_STATUS_CAPPRESENT))
 		return (ENXIO);
 
 	/*
 	 * Determine the start pointer of the capabilities list.
 	 */
 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
 	case PCIM_HDRTYPE_NORMAL:
 	case PCIM_HDRTYPE_BRIDGE:
 		ptr = PCIR_CAP_PTR;
 		break;
 	case PCIM_HDRTYPE_CARDBUS:
 		ptr = PCIR_CAP_PTR_2;
 		break;
 	default:
 		/* XXX: panic? */
 		return (ENXIO);		/* no extended capabilities support */
 	}
 	ptr = pci_read_config(child, ptr, 1);
 
 	/*
 	 * Traverse the capabilities list.
 	 */
 	while (ptr != 0) {
 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
 			if (capreg != NULL)
 				*capreg = ptr;
 			return (0);
 		}
 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
 	}
 
 	return (ENOENT);
 }
 
 /*
  * Support for MSI-X message interrupts.
  */
 void
 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(dev);
 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
 	uint32_t offset;
 
 	KASSERT(msix->msix_table_len > index, ("bogus index"));
 	offset = msix->msix_table_offset + index * 16;
 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
 	bus_write_4(msix->msix_table_res, offset + 8, data);
 
 	/* Enable MSI -> HT mapping. */
 	pci_ht_map_msi(dev, address);
 }
 
 void
 pci_mask_msix(device_t dev, u_int index)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(dev);
 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
 	uint32_t offset, val;
 
 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
 	offset = msix->msix_table_offset + index * 16 + 12;
 	val = bus_read_4(msix->msix_table_res, offset);
 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
 		val |= PCIM_MSIX_VCTRL_MASK;
 		bus_write_4(msix->msix_table_res, offset, val);
 	}
 }
 
 void
 pci_unmask_msix(device_t dev, u_int index)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(dev);
 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
 	uint32_t offset, val;
 
 	KASSERT(msix->msix_table_len > index, ("bogus index"));
 	offset = msix->msix_table_offset + index * 16 + 12;
 	val = bus_read_4(msix->msix_table_res, offset);
 	if (val & PCIM_MSIX_VCTRL_MASK) {
 		val &= ~PCIM_MSIX_VCTRL_MASK;
 		bus_write_4(msix->msix_table_res, offset, val);
 	}
 }
 
 int
 pci_pending_msix(device_t dev, u_int index)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(dev);
 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
 	uint32_t offset, bit;
 
 	KASSERT(msix->msix_table_len > index, ("bogus index"));
 	offset = msix->msix_pba_offset + (index / 32) * 4;
 	bit = 1 << index % 32;
 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
 }
 
 /*
  * Restore MSI-X registers and table during resume.  If MSI-X is
  * enabled then walk the virtual table to restore the actual MSI-X
  * table.
  */
 static void
 pci_resume_msix(device_t dev)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(dev);
 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
 	struct msix_table_entry *mte;
 	struct msix_vector *mv;
 	int i;
 
 	if (msix->msix_alloc > 0) {
 		/* First, mask all vectors. */
 		for (i = 0; i < msix->msix_msgnum; i++)
 			pci_mask_msix(dev, i);
 
 		/* Second, program any messages with at least one handler. */
 		for (i = 0; i < msix->msix_table_len; i++) {
 			mte = &msix->msix_table[i];
 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
 				continue;
 			mv = &msix->msix_vectors[mte->mte_vector - 1];
 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
 			pci_unmask_msix(dev, i);
 		}
 	}
 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
 	    msix->msix_ctrl, 2);
 }
 
 /*
  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
  * returned in *count.  After this function returns, each message will be
  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
  */
 int
 pci_alloc_msix_method(device_t dev, device_t child, int *count)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	pcicfgregs *cfg = &dinfo->cfg;
 	struct resource_list_entry *rle;
 	int actual, error, i, irq, max;
 
 	/* Don't let count == 0 get us into trouble. */
 	if (*count == 0)
 		return (EINVAL);
 
 	/* If rid 0 is allocated, then fail. */
 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
 	if (rle != NULL && rle->res != NULL)
 		return (ENXIO);
 
 	/* Already have allocated messages? */
 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
 		return (ENXIO);
 
 	/* If MSI is blacklisted for this system, fail. */
 	if (pci_msi_blacklisted())
 		return (ENXIO);
 
 	/* MSI-X capability present? */
 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
 		return (ENODEV);
 
 	/* Make sure the appropriate BARs are mapped. */
 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
 	    cfg->msix.msix_table_bar);
 	if (rle == NULL || rle->res == NULL ||
 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
 		return (ENXIO);
 	cfg->msix.msix_table_res = rle->res;
 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
 		    cfg->msix.msix_pba_bar);
 		if (rle == NULL || rle->res == NULL ||
 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
 			return (ENXIO);
 	}
 	cfg->msix.msix_pba_res = rle->res;
 
 	if (bootverbose)
 		device_printf(child,
 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
 		    *count, cfg->msix.msix_msgnum);
 	max = min(*count, cfg->msix.msix_msgnum);
 	for (i = 0; i < max; i++) {
 		/* Allocate a message. */
 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
 		if (error)
 			break;
 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
 		    irq, 1);
 	}
 	actual = i;
 
 	if (bootverbose) {
 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
 		if (actual == 1)
 			device_printf(child, "using IRQ %lu for MSI-X\n",
 			    rle->start);
 		else {
 			int run;
 
 			/*
 			 * Be fancy and try to print contiguous runs of
 			 * IRQ values as ranges.  'irq' is the previous IRQ.
 			 * 'run' is true if we are in a range.
 			 */
 			device_printf(child, "using IRQs %lu", rle->start);
 			irq = rle->start;
 			run = 0;
 			for (i = 1; i < actual; i++) {
 				rle = resource_list_find(&dinfo->resources,
 				    SYS_RES_IRQ, i + 1);
 
 				/* Still in a run? */
 				if (rle->start == irq + 1) {
 					run = 1;
 					irq++;
 					continue;
 				}
 
 				/* Finish previous range. */
 				if (run) {
 					printf("-%d", irq);
 					run = 0;
 				}
 
 				/* Start new range. */
 				printf(",%lu", rle->start);
 				irq = rle->start;
 			}
 
 			/* Unfinished range? */
 			if (run)
 				printf("-%d", irq);
 			printf(" for MSI-X\n");
 		}
 	}
 
 	/* Mask all vectors. */
 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
 		pci_mask_msix(child, i);
 
 	/* Allocate and initialize vector data and virtual table. */
 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 	for (i = 0; i < actual; i++) {
 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
 		cfg->msix.msix_table[i].mte_vector = i + 1;
 	}
 
 	/* Update control register to enable MSI-X. */
 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
 	    cfg->msix.msix_ctrl, 2);
 
 	/* Update counts of alloc'd messages. */
 	cfg->msix.msix_alloc = actual;
 	cfg->msix.msix_table_len = actual;
 	*count = actual;
 	return (0);
 }
 
 /*
  * By default, pci_alloc_msix() will assign the allocated IRQ
  * resources consecutively to the first N messages in the MSI-X table.
  * However, device drivers may want to use different layouts if they
  * either receive fewer messages than they asked for, or they wish to
  * populate the MSI-X table sparsely.  This method allows the driver
  * to specify what layout it wants.  It must be called after a
  * successful pci_alloc_msix() but before any of the associated
  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
  *
  * The 'vectors' array contains 'count' message vectors.  The array
  * maps directly to the MSI-X table in that index 0 in the array
  * specifies the vector for the first message in the MSI-X table, etc.
  * The vector value in each array index can either be 0 to indicate
  * that no vector should be assigned to a message slot, or it can be a
  * number from 1 to N (where N is the count returned from a
  * succcessful call to pci_alloc_msix()) to indicate which message
  * vector (IRQ) to be used for the corresponding message.
  *
  * On successful return, each message with a non-zero vector will have
  * an associated SYS_RES_IRQ whose rid is equal to the array index +
  * 1.  Additionally, if any of the IRQs allocated via the previous
  * call to pci_alloc_msix() are not used in the mapping, those IRQs
  * will be freed back to the system automatically.
  *
  * For example, suppose a driver has a MSI-X table with 6 messages and
  * asks for 6 messages, but pci_alloc_msix() only returns a count of
  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
  * C.  After the call to pci_alloc_msix(), the device will be setup to
  * have an MSI-X table of ABC--- (where - means no vector assigned).
  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
  * be freed back to the system.  This device will also have valid
  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
  *
  * In any case, the SYS_RES_IRQ rid X will always map to the message
  * at MSI-X table index X - 1 and will only be valid if a vector is
  * assigned to that table entry.
  */
 int
 pci_remap_msix_method(device_t dev, device_t child, int count,
     const u_int *vectors)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
 	struct resource_list_entry *rle;
 	int i, irq, j, *used;
 
 	/*
 	 * Have to have at least one message in the table but the
 	 * table can't be bigger than the actual MSI-X table in the
 	 * device.
 	 */
 	if (count == 0 || count > msix->msix_msgnum)
 		return (EINVAL);
 
 	/* Sanity check the vectors. */
 	for (i = 0; i < count; i++)
 		if (vectors[i] > msix->msix_alloc)
 			return (EINVAL);
 
 	/*
 	 * Make sure there aren't any holes in the vectors to be used.
 	 * It's a big pain to support it, and it doesn't really make
 	 * sense anyway.  Also, at least one vector must be used.
 	 */
 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
 	    M_ZERO);
 	for (i = 0; i < count; i++)
 		if (vectors[i] != 0)
 			used[vectors[i] - 1] = 1;
 	for (i = 0; i < msix->msix_alloc - 1; i++)
 		if (used[i] == 0 && used[i + 1] == 1) {
 			free(used, M_DEVBUF);
 			return (EINVAL);
 		}
 	if (used[0] != 1) {
 		free(used, M_DEVBUF);
 		return (EINVAL);
 	}
 	
 	/* Make sure none of the resources are allocated. */
 	for (i = 0; i < msix->msix_table_len; i++) {
 		if (msix->msix_table[i].mte_vector == 0)
 			continue;
 		if (msix->msix_table[i].mte_handlers > 0)
 			return (EBUSY);
 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
 		KASSERT(rle != NULL, ("missing resource"));
 		if (rle->res != NULL)
 			return (EBUSY);
 	}
 
 	/* Free the existing resource list entries. */
 	for (i = 0; i < msix->msix_table_len; i++) {
 		if (msix->msix_table[i].mte_vector == 0)
 			continue;
 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
 	}
 
 	/*
 	 * Build the new virtual table keeping track of which vectors are
 	 * used.
 	 */
 	free(msix->msix_table, M_DEVBUF);
 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 	for (i = 0; i < count; i++)
 		msix->msix_table[i].mte_vector = vectors[i];
 	msix->msix_table_len = count;
 
 	/* Free any unused IRQs and resize the vectors array if necessary. */
 	j = msix->msix_alloc - 1;
 	if (used[j] == 0) {
 		struct msix_vector *vec;
 
 		while (used[j] == 0) {
 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
 			    msix->msix_vectors[j].mv_irq);
 			j--;
 		}
 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
 		    M_WAITOK);
 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
 		    (j + 1));
 		free(msix->msix_vectors, M_DEVBUF);
 		msix->msix_vectors = vec;
 		msix->msix_alloc = j + 1;
 	}
 	free(used, M_DEVBUF);
 
 	/* Map the IRQs onto the rids. */
 	for (i = 0; i < count; i++) {
 		if (vectors[i] == 0)
 			continue;
 		irq = msix->msix_vectors[vectors[i]].mv_irq;
 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
 		    irq, 1);
 	}
 
 	if (bootverbose) {
 		device_printf(child, "Remapped MSI-X IRQs as: ");
 		for (i = 0; i < count; i++) {
 			if (i != 0)
 				printf(", ");
 			if (vectors[i] == 0)
 				printf("---");
 			else
 				printf("%d",
 				    msix->msix_vectors[vectors[i]].mv_irq);
 		}
 		printf("\n");
 	}
 
 	return (0);
 }
 
 static int
 pci_release_msix(device_t dev, device_t child)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
 	struct resource_list_entry *rle;
 	int i;
 
 	/* Do we have any messages to release? */
 	if (msix->msix_alloc == 0)
 		return (ENODEV);
 
 	/* Make sure none of the resources are allocated. */
 	for (i = 0; i < msix->msix_table_len; i++) {
 		if (msix->msix_table[i].mte_vector == 0)
 			continue;
 		if (msix->msix_table[i].mte_handlers > 0)
 			return (EBUSY);
 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
 		KASSERT(rle != NULL, ("missing resource"));
 		if (rle->res != NULL)
 			return (EBUSY);
 	}
 
 	/* Update control register to disable MSI-X. */
 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
 	    msix->msix_ctrl, 2);
 
 	/* Free the resource list entries. */
 	for (i = 0; i < msix->msix_table_len; i++) {
 		if (msix->msix_table[i].mte_vector == 0)
 			continue;
 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
 	}
 	free(msix->msix_table, M_DEVBUF);
 	msix->msix_table_len = 0;
 
 	/* Release the IRQs. */
 	for (i = 0; i < msix->msix_alloc; i++)
 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
 		    msix->msix_vectors[i].mv_irq);
 	free(msix->msix_vectors, M_DEVBUF);
 	msix->msix_alloc = 0;
 	return (0);
 }
 
 /*
  * Return the max supported MSI-X messages this device supports.
  * Basically, assuming the MD code can alloc messages, this function
  * should return the maximum value that pci_alloc_msix() can return.
  * Thus, it is subject to the tunables, etc.
  */
 int
 pci_msix_count_method(device_t dev, device_t child)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
 
 	if (pci_do_msix && msix->msix_location != 0)
 		return (msix->msix_msgnum);
 	return (0);
 }
 
 /*
  * HyperTransport MSI mapping control
  */
 void
 pci_ht_map_msi(device_t dev, uint64_t addr)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(dev);
 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
 
 	if (!ht->ht_msimap)
 		return;
 
 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
 	    ht->ht_msiaddr >> 20 == addr >> 20) {
 		/* Enable MSI -> HT mapping. */
 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
 		    ht->ht_msictrl, 2);
 	}
 
 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
 		/* Disable MSI -> HT mapping. */
 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
 		    ht->ht_msictrl, 2);
 	}
 }
 
 int
 pci_get_max_read_req(device_t dev)
 {
 	int cap;
 	uint16_t val;
 
 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
 		return (0);
 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
 	val >>= 12;
 	return (1 << (val + 7));
 }
 
 int
 pci_set_max_read_req(device_t dev, int size)
 {
 	int cap;
 	uint16_t val;
 
 	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
 		return (0);
 	if (size < 128)
 		size = 128;
 	if (size > 4096)
 		size = 4096;
 	size = (1 << (fls(size) - 1));
 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
 	val |= (fls(size) - 8) << 12;
 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
 	return (size);
 }
 
 /*
  * Support for MSI message signalled interrupts.
  */
 void
 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(dev);
 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
 
 	/* Write data and address values. */
 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
 	    address & 0xffffffff, 4);
 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
 		    address >> 32, 4);
 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
 		    data, 2);
 	} else
 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
 		    2);
 
 	/* Enable MSI in the control register. */
 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
 	    2);
 
 	/* Enable MSI -> HT mapping. */
 	pci_ht_map_msi(dev, address);
 }
 
 void
 pci_disable_msi(device_t dev)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(dev);
 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
 
 	/* Disable MSI -> HT mapping. */
 	pci_ht_map_msi(dev, 0);
 
 	/* Disable MSI in the control register. */
 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
 	    2);
 }
 
 /*
  * Restore MSI registers during resume.  If MSI is enabled then
  * restore the data and address registers in addition to the control
  * register.
  */
 static void
 pci_resume_msi(device_t dev)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(dev);
 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
 	uint64_t address;
 	uint16_t data;
 
 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
 		address = msi->msi_addr;
 		data = msi->msi_data;
 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
 		    address & 0xffffffff, 4);
 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
 			pci_write_config(dev, msi->msi_location +
 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
 			pci_write_config(dev, msi->msi_location +
 			    PCIR_MSI_DATA_64BIT, data, 2);
 		} else
 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
 			    data, 2);
 	}
 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
 	    2);
 }
 
 static int
 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(dev);
 	pcicfgregs *cfg = &dinfo->cfg;
 	struct resource_list_entry *rle;
 	struct msix_table_entry *mte;
 	struct msix_vector *mv;
 	uint64_t addr;
 	uint32_t data;	
 	int error, i, j;
 
 	/*
 	 * Handle MSI first.  We try to find this IRQ among our list
 	 * of MSI IRQs.  If we find it, we request updated address and
 	 * data registers and apply the results.
 	 */
 	if (cfg->msi.msi_alloc > 0) {
 
 		/* If we don't have any active handlers, nothing to do. */
 		if (cfg->msi.msi_handlers == 0)
 			return (0);
 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
 			    i + 1);
 			if (rle->start == irq) {
 				error = PCIB_MAP_MSI(device_get_parent(bus),
 				    dev, irq, &addr, &data);
 				if (error)
 					return (error);
 				pci_disable_msi(dev);
 				dinfo->cfg.msi.msi_addr = addr;
 				dinfo->cfg.msi.msi_data = data;
 				pci_enable_msi(dev, addr, data);
 				return (0);
 			}
 		}
 		return (ENOENT);
 	}
 
 	/*
 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
 	 * we request the updated mapping info.  If that works, we go
 	 * through all the slots that use this IRQ and update them.
 	 */
 	if (cfg->msix.msix_alloc > 0) {
 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
 			mv = &cfg->msix.msix_vectors[i];
 			if (mv->mv_irq == irq) {
 				error = PCIB_MAP_MSI(device_get_parent(bus),
 				    dev, irq, &addr, &data);
 				if (error)
 					return (error);
 				mv->mv_address = addr;
 				mv->mv_data = data;
 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
 					mte = &cfg->msix.msix_table[j];
 					if (mte->mte_vector != i + 1)
 						continue;
 					if (mte->mte_handlers == 0)
 						continue;
 					pci_mask_msix(dev, j);
 					pci_enable_msix(dev, j, addr, data);
 					pci_unmask_msix(dev, j);
 				}
 			}
 		}
 		return (ENOENT);
 	}
 
 	return (ENOENT);
 }
 
 /*
  * Returns true if the specified device is blacklisted because MSI
  * doesn't work.
  */
 int
 pci_msi_device_blacklisted(device_t dev)
 {
 	struct pci_quirk *q;
 
 	if (!pci_honor_msi_blacklist)
 		return (0);
 
 	for (q = &pci_quirks[0]; q->devid; q++) {
 		if (q->devid == pci_get_devid(dev) &&
 		    q->type == PCI_QUIRK_DISABLE_MSI)
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Returns true if a specified chipset supports MSI when it is
  * emulated hardware in a virtual machine.
  */
 static int
 pci_msi_vm_chipset(device_t dev)
 {
 	struct pci_quirk *q;
 
 	for (q = &pci_quirks[0]; q->devid; q++) {
 		if (q->devid == pci_get_devid(dev) &&
 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Determine if MSI is blacklisted globally on this sytem.  Currently,
  * we just check for blacklisted chipsets as represented by the
  * host-PCI bridge at device 0:0:0.  In the future, it may become
  * necessary to check other system attributes, such as the kenv values
  * that give the motherboard manufacturer and model number.
  */
 static int
 pci_msi_blacklisted(void)
 {
 	device_t dev;
 
 	if (!pci_honor_msi_blacklist)
 		return (0);
 
 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
 	if (!(pcie_chipset || pcix_chipset)) {
 		if (vm_guest != VM_GUEST_NO) {
 			dev = pci_find_bsf(0, 0, 0);
 			if (dev != NULL)
 				return (pci_msi_vm_chipset(dev) == 0);
 		}
 		return (1);
 	}
 
 	dev = pci_find_bsf(0, 0, 0);
 	if (dev != NULL)
 		return (pci_msi_device_blacklisted(dev));
 	return (0);
 }
 
 /*
  * Attempt to allocate *count MSI messages.  The actual number allocated is
  * returned in *count.  After this function returns, each message will be
  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
  */
 int
 pci_alloc_msi_method(device_t dev, device_t child, int *count)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	pcicfgregs *cfg = &dinfo->cfg;
 	struct resource_list_entry *rle;
 	int actual, error, i, irqs[32];
 	uint16_t ctrl;
 
 	/* Don't let count == 0 get us into trouble. */
 	if (*count == 0)
 		return (EINVAL);
 
 	/* If rid 0 is allocated, then fail. */
 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
 	if (rle != NULL && rle->res != NULL)
 		return (ENXIO);
 
 	/* Already have allocated messages? */
 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
 		return (ENXIO);
 
 	/* If MSI is blacklisted for this system, fail. */
 	if (pci_msi_blacklisted())
 		return (ENXIO);
 
 	/* MSI capability present? */
 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
 		return (ENODEV);
 
 	if (bootverbose)
 		device_printf(child,
 		    "attempting to allocate %d MSI vectors (%d supported)\n",
 		    *count, cfg->msi.msi_msgnum);
 
 	/* Don't ask for more than the device supports. */
 	actual = min(*count, cfg->msi.msi_msgnum);
 
 	/* Don't ask for more than 32 messages. */
 	actual = min(actual, 32);
 
 	/* MSI requires power of 2 number of messages. */
 	if (!powerof2(actual))
 		return (EINVAL);
 
 	for (;;) {
 		/* Try to allocate N messages. */
 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
 		    cfg->msi.msi_msgnum, irqs);
 		if (error == 0)
 			break;
 		if (actual == 1)
 			return (error);
 
 		/* Try N / 2. */
 		actual >>= 1;
 	}
 
 	/*
 	 * We now have N actual messages mapped onto SYS_RES_IRQ
 	 * resources in the irqs[] array, so add new resources
 	 * starting at rid 1.
 	 */
 	for (i = 0; i < actual; i++)
 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
 		    irqs[i], irqs[i], 1);
 
 	if (bootverbose) {
 		if (actual == 1)
 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
 		else {
 			int run;
 
 			/*
 			 * Be fancy and try to print contiguous runs
 			 * of IRQ values as ranges.  'run' is true if
 			 * we are in a range.
 			 */
 			device_printf(child, "using IRQs %d", irqs[0]);
 			run = 0;
 			for (i = 1; i < actual; i++) {
 
 				/* Still in a run? */
 				if (irqs[i] == irqs[i - 1] + 1) {
 					run = 1;
 					continue;
 				}
 
 				/* Finish previous range. */
 				if (run) {
 					printf("-%d", irqs[i - 1]);
 					run = 0;
 				}
 
 				/* Start new range. */
 				printf(",%d", irqs[i]);
 			}
 
 			/* Unfinished range? */
 			if (run)
 				printf("-%d", irqs[actual - 1]);
 			printf(" for MSI\n");
 		}
 	}
 
 	/* Update control register with actual count. */
 	ctrl = cfg->msi.msi_ctrl;
 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
 	ctrl |= (ffs(actual) - 1) << 4;
 	cfg->msi.msi_ctrl = ctrl;
 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
 
 	/* Update counts of alloc'd messages. */
 	cfg->msi.msi_alloc = actual;
 	cfg->msi.msi_handlers = 0;
 	*count = actual;
 	return (0);
 }
 
 /* Release the MSI messages associated with this device. */
 int
 pci_release_msi_method(device_t dev, device_t child)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
 	struct resource_list_entry *rle;
 	int error, i, irqs[32];
 
 	/* Try MSI-X first. */
 	error = pci_release_msix(dev, child);
 	if (error != ENODEV)
 		return (error);
 
 	/* Do we have any messages to release? */
 	if (msi->msi_alloc == 0)
 		return (ENODEV);
 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
 
 	/* Make sure none of the resources are allocated. */
 	if (msi->msi_handlers > 0)
 		return (EBUSY);
 	for (i = 0; i < msi->msi_alloc; i++) {
 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
 		KASSERT(rle != NULL, ("missing MSI resource"));
 		if (rle->res != NULL)
 			return (EBUSY);
 		irqs[i] = rle->start;
 	}
 
 	/* Update control register with 0 count. */
 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
 	    ("%s: MSI still enabled", __func__));
 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
 	    msi->msi_ctrl, 2);
 
 	/* Release the messages. */
 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
 	for (i = 0; i < msi->msi_alloc; i++)
 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
 
 	/* Update alloc count. */
 	msi->msi_alloc = 0;
 	msi->msi_addr = 0;
 	msi->msi_data = 0;
 	return (0);
 }
 
 /*
  * Return the max supported MSI messages this device supports.
  * Basically, assuming the MD code can alloc messages, this function
  * should return the maximum value that pci_alloc_msi() can return.
  * Thus, it is subject to the tunables, etc.
  */
 int
 pci_msi_count_method(device_t dev, device_t child)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
 
 	if (pci_do_msi && msi->msi_location != 0)
 		return (msi->msi_msgnum);
 	return (0);
 }
 
 /* free pcicfgregs structure and all depending data structures */
 
 int
 pci_freecfg(struct pci_devinfo *dinfo)
 {
 	struct devlist *devlist_head;
 	int i;
 
 	devlist_head = &pci_devq;
 
 	if (dinfo->cfg.vpd.vpd_reg) {
 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
 	}
 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
 	free(dinfo, M_DEVBUF);
 
 	/* increment the generation count */
 	pci_generation++;
 
 	/* we're losing one device */
 	pci_numdevs--;
 	return (0);
 }
 
 /*
  * PCI power manangement
  */
 int
 pci_set_powerstate_method(device_t dev, device_t child, int state)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	pcicfgregs *cfg = &dinfo->cfg;
 	uint16_t status;
 	int result, oldstate, highest, delay;
 
 	if (cfg->pp.pp_cap == 0)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Optimize a no state change request away.  While it would be OK to
 	 * write to the hardware in theory, some devices have shown odd
 	 * behavior when going from D3 -> D3.
 	 */
 	oldstate = pci_get_powerstate(child);
 	if (oldstate == state)
 		return (0);
 
 	/*
 	 * The PCI power management specification states that after a state
 	 * transition between PCI power states, system software must
 	 * guarantee a minimal delay before the function accesses the device.
 	 * Compute the worst case delay that we need to guarantee before we
 	 * access the device.  Many devices will be responsive much more
 	 * quickly than this delay, but there are some that don't respond
 	 * instantly to state changes.  Transitions to/from D3 state require
 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
 	 * is done below with DELAY rather than a sleeper function because
 	 * this function can be called from contexts where we cannot sleep.
 	 */
 	highest = (oldstate > state) ? oldstate : state;
 	if (highest == PCI_POWERSTATE_D3)
 	    delay = 10000;
 	else if (highest == PCI_POWERSTATE_D2)
 	    delay = 200;
 	else
 	    delay = 0;
 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
 	    & ~PCIM_PSTAT_DMASK;
 	result = 0;
 	switch (state) {
 	case PCI_POWERSTATE_D0:
 		status |= PCIM_PSTAT_D0;
 		break;
 	case PCI_POWERSTATE_D1:
 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
 			return (EOPNOTSUPP);
 		status |= PCIM_PSTAT_D1;
 		break;
 	case PCI_POWERSTATE_D2:
 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
 			return (EOPNOTSUPP);
 		status |= PCIM_PSTAT_D2;
 		break;
 	case PCI_POWERSTATE_D3:
 		status |= PCIM_PSTAT_D3;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	if (bootverbose)
 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
 		    state);
 
 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
 	if (delay)
 		DELAY(delay);
 	return (0);
 }
 
 int
 pci_get_powerstate_method(device_t dev, device_t child)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	pcicfgregs *cfg = &dinfo->cfg;
 	uint16_t status;
 	int result;
 
 	if (cfg->pp.pp_cap != 0) {
 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
 		switch (status & PCIM_PSTAT_DMASK) {
 		case PCIM_PSTAT_D0:
 			result = PCI_POWERSTATE_D0;
 			break;
 		case PCIM_PSTAT_D1:
 			result = PCI_POWERSTATE_D1;
 			break;
 		case PCIM_PSTAT_D2:
 			result = PCI_POWERSTATE_D2;
 			break;
 		case PCIM_PSTAT_D3:
 			result = PCI_POWERSTATE_D3;
 			break;
 		default:
 			result = PCI_POWERSTATE_UNKNOWN;
 			break;
 		}
 	} else {
 		/* No support, device is always at D0 */
 		result = PCI_POWERSTATE_D0;
 	}
 	return (result);
 }
 
 /*
  * Some convenience functions for PCI device drivers.
  */
 
 static __inline void
 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
 {
 	uint16_t	command;
 
 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
 	command |= bit;
 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
 }
 
 static __inline void
 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
 {
 	uint16_t	command;
 
 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
 	command &= ~bit;
 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
 }
 
 int
 pci_enable_busmaster_method(device_t dev, device_t child)
 {
 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
 	return (0);
 }
 
 int
 pci_disable_busmaster_method(device_t dev, device_t child)
 {
 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
 	return (0);
 }
 
 int
 pci_enable_io_method(device_t dev, device_t child, int space)
 {
 	uint16_t bit;
 
 	switch(space) {
 	case SYS_RES_IOPORT:
 		bit = PCIM_CMD_PORTEN;
 		break;
 	case SYS_RES_MEMORY:
 		bit = PCIM_CMD_MEMEN;
 		break;
 	default:
 		return (EINVAL);
 	}
 	pci_set_command_bit(dev, child, bit);
 	return (0);
 }
 
 int
 pci_disable_io_method(device_t dev, device_t child, int space)
 {
 	uint16_t bit;
 
 	switch(space) {
 	case SYS_RES_IOPORT:
 		bit = PCIM_CMD_PORTEN;
 		break;
 	case SYS_RES_MEMORY:
 		bit = PCIM_CMD_MEMEN;
 		break;
 	default:
 		return (EINVAL);
 	}
 	pci_clear_command_bit(dev, child, bit);
 	return (0);
 }
 
 /*
  * New style pci driver.  Parent device is either a pci-host-bridge or a
  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
  */
 
 void
 pci_print_verbose(struct pci_devinfo *dinfo)
 {
 
 	if (bootverbose) {
 		pcicfgregs *cfg = &dinfo->cfg;
 
 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
 		    cfg->vendor, cfg->device, cfg->revid);
 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
 		    cfg->mfdev);
 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
 		if (cfg->intpin > 0)
 			printf("\tintpin=%c, irq=%d\n",
 			    cfg->intpin +'a' -1, cfg->intline);
 		if (cfg->pp.pp_cap) {
 			uint16_t status;
 
 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
 			    status & PCIM_PSTAT_DMASK);
 		}
 		if (cfg->msi.msi_location) {
 			int ctrl;
 
 			ctrl = cfg->msi.msi_ctrl;
 			printf("\tMSI supports %d message%s%s%s\n",
 			    cfg->msi.msi_msgnum,
 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
 		}
 		if (cfg->msix.msix_location) {
 			printf("\tMSI-X supports %d message%s ",
 			    cfg->msix.msix_msgnum,
 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
 				printf("in map 0x%x\n",
 				    cfg->msix.msix_table_bar);
 			else
 				printf("in maps 0x%x and 0x%x\n",
 				    cfg->msix.msix_table_bar,
 				    cfg->msix.msix_pba_bar);
 		}
 	}
 }
 
 static int
 pci_porten(device_t dev)
 {
 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
 }
 
 static int
 pci_memen(device_t dev)
 {
 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
 }
 
 static void
 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
 {
 	pci_addr_t map, testval;
 	int ln2range;
 	uint16_t cmd;
 
 	/*
 	 * The device ROM BAR is special.  It is always a 32-bit
 	 * memory BAR.  Bit 0 is special and should not be set when
 	 * sizing the BAR.
 	 */
 	if (reg == PCIR_BIOS) {
 		map = pci_read_config(dev, reg, 4);
 		pci_write_config(dev, reg, 0xfffffffe, 4);
 		testval = pci_read_config(dev, reg, 4);
 		pci_write_config(dev, reg, map, 4);
 		*mapp = map;
 		*testvalp = testval;
 		return;
 	}
 
 	map = pci_read_config(dev, reg, 4);
 	ln2range = pci_maprange(map);
 	if (ln2range == 64)
 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
 
 	/*
 	 * Disable decoding via the command register before
 	 * determining the BAR's length since we will be placing it in
 	 * a weird state.
 	 */
 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
 	pci_write_config(dev, PCIR_COMMAND,
 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
 
 	/*
 	 * Determine the BAR's length by writing all 1's.  The bottom
 	 * log_2(size) bits of the BAR will stick as 0 when we read
 	 * the value back.
 	 */
 	pci_write_config(dev, reg, 0xffffffff, 4);
 	testval = pci_read_config(dev, reg, 4);
 	if (ln2range == 64) {
 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
 	}
 
 	/*
 	 * Restore the original value of the BAR.  We may have reprogrammed
 	 * the BAR of the low-level console device and when booting verbose,
 	 * we need the console device addressable.
 	 */
 	pci_write_config(dev, reg, map, 4);
 	if (ln2range == 64)
 		pci_write_config(dev, reg + 4, map >> 32, 4);
 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
 
 	*mapp = map;
 	*testvalp = testval;
 }
 
 static void
 pci_write_bar(device_t dev, int reg, pci_addr_t base)
 {
 	pci_addr_t map;
 	int ln2range;
 
 	map = pci_read_config(dev, reg, 4);
 
 	/* The device ROM BAR is always 32-bits. */
 	if (reg == PCIR_BIOS)
 		return;
 	ln2range = pci_maprange(map);
 	pci_write_config(dev, reg, base, 4);
 	if (ln2range == 64)
 		pci_write_config(dev, reg + 4, base >> 32, 4);
 }
 
 /*
  * Add a resource based on a pci map register. Return 1 if the map
  * register is a 32bit map register or 2 if it is a 64bit register.
  */
 static int
 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
     int force, int prefetch)
 {
 	pci_addr_t base, map, testval;
 	pci_addr_t start, end, count;
 	int barlen, basezero, maprange, mapsize, type;
 	uint16_t cmd;
 	struct resource *res;
 
 	pci_read_bar(dev, reg, &map, &testval);
 	if (PCI_BAR_MEM(map)) {
 		type = SYS_RES_MEMORY;
 		if (map & PCIM_BAR_MEM_PREFETCH)
 			prefetch = 1;
 	} else
 		type = SYS_RES_IOPORT;
 	mapsize = pci_mapsize(testval);
 	base = pci_mapbase(map);
 #ifdef __PCI_BAR_ZERO_VALID
 	basezero = 0;
 #else
 	basezero = base == 0;
 #endif
 	maprange = pci_maprange(map);
 	barlen = maprange == 64 ? 2 : 1;
 
 	/*
 	 * For I/O registers, if bottom bit is set, and the next bit up
 	 * isn't clear, we know we have a BAR that doesn't conform to the
 	 * spec, so ignore it.  Also, sanity check the size of the data
 	 * areas to the type of memory involved.  Memory must be at least
 	 * 16 bytes in size, while I/O ranges must be at least 4.
 	 */
 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
 		return (barlen);
 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
 	    (type == SYS_RES_IOPORT && mapsize < 2))
 		return (barlen);
 
 	if (bootverbose) {
 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
 			printf(", port disabled\n");
 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
 			printf(", memory disabled\n");
 		else
 			printf(", enabled\n");
 	}
 
 	/*
 	 * If base is 0, then we have problems if this architecture does
 	 * not allow that.  It is best to ignore such entries for the
 	 * moment.  These will be allocated later if the driver specifically
 	 * requests them.  However, some removable busses look better when
 	 * all resources are allocated, so allow '0' to be overriden.
 	 *
 	 * Similarly treat maps whose values is the same as the test value
 	 * read back.  These maps have had all f's written to them by the
 	 * BIOS in an attempt to disable the resources.
 	 */
 	if (!force && (basezero || map == testval))
 		return (barlen);
 	if ((u_long)base != base) {
 		device_printf(bus,
 		    "pci%d:%d:%d:%d bar %#x too many address bits",
 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
 		    pci_get_function(dev), reg);
 		return (barlen);
 	}
 
 	/*
 	 * This code theoretically does the right thing, but has
 	 * undesirable side effects in some cases where peripherals
 	 * respond oddly to having these bits enabled.  Let the user
 	 * be able to turn them off (since pci_enable_io_modes is 1 by
 	 * default).
 	 */
 	if (pci_enable_io_modes) {
 		/* Turn on resources that have been left off by a lazy BIOS */
 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
 			cmd |= PCIM_CMD_PORTEN;
 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
 		}
 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
 			cmd |= PCIM_CMD_MEMEN;
 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
 		}
 	} else {
 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
 			return (barlen);
 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
 			return (barlen);
 	}
 
 	count = 1 << mapsize;
 	if (basezero || base == pci_mapbase(testval)) {
 		start = 0;	/* Let the parent decide. */
 		end = ~0ULL;
 	} else {
 		start = base;
 		end = base + (1 << mapsize) - 1;
 	}
 	resource_list_add(rl, type, reg, start, end, count);
 
 	/*
 	 * Try to allocate the resource for this BAR from our parent
 	 * so that this resource range is already reserved.  The
 	 * driver for this device will later inherit this resource in
 	 * pci_alloc_resource().
 	 */
 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
 	    prefetch ? RF_PREFETCHABLE : 0);
 	if (res == NULL) {
 		/*
 		 * If the allocation fails, clear the BAR and delete
 		 * the resource list entry to force
 		 * pci_alloc_resource() to allocate resources from the
 		 * parent.
 		 */
 		resource_list_delete(rl, type, reg);
 		start = 0;
 	} else
 		start = rman_get_start(res);
 	pci_write_bar(dev, reg, start);
 	return (barlen);
 }
 
 /*
  * For ATA devices we need to decide early what addressing mode to use.
  * Legacy demands that the primary and secondary ATA ports sits on the
  * same addresses that old ISA hardware did. This dictates that we use
  * those addresses and ignore the BAR's if we cannot set PCI native
  * addressing mode.
  */
 static void
 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
     uint32_t prefetchmask)
 {
 	struct resource *r;
 	int rid, type, progif;
 #if 0
 	/* if this device supports PCI native addressing use it */
 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
 	if ((progif & 0x8a) == 0x8a) {
 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
 			printf("Trying ATA native PCI addressing mode\n");
 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
 		}
 	}
 #endif
 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
 	type = SYS_RES_IOPORT;
 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
 		    prefetchmask & (1 << 0));
 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
 		    prefetchmask & (1 << 1));
 	} else {
 		rid = PCIR_BAR(0);
 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
 		    0x1f7, 8, 0);
 		rid = PCIR_BAR(1);
 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
 		    0x3f6, 1, 0);
 	}
 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
 		    prefetchmask & (1 << 2));
 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
 		    prefetchmask & (1 << 3));
 	} else {
 		rid = PCIR_BAR(2);
 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
 		    0x177, 8, 0);
 		rid = PCIR_BAR(3);
 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
 		    0x376, 1, 0);
 	}
 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
 	    prefetchmask & (1 << 4));
 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
 	    prefetchmask & (1 << 5));
 }
 
 static void
 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(dev);
 	pcicfgregs *cfg = &dinfo->cfg;
 	char tunable_name[64];
 	int irq;
 
 	/* Has to have an intpin to have an interrupt. */
 	if (cfg->intpin == 0)
 		return;
 
 	/* Let the user override the IRQ with a tunable. */
 	irq = PCI_INVALID_IRQ;
 	snprintf(tunable_name, sizeof(tunable_name),
 	    "hw.pci%d.%d.%d.INT%c.irq",
 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
 		irq = PCI_INVALID_IRQ;
 
 	/*
 	 * If we didn't get an IRQ via the tunable, then we either use the
 	 * IRQ value in the intline register or we ask the bus to route an
 	 * interrupt for us.  If force_route is true, then we only use the
 	 * value in the intline register if the bus was unable to assign an
 	 * IRQ.
 	 */
 	if (!PCI_INTERRUPT_VALID(irq)) {
 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
 		if (!PCI_INTERRUPT_VALID(irq))
 			irq = cfg->intline;
 	}
 
 	/* If after all that we don't have an IRQ, just bail. */
 	if (!PCI_INTERRUPT_VALID(irq))
 		return;
 
 	/* Update the config register if it changed. */
 	if (irq != cfg->intline) {
 		cfg->intline = irq;
 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
 	}
 
 	/* Add this IRQ as rid 0 interrupt resource. */
 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
 }
 
 /* Perform early OHCI takeover from SMM. */
 static void
 ohci_early_takeover(device_t self)
 {
 	struct resource *res;
 	uint32_t ctl;
 	int rid;
 	int i;
 
 	rid = PCIR_BAR(0);
 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
 	if (res == NULL)
 		return;
 
 	ctl = bus_read_4(res, OHCI_CONTROL);
 	if (ctl & OHCI_IR) {
 		if (bootverbose)
 			printf("ohci early: "
 			    "SMM active, request owner change\n");
 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
 			DELAY(1000);
 			ctl = bus_read_4(res, OHCI_CONTROL);
 		}
 		if (ctl & OHCI_IR) {
 			if (bootverbose)
 				printf("ohci early: "
 				    "SMM does not respond, resetting\n");
 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
 		}
 		/* Disable interrupts */
 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
 	}
 
 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
 }
 
 /* Perform early UHCI takeover from SMM. */
 static void
 uhci_early_takeover(device_t self)
 {
 	struct resource *res;
 	int rid;
 
 	/*
 	 * Set the PIRQD enable bit and switch off all the others. We don't
 	 * want legacy support to interfere with us XXX Does this also mean
 	 * that the BIOS won't touch the keyboard anymore if it is connected
 	 * to the ports of the root hub?
 	 */
 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
 
 	/* Disable interrupts */
 	rid = PCI_UHCI_BASE_REG;
 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
 	if (res != NULL) {
 		bus_write_2(res, UHCI_INTR, 0);
 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
 	}
 }
 
 /* Perform early EHCI takeover from SMM. */
 static void
 ehci_early_takeover(device_t self)
 {
 	struct resource *res;
 	uint32_t cparams;
 	uint32_t eec;
 	uint8_t eecp;
 	uint8_t bios_sem;
 	uint8_t offs;
 	int rid;
 	int i;
 
 	rid = PCIR_BAR(0);
 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
 	if (res == NULL)
 		return;
 
 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
 
 	/* Synchronise with the BIOS if it owns the controller. */
 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
 	    eecp = EHCI_EECP_NEXT(eec)) {
 		eec = pci_read_config(self, eecp, 4);
 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
 			continue;
 		}
 		bios_sem = pci_read_config(self, eecp +
 		    EHCI_LEGSUP_BIOS_SEM, 1);
 		if (bios_sem == 0) {
 			continue;
 		}
 		if (bootverbose)
 			printf("ehci early: "
 			    "SMM active, request owner change\n");
 
 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
 
 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
 			DELAY(1000);
 			bios_sem = pci_read_config(self, eecp +
 			    EHCI_LEGSUP_BIOS_SEM, 1);
 		}
 
 		if (bios_sem != 0) {
 			if (bootverbose)
 				printf("ehci early: "
 				    "SMM does not respond\n");
 		}
 		/* Disable interrupts */
 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
 		bus_write_4(res, offs + EHCI_USBINTR, 0);
 	}
 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
 }
 
 void
 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(dev);
 	pcicfgregs *cfg = &dinfo->cfg;
 	struct resource_list *rl = &dinfo->resources;
 	struct pci_quirk *q;
 	int i;
 
 	/* ATA devices needs special map treatment */
 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
 	else
 		for (i = 0; i < cfg->nummaps;)
 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
 			    prefetchmask & (1 << i));
 
 	/*
 	 * Add additional, quirked resources.
 	 */
 	for (q = &pci_quirks[0]; q->devid; q++) {
 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
 		    && q->type == PCI_QUIRK_MAP_REG)
 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
 	}
 
 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
 #ifdef __PCI_REROUTE_INTERRUPT
 		/*
 		 * Try to re-route interrupts. Sometimes the BIOS or
 		 * firmware may leave bogus values in these registers.
 		 * If the re-route fails, then just stick with what we
 		 * have.
 		 */
 		pci_assign_interrupt(bus, dev, 1);
 #else
 		pci_assign_interrupt(bus, dev, 0);
 #endif
 	}
 
 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
 			ehci_early_takeover(dev);
 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
 			ohci_early_takeover(dev);
 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
 			uhci_early_takeover(dev);
 	}
 }
 
 void
 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
 {
 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
 	device_t pcib = device_get_parent(dev);
 	struct pci_devinfo *dinfo;
 	int maxslots;
 	int s, f, pcifunchigh;
 	uint8_t hdrtype;
 
 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
 	    ("dinfo_size too small"));
 	maxslots = PCIB_MAXSLOTS(pcib);
 	for (s = 0; s <= maxslots; s++) {
 		pcifunchigh = 0;
 		f = 0;
 		DELAY(1);
 		hdrtype = REG(PCIR_HDRTYPE, 1);
 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
 			continue;
 		if (hdrtype & PCIM_MFDEV)
 			pcifunchigh = PCI_FUNCMAX;
 		for (f = 0; f <= pcifunchigh; f++) {
 			dinfo = pci_read_device(pcib, domain, busno, s, f,
 			    dinfo_size);
 			if (dinfo != NULL) {
 				pci_add_child(dev, dinfo);
 			}
 		}
 	}
 #undef REG
 }
 
 void
 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
 {
 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
 	device_set_ivars(dinfo->cfg.dev, dinfo);
 	resource_list_init(&dinfo->resources);
 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
 	pci_print_verbose(dinfo);
 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
 }
 
 static int
 pci_probe(device_t dev)
 {
 
 	device_set_desc(dev, "PCI bus");
 
 	/* Allow other subclasses to override this driver. */
 	return (BUS_PROBE_GENERIC);
 }
 
 static int
 pci_attach(device_t dev)
 {
 	int busno, domain;
 
 	/*
 	 * Since there can be multiple independantly numbered PCI
 	 * busses on systems with multiple PCI domains, we can't use
 	 * the unit number to decide which bus we are probing. We ask
 	 * the parent pcib what our domain and bus numbers are.
 	 */
 	domain = pcib_get_domain(dev);
 	busno = pcib_get_bus(dev);
 	if (bootverbose)
 		device_printf(dev, "domain=%d, physical bus=%d\n",
 		    domain, busno);
 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
 	return (bus_generic_attach(dev));
 }
 
 static void
 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
     int state)
 {
 	device_t child, pcib;
 	struct pci_devinfo *dinfo;
 	int dstate, i;
 
 	/*
 	 * Set the device to the given state.  If the firmware suggests
 	 * a different power state, use it instead.  If power management
 	 * is not present, the firmware is responsible for managing
 	 * device power.  Skip children who aren't attached since they
 	 * are handled separately.
 	 */
 	pcib = device_get_parent(dev);
 	for (i = 0; i < numdevs; i++) {
 		child = devlist[i];
 		dinfo = device_get_ivars(child);
 		dstate = state;
 		if (device_is_attached(child) &&
 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
 			pci_set_powerstate(child, dstate);
 	}
 }
 
 int
 pci_suspend(device_t dev)
 {
 	device_t child, *devlist;
 	struct pci_devinfo *dinfo;
 	int error, i, numdevs;
 
 	/*
 	 * Save the PCI configuration space for each child and set the
 	 * device in the appropriate power state for this sleep state.
 	 */
 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
 		return (error);
 	for (i = 0; i < numdevs; i++) {
 		child = devlist[i];
 		dinfo = device_get_ivars(child);
 		pci_cfg_save(child, dinfo, 0);
 	}
 
 	/* Suspend devices before potentially powering them down. */
 	error = bus_generic_suspend(dev);
 	if (error) {
 		free(devlist, M_TEMP);
 		return (error);
 	}
 	if (pci_do_power_suspend)
 		pci_set_power_children(dev, devlist, numdevs,
 		    PCI_POWERSTATE_D3);
 	free(devlist, M_TEMP);
 	return (0);
 }
 
 int
 pci_resume(device_t dev)
 {
 	device_t child, *devlist;
 	struct pci_devinfo *dinfo;
 	int error, i, numdevs;
 
 	/*
 	 * Set each child to D0 and restore its PCI configuration space.
 	 */
 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
 		return (error);
 	if (pci_do_power_resume)
 		pci_set_power_children(dev, devlist, numdevs,
 		    PCI_POWERSTATE_D0);
 
 	/* Now the device is powered up, restore its config space. */
 	for (i = 0; i < numdevs; i++) {
 		child = devlist[i];
 		dinfo = device_get_ivars(child);
 
 		pci_cfg_restore(child, dinfo);
 		if (!device_is_attached(child))
 			pci_cfg_save(child, dinfo, 1);
 	}
+
+	/*
+	 * Resume critical devices first, then everything else later.
+	 */
+	for (i = 0; i < numdevs; i++) {
+		child = devlist[i];
+		switch (pci_get_class(child)) {
+		case PCIC_DISPLAY:
+		case PCIC_MEMORY:
+		case PCIC_BRIDGE:
+		case PCIC_BASEPERIPH:
+			DEVICE_RESUME(child);
+			break;
+		}
+	}
+	for (i = 0; i < numdevs; i++) {
+		child = devlist[i];
+		switch (pci_get_class(child)) {
+		case PCIC_DISPLAY:
+		case PCIC_MEMORY:
+		case PCIC_BRIDGE:
+		case PCIC_BASEPERIPH:
+			break;
+		default:
+			DEVICE_RESUME(child);
+		}
+	}
 	free(devlist, M_TEMP);
-	return (bus_generic_resume(dev));
+	return (0);
 }
 
 static void
 pci_load_vendor_data(void)
 {
 	caddr_t vendordata, info;
 
 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
 		info = preload_search_info(vendordata, MODINFO_ADDR);
 		pci_vendordata = *(char **)info;
 		info = preload_search_info(vendordata, MODINFO_SIZE);
 		pci_vendordata_size = *(size_t *)info;
 		/* terminate the database */
 		pci_vendordata[pci_vendordata_size] = '\n';
 	}
 }
 
 void
 pci_driver_added(device_t dev, driver_t *driver)
 {
 	int numdevs;
 	device_t *devlist;
 	device_t child;
 	struct pci_devinfo *dinfo;
 	int i;
 
 	if (bootverbose)
 		device_printf(dev, "driver added\n");
 	DEVICE_IDENTIFY(driver, dev);
 	if (device_get_children(dev, &devlist, &numdevs) != 0)
 		return;
 	for (i = 0; i < numdevs; i++) {
 		child = devlist[i];
 		if (device_get_state(child) != DS_NOTPRESENT)
 			continue;
 		dinfo = device_get_ivars(child);
 		pci_print_verbose(dinfo);
 		if (bootverbose)
 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
 		pci_cfg_restore(child, dinfo);
 		if (device_probe_and_attach(child) != 0)
 			pci_cfg_save(child, dinfo, 1);
 	}
 	free(devlist, M_TEMP);
 }
 
 int
 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
 {
 	struct pci_devinfo *dinfo;
 	struct msix_table_entry *mte;
 	struct msix_vector *mv;
 	uint64_t addr;
 	uint32_t data;
 	void *cookie;
 	int error, rid;
 
 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
 	    arg, &cookie);
 	if (error)
 		return (error);
 
 	/* If this is not a direct child, just bail out. */
 	if (device_get_parent(child) != dev) {
 		*cookiep = cookie;
 		return(0);
 	}
 
 	rid = rman_get_rid(irq);
 	if (rid == 0) {
 		/* Make sure that INTx is enabled */
 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
 	} else {
 		/*
 		 * Check to see if the interrupt is MSI or MSI-X.
 		 * Ask our parent to map the MSI and give
 		 * us the address and data register values.
 		 * If we fail for some reason, teardown the
 		 * interrupt handler.
 		 */
 		dinfo = device_get_ivars(child);
 		if (dinfo->cfg.msi.msi_alloc > 0) {
 			if (dinfo->cfg.msi.msi_addr == 0) {
 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
 			    ("MSI has handlers, but vectors not mapped"));
 				error = PCIB_MAP_MSI(device_get_parent(dev),
 				    child, rman_get_start(irq), &addr, &data);
 				if (error)
 					goto bad;
 				dinfo->cfg.msi.msi_addr = addr;
 				dinfo->cfg.msi.msi_data = data;
 			}
 			if (dinfo->cfg.msi.msi_handlers == 0)
 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
 				    dinfo->cfg.msi.msi_data);
 			dinfo->cfg.msi.msi_handlers++;
 		} else {
 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
 			    ("No MSI or MSI-X interrupts allocated"));
 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
 			    ("MSI-X index too high"));
 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
 			KASSERT(mte->mte_vector != 0, ("no message vector"));
 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
 			KASSERT(mv->mv_irq == rman_get_start(irq),
 			    ("IRQ mismatch"));
 			if (mv->mv_address == 0) {
 				KASSERT(mte->mte_handlers == 0,
 		    ("MSI-X table entry has handlers, but vector not mapped"));
 				error = PCIB_MAP_MSI(device_get_parent(dev),
 				    child, rman_get_start(irq), &addr, &data);
 				if (error)
 					goto bad;
 				mv->mv_address = addr;
 				mv->mv_data = data;
 			}
 			if (mte->mte_handlers == 0) {
 				pci_enable_msix(child, rid - 1, mv->mv_address,
 				    mv->mv_data);
 				pci_unmask_msix(child, rid - 1);
 			}
 			mte->mte_handlers++;
 		}
 
 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
 	bad:
 		if (error) {
 			(void)bus_generic_teardown_intr(dev, child, irq,
 			    cookie);
 			return (error);
 		}
 	}
 	*cookiep = cookie;
 	return (0);
 }
 
 int
 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
     void *cookie)
 {
 	struct msix_table_entry *mte;
 	struct resource_list_entry *rle;
 	struct pci_devinfo *dinfo;
 	int error, rid;
 
 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
 		return (EINVAL);
 
 	/* If this isn't a direct child, just bail out */
 	if (device_get_parent(child) != dev)
 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
 
 	rid = rman_get_rid(irq);
 	if (rid == 0) {
 		/* Mask INTx */
 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
 	} else {
 		/*
 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
 		 * decrement the appropriate handlers count and mask the
 		 * MSI-X message, or disable MSI messages if the count
 		 * drops to 0.
 		 */
 		dinfo = device_get_ivars(child);
 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
 		if (rle->res != irq)
 			return (EINVAL);
 		if (dinfo->cfg.msi.msi_alloc > 0) {
 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
 			    ("MSI-X index too high"));
 			if (dinfo->cfg.msi.msi_handlers == 0)
 				return (EINVAL);
 			dinfo->cfg.msi.msi_handlers--;
 			if (dinfo->cfg.msi.msi_handlers == 0)
 				pci_disable_msi(child);
 		} else {
 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
 			    ("No MSI or MSI-X interrupts allocated"));
 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
 			    ("MSI-X index too high"));
 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
 			if (mte->mte_handlers == 0)
 				return (EINVAL);
 			mte->mte_handlers--;
 			if (mte->mte_handlers == 0)
 				pci_mask_msix(child, rid - 1);
 		}
 	}
 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
 	if (rid > 0)
 		KASSERT(error == 0,
 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
 	return (error);
 }
 
 int
 pci_print_child(device_t dev, device_t child)
 {
 	struct pci_devinfo *dinfo;
 	struct resource_list *rl;
 	int retval = 0;
 
 	dinfo = device_get_ivars(child);
 	rl = &dinfo->resources;
 
 	retval += bus_print_child_header(dev, child);
 
 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
 	if (device_get_flags(dev))
 		retval += printf(" flags %#x", device_get_flags(dev));
 
 	retval += printf(" at device %d.%d", pci_get_slot(child),
 	    pci_get_function(child));
 
 	retval += bus_print_child_footer(dev, child);
 
 	return (retval);
 }
 
 static struct
 {
 	int	class;
 	int	subclass;
 	char	*desc;
 } pci_nomatch_tab[] = {
 	{PCIC_OLD,		-1,			"old"},
 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
 	{PCIC_STORAGE,		-1,			"mass storage"},
 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
 	{PCIC_NETWORK,		-1,			"network"},
 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
 	{PCIC_DISPLAY,		-1,			"display"},
 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
 	{PCIC_MEMORY,		-1,			"memory"},
 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
 	{PCIC_BRIDGE,		-1,			"bridge"},
 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
 	{PCIC_INPUTDEV,		-1,			"input device"},
 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
 	{PCIC_DOCKING,		-1,			"docking station"},
 	{PCIC_PROCESSOR,	-1,			"processor"},
 	{PCIC_SERIALBUS,	-1,			"serial bus"},
 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
 	{PCIC_WIRELESS,		-1,			"wireless controller"},
 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
 	{PCIC_SATCOM,		-1,			"satellite communication"},
 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
 	{PCIC_DASP,		-1,			"dasp"},
 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
 	{0, 0,		NULL}
 };
 
 void
 pci_probe_nomatch(device_t dev, device_t child)
 {
 	int	i;
 	char	*cp, *scp, *device;
 
 	/*
 	 * Look for a listing for this device in a loaded device database.
 	 */
 	if ((device = pci_describe_device(child)) != NULL) {
 		device_printf(dev, "<%s>", device);
 		free(device, M_DEVBUF);
 	} else {
 		/*
 		 * Scan the class/subclass descriptions for a general
 		 * description.
 		 */
 		cp = "unknown";
 		scp = NULL;
 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
 				if (pci_nomatch_tab[i].subclass == -1) {
 					cp = pci_nomatch_tab[i].desc;
 				} else if (pci_nomatch_tab[i].subclass ==
 				    pci_get_subclass(child)) {
 					scp = pci_nomatch_tab[i].desc;
 				}
 			}
 		}
 		device_printf(dev, "<%s%s%s>",
 		    cp ? cp : "",
 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
 		    scp ? scp : "");
 	}
 	printf(" at device %d.%d (no driver attached)\n",
 	    pci_get_slot(child), pci_get_function(child));
 	pci_cfg_save(child, device_get_ivars(child), 1);
 	return;
 }
 
 /*
  * Parse the PCI device database, if loaded, and return a pointer to a
  * description of the device.
  *
  * The database is flat text formatted as follows:
  *
  * Any line not in a valid format is ignored.
  * Lines are terminated with newline '\n' characters.
  *
  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
  * the vendor name.
  *
  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
  * - devices cannot be listed without a corresponding VENDOR line.
  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
  * another TAB, then the device name.
  */
 
 /*
  * Assuming (ptr) points to the beginning of a line in the database,
  * return the vendor or device and description of the next entry.
  * The value of (vendor) or (device) inappropriate for the entry type
  * is set to -1.  Returns nonzero at the end of the database.
  *
  * Note that this is slightly unrobust in the face of corrupt data;
  * we attempt to safeguard against this by spamming the end of the
  * database with a newline when we initialise.
  */
 static int
 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
 {
 	char	*cp = *ptr;
 	int	left;
 
 	*device = -1;
 	*vendor = -1;
 	**desc = '\0';
 	for (;;) {
 		left = pci_vendordata_size - (cp - pci_vendordata);
 		if (left <= 0) {
 			*ptr = cp;
 			return(1);
 		}
 
 		/* vendor entry? */
 		if (*cp != '\t' &&
 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
 			break;
 		/* device entry? */
 		if (*cp == '\t' &&
 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
 			break;
 
 		/* skip to next line */
 		while (*cp != '\n' && left > 0) {
 			cp++;
 			left--;
 		}
 		if (*cp == '\n') {
 			cp++;
 			left--;
 		}
 	}
 	/* skip to next line */
 	while (*cp != '\n' && left > 0) {
 		cp++;
 		left--;
 	}
 	if (*cp == '\n' && left > 0)
 		cp++;
 	*ptr = cp;
 	return(0);
 }
 
 static char *
 pci_describe_device(device_t dev)
 {
 	int	vendor, device;
 	char	*desc, *vp, *dp, *line;
 
 	desc = vp = dp = NULL;
 
 	/*
 	 * If we have no vendor data, we can't do anything.
 	 */
 	if (pci_vendordata == NULL)
 		goto out;
 
 	/*
 	 * Scan the vendor data looking for this device
 	 */
 	line = pci_vendordata;
 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
 		goto out;
 	for (;;) {
 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
 			goto out;
 		if (vendor == pci_get_vendor(dev))
 			break;
 	}
 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
 		goto out;
 	for (;;) {
 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
 			*dp = 0;
 			break;
 		}
 		if (vendor != -1) {
 			*dp = 0;
 			break;
 		}
 		if (device == pci_get_device(dev))
 			break;
 	}
 	if (dp[0] == '\0')
 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
 	    NULL)
 		sprintf(desc, "%s, %s", vp, dp);
  out:
 	if (vp != NULL)
 		free(vp, M_DEVBUF);
 	if (dp != NULL)
 		free(dp, M_DEVBUF);
 	return(desc);
 }
 
 int
 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
 {
 	struct pci_devinfo *dinfo;
 	pcicfgregs *cfg;
 
 	dinfo = device_get_ivars(child);
 	cfg = &dinfo->cfg;
 
 	switch (which) {
 	case PCI_IVAR_ETHADDR:
 		/*
 		 * The generic accessor doesn't deal with failure, so
 		 * we set the return value, then return an error.
 		 */
 		*((uint8_t **) result) = NULL;
 		return (EINVAL);
 	case PCI_IVAR_SUBVENDOR:
 		*result = cfg->subvendor;
 		break;
 	case PCI_IVAR_SUBDEVICE:
 		*result = cfg->subdevice;
 		break;
 	case PCI_IVAR_VENDOR:
 		*result = cfg->vendor;
 		break;
 	case PCI_IVAR_DEVICE:
 		*result = cfg->device;
 		break;
 	case PCI_IVAR_DEVID:
 		*result = (cfg->device << 16) | cfg->vendor;
 		break;
 	case PCI_IVAR_CLASS:
 		*result = cfg->baseclass;
 		break;
 	case PCI_IVAR_SUBCLASS:
 		*result = cfg->subclass;
 		break;
 	case PCI_IVAR_PROGIF:
 		*result = cfg->progif;
 		break;
 	case PCI_IVAR_REVID:
 		*result = cfg->revid;
 		break;
 	case PCI_IVAR_INTPIN:
 		*result = cfg->intpin;
 		break;
 	case PCI_IVAR_IRQ:
 		*result = cfg->intline;
 		break;
 	case PCI_IVAR_DOMAIN:
 		*result = cfg->domain;
 		break;
 	case PCI_IVAR_BUS:
 		*result = cfg->bus;
 		break;
 	case PCI_IVAR_SLOT:
 		*result = cfg->slot;
 		break;
 	case PCI_IVAR_FUNCTION:
 		*result = cfg->func;
 		break;
 	case PCI_IVAR_CMDREG:
 		*result = cfg->cmdreg;
 		break;
 	case PCI_IVAR_CACHELNSZ:
 		*result = cfg->cachelnsz;
 		break;
 	case PCI_IVAR_MINGNT:
 		*result = cfg->mingnt;
 		break;
 	case PCI_IVAR_MAXLAT:
 		*result = cfg->maxlat;
 		break;
 	case PCI_IVAR_LATTIMER:
 		*result = cfg->lattimer;
 		break;
 	default:
 		return (ENOENT);
 	}
 	return (0);
 }
 
 int
 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
 {
 	struct pci_devinfo *dinfo;
 
 	dinfo = device_get_ivars(child);
 
 	switch (which) {
 	case PCI_IVAR_INTPIN:
 		dinfo->cfg.intpin = value;
 		return (0);
 	case PCI_IVAR_ETHADDR:
 	case PCI_IVAR_SUBVENDOR:
 	case PCI_IVAR_SUBDEVICE:
 	case PCI_IVAR_VENDOR:
 	case PCI_IVAR_DEVICE:
 	case PCI_IVAR_DEVID:
 	case PCI_IVAR_CLASS:
 	case PCI_IVAR_SUBCLASS:
 	case PCI_IVAR_PROGIF:
 	case PCI_IVAR_REVID:
 	case PCI_IVAR_IRQ:
 	case PCI_IVAR_DOMAIN:
 	case PCI_IVAR_BUS:
 	case PCI_IVAR_SLOT:
 	case PCI_IVAR_FUNCTION:
 		return (EINVAL);	/* disallow for now */
 
 	default:
 		return (ENOENT);
 	}
 }
 
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <ddb/ddb.h>
 #include <sys/cons.h>
 
 /*
  * List resources based on pci map registers, used for within ddb
  */
 
 DB_SHOW_COMMAND(pciregs, db_pci_dump)
 {
 	struct pci_devinfo *dinfo;
 	struct devlist *devlist_head;
 	struct pci_conf *p;
 	const char *name;
 	int i, error, none_count;
 
 	none_count = 0;
 	/* get the head of the device queue */
 	devlist_head = &pci_devq;
 
 	/*
 	 * Go through the list of devices and print out devices
 	 */
 	for (error = 0, i = 0,
 	     dinfo = STAILQ_FIRST(devlist_head);
 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
 
 		/* Populate pd_name and pd_unit */
 		name = NULL;
 		if (dinfo->cfg.dev)
 			name = device_get_name(dinfo->cfg.dev);
 
 		p = &dinfo->conf;
 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
 			(name && *name) ? name : "none",
 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
 			none_count++,
 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
 			p->pc_sel.pc_func, (p->pc_class << 16) |
 			(p->pc_subclass << 8) | p->pc_progif,
 			(p->pc_subdevice << 16) | p->pc_subvendor,
 			(p->pc_device << 16) | p->pc_vendor,
 			p->pc_revid, p->pc_hdr);
 	}
 }
 #endif /* DDB */
 
 static struct resource *
 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
     u_long start, u_long end, u_long count, u_int flags)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	struct resource_list *rl = &dinfo->resources;
 	struct resource_list_entry *rle;
 	struct resource *res;
 	pci_addr_t map, testval;
 	int mapsize;
 
 	/*
 	 * Weed out the bogons, and figure out how large the BAR/map
 	 * is.  Bars that read back 0 here are bogus and unimplemented.
 	 * Note: atapci in legacy mode are special and handled elsewhere
 	 * in the code.  If you have a atapci device in legacy mode and
 	 * it fails here, that other code is broken.
 	 */
 	res = NULL;
 	pci_read_bar(child, *rid, &map, &testval);
 
 	/*
 	 * Determine the size of the BAR and ignore BARs with a size
 	 * of 0.  Device ROM BARs use a different mask value.
 	 */
 	if (*rid == PCIR_BIOS)
 		mapsize = pci_romsize(testval);
 	else
 		mapsize = pci_mapsize(testval);
 	if (mapsize == 0)
 		goto out;
 
 	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
 		if (type != SYS_RES_MEMORY) {
 			if (bootverbose)
 				device_printf(dev,
 				    "child %s requested type %d for rid %#x,"
 				    " but the BAR says it is an memio\n",
 				    device_get_nameunit(child), type, *rid);
 			goto out;
 		}
 	} else {
 		if (type != SYS_RES_IOPORT) {
 			if (bootverbose)
 				device_printf(dev,
 				    "child %s requested type %d for rid %#x,"
 				    " but the BAR says it is an ioport\n",
 				    device_get_nameunit(child), type, *rid);
 			goto out;
 		}
 	}
 
 	/*
 	 * For real BARs, we need to override the size that
 	 * the driver requests, because that's what the BAR
 	 * actually uses and we would otherwise have a
 	 * situation where we might allocate the excess to
 	 * another driver, which won't work.
 	 */
 	count = 1UL << mapsize;
 	if (RF_ALIGNMENT(flags) < mapsize)
 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
 		flags |= RF_PREFETCHABLE;
 
 	/*
 	 * Allocate enough resource, and then write back the
 	 * appropriate bar for that resource.
 	 */
 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
 	    start, end, count, flags & ~RF_ACTIVE);
 	if (res == NULL) {
 		device_printf(child,
 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
 		    count, *rid, type, start, end);
 		goto out;
 	}
 	resource_list_add(rl, type, *rid, start, end, count);
 	rle = resource_list_find(rl, type, *rid);
 	if (rle == NULL)
 		panic("pci_reserve_map: unexpectedly can't find resource.");
 	rle->res = res;
 	rle->start = rman_get_start(res);
 	rle->end = rman_get_end(res);
 	rle->count = count;
 	rle->flags = RLE_RESERVED;
 	if (bootverbose)
 		device_printf(child,
 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
 		    count, *rid, type, rman_get_start(res));
 	map = rman_get_start(res);
 	pci_write_bar(child, *rid, map);
 out:;
 	return (res);
 }
 
 
 struct resource *
 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
 		   u_long start, u_long end, u_long count, u_int flags)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	struct resource_list *rl = &dinfo->resources;
 	struct resource_list_entry *rle;
 	struct resource *res;
 	pcicfgregs *cfg = &dinfo->cfg;
 
 	if (device_get_parent(child) != dev)
 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
 		    type, rid, start, end, count, flags));
 
 	/*
 	 * Perform lazy resource allocation
 	 */
 	switch (type) {
 	case SYS_RES_IRQ:
 		/*
 		 * Can't alloc legacy interrupt once MSI messages have
 		 * been allocated.
 		 */
 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
 		    cfg->msix.msix_alloc > 0))
 			return (NULL);
 
 		/*
 		 * If the child device doesn't have an interrupt
 		 * routed and is deserving of an interrupt, try to
 		 * assign it one.
 		 */
 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
 		    (cfg->intpin != 0))
 			pci_assign_interrupt(dev, child, 0);
 		break;
 	case SYS_RES_IOPORT:
 	case SYS_RES_MEMORY:
 		/* Reserve resources for this BAR if needed. */
 		rle = resource_list_find(rl, type, *rid);
 		if (rle == NULL) {
 			res = pci_reserve_map(dev, child, type, rid, start, end,
 			    count, flags);
 			if (res == NULL)
 				return (NULL);
 		}
 	}
 	return (resource_list_alloc(rl, dev, child, type, rid,
 	    start, end, count, flags));
 }
 
 int
 pci_activate_resource(device_t dev, device_t child, int type, int rid,
     struct resource *r)
 {
 	int error;
 
 	error = bus_generic_activate_resource(dev, child, type, rid, r);
 	if (error)
 		return (error);
 
 	/* Enable decoding in the command register when activating BARs. */
 	if (device_get_parent(child) == dev) {
 		/* Device ROMs need their decoding explicitly enabled. */
 		if (rid == PCIR_BIOS)
 			pci_write_config(child, rid, rman_get_start(r) |
 			    PCIM_BIOS_ENABLE, 4);
 		switch (type) {
 		case SYS_RES_IOPORT:
 		case SYS_RES_MEMORY:
 			error = PCI_ENABLE_IO(dev, child, type);
 			break;
 		}
 	}
 	return (error);
 }
 
 int
 pci_deactivate_resource(device_t dev, device_t child, int type,
     int rid, struct resource *r)
 {
 	int error;
 
 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
 	if (error)
 		return (error);
 
 	/* Disable decoding for device ROMs. */
 	if (rid == PCIR_BIOS)
 		pci_write_config(child, rid, rman_get_start(r), 4);
 	return (0);
 }
 
 void
 pci_delete_child(device_t dev, device_t child)
 {
 	struct resource_list_entry *rle;
 	struct resource_list *rl;
 	struct pci_devinfo *dinfo;
 
 	dinfo = device_get_ivars(child);
 	rl = &dinfo->resources;
 
 	if (device_is_attached(child))
 		device_detach(child);
 
 	/* Turn off access to resources we're about to free */
 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
 
 	/* Free all allocated resources */
 	STAILQ_FOREACH(rle, rl, link) {
 		if (rle->res) {
 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
 			    resource_list_busy(rl, rle->type, rle->rid)) {
 				pci_printf(&dinfo->cfg,
 				    "Resource still owned, oops. "
 				    "(type=%d, rid=%d, addr=%lx)\n",
 				    rle->type, rle->rid,
 				    rman_get_start(rle->res));
 				bus_release_resource(child, rle->type, rle->rid,
 				    rle->res);
 			}
 			resource_list_unreserve(rl, dev, child, rle->type,
 			    rle->rid);
 		}
 	}
 	resource_list_free(rl);
 
 	device_delete_child(dev, child);
 	pci_freecfg(dinfo);
 }
 
 void
 pci_delete_resource(device_t dev, device_t child, int type, int rid)
 {
 	struct pci_devinfo *dinfo;
 	struct resource_list *rl;
 	struct resource_list_entry *rle;
 
 	if (device_get_parent(child) != dev)
 		return;
 
 	dinfo = device_get_ivars(child);
 	rl = &dinfo->resources;
 	rle = resource_list_find(rl, type, rid);
 	if (rle == NULL)
 		return;
 
 	if (rle->res) {
 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
 		    resource_list_busy(rl, type, rid)) {
 			device_printf(dev, "delete_resource: "
 			    "Resource still owned by child, oops. "
 			    "(type=%d, rid=%d, addr=%lx)\n",
 			    type, rid, rman_get_start(rle->res));
 			return;
 		}
 
 #ifndef __PCI_BAR_ZERO_VALID
 		/*
 		 * If this is a BAR, clear the BAR so it stops
 		 * decoding before releasing the resource.
 		 */
 		switch (type) {
 		case SYS_RES_IOPORT:
 		case SYS_RES_MEMORY:
 			pci_write_bar(child, rid, 0);
 			break;
 		}
 #endif
 		resource_list_unreserve(rl, dev, child, type, rid);
 	}
 	resource_list_delete(rl, type, rid);
 }
 
 struct resource_list *
 pci_get_resource_list (device_t dev, device_t child)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 
 	return (&dinfo->resources);
 }
 
 uint32_t
 pci_read_config_method(device_t dev, device_t child, int reg, int width)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	pcicfgregs *cfg = &dinfo->cfg;
 
 	return (PCIB_READ_CONFIG(device_get_parent(dev),
 	    cfg->bus, cfg->slot, cfg->func, reg, width));
 }
 
 void
 pci_write_config_method(device_t dev, device_t child, int reg,
     uint32_t val, int width)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	pcicfgregs *cfg = &dinfo->cfg;
 
 	PCIB_WRITE_CONFIG(device_get_parent(dev),
 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
 }
 
 int
 pci_child_location_str_method(device_t dev, device_t child, char *buf,
     size_t buflen)
 {
 
 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
 	    pci_get_function(child));
 	return (0);
 }
 
 int
 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
     size_t buflen)
 {
 	struct pci_devinfo *dinfo;
 	pcicfgregs *cfg;
 
 	dinfo = device_get_ivars(child);
 	cfg = &dinfo->cfg;
 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
 	    cfg->progif);
 	return (0);
 }
 
 int
 pci_assign_interrupt_method(device_t dev, device_t child)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	pcicfgregs *cfg = &dinfo->cfg;
 
 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
 	    cfg->intpin));
 }
 
 static int
 pci_modevent(module_t mod, int what, void *arg)
 {
 	static struct cdev *pci_cdev;
 
 	switch (what) {
 	case MOD_LOAD:
 		STAILQ_INIT(&pci_devq);
 		pci_generation = 0;
 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
 		    "pci");
 		pci_load_vendor_data();
 		break;
 
 	case MOD_UNLOAD:
 		destroy_dev(pci_cdev);
 		break;
 	}
 
 	return (0);
 }
 
 void
 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
 {
 	int i;
 
 	/*
 	 * Only do header type 0 devices.  Type 1 devices are bridges,
 	 * which we know need special treatment.  Type 2 devices are
 	 * cardbus bridges which also require special treatment.
 	 * Other types are unknown, and we err on the side of safety
 	 * by ignoring them.
 	 */
 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
 		return;
 
 	/*
 	 * Restore the device to full power mode.  We must do this
 	 * before we restore the registers because moving from D3 to
 	 * D0 will cause the chip's BARs and some other registers to
 	 * be reset to some unknown power on reset values.  Cut down
 	 * the noise on boot by doing nothing if we are already in
 	 * state D0.
 	 */
 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
 	for (i = 0; i < dinfo->cfg.nummaps; i++)
 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
 
 	/* Restore MSI and MSI-X configurations if they are present. */
 	if (dinfo->cfg.msi.msi_location != 0)
 		pci_resume_msi(dev);
 	if (dinfo->cfg.msix.msix_location != 0)
 		pci_resume_msix(dev);
 }
 
 void
 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
 {
 	int i;
 	uint32_t cls;
 	int ps;
 
 	/*
 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
 	 * we know need special treatment.  Type 2 devices are cardbus bridges
 	 * which also require special treatment.  Other types are unknown, and
 	 * we err on the side of safety by ignoring them.  Powering down
 	 * bridges should not be undertaken lightly.
 	 */
 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
 		return;
 	for (i = 0; i < dinfo->cfg.nummaps; i++)
 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
 
 	/*
 	 * Some drivers apparently write to these registers w/o updating our
 	 * cached copy.  No harm happens if we update the copy, so do so here
 	 * so we can restore them.  The COMMAND register is modified by the
 	 * bus w/o updating the cache.  This should represent the normally
 	 * writable portion of the 'defined' part of type 0 headers.  In
 	 * theory we also need to save/restore the PCI capability structures
 	 * we know about, but apart from power we don't know any that are
 	 * writable.
 	 */
 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
 
 	/*
 	 * don't set the state for display devices, base peripherals and
 	 * memory devices since bad things happen when they are powered down.
 	 * We should (a) have drivers that can easily detach and (b) use
 	 * generic drivers for these devices so that some device actually
 	 * attaches.  We need to make sure that when we implement (a) we don't
 	 * power the device down on a reattach.
 	 */
 	cls = pci_get_class(dev);
 	if (!setstate)
 		return;
 	switch (pci_do_power_nodriver)
 	{
 		case 0:		/* NO powerdown at all */
 			return;
 		case 1:		/* Conservative about what to power down */
 			if (cls == PCIC_STORAGE)
 				return;
 			/*FALLTHROUGH*/
 		case 2:		/* Agressive about what to power down */
 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
 			    cls == PCIC_BASEPERIPH)
 				return;
 			/*FALLTHROUGH*/
 		case 3:		/* Power down everything */
 			break;
 	}
 	/*
 	 * PCI spec says we can only go into D3 state from D0 state.
 	 * Transition from D[12] into D0 before going to D3 state.
 	 */
 	ps = pci_get_powerstate(dev);
 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
 }
Index: projects/binutils-2.17/sys/dev/pci/pci_pci.c
===================================================================
--- projects/binutils-2.17/sys/dev/pci/pci_pci.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/pci/pci_pci.c	(revision 215830)
@@ -1,878 +1,880 @@
 /*-
  * Copyright (c) 1994,1995 Stefan Esser, Wolfgang StanglMeier
  * Copyright (c) 2000 Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2000 BSDi
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * PCI:PCI bridge support.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <sys/sysctl.h>
 
 #include <machine/resource.h>
 
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pci_private.h>
 #include <dev/pci/pcib_private.h>
 
 #include "pcib_if.h"
 
 static int		pcib_probe(device_t dev);
 static int		pcib_suspend(device_t dev);
 static int		pcib_resume(device_t dev);
 static int		pcib_power_for_sleep(device_t pcib, device_t dev,
 			    int *pstate);
 
 static device_method_t pcib_methods[] = {
     /* Device interface */
     DEVMETHOD(device_probe,		pcib_probe),
     DEVMETHOD(device_attach,		pcib_attach),
     DEVMETHOD(device_detach,		bus_generic_detach),
     DEVMETHOD(device_shutdown,		bus_generic_shutdown),
     DEVMETHOD(device_suspend,		pcib_suspend),
     DEVMETHOD(device_resume,		pcib_resume),
 
     /* Bus interface */
     DEVMETHOD(bus_print_child,		bus_generic_print_child),
     DEVMETHOD(bus_read_ivar,		pcib_read_ivar),
     DEVMETHOD(bus_write_ivar,		pcib_write_ivar),
     DEVMETHOD(bus_alloc_resource,	pcib_alloc_resource),
     DEVMETHOD(bus_release_resource,	bus_generic_release_resource),
     DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
     DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
     DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
     DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
 
     /* pcib interface */
     DEVMETHOD(pcib_maxslots,		pcib_maxslots),
     DEVMETHOD(pcib_read_config,		pcib_read_config),
     DEVMETHOD(pcib_write_config,	pcib_write_config),
     DEVMETHOD(pcib_route_interrupt,	pcib_route_interrupt),
     DEVMETHOD(pcib_alloc_msi,		pcib_alloc_msi),
     DEVMETHOD(pcib_release_msi,		pcib_release_msi),
     DEVMETHOD(pcib_alloc_msix,		pcib_alloc_msix),
     DEVMETHOD(pcib_release_msix,	pcib_release_msix),
     DEVMETHOD(pcib_map_msi,		pcib_map_msi),
     DEVMETHOD(pcib_power_for_sleep,	pcib_power_for_sleep),
 
     { 0, 0 }
 };
 
 static devclass_t pcib_devclass;
 
 DEFINE_CLASS_0(pcib, pcib_driver, pcib_methods, sizeof(struct pcib_softc));
 DRIVER_MODULE(pcib, pci, pcib_driver, pcib_devclass, 0, 0);
 
 /*
  * Is the prefetch window open (eg, can we allocate memory in it?)
  */
 static int
 pcib_is_prefetch_open(struct pcib_softc *sc)
 {
 	return (sc->pmembase > 0 && sc->pmembase < sc->pmemlimit);
 }
 
 /*
  * Is the nonprefetch window open (eg, can we allocate memory in it?)
  */
 static int
 pcib_is_nonprefetch_open(struct pcib_softc *sc)
 {
 	return (sc->membase > 0 && sc->membase < sc->memlimit);
 }
 
 /*
  * Is the io window open (eg, can we allocate ports in it?)
  */
 static int
 pcib_is_io_open(struct pcib_softc *sc)
 {
 	return (sc->iobase > 0 && sc->iobase < sc->iolimit);
 }
 
 /*
  * Get current I/O decode.
  */
 static void
 pcib_get_io_decode(struct pcib_softc *sc)
 {
 	device_t	dev;
 	uint32_t	iolow;
 
 	dev = sc->dev;
 
 	iolow = pci_read_config(dev, PCIR_IOBASEL_1, 1);
 	if ((iolow & PCIM_BRIO_MASK) == PCIM_BRIO_32)
 		sc->iobase = PCI_PPBIOBASE(
 		    pci_read_config(dev, PCIR_IOBASEH_1, 2), iolow);
 	else
 		sc->iobase = PCI_PPBIOBASE(0, iolow);
 
 	iolow = pci_read_config(dev, PCIR_IOLIMITL_1, 1);
 	if ((iolow & PCIM_BRIO_MASK) == PCIM_BRIO_32)
 		sc->iolimit = PCI_PPBIOLIMIT(
 		    pci_read_config(dev, PCIR_IOLIMITH_1, 2), iolow);
 	else
 		sc->iolimit = PCI_PPBIOLIMIT(0, iolow);
 }
 
 /*
  * Get current memory decode.
  */
 static void
 pcib_get_mem_decode(struct pcib_softc *sc)
 {
 	device_t	dev;
 	pci_addr_t	pmemlow;
 
 	dev = sc->dev;
 
 	sc->membase = PCI_PPBMEMBASE(0,
 	    pci_read_config(dev, PCIR_MEMBASE_1, 2));
 	sc->memlimit = PCI_PPBMEMLIMIT(0,
 	    pci_read_config(dev, PCIR_MEMLIMIT_1, 2));
 
 	pmemlow = pci_read_config(dev, PCIR_PMBASEL_1, 2);
 	if ((pmemlow & PCIM_BRPM_MASK) == PCIM_BRPM_64)
 		sc->pmembase = PCI_PPBMEMBASE(
 		    pci_read_config(dev, PCIR_PMBASEH_1, 4), pmemlow);
 	else
 		sc->pmembase = PCI_PPBMEMBASE(0, pmemlow);
 
 	pmemlow = pci_read_config(dev, PCIR_PMLIMITL_1, 2);
 	if ((pmemlow & PCIM_BRPM_MASK) == PCIM_BRPM_64)	
 		sc->pmemlimit = PCI_PPBMEMLIMIT(
 		    pci_read_config(dev, PCIR_PMLIMITH_1, 4), pmemlow);
 	else
 		sc->pmemlimit = PCI_PPBMEMLIMIT(0, pmemlow);
 }
 
 /*
  * Restore previous I/O decode.
  */
 static void
 pcib_set_io_decode(struct pcib_softc *sc)
 {
 	device_t	dev;
 	uint32_t	iohi;
 
 	dev = sc->dev;
 
 	iohi = sc->iobase >> 16;
 	if (iohi > 0)
 		pci_write_config(dev, PCIR_IOBASEH_1, iohi, 2);
 	pci_write_config(dev, PCIR_IOBASEL_1, sc->iobase >> 8, 1);
 
 	iohi = sc->iolimit >> 16;
 	if (iohi > 0)
 		pci_write_config(dev, PCIR_IOLIMITH_1, iohi, 2);
 	pci_write_config(dev, PCIR_IOLIMITL_1, sc->iolimit >> 8, 1);
 }
 
 /*
  * Restore previous memory decode.
  */
 static void
 pcib_set_mem_decode(struct pcib_softc *sc)
 {
 	device_t	dev;
 	pci_addr_t	pmemhi;
 
 	dev = sc->dev;
 
 	pci_write_config(dev, PCIR_MEMBASE_1, sc->membase >> 16, 2);
 	pci_write_config(dev, PCIR_MEMLIMIT_1, sc->memlimit >> 16, 2);
 
 	pmemhi = sc->pmembase >> 32;
 	if (pmemhi > 0)
 		pci_write_config(dev, PCIR_PMBASEH_1, pmemhi, 4);
 	pci_write_config(dev, PCIR_PMBASEL_1, sc->pmembase >> 16, 2);
 
 	pmemhi = sc->pmemlimit >> 32;
 	if (pmemhi > 0)
 		pci_write_config(dev, PCIR_PMLIMITH_1, pmemhi, 4);
 	pci_write_config(dev, PCIR_PMLIMITL_1, sc->pmemlimit >> 16, 2);
 }
 
 /*
  * Get current bridge configuration.
  */
 static void
 pcib_cfg_save(struct pcib_softc *sc)
 {
 	device_t	dev;
 
 	dev = sc->dev;
 
 	sc->command = pci_read_config(dev, PCIR_COMMAND, 2);
 	sc->pribus = pci_read_config(dev, PCIR_PRIBUS_1, 1);
 	sc->secbus = pci_read_config(dev, PCIR_SECBUS_1, 1);
 	sc->subbus = pci_read_config(dev, PCIR_SUBBUS_1, 1);
 	sc->bridgectl = pci_read_config(dev, PCIR_BRIDGECTL_1, 2);
 	sc->seclat = pci_read_config(dev, PCIR_SECLAT_1, 1);
 	if (sc->command & PCIM_CMD_PORTEN)
 		pcib_get_io_decode(sc);
 	if (sc->command & PCIM_CMD_MEMEN)
 		pcib_get_mem_decode(sc);
 }
 
 /*
  * Restore previous bridge configuration.
  */
 static void
 pcib_cfg_restore(struct pcib_softc *sc)
 {
 	device_t	dev;
 
 	dev = sc->dev;
 
 	pci_write_config(dev, PCIR_COMMAND, sc->command, 2);
 	pci_write_config(dev, PCIR_PRIBUS_1, sc->pribus, 1);
 	pci_write_config(dev, PCIR_SECBUS_1, sc->secbus, 1);
 	pci_write_config(dev, PCIR_SUBBUS_1, sc->subbus, 1);
 	pci_write_config(dev, PCIR_BRIDGECTL_1, sc->bridgectl, 2);
 	pci_write_config(dev, PCIR_SECLAT_1, sc->seclat, 1);
 	if (sc->command & PCIM_CMD_PORTEN)
 		pcib_set_io_decode(sc);
 	if (sc->command & PCIM_CMD_MEMEN)
 		pcib_set_mem_decode(sc);
 }
 
 /*
  * Generic device interface
  */
 static int
 pcib_probe(device_t dev)
 {
     if ((pci_get_class(dev) == PCIC_BRIDGE) &&
 	(pci_get_subclass(dev) == PCIS_BRIDGE_PCI)) {
 	device_set_desc(dev, "PCI-PCI bridge");
 	return(-10000);
     }
     return(ENXIO);
 }
 
 void
 pcib_attach_common(device_t dev)
 {
     struct pcib_softc	*sc;
     struct sysctl_ctx_list *sctx;
     struct sysctl_oid	*soid;
 
     sc = device_get_softc(dev);
     sc->dev = dev;
 
     /*
      * Get current bridge configuration.
      */
     sc->domain = pci_get_domain(dev);
     sc->secstat = pci_read_config(dev, PCIR_SECSTAT_1, 2);
     pcib_cfg_save(sc);
 
     /*
      * Setup sysctl reporting nodes
      */
     sctx = device_get_sysctl_ctx(dev);
     soid = device_get_sysctl_tree(dev);
     SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "domain",
       CTLFLAG_RD, &sc->domain, 0, "Domain number");
     SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "pribus",
       CTLFLAG_RD, &sc->pribus, 0, "Primary bus number");
     SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "secbus",
       CTLFLAG_RD, &sc->secbus, 0, "Secondary bus number");
     SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "subbus",
       CTLFLAG_RD, &sc->subbus, 0, "Subordinate bus number");
 
     /*
      * Quirk handling.
      */
     switch (pci_get_devid(dev)) {
     case 0x12258086:		/* Intel 82454KX/GX (Orion) */
 	{
 	    uint8_t	supbus;
 
 	    supbus = pci_read_config(dev, 0x41, 1);
 	    if (supbus != 0xff) {
 		sc->secbus = supbus + 1;
 		sc->subbus = supbus + 1;
 	    }
 	    break;
 	}
 
     /*
      * The i82380FB mobile docking controller is a PCI-PCI bridge,
      * and it is a subtractive bridge.  However, the ProgIf is wrong
      * so the normal setting of PCIB_SUBTRACTIVE bit doesn't
      * happen.  There's also a Toshiba bridge that behaves this
      * way.
      */
     case 0x124b8086:		/* Intel 82380FB Mobile */
     case 0x060513d7:		/* Toshiba ???? */
 	sc->flags |= PCIB_SUBTRACTIVE;
 	break;
 
     /* Compaq R3000 BIOS sets wrong subordinate bus number. */
     case 0x00dd10de:
 	{
 	    char *cp;
 
 	    if ((cp = getenv("smbios.planar.maker")) == NULL)
 		break;
 	    if (strncmp(cp, "Compal", 6) != 0) {
 		freeenv(cp);
 		break;
 	    }
 	    freeenv(cp);
 	    if ((cp = getenv("smbios.planar.product")) == NULL)
 		break;
 	    if (strncmp(cp, "08A0", 4) != 0) {
 		freeenv(cp);
 		break;
 	    }
 	    freeenv(cp);
 	    if (sc->subbus < 0xa) {
 		pci_write_config(dev, PCIR_SUBBUS_1, 0xa, 1);
 		sc->subbus = pci_read_config(dev, PCIR_SUBBUS_1, 1);
 	    }
 	    break;
 	}
     }
 
     if (pci_msi_device_blacklisted(dev))
 	sc->flags |= PCIB_DISABLE_MSI;
 
     /*
      * Intel 815, 845 and other chipsets say they are PCI-PCI bridges,
      * but have a ProgIF of 0x80.  The 82801 family (AA, AB, BAM/CAM,
      * BA/CA/DB and E) PCI bridges are HUB-PCI bridges, in Intelese.
      * This means they act as if they were subtractively decoding
      * bridges and pass all transactions.  Mark them and real ProgIf 1
      * parts as subtractive.
      */
     if ((pci_get_devid(dev) & 0xff00ffff) == 0x24008086 ||
       pci_read_config(dev, PCIR_PROGIF, 1) == PCIP_BRIDGE_PCI_SUBTRACTIVE)
 	sc->flags |= PCIB_SUBTRACTIVE;
 	
     if (bootverbose) {
 	device_printf(dev, "  domain            %d\n", sc->domain);
 	device_printf(dev, "  secondary bus     %d\n", sc->secbus);
 	device_printf(dev, "  subordinate bus   %d\n", sc->subbus);
 	device_printf(dev, "  I/O decode        0x%x-0x%x\n", sc->iobase, sc->iolimit);
 	if (pcib_is_nonprefetch_open(sc))
 	    device_printf(dev, "  memory decode     0x%jx-0x%jx\n",
 	      (uintmax_t)sc->membase, (uintmax_t)sc->memlimit);
 	if (pcib_is_prefetch_open(sc))
 	    device_printf(dev, "  prefetched decode 0x%jx-0x%jx\n",
 	      (uintmax_t)sc->pmembase, (uintmax_t)sc->pmemlimit);
 	else
 	    device_printf(dev, "  no prefetched decode\n");
 	if (sc->flags & PCIB_SUBTRACTIVE)
 	    device_printf(dev, "  Subtractively decoded bridge.\n");
     }
 
     /*
      * XXX If the secondary bus number is zero, we should assign a bus number
      *     since the BIOS hasn't, then initialise the bridge.  A simple
      *     bus_alloc_resource with the a couple of busses seems like the right
      *     approach, but we don't know what busses the BIOS might have already
      *     assigned to other bridges on this bus that probe later than we do.
      *
      *     If the subordinate bus number is less than the secondary bus number,
      *     we should pick a better value.  One sensible alternative would be to
      *     pick 255; the only tradeoff here is that configuration transactions
      *     would be more widely routed than absolutely necessary.  We could
      *     then do a walk of the tree later and fix it.
      */
 }
 
 int
 pcib_attach(device_t dev)
 {
     struct pcib_softc	*sc;
     device_t		child;
 
     pcib_attach_common(dev);
     sc = device_get_softc(dev);
     if (sc->secbus != 0) {
 	child = device_add_child(dev, "pci", sc->secbus);
 	if (child != NULL)
 	    return(bus_generic_attach(dev));
     }
 
     /* no secondary bus; we should have fixed this */
     return(0);
 }
 
 int
 pcib_suspend(device_t dev)
 {
 	device_t	pcib;
 	int		dstate, error;
 
 	pcib_cfg_save(device_get_softc(dev));
 	error = bus_generic_suspend(dev);
 	if (error == 0 && pci_do_power_suspend) {
 		dstate = PCI_POWERSTATE_D3;
 		pcib = device_get_parent(device_get_parent(dev));
 		if (PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
 			pci_set_powerstate(dev, dstate);
 	}
 	return (error);
 }
 
 int
 pcib_resume(device_t dev)
 {
 	device_t	pcib;
 
 	if (pci_do_power_resume) {
 		pcib = device_get_parent(device_get_parent(dev));
 		if (PCIB_POWER_FOR_SLEEP(pcib, dev, NULL) == 0)
 			pci_set_powerstate(dev, PCI_POWERSTATE_D0);
 	}
 	pcib_cfg_restore(device_get_softc(dev));
 	return (bus_generic_resume(dev));
 }
 
 int
 pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
 {
     struct pcib_softc	*sc = device_get_softc(dev);
     
     switch (which) {
     case PCIB_IVAR_DOMAIN:
 	*result = sc->domain;
 	return(0);
     case PCIB_IVAR_BUS:
 	*result = sc->secbus;
 	return(0);
     }
     return(ENOENT);
 }
 
 int
 pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
 {
     struct pcib_softc	*sc = device_get_softc(dev);
 
     switch (which) {
     case PCIB_IVAR_DOMAIN:
 	return(EINVAL);
     case PCIB_IVAR_BUS:
 	sc->secbus = value;
 	return(0);
     }
     return(ENOENT);
 }
 
 /*
  * We have to trap resource allocation requests and ensure that the bridge
  * is set up to, or capable of handling them.
  */
 struct resource *
 pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, 
     u_long start, u_long end, u_long count, u_int flags)
 {
 	struct pcib_softc	*sc = device_get_softc(dev);
 	const char *name, *suffix;
 	int ok;
 
 	/*
 	 * Fail the allocation for this range if it's not supported.
 	 */
 	name = device_get_nameunit(child);
 	if (name == NULL) {
 		name = "";
 		suffix = "";
 	} else
 		suffix = " ";
 	switch (type) {
 	case SYS_RES_IOPORT:
 		ok = 0;
 		if (!pcib_is_io_open(sc))
 			break;
 		ok = (start >= sc->iobase && end <= sc->iolimit);
 
 		/*
 		 * Make sure we allow access to VGA I/O addresses when the
 		 * bridge has the "VGA Enable" bit set.
 		 */
 		if (!ok && pci_is_vga_ioport_range(start, end))
 			ok = (sc->bridgectl & PCIB_BCR_VGA_ENABLE) ? 1 : 0;
 
 		if ((sc->flags & PCIB_SUBTRACTIVE) == 0) {
 			if (!ok) {
 				if (start < sc->iobase)
 					start = sc->iobase;
 				if (end > sc->iolimit)
 					end = sc->iolimit;
 				if (start < end)
 					ok = 1;
 			}
 		} else {
 			ok = 1;
 #if 0
 			/*
 			 * If we overlap with the subtractive range, then
 			 * pick the upper range to use.
 			 */
 			if (start < sc->iolimit && end > sc->iobase)
 				start = sc->iolimit + 1;
 #endif
 		}
 		if (end < start) {
 			device_printf(dev, "ioport: end (%lx) < start (%lx)\n",
 			    end, start);
 			start = 0;
 			end = 0;
 			ok = 0;
 		}
 		if (!ok) {
 			device_printf(dev, "%s%srequested unsupported I/O "
 			    "range 0x%lx-0x%lx (decoding 0x%x-0x%x)\n",
 			    name, suffix, start, end, sc->iobase, sc->iolimit);
 			return (NULL);
 		}
 		if (bootverbose)
 			device_printf(dev,
 			    "%s%srequested I/O range 0x%lx-0x%lx: in range\n",
 			    name, suffix, start, end);
 		break;
 
 	case SYS_RES_MEMORY:
 		ok = 0;
 		if (pcib_is_nonprefetch_open(sc))
 			ok = ok || (start >= sc->membase && end <= sc->memlimit);
 		if (pcib_is_prefetch_open(sc))
 			ok = ok || (start >= sc->pmembase && end <= sc->pmemlimit);
 
 		/*
 		 * Make sure we allow access to VGA memory addresses when the
 		 * bridge has the "VGA Enable" bit set.
 		 */
 		if (!ok && pci_is_vga_memory_range(start, end))
 			ok = (sc->bridgectl & PCIB_BCR_VGA_ENABLE) ? 1 : 0;
 
 		if ((sc->flags & PCIB_SUBTRACTIVE) == 0) {
 			if (!ok) {
 				ok = 1;
 				if (flags & RF_PREFETCHABLE) {
 					if (pcib_is_prefetch_open(sc)) {
 						if (start < sc->pmembase)
 							start = sc->pmembase;
 						if (end > sc->pmemlimit)
 							end = sc->pmemlimit;
 					} else {
 						ok = 0;
 					}
 				} else {	/* non-prefetchable */
 					if (pcib_is_nonprefetch_open(sc)) {
 						if (start < sc->membase)
 							start = sc->membase;
 						if (end > sc->memlimit)
 							end = sc->memlimit;
 					} else {
 						ok = 0;
 					}
 				}
 			}
 		} else if (!ok) {
 			ok = 1;	/* subtractive bridge: always ok */
 #if 0
 			if (pcib_is_nonprefetch_open(sc)) {
 				if (start < sc->memlimit && end > sc->membase)
 					start = sc->memlimit + 1;
 			}
 			if (pcib_is_prefetch_open(sc)) {
 				if (start < sc->pmemlimit && end > sc->pmembase)
 					start = sc->pmemlimit + 1;
 			}
 #endif
 		}
 		if (end < start) {
 			device_printf(dev, "memory: end (%lx) < start (%lx)\n",
 			    end, start);
 			start = 0;
 			end = 0;
 			ok = 0;
 		}
 		if (!ok && bootverbose)
 			device_printf(dev,
 			    "%s%srequested unsupported memory range %#lx-%#lx "
 			    "(decoding %#jx-%#jx, %#jx-%#jx)\n",
 			    name, suffix, start, end,
 			    (uintmax_t)sc->membase, (uintmax_t)sc->memlimit,
 			    (uintmax_t)sc->pmembase, (uintmax_t)sc->pmemlimit);
 		if (!ok)
 			return (NULL);
 		if (bootverbose)
 			device_printf(dev,"%s%srequested memory range "
 			    "0x%lx-0x%lx: good\n",
 			    name, suffix, start, end);
 		break;
 
 	default:
 		break;
 	}
 	/*
 	 * Bridge is OK decoding this resource, so pass it up.
 	 */
 	return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
 	    count, flags));
 }
 
 /*
  * PCIB interface.
  */
 int
 pcib_maxslots(device_t dev)
 {
     return(PCI_SLOTMAX);
 }
 
 /*
  * Since we are a child of a PCI bus, its parent must support the pcib interface.
  */
 uint32_t
 pcib_read_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, int width)
 {
     return(PCIB_READ_CONFIG(device_get_parent(device_get_parent(dev)), b, s, f, reg, width));
 }
 
 void
 pcib_write_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, uint32_t val, int width)
 {
     PCIB_WRITE_CONFIG(device_get_parent(device_get_parent(dev)), b, s, f, reg, val, width);
 }
 
 /*
  * Route an interrupt across a PCI bridge.
  */
 int
 pcib_route_interrupt(device_t pcib, device_t dev, int pin)
 {
     device_t	bus;
     int		parent_intpin;
     int		intnum;
 
     /*	
      *
      * The PCI standard defines a swizzle of the child-side device/intpin to
      * the parent-side intpin as follows.
      *
      * device = device on child bus
      * child_intpin = intpin on child bus slot (0-3)
      * parent_intpin = intpin on parent bus slot (0-3)
      *
      * parent_intpin = (device + child_intpin) % 4
      */
     parent_intpin = (pci_get_slot(dev) + (pin - 1)) % 4;
 
     /*
      * Our parent is a PCI bus.  Its parent must export the pcib interface
      * which includes the ability to route interrupts.
      */
     bus = device_get_parent(pcib);
     intnum = PCIB_ROUTE_INTERRUPT(device_get_parent(bus), pcib, parent_intpin + 1);
     if (PCI_INTERRUPT_VALID(intnum) && bootverbose) {
 	device_printf(pcib, "slot %d INT%c is routed to irq %d\n",
 	    pci_get_slot(dev), 'A' + pin - 1, intnum);
     }
     return(intnum);
 }
 
 /* Pass request to alloc MSI/MSI-X messages up to the parent bridge. */
 int
 pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs)
 {
 	struct pcib_softc *sc = device_get_softc(pcib);
 	device_t bus;
 
 	if (sc->flags & PCIB_DISABLE_MSI)
 		return (ENXIO);
 	bus = device_get_parent(pcib);
 	return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount,
 	    irqs));
 }
 
 /* Pass request to release MSI/MSI-X messages up to the parent bridge. */
 int
 pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs)
 {
 	device_t bus;
 
 	bus = device_get_parent(pcib);
 	return (PCIB_RELEASE_MSI(device_get_parent(bus), dev, count, irqs));
 }
 
 /* Pass request to alloc an MSI-X message up to the parent bridge. */
 int
 pcib_alloc_msix(device_t pcib, device_t dev, int *irq)
 {
 	struct pcib_softc *sc = device_get_softc(pcib);
 	device_t bus;
 
 	if (sc->flags & PCIB_DISABLE_MSI)
 		return (ENXIO);
 	bus = device_get_parent(pcib);
 	return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
 }
 
 /* Pass request to release an MSI-X message up to the parent bridge. */
 int
 pcib_release_msix(device_t pcib, device_t dev, int irq)
 {
 	device_t bus;
 
 	bus = device_get_parent(pcib);
 	return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq));
 }
 
 /* Pass request to map MSI/MSI-X message up to parent bridge. */
 int
 pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr,
     uint32_t *data)
 {
 	device_t bus;
 	int error;
 
 	bus = device_get_parent(pcib);
 	error = PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data);
 	if (error)
 		return (error);
 
 	pci_ht_map_msi(pcib, *addr);
 	return (0);
 }
 
 /* Pass request for device power state up to parent bridge. */
 int
 pcib_power_for_sleep(device_t pcib, device_t dev, int *pstate)
 {
 	device_t bus;
 
 	bus = device_get_parent(pcib);
 	return (PCIB_POWER_FOR_SLEEP(bus, dev, pstate));
 }
 
 /*
  * Try to read the bus number of a host-PCI bridge using appropriate config
  * registers.
  */
 int
 host_pcib_get_busno(pci_read_config_fn read_config, int bus, int slot, int func,
     uint8_t *busnum)
 {
 	uint32_t id;
 
 	id = read_config(bus, slot, func, PCIR_DEVVENDOR, 4);
 	if (id == 0xffffffff)
 		return (0);
 
 	switch (id) {
 	case 0x12258086:
 		/* Intel 824?? */
 		/* XXX This is a guess */
 		/* *busnum = read_config(bus, slot, func, 0x41, 1); */
 		*busnum = bus;
 		break;
 	case 0x84c48086:
 		/* Intel 82454KX/GX (Orion) */
 		*busnum = read_config(bus, slot, func, 0x4a, 1);
 		break;
 	case 0x84ca8086:
 		/*
 		 * For the 450nx chipset, there is a whole bundle of
 		 * things pretending to be host bridges. The MIOC will 
 		 * be seen first and isn't really a pci bridge (the
 		 * actual busses are attached to the PXB's). We need to 
 		 * read the registers of the MIOC to figure out the
 		 * bus numbers for the PXB channels.
 		 *
 		 * Since the MIOC doesn't have a pci bus attached, we
 		 * pretend it wasn't there.
 		 */
 		return (0);
 	case 0x84cb8086:
 		switch (slot) {
 		case 0x12:
 			/* Intel 82454NX PXB#0, Bus#A */
 			*busnum = read_config(bus, 0x10, func, 0xd0, 1);
 			break;
 		case 0x13:
 			/* Intel 82454NX PXB#0, Bus#B */
 			*busnum = read_config(bus, 0x10, func, 0xd1, 1) + 1;
 			break;
 		case 0x14:
 			/* Intel 82454NX PXB#1, Bus#A */
 			*busnum = read_config(bus, 0x10, func, 0xd3, 1);
 			break;
 		case 0x15:
 			/* Intel 82454NX PXB#1, Bus#B */
 			*busnum = read_config(bus, 0x10, func, 0xd4, 1) + 1;
 			break;
 		}
 		break;
 
 		/* ServerWorks -- vendor 0x1166 */
 	case 0x00051166:
 	case 0x00061166:
 	case 0x00081166:
 	case 0x00091166:
 	case 0x00101166:
 	case 0x00111166:
 	case 0x00171166:
 	case 0x01011166:
 	case 0x010f1014:
+	case 0x01101166:
 	case 0x02011166:
+	case 0x02251166:
 	case 0x03021014:
 		*busnum = read_config(bus, slot, func, 0x44, 1);
 		break;
 
 		/* Compaq/HP -- vendor 0x0e11 */
 	case 0x60100e11:
 		*busnum = read_config(bus, slot, func, 0xc8, 1);
 		break;
 	default:
 		/* Don't know how to read bus number. */
 		return 0;
 	}
 
 	return 1;
 }
Index: projects/binutils-2.17/sys/dev/usb/controller/usb_controller.c
===================================================================
--- projects/binutils-2.17/sys/dev/usb/controller/usb_controller.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/usb/controller/usb_controller.c	(revision 215830)
@@ -1,630 +1,630 @@
 /* $FreeBSD$ */
 /*-
  * Copyright (c) 2008 Hans Petter Selasky. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_ddb.h"
 
 #include <sys/stdint.h>
 #include <sys/stddef.h>
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/linker_set.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/condvar.h>
 #include <sys/sysctl.h>
 #include <sys/sx.h>
 #include <sys/unistd.h>
 #include <sys/callout.h>
 #include <sys/malloc.h>
 #include <sys/priv.h>
 
 #include <dev/usb/usb.h>
 #include <dev/usb/usbdi.h>
 
 #define	USB_DEBUG_VAR usb_ctrl_debug
 
 #include <dev/usb/usb_core.h>
 #include <dev/usb/usb_debug.h>
 #include <dev/usb/usb_process.h>
 #include <dev/usb/usb_busdma.h>
 #include <dev/usb/usb_dynamic.h>
 #include <dev/usb/usb_device.h>
 #include <dev/usb/usb_hub.h>
 
 #include <dev/usb/usb_controller.h>
 #include <dev/usb/usb_bus.h>
 #include <dev/usb/usb_pf.h>
 
 /* function prototypes  */
 
 static device_probe_t usb_probe;
 static device_attach_t usb_attach;
 static device_detach_t usb_detach;
 
 static void	usb_attach_sub(device_t, struct usb_bus *);
 
 /* static variables */
 
 #ifdef USB_DEBUG
 static int usb_ctrl_debug = 0;
 
 SYSCTL_NODE(_hw_usb, OID_AUTO, ctrl, CTLFLAG_RW, 0, "USB controller");
 SYSCTL_INT(_hw_usb_ctrl, OID_AUTO, debug, CTLFLAG_RW, &usb_ctrl_debug, 0,
     "Debug level");
 #endif
 
 static int usb_no_boot_wait = 0;
 TUNABLE_INT("hw.usb.no_boot_wait", &usb_no_boot_wait);
 SYSCTL_INT(_hw_usb, OID_AUTO, no_boot_wait, CTLFLAG_RDTUN, &usb_no_boot_wait, 0,
     "No device enumerate waiting at boot.");
 
 static devclass_t usb_devclass;
 
 static device_method_t usb_methods[] = {
 	DEVMETHOD(device_probe, usb_probe),
 	DEVMETHOD(device_attach, usb_attach),
 	DEVMETHOD(device_detach, usb_detach),
 	DEVMETHOD(device_suspend, bus_generic_suspend),
 	DEVMETHOD(device_resume, bus_generic_resume),
 	DEVMETHOD(device_shutdown, bus_generic_shutdown),
 	{0, 0}
 };
 
 static driver_t usb_driver = {
 	.name = "usbus",
 	.methods = usb_methods,
 	.size = 0,
 };
 
 /* Host Only Drivers */
 DRIVER_MODULE(usbus, ohci, usb_driver, usb_devclass, 0, 0);
 DRIVER_MODULE(usbus, uhci, usb_driver, usb_devclass, 0, 0);
 DRIVER_MODULE(usbus, ehci, usb_driver, usb_devclass, 0, 0);
 DRIVER_MODULE(usbus, xhci, usb_driver, usb_devclass, 0, 0);
 
 /* Device Only Drivers */
 DRIVER_MODULE(usbus, at91_udp, usb_driver, usb_devclass, 0, 0);
 DRIVER_MODULE(usbus, musbotg, usb_driver, usb_devclass, 0, 0);
 DRIVER_MODULE(usbus, uss820, usb_driver, usb_devclass, 0, 0);
 
 /*------------------------------------------------------------------------*
  *	usb_probe
  *
  * This function is called from "{ehci,ohci,uhci}_pci_attach()".
  *------------------------------------------------------------------------*/
 static int
 usb_probe(device_t dev)
 {
 	DPRINTF("\n");
 	return (0);
 }
 
 static void
 usb_root_mount_rel(struct usb_bus *bus)
 {
 	if (bus->bus_roothold != NULL) {
 		DPRINTF("Releasing root mount hold %p\n", bus->bus_roothold);
 		root_mount_rel(bus->bus_roothold);
 		bus->bus_roothold = NULL;
 	}
 }
 
 /*------------------------------------------------------------------------*
  *	usb_attach
  *------------------------------------------------------------------------*/
 static int
 usb_attach(device_t dev)
 {
 	struct usb_bus *bus = device_get_ivars(dev);
 
 	DPRINTF("\n");
 
 	if (bus == NULL) {
 		device_printf(dev, "USB device has no ivars\n");
 		return (ENXIO);
 	}
 
 	if (usb_no_boot_wait == 0) {
 		/* delay vfs_mountroot until the bus is explored */
 		bus->bus_roothold = root_mount_hold(device_get_nameunit(dev));
 	}
 
 	usb_attach_sub(dev, bus);
 
 	return (0);			/* return success */
 }
 
 /*------------------------------------------------------------------------*
  *	usb_detach
  *------------------------------------------------------------------------*/
 static int
 usb_detach(device_t dev)
 {
 	struct usb_bus *bus = device_get_softc(dev);
 
 	DPRINTF("\n");
 
 	if (bus == NULL) {
 		/* was never setup properly */
 		return (0);
 	}
 	/* Stop power watchdog */
 	usb_callout_drain(&bus->power_wdog);
 
 	/* Let the USB explore process detach all devices. */
 	usb_root_mount_rel(bus);
 
 	USB_BUS_LOCK(bus);
 	if (usb_proc_msignal(&bus->explore_proc,
 	    &bus->detach_msg[0], &bus->detach_msg[1])) {
 		/* ignore */
 	}
 	/* Wait for detach to complete */
 
 	usb_proc_mwait(&bus->explore_proc,
 	    &bus->detach_msg[0], &bus->detach_msg[1]);
 
 	USB_BUS_UNLOCK(bus);
 
 	/* Get rid of USB callback processes */
 
 	usb_proc_free(&bus->giant_callback_proc);
 	usb_proc_free(&bus->non_giant_callback_proc);
 
 	/* Get rid of USB explore process */
 
 	usb_proc_free(&bus->explore_proc);
 
 	/* Get rid of control transfer process */
 
 	usb_proc_free(&bus->control_xfer_proc);
 
+	usbpf_detach(bus);
+
 	return (0);
 }
 
 /*------------------------------------------------------------------------*
  *	usb_bus_explore
  *
  * This function is used to explore the device tree from the root.
  *------------------------------------------------------------------------*/
 static void
 usb_bus_explore(struct usb_proc_msg *pm)
 {
 	struct usb_bus *bus;
 	struct usb_device *udev;
 
 	bus = ((struct usb_bus_msg *)pm)->bus;
 	udev = bus->devices[USB_ROOT_HUB_ADDR];
 
 	if (udev && udev->hub) {
 
 		if (bus->do_probe) {
 			bus->do_probe = 0;
 			bus->driver_added_refcount++;
 		}
 		if (bus->driver_added_refcount == 0) {
 			/* avoid zero, hence that is memory default */
 			bus->driver_added_refcount = 1;
 		}
 
 #ifdef DDB
 		/*
 		 * The following three lines of code are only here to
 		 * recover from DDB:
 		 */
 		usb_proc_rewakeup(&bus->control_xfer_proc);
 		usb_proc_rewakeup(&bus->giant_callback_proc);
 		usb_proc_rewakeup(&bus->non_giant_callback_proc);
 #endif
 
 		USB_BUS_UNLOCK(bus);
 
 #if USB_HAVE_POWERD
 		/*
 		 * First update the USB power state!
 		 */
 		usb_bus_powerd(bus);
 #endif
 		 /* Explore the Root USB HUB. */
 		(udev->hub->explore) (udev);
 		USB_BUS_LOCK(bus);
 	}
 	usb_root_mount_rel(bus);
 }
 
 /*------------------------------------------------------------------------*
  *	usb_bus_detach
  *
  * This function is used to detach the device tree from the root.
  *------------------------------------------------------------------------*/
 static void
 usb_bus_detach(struct usb_proc_msg *pm)
 {
 	struct usb_bus *bus;
 	struct usb_device *udev;
 	device_t dev;
 
 	bus = ((struct usb_bus_msg *)pm)->bus;
 	udev = bus->devices[USB_ROOT_HUB_ADDR];
 	dev = bus->bdev;
 	/* clear the softc */
 	device_set_softc(dev, NULL);
 	USB_BUS_UNLOCK(bus);
 
 	/* detach children first */
 	mtx_lock(&Giant);
 	bus_generic_detach(dev);
 	mtx_unlock(&Giant);
 
 	/*
 	 * Free USB device and all subdevices, if any.
 	 */
 	usb_free_device(udev, 0);
 
 	USB_BUS_LOCK(bus);
 	/* clear bdev variable last */
 	bus->bdev = NULL;
 }
 
 static void
 usb_power_wdog(void *arg)
 {
 	struct usb_bus *bus = arg;
 
 	USB_BUS_LOCK_ASSERT(bus, MA_OWNED);
 
 	usb_callout_reset(&bus->power_wdog,
 	    4 * hz, usb_power_wdog, arg);
 
 #ifdef DDB
 	/*
 	 * The following line of code is only here to recover from
 	 * DDB:
 	 */
 	usb_proc_rewakeup(&bus->explore_proc);	/* recover from DDB */
 #endif
 
 #if USB_HAVE_POWERD
 	USB_BUS_UNLOCK(bus);
 
 	usb_bus_power_update(bus);
 
 	USB_BUS_LOCK(bus);
 #endif
 }
 
 /*------------------------------------------------------------------------*
  *	usb_bus_attach
  *
  * This function attaches USB in context of the explore thread.
  *------------------------------------------------------------------------*/
 static void
 usb_bus_attach(struct usb_proc_msg *pm)
 {
 	struct usb_bus *bus;
 	struct usb_device *child;
 	device_t dev;
 	usb_error_t err;
 	enum usb_dev_speed speed;
 
 	bus = ((struct usb_bus_msg *)pm)->bus;
 	dev = bus->bdev;
 
 	DPRINTF("\n");
 
 	switch (bus->usbrev) {
 	case USB_REV_1_0:
 		speed = USB_SPEED_FULL;
 		device_printf(bus->bdev, "12Mbps Full Speed USB v1.0\n");
 		break;
 
 	case USB_REV_1_1:
 		speed = USB_SPEED_FULL;
 		device_printf(bus->bdev, "12Mbps Full Speed USB v1.1\n");
 		break;
 
 	case USB_REV_2_0:
 		speed = USB_SPEED_HIGH;
 		device_printf(bus->bdev, "480Mbps High Speed USB v2.0\n");
 		break;
 
 	case USB_REV_2_5:
 		speed = USB_SPEED_VARIABLE;
 		device_printf(bus->bdev, "480Mbps Wireless USB v2.5\n");
 		break;
 
 	case USB_REV_3_0:
 		speed = USB_SPEED_SUPER;
 		device_printf(bus->bdev, "4.8Gbps Super Speed USB v3.0\n");
 		break;
 
 	default:
 		device_printf(bus->bdev, "Unsupported USB revision\n");
 		usb_root_mount_rel(bus);
 		return;
 	}
 
 	USB_BUS_UNLOCK(bus);
 
 	/* default power_mask value */
 	bus->hw_power_state =
 	  USB_HW_POWER_CONTROL |
 	  USB_HW_POWER_BULK |
 	  USB_HW_POWER_INTERRUPT |
 	  USB_HW_POWER_ISOC |
 	  USB_HW_POWER_NON_ROOT_HUB;
 
 	/* make sure power is set at least once */
 
 	if (bus->methods->set_hw_power != NULL) {
 		(bus->methods->set_hw_power) (bus);
 	}
 
 	/* Allocate the Root USB device */
 
 	child = usb_alloc_device(bus->bdev, bus, NULL, 0, 0, 1,
 	    speed, USB_MODE_HOST);
 	if (child) {
 		err = usb_probe_and_attach(child,
 		    USB_IFACE_INDEX_ANY);
 		if (!err) {
 			if ((bus->devices[USB_ROOT_HUB_ADDR] == NULL) ||
 			    (bus->devices[USB_ROOT_HUB_ADDR]->hub == NULL)) {
 				err = USB_ERR_NO_ROOT_HUB;
 			}
 		}
 	} else {
 		err = USB_ERR_NOMEM;
 	}
 
 	USB_BUS_LOCK(bus);
 
 	if (err) {
 		device_printf(bus->bdev, "Root HUB problem, error=%s\n",
 		    usbd_errstr(err));
 		usb_root_mount_rel(bus);
 	}
 
 	/* set softc - we are ready */
 	device_set_softc(dev, bus);
 
 	/* start watchdog */
 	usb_power_wdog(bus);
 }
 
 /*------------------------------------------------------------------------*
  *	usb_attach_sub
  *
  * This function creates a thread which runs the USB attach code.
  *------------------------------------------------------------------------*/
 static void
 usb_attach_sub(device_t dev, struct usb_bus *bus)
 {
 	const char *pname = device_get_nameunit(dev);
 
 	mtx_lock(&Giant);
 	if (usb_devclass_ptr == NULL)
 		usb_devclass_ptr = devclass_find("usbus");
 	mtx_unlock(&Giant);
 
+	usbpf_attach(bus);
+
 	/* Initialise USB process messages */
 	bus->explore_msg[0].hdr.pm_callback = &usb_bus_explore;
 	bus->explore_msg[0].bus = bus;
 	bus->explore_msg[1].hdr.pm_callback = &usb_bus_explore;
 	bus->explore_msg[1].bus = bus;
 
 	bus->detach_msg[0].hdr.pm_callback = &usb_bus_detach;
 	bus->detach_msg[0].bus = bus;
 	bus->detach_msg[1].hdr.pm_callback = &usb_bus_detach;
 	bus->detach_msg[1].bus = bus;
 
 	bus->attach_msg[0].hdr.pm_callback = &usb_bus_attach;
 	bus->attach_msg[0].bus = bus;
 	bus->attach_msg[1].hdr.pm_callback = &usb_bus_attach;
 	bus->attach_msg[1].bus = bus;
 
 	/* Create USB explore and callback processes */
 
 	if (usb_proc_create(&bus->giant_callback_proc,
 	    &bus->bus_mtx, pname, USB_PRI_MED)) {
 		printf("WARNING: Creation of USB Giant "
 		    "callback process failed.\n");
 	} else if (usb_proc_create(&bus->non_giant_callback_proc,
 	    &bus->bus_mtx, pname, USB_PRI_HIGH)) {
 		printf("WARNING: Creation of USB non-Giant "
 		    "callback process failed.\n");
 	} else if (usb_proc_create(&bus->explore_proc,
 	    &bus->bus_mtx, pname, USB_PRI_MED)) {
 		printf("WARNING: Creation of USB explore "
 		    "process failed.\n");
 	} else if (usb_proc_create(&bus->control_xfer_proc,
 	    &bus->bus_mtx, pname, USB_PRI_MED)) {
 		printf("WARNING: Creation of USB control transfer "
 		    "process failed.\n");
 	} else {
 		/* Get final attach going */
 		USB_BUS_LOCK(bus);
 		if (usb_proc_msignal(&bus->explore_proc,
 		    &bus->attach_msg[0], &bus->attach_msg[1])) {
 			/* ignore */
 		}
 		USB_BUS_UNLOCK(bus);
 
 		/* Do initial explore */
 		usb_needs_explore(bus, 1);
 	}
 }
 
 SYSUNINIT(usb_bus_unload, SI_SUB_KLD, SI_ORDER_ANY, usb_bus_unload, NULL);
 
 /*------------------------------------------------------------------------*
  *	usb_bus_mem_flush_all_cb
  *------------------------------------------------------------------------*/
 #if USB_HAVE_BUSDMA
 static void
 usb_bus_mem_flush_all_cb(struct usb_bus *bus, struct usb_page_cache *pc,
     struct usb_page *pg, usb_size_t size, usb_size_t align)
 {
 	usb_pc_cpu_flush(pc);
 }
 #endif
 
 /*------------------------------------------------------------------------*
  *	usb_bus_mem_flush_all - factored out code
  *------------------------------------------------------------------------*/
 #if USB_HAVE_BUSDMA
 void
 usb_bus_mem_flush_all(struct usb_bus *bus, usb_bus_mem_cb_t *cb)
 {
 	if (cb) {
 		cb(bus, &usb_bus_mem_flush_all_cb);
 	}
 }
 #endif
 
 /*------------------------------------------------------------------------*
  *	usb_bus_mem_alloc_all_cb
  *------------------------------------------------------------------------*/
 #if USB_HAVE_BUSDMA
 static void
 usb_bus_mem_alloc_all_cb(struct usb_bus *bus, struct usb_page_cache *pc,
     struct usb_page *pg, usb_size_t size, usb_size_t align)
 {
 	/* need to initialize the page cache */
 	pc->tag_parent = bus->dma_parent_tag;
 
 	if (usb_pc_alloc_mem(pc, pg, size, align)) {
 		bus->alloc_failed = 1;
 	}
 }
 #endif
 
 /*------------------------------------------------------------------------*
  *	usb_bus_mem_alloc_all - factored out code
  *
  * Returns:
  *    0: Success
  * Else: Failure
  *------------------------------------------------------------------------*/
 uint8_t
 usb_bus_mem_alloc_all(struct usb_bus *bus, bus_dma_tag_t dmat,
     usb_bus_mem_cb_t *cb)
 {
 	bus->alloc_failed = 0;
 
 	mtx_init(&bus->bus_mtx, device_get_nameunit(bus->parent),
 	    NULL, MTX_DEF | MTX_RECURSE);
 
 	usb_callout_init_mtx(&bus->power_wdog,
 	    &bus->bus_mtx, 0);
 
 	TAILQ_INIT(&bus->intr_q.head);
 
-	usbpf_attach(bus, &bus->uif);
-
 #if USB_HAVE_BUSDMA
 	usb_dma_tag_setup(bus->dma_parent_tag, bus->dma_tags,
 	    dmat, &bus->bus_mtx, NULL, 32, USB_BUS_DMA_TAG_MAX);
 #endif
 	if ((bus->devices_max > USB_MAX_DEVICES) ||
 	    (bus->devices_max < USB_MIN_DEVICES) ||
 	    (bus->devices == NULL)) {
 		DPRINTFN(0, "Devices field has not been "
 		    "initialised properly\n");
 		bus->alloc_failed = 1;		/* failure */
 	}
 #if USB_HAVE_BUSDMA
 	if (cb) {
 		cb(bus, &usb_bus_mem_alloc_all_cb);
 	}
 #endif
 	if (bus->alloc_failed) {
 		usb_bus_mem_free_all(bus, cb);
 	}
 	return (bus->alloc_failed);
 }
 
 /*------------------------------------------------------------------------*
  *	usb_bus_mem_free_all_cb
  *------------------------------------------------------------------------*/
 #if USB_HAVE_BUSDMA
 static void
 usb_bus_mem_free_all_cb(struct usb_bus *bus, struct usb_page_cache *pc,
     struct usb_page *pg, usb_size_t size, usb_size_t align)
 {
 	usb_pc_free_mem(pc);
 }
 #endif
 
 /*------------------------------------------------------------------------*
  *	usb_bus_mem_free_all - factored out code
  *------------------------------------------------------------------------*/
 void
 usb_bus_mem_free_all(struct usb_bus *bus, usb_bus_mem_cb_t *cb)
 {
 #if USB_HAVE_BUSDMA
 	if (cb) {
 		cb(bus, &usb_bus_mem_free_all_cb);
 	}
 	usb_dma_tag_unsetup(bus->dma_parent_tag);
 #endif
-
-	usbpf_detach(bus);
 
 	mtx_destroy(&bus->bus_mtx);
 }
 
 struct usb_bus *
 usb_bus_find(const char *name)
 {
 	struct usb_bus *ubus;
 	devclass_t dc;
 	device_t *devlist;
 	int devcount, error, i;
 	const char *nameunit;
 
 	dc = devclass_find("usbus");
 	if (dc == NULL)
 		return (NULL);
 	error = devclass_get_devices(dc, &devlist, &devcount);
 	if (error != 0)
 		return (NULL);
 	for (i = 0; i < devcount; i++) {
 		nameunit = device_get_nameunit(devlist[i]);
 		if (!strncmp(name, nameunit, strlen(nameunit))) {
 			ubus = device_get_ivars(devlist[i]);
 			free(devlist, M_TEMP);
 			return (ubus);
 		}
 	}
 	free(devlist, M_TEMP);
 	return (NULL);
 }
Index: projects/binutils-2.17/sys/dev/usb/serial/u3g.c
===================================================================
--- projects/binutils-2.17/sys/dev/usb/serial/u3g.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/usb/serial/u3g.c	(revision 215830)
@@ -1,982 +1,982 @@
 /*
  * Copyright (c) 2008 AnyWi Technologies
  * Author: Andrea Guzzo <aguzzo@anywi.com>
  * * based on uark.c 1.1 2006/08/14 08:30:22 jsg *
  * * parts from ubsa.c 183348 2008-09-25 12:00:56Z phk *
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  *
  * $FreeBSD$
  */
 
 /*
  * NOTE:
  *
  * - The detour through the tty layer is ridiculously expensive wrt
  *   buffering due to the high speeds.
  *
  *   We should consider adding a simple r/w device which allows
  *   attaching of PPP in a more efficient way.
  *
  */
 
 
 #include <sys/stdint.h>
 #include <sys/stddef.h>
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/linker_set.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/condvar.h>
 #include <sys/sysctl.h>
 #include <sys/sx.h>
 #include <sys/unistd.h>
 #include <sys/callout.h>
 #include <sys/malloc.h>
 #include <sys/priv.h>
 
 #include <dev/usb/usb.h>
 #include <dev/usb/usbdi.h>
 #include <dev/usb/usbdi_util.h>
 #include "usbdevs.h"
 
 #define	USB_DEBUG_VAR u3g_debug
 #include <dev/usb/usb_debug.h>
 #include <dev/usb/usb_process.h>
 #include <dev/usb/usb_msctest.h>
 
 #include <dev/usb/serial/usb_serial.h>
 #include <dev/usb/quirk/usb_quirk.h>
 
 #ifdef USB_DEBUG
 static int u3g_debug = 0;
 
 SYSCTL_NODE(_hw_usb, OID_AUTO, u3g, CTLFLAG_RW, 0, "USB 3g");
 SYSCTL_INT(_hw_usb_u3g, OID_AUTO, debug, CTLFLAG_RW,
     &u3g_debug, 0, "Debug level");
 #endif
 
 #define	U3G_MAXPORTS		12
 #define	U3G_CONFIG_INDEX	0
 #define	U3G_BSIZE		2048
 
 #define	U3GSP_GPRS		0
 #define	U3GSP_EDGE		1
 #define	U3GSP_CDMA		2
 #define	U3GSP_UMTS		3
 #define	U3GSP_HSDPA		4
 #define	U3GSP_HSUPA		5
 #define	U3GSP_HSPA		6
 #define	U3GSP_MAX		7
 
 /* Eject methods; See also usb_quirks.h:UQ_MSC_EJECT_* */
 #define	U3GINIT_HUAWEI		1	/* Requires Huawei init command */
 #define	U3GINIT_SIERRA		2	/* Requires Sierra init command */
 #define	U3GINIT_SCSIEJECT	3	/* Requires SCSI eject command */
 #define	U3GINIT_REZERO		4	/* Requires SCSI rezero command */
 #define	U3GINIT_ZTESTOR		5	/* Requires ZTE SCSI command */
 #define	U3GINIT_CMOTECH		6	/* Requires CMOTECH SCSI command */
 #define	U3GINIT_WAIT		7	/* Device reappears after a delay */
 #define	U3GINIT_SAEL_M460	8	/* Requires vendor init */
 #define	U3GINIT_HUAWEISCSI	9	/* Requires Huawei SCSI init command */
 #define	U3GINIT_TCT		10	/* Requires TCT Mobile init command */
 
 enum {
 	U3G_BULK_WR,
 	U3G_BULK_RD,
 	U3G_N_TRANSFER,
 };
 
 struct u3g_softc {
 	struct ucom_super_softc sc_super_ucom;
 	struct ucom_softc sc_ucom[U3G_MAXPORTS];
 
 	struct usb_xfer *sc_xfer[U3G_MAXPORTS][U3G_N_TRANSFER];
 	struct usb_device *sc_udev;
 	struct mtx sc_mtx;
 
 	uint8_t	sc_lsr;			/* local status register */
 	uint8_t	sc_msr;			/* U3G status register */
 	uint8_t	sc_numports;
 };
 
 static device_probe_t u3g_probe;
 static device_attach_t u3g_attach;
 static device_detach_t u3g_detach;
 
 static usb_callback_t u3g_write_callback;
 static usb_callback_t u3g_read_callback;
 
 static void u3g_start_read(struct ucom_softc *ucom);
 static void u3g_stop_read(struct ucom_softc *ucom);
 static void u3g_start_write(struct ucom_softc *ucom);
 static void u3g_stop_write(struct ucom_softc *ucom);
 
 
 static void u3g_test_autoinst(void *, struct usb_device *,
 		struct usb_attach_arg *);
 static int u3g_driver_loaded(struct module *mod, int what, void *arg);
 
 static eventhandler_tag u3g_etag;
 
 static const struct usb_config u3g_config[U3G_N_TRANSFER] = {
 
 	[U3G_BULK_WR] = {
 		.type = UE_BULK,
 		.endpoint = UE_ADDR_ANY,
 		.direction = UE_DIR_OUT,
 		.bufsize = U3G_BSIZE,/* bytes */
 		.flags = {.pipe_bof = 1,.force_short_xfer = 1,},
 		.callback = &u3g_write_callback,
 	},
 
 	[U3G_BULK_RD] = {
 		.type = UE_BULK,
 		.endpoint = UE_ADDR_ANY,
 		.direction = UE_DIR_IN,
 		.bufsize = U3G_BSIZE,/* bytes */
 		.flags = {.pipe_bof = 1,.short_xfer_ok = 1,},
 		.callback = &u3g_read_callback,
 	},
 };
 
 static const struct ucom_callback u3g_callback = {
 	.ucom_start_read = &u3g_start_read,
 	.ucom_stop_read = &u3g_stop_read,
 	.ucom_start_write = &u3g_start_write,
 	.ucom_stop_write = &u3g_stop_write,
 };
 
 static device_method_t u3g_methods[] = {
 	DEVMETHOD(device_probe, u3g_probe),
 	DEVMETHOD(device_attach, u3g_attach),
 	DEVMETHOD(device_detach, u3g_detach),
 	{0, 0}
 };
 
 static devclass_t u3g_devclass;
 
 static driver_t u3g_driver = {
 	.name = "u3g",
 	.methods = u3g_methods,
 	.size = sizeof(struct u3g_softc),
 };
 
 DRIVER_MODULE(u3g, uhub, u3g_driver, u3g_devclass, u3g_driver_loaded, 0);
 MODULE_DEPEND(u3g, ucom, 1, 1, 1);
 MODULE_DEPEND(u3g, usb, 1, 1, 1);
 MODULE_VERSION(u3g, 1);
 
 static const struct usb_device_id u3g_devs[] = {
 #define	U3G_DEV(v,p,i) { USB_VPI(USB_VENDOR_##v, USB_PRODUCT_##v##_##p, i) }
 	U3G_DEV(ACERP, H10, 0),
 	U3G_DEV(AIRPLUS, MCD650, 0),
 	U3G_DEV(AIRPRIME, PC5220, 0),
 	U3G_DEV(ALINK, 3G, 0),
 	U3G_DEV(ALINK, 3GU, 0),
 	U3G_DEV(ALINK, DWM652U5, 0),
 	U3G_DEV(AMOI, H01, 0),
 	U3G_DEV(AMOI, H01A, 0),
 	U3G_DEV(AMOI, H02, 0),
 	U3G_DEV(ANYDATA, ADU_500A, 0),
 	U3G_DEV(ANYDATA, ADU_620UW, 0),
 	U3G_DEV(ANYDATA, ADU_E100X, 0),
 	U3G_DEV(AXESSTEL, DATAMODEM, 0),
 	U3G_DEV(CMOTECH, CDMA_MODEM1, 0),
 	U3G_DEV(CMOTECH, CGU628, U3GINIT_CMOTECH),
 	U3G_DEV(DELL, U5500, 0),
 	U3G_DEV(DELL, U5505, 0),
 	U3G_DEV(DELL, U5510, 0),
 	U3G_DEV(DELL, U5520, 0),
 	U3G_DEV(DELL, U5520_2, 0),
 	U3G_DEV(DELL, U5520_3, 0),
 	U3G_DEV(DELL, U5700, 0),
 	U3G_DEV(DELL, U5700_2, 0),
 	U3G_DEV(DELL, U5700_3, 0),
 	U3G_DEV(DELL, U5700_4, 0),
 	U3G_DEV(DELL, U5720, 0),
 	U3G_DEV(DELL, U5720_2, 0),
 	U3G_DEV(DELL, U5730, 0),
 	U3G_DEV(DELL, U5730_2, 0),
 	U3G_DEV(DELL, U5730_3, 0),
 	U3G_DEV(DELL, U740, 0),
 	U3G_DEV(DLINK3, DWM652, 0),
 	U3G_DEV(HP, EV2200, 0),
 	U3G_DEV(HP, HS2300, 0),
 	U3G_DEV(HUAWEI, E1401, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1402, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1403, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1404, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1405, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1406, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1407, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1408, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1409, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E140A, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E140B, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E140D, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E140E, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E140F, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1410, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1411, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1412, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1413, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1414, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1415, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1416, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1417, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1418, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1419, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E141A, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E141B, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E141C, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E141D, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E141E, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E141F, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1420, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1421, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1422, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1423, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1424, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1425, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1426, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1427, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1428, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1429, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E142A, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E142B, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E142C, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E142D, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E142E, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E142F, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1430, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1431, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1432, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1433, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1434, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1435, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1436, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1437, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1438, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1439, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E143A, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E143B, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E143C, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E143D, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E143E, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E143F, U3GINIT_HUAWEI),
-	U3G_DEV(HUAWEI, E14AC, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E180V, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E220, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E220BIS, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, MOBILE, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, E1752, U3GINIT_HUAWEISCSI),
+	U3G_DEV(HUAWEI, E1820, U3GINIT_HUAWEISCSI),
 	U3G_DEV(HUAWEI, K3765, U3GINIT_HUAWEI),
 	U3G_DEV(HUAWEI, K3765_INIT, U3GINIT_HUAWEISCSI),
 	U3G_DEV(KYOCERA2, CDMA_MSM_K, 0),
 	U3G_DEV(KYOCERA2, KPC680, 0),
 	U3G_DEV(LONGCHEER, WM66, U3GINIT_HUAWEI),
 	U3G_DEV(LONGCHEER, DISK, U3GINIT_TCT),
 	U3G_DEV(LONGCHEER, W14, 0),
 	U3G_DEV(MERLIN, V620, 0),
 	U3G_DEV(NEOTEL, PRIME, 0),
 	U3G_DEV(NOVATEL, E725, 0),
 	U3G_DEV(NOVATEL, ES620, 0),
 	U3G_DEV(NOVATEL, ES620_2, 0),
 	U3G_DEV(NOVATEL, EU730, 0),
 	U3G_DEV(NOVATEL, EU740, 0),
 	U3G_DEV(NOVATEL, EU870D, 0),
 	U3G_DEV(NOVATEL, MC760, 0),
 	U3G_DEV(NOVATEL, MC950D, 0),
 	U3G_DEV(NOVATEL, U720, 0),
 	U3G_DEV(NOVATEL, U727, 0),
 	U3G_DEV(NOVATEL, U727_2, 0),
 	U3G_DEV(NOVATEL, U740, 0),
 	U3G_DEV(NOVATEL, U740_2, 0),
 	U3G_DEV(NOVATEL, U760, U3GINIT_SCSIEJECT),
 	U3G_DEV(NOVATEL, U870, 0),
 	U3G_DEV(NOVATEL, V620, 0),
 	U3G_DEV(NOVATEL, V640, 0),
 	U3G_DEV(NOVATEL, V720, 0),
 	U3G_DEV(NOVATEL, V740, 0),
 	U3G_DEV(NOVATEL, X950D, 0),
 	U3G_DEV(NOVATEL, XU870, 0),
 	U3G_DEV(OPTION, E6500, 0),
 	U3G_DEV(OPTION, E6501, 0),
 	U3G_DEV(OPTION, E6601, 0),
 	U3G_DEV(OPTION, E6721, 0),
 	U3G_DEV(OPTION, E6741, 0),
 	U3G_DEV(OPTION, E6761, 0),
 	U3G_DEV(OPTION, E6800, 0),
 	U3G_DEV(OPTION, E7021, 0),
 	U3G_DEV(OPTION, E7041, 0),
 	U3G_DEV(OPTION, E7061, 0),
 	U3G_DEV(OPTION, E7100, 0),
 	U3G_DEV(OPTION, GE40X, 0),
 	U3G_DEV(OPTION, GT3G, 0),
 	U3G_DEV(OPTION, GT3GPLUS, 0),
 	U3G_DEV(OPTION, GT3GQUAD, 0),
 	U3G_DEV(OPTION, GT3G_1, 0),
 	U3G_DEV(OPTION, GT3G_2, 0),
 	U3G_DEV(OPTION, GT3G_3, 0),
 	U3G_DEV(OPTION, GT3G_4, 0),
 	U3G_DEV(OPTION, GT3G_5, 0),
 	U3G_DEV(OPTION, GT3G_6, 0),
 	U3G_DEV(OPTION, GTHSDPA, 0),
 	U3G_DEV(OPTION, GTM380, 0),
 	U3G_DEV(OPTION, GTMAX36, 0),
 	U3G_DEV(OPTION, GTMAX380HSUPAE, 0),
 	U3G_DEV(OPTION, GTMAXHSUPA, 0),
 	U3G_DEV(OPTION, GTMAXHSUPAE, 0),
 	U3G_DEV(OPTION, VODAFONEMC3G, 0),
 	U3G_DEV(QISDA, H20_1, 0),
 	U3G_DEV(QISDA, H20_2, 0),
 	U3G_DEV(QISDA, H21_1, 0),
 	U3G_DEV(QISDA, H21_2, 0),
 	U3G_DEV(QUALCOMM2, AC8700, 0),
 	U3G_DEV(QUALCOMM2, MF330, 0),
 	U3G_DEV(QUALCOMMINC, AC2726, 0),
 	U3G_DEV(QUALCOMMINC, AC8700, 0),
 	U3G_DEV(QUALCOMMINC, AC8710, 0),
 	U3G_DEV(QUALCOMMINC, CDMA_MSM, U3GINIT_SCSIEJECT),
 	U3G_DEV(QUALCOMMINC, E0002, 0),
 	U3G_DEV(QUALCOMMINC, E0003, 0),
 	U3G_DEV(QUALCOMMINC, E0004, 0),
 	U3G_DEV(QUALCOMMINC, E0005, 0),
 	U3G_DEV(QUALCOMMINC, E0006, 0),
 	U3G_DEV(QUALCOMMINC, E0007, 0),
 	U3G_DEV(QUALCOMMINC, E0008, 0),
 	U3G_DEV(QUALCOMMINC, E0009, 0),
 	U3G_DEV(QUALCOMMINC, E000A, 0),
 	U3G_DEV(QUALCOMMINC, E000B, 0),
 	U3G_DEV(QUALCOMMINC, E000C, 0),
 	U3G_DEV(QUALCOMMINC, E000D, 0),
 	U3G_DEV(QUALCOMMINC, E000E, 0),
 	U3G_DEV(QUALCOMMINC, E000F, 0),
 	U3G_DEV(QUALCOMMINC, E0010, 0),
 	U3G_DEV(QUALCOMMINC, E0011, 0),
 	U3G_DEV(QUALCOMMINC, E0012, 0),
 	U3G_DEV(QUALCOMMINC, E0013, 0),
 	U3G_DEV(QUALCOMMINC, E0014, 0),
 	U3G_DEV(QUALCOMMINC, E0017, 0),
 	U3G_DEV(QUALCOMMINC, E0018, 0),
 	U3G_DEV(QUALCOMMINC, E0019, 0),
 	U3G_DEV(QUALCOMMINC, E0020, 0),
 	U3G_DEV(QUALCOMMINC, E0021, 0),
 	U3G_DEV(QUALCOMMINC, E0022, 0),
 	U3G_DEV(QUALCOMMINC, E0023, 0),
 	U3G_DEV(QUALCOMMINC, E0024, 0),
 	U3G_DEV(QUALCOMMINC, E0025, 0),
 	U3G_DEV(QUALCOMMINC, E0026, 0),
 	U3G_DEV(QUALCOMMINC, E0027, 0),
 	U3G_DEV(QUALCOMMINC, E0028, 0),
 	U3G_DEV(QUALCOMMINC, E0029, 0),
 	U3G_DEV(QUALCOMMINC, E0030, 0),
 	U3G_DEV(QUALCOMMINC, E0032, 0),
 	U3G_DEV(QUALCOMMINC, E0033, 0),
 	U3G_DEV(QUALCOMMINC, E0037, 0),
 	U3G_DEV(QUALCOMMINC, E0039, 0),
 	U3G_DEV(QUALCOMMINC, E0042, 0),
 	U3G_DEV(QUALCOMMINC, E0043, 0),
 	U3G_DEV(QUALCOMMINC, E0048, 0),
 	U3G_DEV(QUALCOMMINC, E0049, 0),
 	U3G_DEV(QUALCOMMINC, E0051, 0),
 	U3G_DEV(QUALCOMMINC, E0052, 0),
 	U3G_DEV(QUALCOMMINC, E0054, 0),
 	U3G_DEV(QUALCOMMINC, E0055, 0),
 	U3G_DEV(QUALCOMMINC, E0057, 0),
 	U3G_DEV(QUALCOMMINC, E0058, 0),
 	U3G_DEV(QUALCOMMINC, E0059, 0),
 	U3G_DEV(QUALCOMMINC, E0060, 0),
 	U3G_DEV(QUALCOMMINC, E0061, 0),
 	U3G_DEV(QUALCOMMINC, E0062, 0),
 	U3G_DEV(QUALCOMMINC, E0063, 0),
 	U3G_DEV(QUALCOMMINC, E0064, 0),
 	U3G_DEV(QUALCOMMINC, E0066, 0),
 	U3G_DEV(QUALCOMMINC, E0069, 0),
 	U3G_DEV(QUALCOMMINC, E0070, 0),
 	U3G_DEV(QUALCOMMINC, E0073, 0),
 	U3G_DEV(QUALCOMMINC, E0076, 0),
 	U3G_DEV(QUALCOMMINC, E0078, 0),
 	U3G_DEV(QUALCOMMINC, E0082, 0),
 	U3G_DEV(QUALCOMMINC, E0086, 0),
 	U3G_DEV(QUALCOMMINC, E2002, 0),
 	U3G_DEV(QUALCOMMINC, E2003, 0),
 	U3G_DEV(QUALCOMMINC, MF626, 0),
 	U3G_DEV(QUALCOMMINC, MF628, 0),
 	U3G_DEV(QUALCOMMINC, MF633R, 0),
 	U3G_DEV(QUANTA, GKE, 0),
 	U3G_DEV(QUANTA, GLE, 0),
 	U3G_DEV(QUANTA, GLX, 0),
 	U3G_DEV(QUANTA, Q101, 0),
 	U3G_DEV(QUANTA, Q111, 0),
 	U3G_DEV(SIERRA, AC402, 0),
 	U3G_DEV(SIERRA, AC595U, 0),
 	U3G_DEV(SIERRA, AC597E, 0),
 	U3G_DEV(SIERRA, AC875E, 0),
 	U3G_DEV(SIERRA, AC875U, 0),
 	U3G_DEV(SIERRA, AC875U_2, 0),
 	U3G_DEV(SIERRA, AC880, 0),
 	U3G_DEV(SIERRA, AC880E, 0),
 	U3G_DEV(SIERRA, AC880U, 0),
 	U3G_DEV(SIERRA, AC881, 0),
 	U3G_DEV(SIERRA, AC881E, 0),
 	U3G_DEV(SIERRA, AC881U, 0),
 	U3G_DEV(SIERRA, AC885E, 0),
 	U3G_DEV(SIERRA, AC885E_2, 0),
 	U3G_DEV(SIERRA, AC885U, 0),
 	U3G_DEV(SIERRA, AIRCARD580, 0),
 	U3G_DEV(SIERRA, AIRCARD595, 0),
 	U3G_DEV(SIERRA, AIRCARD875, 0),
 	U3G_DEV(SIERRA, C22, 0),
 	U3G_DEV(SIERRA, C597, 0),
 	U3G_DEV(SIERRA, C888, 0),
 	U3G_DEV(SIERRA, E0029, 0),
 	U3G_DEV(SIERRA, E6892, 0),
 	U3G_DEV(SIERRA, E6893, 0),
 	U3G_DEV(SIERRA, EM5625, 0),
 	U3G_DEV(SIERRA, EM5725, 0),
 	U3G_DEV(SIERRA, MC5720, 0),
 	U3G_DEV(SIERRA, MC5720_2, 0),
 	U3G_DEV(SIERRA, MC5725, 0),
 	U3G_DEV(SIERRA, MC5727, 0),
 	U3G_DEV(SIERRA, MC5727_2, 0),
 	U3G_DEV(SIERRA, MC5728, 0),
 	U3G_DEV(SIERRA, MC8700, 0),
 	U3G_DEV(SIERRA, MC8755, 0),
 	U3G_DEV(SIERRA, MC8755_2, 0),
 	U3G_DEV(SIERRA, MC8755_3, 0),
 	U3G_DEV(SIERRA, MC8755_4, 0),
 	U3G_DEV(SIERRA, MC8765, 0),
 	U3G_DEV(SIERRA, MC8765_2, 0),
 	U3G_DEV(SIERRA, MC8765_3, 0),
 	U3G_DEV(SIERRA, MC8775, 0),
 	U3G_DEV(SIERRA, MC8775_2, 0),
 	U3G_DEV(SIERRA, MC8780, 0),
 	U3G_DEV(SIERRA, MC8780_2, 0),
 	U3G_DEV(SIERRA, MC8780_3, 0),
 	U3G_DEV(SIERRA, MC8781, 0),
 	U3G_DEV(SIERRA, MC8781_2, 0),
 	U3G_DEV(SIERRA, MC8781_3, 0),
 	U3G_DEV(SIERRA, MC8785, 0),
 	U3G_DEV(SIERRA, MC8785_2, 0),
 	U3G_DEV(SIERRA, MC8790, 0),
 	U3G_DEV(SIERRA, MC8791, 0),
 	U3G_DEV(SIERRA, MC8792, 0),
 	U3G_DEV(SIERRA, MINI5725, 0),
 	U3G_DEV(SIERRA, T11, 0),
 	U3G_DEV(SIERRA, T598, 0),
 	U3G_DEV(SILABS, SAEL, U3GINIT_SAEL_M460),
 	U3G_DEV(STELERA, C105, 0),
 	U3G_DEV(STELERA, E1003, 0),
 	U3G_DEV(STELERA, E1004, 0),
 	U3G_DEV(STELERA, E1005, 0),
 	U3G_DEV(STELERA, E1006, 0),
 	U3G_DEV(STELERA, E1007, 0),
 	U3G_DEV(STELERA, E1008, 0),
 	U3G_DEV(STELERA, E1009, 0),
 	U3G_DEV(STELERA, E100A, 0),
 	U3G_DEV(STELERA, E100B, 0),
 	U3G_DEV(STELERA, E100C, 0),
 	U3G_DEV(STELERA, E100D, 0),
 	U3G_DEV(STELERA, E100E, 0),
 	U3G_DEV(STELERA, E100F, 0),
 	U3G_DEV(STELERA, E1010, 0),
 	U3G_DEV(STELERA, E1011, 0),
 	U3G_DEV(STELERA, E1012, 0),
 	U3G_DEV(TCTMOBILE, X060S, 0),
 	U3G_DEV(TCTMOBILE, X080S, U3GINIT_TCT),
 	U3G_DEV(TELIT, UC864E, 0),
 	U3G_DEV(TELIT, UC864G, 0),
 	U3G_DEV(TLAYTECH, TEU800, 0),
 	U3G_DEV(TOSHIBA, G450, 0),
 	U3G_DEV(TOSHIBA, HSDPA, 0),
 	U3G_DEV(YISO, C893, 0),
 	/* Autoinstallers */
 	U3G_DEV(NOVATEL, ZEROCD, U3GINIT_SCSIEJECT),
 	U3G_DEV(OPTION, GTICON322, U3GINIT_REZERO),
 	U3G_DEV(QUALCOMMINC, ZTE_STOR, U3GINIT_ZTESTOR),
 	U3G_DEV(QUALCOMMINC, ZTE_STOR2, U3GINIT_SCSIEJECT),
 	U3G_DEV(QUANTA, Q101_STOR, U3GINIT_SCSIEJECT),
 	U3G_DEV(SIERRA, TRUINSTALL, U3GINIT_SIERRA),
 #undef	U3G_DEV
 };
 
 static int
 u3g_sierra_init(struct usb_device *udev)
 {
 	struct usb_device_request req;
 
 	req.bmRequestType = UT_VENDOR;
 	req.bRequest = UR_SET_INTERFACE;
 	USETW(req.wValue, UF_DEVICE_REMOTE_WAKEUP);
 	USETW(req.wIndex, UHF_PORT_CONNECTION);
 	USETW(req.wLength, 0);
 
 	if (usbd_do_request_flags(udev, NULL, &req,
 	    NULL, 0, NULL, USB_MS_HZ)) {
 		/* ignore any errors */
 	}
 	return (0);
 }
 
 static int
 u3g_huawei_init(struct usb_device *udev)
 {
 	struct usb_device_request req;
 
 	req.bmRequestType = UT_WRITE_DEVICE;
 	req.bRequest = UR_SET_FEATURE;
 	USETW(req.wValue, UF_DEVICE_REMOTE_WAKEUP);
 	USETW(req.wIndex, UHF_PORT_SUSPEND);
 	USETW(req.wLength, 0);
 
 	if (usbd_do_request_flags(udev, NULL, &req,
 	    NULL, 0, NULL, USB_MS_HZ)) {
 		/* ignore any errors */
 	}
 	return (0);
 }
 
 static void
 u3g_sael_m460_init(struct usb_device *udev)
 {
 	static const uint8_t setup[][24] = {
 	     { 0x41, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
 	     { 0x41, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 },
 	     { 0x41, 0x13, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 
 	       0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
 	       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
 	     { 0xc1, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x40, 0x02 },
 	     { 0xc1, 0x08, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00 },
 	     { 0x41, 0x07, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00 },
 	     { 0xc1, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 },
 	     { 0x41, 0x01, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00 },
 	     { 0x41, 0x07, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00 },
 	     { 0x41, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00 },
 	     { 0x41, 0x19, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
 	       0x00, 0x00, 0x00, 0x00, 0x11, 0x13 },
 	     { 0x41, 0x13, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 
 	       0x09, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
 	       0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00 },
 	     { 0x41, 0x12, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 },
 	     { 0x41, 0x01, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00 },
 	     { 0x41, 0x07, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00 },
 	     { 0x41, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00 },
 	     { 0x41, 0x19, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
 	       0x00, 0x00, 0x00, 0x00, 0x11, 0x13 },
 	     { 0x41, 0x13, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00,
 	       0x09, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 
 	       0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00 },
 	     { 0x41, 0x07, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00 },
 	};
 
 	struct usb_device_request req;
 	usb_error_t err;
 	uint16_t len;
 	uint8_t buf[0x300];
 	uint8_t n;
 
 	DPRINTFN(1, "\n");
 
 	if (usbd_req_set_alt_interface_no(udev, NULL, 0, 0)) {
 		DPRINTFN(0, "Alt setting 0 failed\n");
 		return;
 	}
 
 	for (n = 0; n != (sizeof(setup)/sizeof(setup[0])); n++) {
 
 		memcpy(&req, setup[n], sizeof(req));
 
 		len = UGETW(req.wLength);
 		if (req.bmRequestType & UE_DIR_IN) {
 			if (len > sizeof(buf)) {
 				DPRINTFN(0, "too small buffer\n");
 				continue;
 			}
 			err = usbd_do_request(udev, NULL, &req, buf);
 		} else {
 			if (len > (sizeof(setup[0]) - 8)) {
 				DPRINTFN(0, "too small buffer\n");
 				continue;
 			}
 			err = usbd_do_request(udev, NULL, &req, 
 			    __DECONST(uint8_t *, &setup[n][8]));
 		}
 		if (err) {
 			DPRINTFN(1, "request %u failed\n",
 			    (unsigned int)n);
 			/*
 			 * Some of the requests will fail. Stop doing
 			 * requests when we are getting timeouts so
 			 * that we don't block the explore/attach
 			 * thread forever.
 			 */
 			if (err == USB_ERR_TIMEOUT)
 				break;
 		}
 	}
 }
 
 /*
  * The following function handles 3G modem devices (E220, Mobile,
  * etc.) with auto-install flash disks for Windows/MacOSX on the first
  * interface.  After some command or some delay they change appearance
  * to a modem.
  */
 static void
 u3g_test_autoinst(void *arg, struct usb_device *udev,
     struct usb_attach_arg *uaa)
 {
 	struct usb_interface *iface;
 	struct usb_interface_descriptor *id;
 	int error;
 	unsigned long method;
 
 	if (uaa->dev_state != UAA_DEV_READY)
 		return;
 
 	iface = usbd_get_iface(udev, 0);
 	if (iface == NULL)
 		return;
 	id = iface->idesc;
 	if (id == NULL || id->bInterfaceClass != UICLASS_MASS)
 		return;
 
 	if (usb_test_quirk(uaa, UQ_MSC_EJECT_HUAWEI))
 		method = U3GINIT_HUAWEI;
 	else if (usb_test_quirk(uaa, UQ_MSC_EJECT_SIERRA))
 		method = U3GINIT_SIERRA;
 	else if (usb_test_quirk(uaa, UQ_MSC_EJECT_SCSIEJECT))
 		method = U3GINIT_SCSIEJECT;
 	else if (usb_test_quirk(uaa, UQ_MSC_EJECT_REZERO))
 		method = U3GINIT_REZERO;
 	else if (usb_test_quirk(uaa, UQ_MSC_EJECT_ZTESTOR))
 		method = U3GINIT_ZTESTOR;
 	else if (usb_test_quirk(uaa, UQ_MSC_EJECT_CMOTECH))
 		method = U3GINIT_CMOTECH;
 	else if (usb_test_quirk(uaa, UQ_MSC_EJECT_WAIT))
 		method = U3GINIT_WAIT;
 	else if (usb_test_quirk(uaa, UQ_MSC_EJECT_HUAWEISCSI))
 		method = U3GINIT_HUAWEISCSI;
 	else if (usb_test_quirk(uaa, UQ_MSC_EJECT_TCT))
 		method = U3GINIT_TCT;
 	else if (usbd_lookup_id_by_uaa(u3g_devs, sizeof(u3g_devs), uaa) == 0)
 		method = USB_GET_DRIVER_INFO(uaa);
 	else
 		return;		/* no device match */
 
 	if (bootverbose) {
 		printf("Ejecting %s %s using method %ld\n",
 		       usb_get_manufacturer(udev),
 		       usb_get_product(udev), method);
 	}
 
 	switch (method) {
 		case U3GINIT_HUAWEI:
 			error = u3g_huawei_init(udev);
 			break;
 		case U3GINIT_HUAWEISCSI:
 			error = usb_msc_eject(udev, 0, MSC_EJECT_HUAWEI);
 			break;
 		case U3GINIT_SCSIEJECT:
 			error = usb_msc_eject(udev, 0, MSC_EJECT_STOPUNIT);
 			break;
 		case U3GINIT_REZERO:
 			error = usb_msc_eject(udev, 0, MSC_EJECT_REZERO);
 			break;
 		case U3GINIT_ZTESTOR:
 			error = usb_msc_eject(udev, 0, MSC_EJECT_STOPUNIT);
 			error |= usb_msc_eject(udev, 0, MSC_EJECT_ZTESTOR);
 			break;
 		case U3GINIT_CMOTECH:
 			error = usb_msc_eject(udev, 0, MSC_EJECT_CMOTECH);
 			break;
 		case U3GINIT_TCT:
 			error = usb_msc_eject(udev, 0, MSC_EJECT_TCT);
 			break;
 		case U3GINIT_SIERRA:
 			error = u3g_sierra_init(udev);
 			break;
 		case U3GINIT_WAIT:
 			/* Just pretend we ejected, the card will timeout */
 			error = 0;
 			break;
 		default:
 			/* no 3G eject quirks */
 			error = EOPNOTSUPP;
 			break;
 	}
 	if (error == 0) {
 		/* success, mark the udev as disappearing */
 		uaa->dev_state = UAA_DEV_EJECTING;
 	}
 }
 
 static int
 u3g_driver_loaded(struct module *mod, int what, void *arg)
 {
 	switch (what) {
 	case MOD_LOAD:
 		/* register our autoinstall handler */
 		u3g_etag = EVENTHANDLER_REGISTER(usb_dev_configured,
 		    u3g_test_autoinst, NULL, EVENTHANDLER_PRI_ANY);
 		break;
 	case MOD_UNLOAD:
 		EVENTHANDLER_DEREGISTER(usb_dev_configured, u3g_etag);
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
  	return (0);
 }
 
 static int
 u3g_probe(device_t self)
 {
 	struct usb_attach_arg *uaa = device_get_ivars(self);
 
 	if (uaa->usb_mode != USB_MODE_HOST) {
 		return (ENXIO);
 	}
 	if (uaa->info.bConfigIndex != U3G_CONFIG_INDEX) {
 		return (ENXIO);
 	}
 	if (uaa->info.bInterfaceClass != UICLASS_VENDOR) {
 		return (ENXIO);
 	}
 	return (usbd_lookup_id_by_uaa(u3g_devs, sizeof(u3g_devs), uaa));
 }
 
 static int
 u3g_attach(device_t dev)
 {
 	struct usb_config u3g_config_tmp[U3G_N_TRANSFER];
 	struct usb_attach_arg *uaa = device_get_ivars(dev);
 	struct u3g_softc *sc = device_get_softc(dev);
 	struct usb_interface *iface;
 	struct usb_interface_descriptor *id;
 	uint32_t iface_valid;
 	int error, type, nports;
 	int ep, n;
 	uint8_t i;
 
 	DPRINTF("sc=%p\n", sc);
 
 	type = USB_GET_DRIVER_INFO(uaa);
 	if (type == U3GINIT_SAEL_M460
 	    || usb_test_quirk(uaa, UQ_MSC_EJECT_SAEL_M460)) {
 		u3g_sael_m460_init(uaa->device);
 	}
 
 	/* copy in USB config */
 	for (n = 0; n != U3G_N_TRANSFER; n++) 
 		u3g_config_tmp[n] = u3g_config[n];
 
 	device_set_usb_desc(dev);
 	mtx_init(&sc->sc_mtx, "u3g", NULL, MTX_DEF);
 
 	sc->sc_udev = uaa->device;
 
 	/* Claim all interfaces on the device */
 	iface_valid = 0;
 	for (i = uaa->info.bIfaceIndex; i < USB_IFACE_MAX; i++) {
 		iface = usbd_get_iface(uaa->device, i);
 		if (iface == NULL)
 			break;
 		id = usbd_get_interface_descriptor(iface);
 		if (id == NULL || id->bInterfaceClass != UICLASS_VENDOR)
 			continue;
 		usbd_set_parent_iface(uaa->device, i, uaa->info.bIfaceIndex);
 		iface_valid |= (1<<i);
 	}
 
 	i = 0;		/* interface index */
 	ep = 0;		/* endpoint index */
 	nports = 0;	/* number of ports */
 	while (i < USB_IFACE_MAX) {
 		if ((iface_valid & (1<<i)) == 0) {
 			i++;
 			continue;
 		}
 
 		/* update BULK endpoint index */
 		for (n = 0; n < U3G_N_TRANSFER; n++)
 			u3g_config_tmp[n].ep_index = ep;
 
 		/* try to allocate a set of BULK endpoints */
 		error = usbd_transfer_setup(uaa->device, &i,
 		    sc->sc_xfer[nports], u3g_config_tmp, U3G_N_TRANSFER,
 		    &sc->sc_ucom[nports], &sc->sc_mtx);
 		if (error) {
 			/* next interface */
 			i++;
 			ep = 0;
 			continue;
 		}
 
 		/* set stall by default */
 		mtx_lock(&sc->sc_mtx);
 		usbd_xfer_set_stall(sc->sc_xfer[nports][U3G_BULK_WR]);
 		usbd_xfer_set_stall(sc->sc_xfer[nports][U3G_BULK_RD]);
 		mtx_unlock(&sc->sc_mtx);
 
 		nports++;	/* found one port */
 		ep++;
 		if (nports == U3G_MAXPORTS)
 			break;
 	}
 	if (nports == 0) {
 		device_printf(dev, "no ports found\n");
 		goto detach;
 	}
 	sc->sc_numports = nports;
 
 	error = ucom_attach(&sc->sc_super_ucom, sc->sc_ucom,
 	    sc->sc_numports, sc, &u3g_callback, &sc->sc_mtx);
 	if (error) {
 		DPRINTF("ucom_attach failed\n");
 		goto detach;
 	}
 	ucom_set_pnpinfo_usb(&sc->sc_super_ucom, dev);
 	device_printf(dev, "Found %u port%s.\n", sc->sc_numports,
 	    sc->sc_numports > 1 ? "s":"");
 
 	return (0);
 
 detach:
 	u3g_detach(dev);
 	return (ENXIO);
 }
 
 static int
 u3g_detach(device_t dev)
 {
 	struct u3g_softc *sc = device_get_softc(dev);
 	uint8_t subunit;
 
 	DPRINTF("sc=%p\n", sc);
 
 	/* NOTE: It is not dangerous to detach more ports than attached! */
 	ucom_detach(&sc->sc_super_ucom, sc->sc_ucom);
 
 	for (subunit = 0; subunit != U3G_MAXPORTS; subunit++)
 		usbd_transfer_unsetup(sc->sc_xfer[subunit], U3G_N_TRANSFER);
 	mtx_destroy(&sc->sc_mtx);
 
 	return (0);
 }
 
 static void
 u3g_start_read(struct ucom_softc *ucom)
 {
 	struct u3g_softc *sc = ucom->sc_parent;
 
 	/* start read endpoint */
 	usbd_transfer_start(sc->sc_xfer[ucom->sc_subunit][U3G_BULK_RD]);
 	return;
 }
 
 static void
 u3g_stop_read(struct ucom_softc *ucom)
 {
 	struct u3g_softc *sc = ucom->sc_parent;
 
 	/* stop read endpoint */
 	usbd_transfer_stop(sc->sc_xfer[ucom->sc_subunit][U3G_BULK_RD]);
 	return;
 }
 
 static void
 u3g_start_write(struct ucom_softc *ucom)
 {
 	struct u3g_softc *sc = ucom->sc_parent;
 
 	usbd_transfer_start(sc->sc_xfer[ucom->sc_subunit][U3G_BULK_WR]);
 	return;
 }
 
 static void
 u3g_stop_write(struct ucom_softc *ucom)
 {
 	struct u3g_softc *sc = ucom->sc_parent;
 
 	usbd_transfer_stop(sc->sc_xfer[ucom->sc_subunit][U3G_BULK_WR]);
 	return;
 }
 
 static void
 u3g_write_callback(struct usb_xfer *xfer, usb_error_t error)
 {
 	struct ucom_softc *ucom = usbd_xfer_softc(xfer);
 	struct usb_page_cache *pc;
 	uint32_t actlen;
 
 	switch (USB_GET_STATE(xfer)) {
 	case USB_ST_TRANSFERRED:
 	case USB_ST_SETUP:
 tr_setup:
 		pc = usbd_xfer_get_frame(xfer, 0);
 		if (ucom_get_data(ucom, pc, 0, U3G_BSIZE, &actlen)) {
 			usbd_xfer_set_frame_len(xfer, 0, actlen);
 			usbd_transfer_submit(xfer);
 		}
 		break;
 
 	default:			/* Error */
 		if (error != USB_ERR_CANCELLED) {
 			/* do a builtin clear-stall */
 			usbd_xfer_set_stall(xfer);
 			goto tr_setup;
 		}
 		break;
 	}
 	return;
 }
 
 static void
 u3g_read_callback(struct usb_xfer *xfer, usb_error_t error)
 {
 	struct ucom_softc *ucom = usbd_xfer_softc(xfer);
 	struct usb_page_cache *pc;
 	int actlen;
 
 	usbd_xfer_status(xfer, &actlen, NULL, NULL, NULL);
 
 	switch (USB_GET_STATE(xfer)) {
 	case USB_ST_TRANSFERRED:
 		pc = usbd_xfer_get_frame(xfer, 0);
 		ucom_put_data(ucom, pc, 0, actlen);
 
 	case USB_ST_SETUP:
 tr_setup:
 		usbd_xfer_set_frame_len(xfer, 0, usbd_xfer_max_len(xfer));
 		usbd_transfer_submit(xfer);
 		break;
 
 	default:			/* Error */
 		if (error != USB_ERR_CANCELLED) {
 			/* do a builtin clear-stall */
 			usbd_xfer_set_stall(xfer);
 			goto tr_setup;
 		}
 		break;
 	}
 	return;
 }
Index: projects/binutils-2.17/sys/dev/usb/usb_bus.h
===================================================================
--- projects/binutils-2.17/sys/dev/usb/usb_bus.h	(revision 215829)
+++ projects/binutils-2.17/sys/dev/usb/usb_bus.h	(revision 215830)
@@ -1,114 +1,114 @@
 /* $FreeBSD$ */
 /*-
  * Copyright (c) 2008 Hans Petter Selasky. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _USB_BUS_H_
 #define	_USB_BUS_H_
 
 /*
  * The following structure defines the USB explore message sent to the USB
  * explore process.
  */
 
 struct usb_bus_msg {
 	struct usb_proc_msg hdr;
 	struct usb_bus *bus;
 };
 
 /*
  * The following structure defines the USB statistics structure.
  */
 struct usb_bus_stat {
 	uint32_t uds_requests[4];
 };
 
 /*
  * The following structure defines an USB BUS. There is one USB BUS
  * for every Host or Device controller.
  */
 struct usb_bus {
 	struct usb_bus_stat stats_err;
 	struct usb_bus_stat stats_ok;
 	struct root_hold_token *bus_roothold;
 	/*
 	 * There are two callback processes. One for Giant locked
 	 * callbacks. One for non-Giant locked callbacks. This should
 	 * avoid congestion and reduce response time in most cases.
 	 */
 	struct usb_process giant_callback_proc;
 	struct usb_process non_giant_callback_proc;
 
 	/* Explore process */
 	struct usb_process explore_proc;
 
 	/* Control request process */
 	struct usb_process control_xfer_proc;
 
 	struct usb_bus_msg explore_msg[2];
 	struct usb_bus_msg detach_msg[2];
 	struct usb_bus_msg attach_msg[2];
 	/*
 	 * This mutex protects the USB hardware:
 	 */
 	struct mtx bus_mtx;
 	struct usb_xfer_queue intr_q;
 	struct usb_callout power_wdog;	/* power management */
 
 	device_t parent;
 	device_t bdev;			/* filled by HC driver */
 
 #if USB_HAVE_BUSDMA
 	struct usb_dma_parent_tag dma_parent_tag[1];
 	struct usb_dma_tag dma_tags[USB_BUS_DMA_TAG_MAX];
 #endif
 	struct usb_bus_methods *methods;	/* filled by HC driver */
 	struct usb_device **devices;
 
-	struct usbpf_if *uif;	/* USB Packet Filter */
+	struct ifnet *ifp;	/* only for USB Packet Filter */
 
 	usb_power_mask_t hw_power_state;	/* see USB_HW_POWER_XXX */
 	usb_size_t uframe_usage[USB_HS_MICRO_FRAMES_MAX];
 
 	uint16_t isoc_time_last;	/* in milliseconds */
 
 	uint8_t	alloc_failed;		/* Set if memory allocation failed. */
 	uint8_t	driver_added_refcount;	/* Current driver generation count */
 	enum usb_revision usbrev;	/* USB revision. See "USB_REV_XXX". */
 
 	uint8_t	devices_max;		/* maximum number of USB devices */
 	uint8_t	do_probe;		/* set if USB BUS should be re-probed */
 
 	/* 
 	 * The scratch area can only be used inside the explore thread
 	 * belonging to the give serial bus.
 	 */
 	union {
 		struct usb_hw_ep_scratch hw_ep_scratch[1];
 		struct usb_temp_setup temp_setup[1];
 		uint8_t	data[255];
 	}	scratch[1];
 };
 
 #endif					/* _USB_BUS_H_ */
Index: projects/binutils-2.17/sys/dev/usb/usb_pf.c
===================================================================
--- projects/binutils-2.17/sys/dev/usb/usb_pf.c	(revision 215829)
+++ projects/binutils-2.17/sys/dev/usb/usb_pf.c	(revision 215830)
@@ -1,1862 +1,247 @@
 /*-
  * Copyright (c) 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from the Stanford/CMU enet packet filter,
  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
  * Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <net/if.h>
+#include <net/if_types.h>
+#include <net/bpf.h>
 
 #include <dev/usb/usb.h>
 #include <dev/usb/usbdi.h>
 #include <dev/usb/usb_busdma.h>
 #include <dev/usb/usb_controller.h>
 #include <dev/usb/usb_core.h>
 #include <dev/usb/usb_process.h>
 #include <dev/usb/usb_device.h>
 #include <dev/usb/usb_bus.h>
 #include <dev/usb/usb_pf.h>
 #include <dev/usb/usb_transfer.h>
 
-/*
- * All usbpf implementations are extracted from bpf(9) APIs and it's
- * specialized for USB packet filtering between the driver and the host
- * controller.
- */
-
-MALLOC_DEFINE(M_USBPF, "USBPktFilter", "USB Packet Filter");
-
-/*
- * Rotate the packet buffers in descriptor ud.  Move the store buffer into the
- * hold slot, and the free buffer ino the store slot.  Zero the length of the
- * new store buffer.  Descriptor lock should be held.
- */
-#define	USBPF_ROTATE_BUFFERS(ud)	do {				\
-	(ud)->ud_hbuf = (ud)->ud_sbuf;					\
-	(ud)->ud_hlen = (ud)->ud_slen;					\
-	(ud)->ud_sbuf = (ud)->ud_fbuf;					\
-	(ud)->ud_slen = 0;						\
-	(ud)->ud_fbuf = NULL;						\
-	usbpf_bufheld(ud);						\
-} while (0)
-
-#ifndef __i386__
-#define	USBPF_ALIGN
-#endif
-
-#ifndef USBPF_ALIGN
-#define	USBPF_EXTRACT_SHORT(p)	((u_int16_t)ntohs(*(u_int16_t *)p))
-#define	USBPF_EXTRACT_LONG(p)	(ntohl(*(u_int32_t *)p))
-#else
-#define	USBPF_EXTRACT_SHORT(p)						\
-	((u_int16_t)							\
-	    ((u_int16_t)*((u_char *)p+0)<<8|				\
-		(u_int16_t)*((u_char *)p+1)<<0))
-#define	USBPF_EXTRACT_LONG(p)						\
-	((u_int32_t)*((u_char *)p+0)<<24|				\
-	    (u_int32_t)*((u_char *)p+1)<<16|				\
-	    (u_int32_t)*((u_char *)p+2)<<8|				\
-	    (u_int32_t)*((u_char *)p+3)<<0)
-#endif
-
-/*
- * Number of scratch memory words (for USBPF_LD|USBPF_MEM and USBPF_ST).
- */
-#define	USBPF_MEMWORDS		 16
-
-/* Values for ud_state */
-#define	USBPF_IDLE		0	/* no select in progress */
-#define	USBPF_WAITING		1	/* waiting for read timeout in select */
-#define	USBPF_TIMED_OUT		2	/* read timeout has expired in select */
-
-#define	PRIUSB			26	/* interruptible */
-
-/* Frame directions */
-enum usbpf_direction {
-	USBPF_D_IN,	/* See incoming frames */
-	USBPF_D_INOUT,	/* See incoming and outgoing frames */
-	USBPF_D_OUT	/* See outgoing frames */
-};
-
-static void	usbpf_append_bytes(struct usbpf_d *, caddr_t, u_int, void *,
-		    u_int);
-static void	usbpf_attachd(struct usbpf_d *, struct usbpf_if *);
-static void	usbpf_detachd(struct usbpf_d *);
-static int	usbpf_canfreebuf(struct usbpf_d *);
-static void	usbpf_buf_reclaimed(struct usbpf_d *);
-static int	usbpf_canwritebuf(struct usbpf_d *);
-
-static	d_open_t	usbpf_open;
-static	d_read_t	usbpf_read;
-static	d_write_t	usbpf_write;
-static	d_ioctl_t	usbpf_ioctl;
-static	d_poll_t	usbpf_poll;
-static	d_kqfilter_t	usbpf_kqfilter;
-
-static struct cdevsw usbpf_cdevsw = {
-	.d_version =	D_VERSION,
-	.d_open =	usbpf_open,
-	.d_read =	usbpf_read,
-	.d_write =	usbpf_write,
-	.d_ioctl =	usbpf_ioctl,
-	.d_poll =	usbpf_poll,
-	.d_name =	"usbpf",
-	.d_kqfilter =	usbpf_kqfilter,
-};
-
-static LIST_HEAD(, usbpf_if)	usbpf_iflist;
-static struct mtx	usbpf_mtx;		/* global lock */
-static int usbpf_uifd_cnt;
-
-static int usbpf_bufsize = 4096;
-#define	USBPF_MINBUFSIZE 32
-#define	USBPF_MAXBUFSIZE 0x80000
-static int usbpf_maxbufsize = USBPF_MAXBUFSIZE;
-#define	USBPF_MAXINSNS 512
-static int usbpf_maxinsns = USBPF_MAXINSNS;
-
-static void
-usbpf_buffer_init(struct usbpf_d *ud)
+void
+usbpf_attach(struct usb_bus *ubus)
 {
+	struct ifnet *ifp;
 
-	ud->ud_bufsize = usbpf_bufsize;
-}
+	ifp = ubus->ifp = if_alloc(IFT_USB);
+	if_initname(ifp, "usbus", device_get_unit(ubus->bdev));
+	if_attach(ifp);
 
-/*
- * Free USBPF kernel buffers on device close.
- */
-static void
-usbpf_buffer_free(struct usbpf_d *ud)
-{
+	KASSERT(sizeof(struct usbpf_pkthdr) == USBPF_HDR_LEN,
+	    ("wrong USB pf header length (%zd)", sizeof(struct usbpf_pkthdr)));
 
-	if (ud->ud_sbuf != NULL)
-		free(ud->ud_sbuf, M_USBPF);
-	if (ud->ud_hbuf != NULL)
-		free(ud->ud_hbuf, M_USBPF);
-	if (ud->ud_fbuf != NULL)
-		free(ud->ud_fbuf, M_USBPF);
-
-#ifdef INVARIANTS
-	ud->ud_sbuf = ud->ud_hbuf = ud->ud_fbuf = (caddr_t)~0;
-#endif
-}
-
-static void
-usbpf_buffer_alloc(struct usbpf_d *ud)
-{
-
-	KASSERT(ud->ud_fbuf == NULL, ("%s: ud_fbuf != NULL", __func__));
-	KASSERT(ud->ud_sbuf == NULL, ("%s: ud_sbuf != NULL", __func__));
-	KASSERT(ud->ud_hbuf == NULL, ("%s: ud_hbuf != NULL", __func__));
-
-	ud->ud_fbuf = (caddr_t)malloc(ud->ud_bufsize, M_USBPF, M_WAITOK);
-	ud->ud_sbuf = (caddr_t)malloc(ud->ud_bufsize, M_USBPF, M_WAITOK);
-	ud->ud_hbuf = NULL;
-	ud->ud_slen = 0;
-	ud->ud_hlen = 0;
-}
-
-/*
- * Copy buffer storage to user space in read().
- */
-static int
-usbpf_buffer_uiomove(struct usbpf_d *ud, caddr_t buf, u_int len,
-    struct uio *uio)
-{
-
-	return (uiomove(buf, len, uio));
-}
-
-/*
- * Simple data copy to the current kernel buffer.
- */
-static void
-usbpf_buffer_append_bytes(struct usbpf_d *ud, caddr_t buf, u_int offset,
-    void *src, u_int len)
-{
-	u_char *src_bytes;
-
-	src_bytes = (u_char *)src;
-	bcopy(src_bytes, buf + offset, len);
-}
-
-/*
- * Allocate or resize buffers.
- */
-static int
-usbpf_buffer_ioctl_sblen(struct usbpf_d *ud, u_int *i)
-{
-	u_int size;
-
-	USBPFD_LOCK(ud);
-	if (ud->ud_bif != NULL) {
-		USBPFD_UNLOCK(ud);
-		return (EINVAL);
-	}
-	size = *i;
-	if (size > usbpf_maxbufsize)
-		*i = size = usbpf_maxbufsize;
-	else if (size < USBPF_MINBUFSIZE)
-		*i = size = USBPF_MINBUFSIZE;
-	ud->ud_bufsize = size;
-	USBPFD_UNLOCK(ud);
-	return (0);
-}
-
-static const u_short	usbpf_code_map[] = {
-	0x10ff,	/* 0x00-0x0f: 1111111100001000 */
-	0x3070,	/* 0x10-0x1f: 0000111000001100 */
-	0x3131,	/* 0x20-0x2f: 1000110010001100 */
-	0x3031,	/* 0x30-0x3f: 1000110000001100 */
-	0x3131,	/* 0x40-0x4f: 1000110010001100 */
-	0x1011,	/* 0x50-0x5f: 1000100000001000 */
-	0x1013,	/* 0x60-0x6f: 1100100000001000 */
-	0x1010,	/* 0x70-0x7f: 0000100000001000 */
-	0x0093,	/* 0x80-0x8f: 1100100100000000 */
-	0x0000,	/* 0x90-0x9f: 0000000000000000 */
-	0x0000,	/* 0xa0-0xaf: 0000000000000000 */
-	0x0002,	/* 0xb0-0xbf: 0100000000000000 */
-	0x0000,	/* 0xc0-0xcf: 0000000000000000 */
-	0x0000,	/* 0xd0-0xdf: 0000000000000000 */
-	0x0000,	/* 0xe0-0xef: 0000000000000000 */
-	0x0000	/* 0xf0-0xff: 0000000000000000 */
-};
-
-#define	USBPF_VALIDATE_CODE(c)	\
-    ((c) <= 0xff && (usbpf_code_map[(c) >> 4] & (1 << ((c) & 0xf))) != 0)
-
-/*
- * Return true if the 'fcode' is a valid filter program.
- * The constraints are that each jump be forward and to a valid
- * code.  The code must terminate with either an accept or reject.
- *
- * The kernel needs to be able to verify an application's filter code.
- * Otherwise, a bogus program could easily crash the system.
- */
-static int
-usbpf_validate(const struct usbpf_insn *f, int len)
-{
-	register int i;
-	register const struct usbpf_insn *p;
-
-	/* Do not accept negative length filter. */
-	if (len < 0)
-		return (0);
-
-	/* An empty filter means accept all. */
-	if (len == 0)
-		return (1);
-
-	for (i = 0; i < len; ++i) {
-		p = &f[i];
-		/*
-		 * Check that the code is valid.
-		 */
-		if (!USBPF_VALIDATE_CODE(p->code))
-			return (0);
-		/*
-		 * Check that that jumps are forward, and within
-		 * the code block.
-		 */
-		if (USBPF_CLASS(p->code) == USBPF_JMP) {
-			register u_int offset;
-
-			if (p->code == (USBPF_JMP|USBPF_JA))
-				offset = p->k;
-			else
-				offset = p->jt > p->jf ? p->jt : p->jf;
-			if (offset >= (u_int)(len - i) - 1)
-				return (0);
-			continue;
-		}
-		/*
-		 * Check that memory operations use valid addresses.
-		 */
-		if (p->code == USBPF_ST || p->code == USBPF_STX ||
-		    p->code == (USBPF_LD|USBPF_MEM) ||
-		    p->code == (USBPF_LDX|USBPF_MEM)) {
-			if (p->k >= USBPF_MEMWORDS)
-				return (0);
-			continue;
-		}
-		/*
-		 * Check for constant division by 0.
-		 */
-		if (p->code == (USBPF_ALU|USBPF_DIV|USBPF_K) && p->k == 0)
-			return (0);
-	}
-	return (USBPF_CLASS(f[len - 1].code) == USBPF_RET);
-}
-
-#ifdef _KERNEL
-#define	MINDEX(m, k) \
-{ \
-	register int len = m->m_len; \
- \
-	while (k >= len) { \
-		k -= len; \
-		m = m->m_next; \
-		if (m == 0) \
-			return (0); \
-		len = m->m_len; \
-	} \
-}
-
-static u_int16_t	m_xhalf(struct mbuf *m, usbpf_u_int32 k, int *err);
-static u_int32_t	m_xword(struct mbuf *m, usbpf_u_int32 k, int *err);
-
-static u_int32_t
-m_xword(struct mbuf *m, usbpf_u_int32 k, int *err)
-{
-	size_t len;
-	u_char *cp, *np;
-	struct mbuf *m0;
-
-	len = m->m_len;
-	while (k >= len) {
-		k -= len;
-		m = m->m_next;
-		if (m == 0)
-			goto bad;
-		len = m->m_len;
-	}
-	cp = mtod(m, u_char *) + k;
-	if (len - k >= 4) {
-		*err = 0;
-		return (USBPF_EXTRACT_LONG(cp));
-	}
-	m0 = m->m_next;
-	if (m0 == 0 || m0->m_len + len - k < 4)
-		goto bad;
-	*err = 0;
-	np = mtod(m0, u_char *);
-	switch (len - k) {
-	case 1:
-		return (((u_int32_t)cp[0] << 24) |
-		    ((u_int32_t)np[0] << 16) |
-		    ((u_int32_t)np[1] << 8)  |
-		    (u_int32_t)np[2]);
-
-	case 2:
-		return (((u_int32_t)cp[0] << 24) |
-		    ((u_int32_t)cp[1] << 16) |
-		    ((u_int32_t)np[0] << 8) |
-		    (u_int32_t)np[1]);
-
-	default:
-		return (((u_int32_t)cp[0] << 24) |
-		    ((u_int32_t)cp[1] << 16) |
-		    ((u_int32_t)cp[2] << 8) |
-		    (u_int32_t)np[0]);
-	}
-    bad:
-	*err = 1;
-	return (0);
-}
-
-static u_int16_t
-m_xhalf(struct mbuf *m, usbpf_u_int32 k, int *err)
-{
-	size_t len;
-	u_char *cp;
-	struct mbuf *m0;
-
-	len = m->m_len;
-	while (k >= len) {
-		k -= len;
-		m = m->m_next;
-		if (m == 0)
-			goto bad;
-		len = m->m_len;
-	}
-	cp = mtod(m, u_char *) + k;
-	if (len - k >= 2) {
-		*err = 0;
-		return (USBPF_EXTRACT_SHORT(cp));
-	}
-	m0 = m->m_next;
-	if (m0 == 0)
-		goto bad;
-	*err = 0;
-	return ((cp[0] << 8) | mtod(m0, u_char *)[0]);
- bad:
-	*err = 1;
-	return (0);
-}
-#endif
-
-/*
- * Execute the filter program starting at pc on the packet p
- * wirelen is the length of the original packet
- * buflen is the amount of data present
- */
-static u_int
-usbpf_filter(const struct usbpf_insn *pc, u_char *p, u_int wirelen,
-    u_int buflen)
-{
-	u_int32_t A = 0, X = 0;
-	usbpf_u_int32 k;
-	u_int32_t mem[USBPF_MEMWORDS];
-
 	/*
-	 * XXX temporarily the filter system is disabled because currently it
-	 * could not handle the some machine code properly that leads to
-	 * kernel crash by invalid usage.
+	 * XXX According to the specification of DLT_USB, it indicates packets
+	 * beginning with USB setup header.  But not sure all packets would be.
 	 */
-	return ((u_int)-1);
+	bpfattach(ifp, DLT_USB, USBPF_HDR_LEN);
 
-	if (pc == NULL)
-		/*
-		 * No filter means accept all.
-		 */
-		return ((u_int)-1);
-
-	--pc;
-	while (1) {
-		++pc;
-		switch (pc->code) {
-		default:
-#ifdef _KERNEL
-			return (0);
-#else
-			abort();
-#endif
-
-		case USBPF_RET|USBPF_K:
-			return ((u_int)pc->k);
-
-		case USBPF_RET|USBPF_A:
-			return ((u_int)A);
-
-		case USBPF_LD|USBPF_W|USBPF_ABS:
-			k = pc->k;
-			if (k > buflen || sizeof(int32_t) > buflen - k) {
-#ifdef _KERNEL
-				int merr;
-
-				if (buflen != 0)
-					return (0);
-				A = m_xword((struct mbuf *)p, k, &merr);
-				if (merr != 0)
-					return (0);
-				continue;
-#else
-				return (0);
-#endif
-			}
-#ifdef USBPF_ALIGN
-			if (((intptr_t)(p + k) & 3) != 0)
-				A = USBPF_EXTRACT_LONG(&p[k]);
-			else
-#endif
-				A = ntohl(*(int32_t *)(p + k));
-			continue;
-
-		case USBPF_LD|USBPF_H|USBPF_ABS:
-			k = pc->k;
-			if (k > buflen || sizeof(int16_t) > buflen - k) {
-#ifdef _KERNEL
-				int merr;
-
-				if (buflen != 0)
-					return (0);
-				A = m_xhalf((struct mbuf *)p, k, &merr);
-				continue;
-#else
-				return (0);
-#endif
-			}
-			A = USBPF_EXTRACT_SHORT(&p[k]);
-			continue;
-
-		case USBPF_LD|USBPF_B|USBPF_ABS:
-			k = pc->k;
-			if (k >= buflen) {
-#ifdef _KERNEL
-				struct mbuf *m;
-
-				if (buflen != 0)
-					return (0);
-				m = (struct mbuf *)p;
-				MINDEX(m, k);
-				A = mtod(m, u_char *)[k];
-				continue;
-#else
-				return (0);
-#endif
-			}
-			A = p[k];
-			continue;
-
-		case USBPF_LD|USBPF_W|USBPF_LEN:
-			A = wirelen;
-			continue;
-
-		case USBPF_LDX|USBPF_W|USBPF_LEN:
-			X = wirelen;
-			continue;
-
-		case USBPF_LD|USBPF_W|USBPF_IND:
-			k = X + pc->k;
-			if (pc->k > buflen || X > buflen - pc->k ||
-			    sizeof(int32_t) > buflen - k) {
-#ifdef _KERNEL
-				int merr;
-
-				if (buflen != 0)
-					return (0);
-				A = m_xword((struct mbuf *)p, k, &merr);
-				if (merr != 0)
-					return (0);
-				continue;
-#else
-				return (0);
-#endif
-			}
-#ifdef USBPF_ALIGN
-			if (((intptr_t)(p + k) & 3) != 0)
-				A = USBPF_EXTRACT_LONG(&p[k]);
-			else
-#endif
-				A = ntohl(*(int32_t *)(p + k));
-			continue;
-
-		case USBPF_LD|USBPF_H|USBPF_IND:
-			k = X + pc->k;
-			if (X > buflen || pc->k > buflen - X ||
-			    sizeof(int16_t) > buflen - k) {
-#ifdef _KERNEL
-				int merr;
-
-				if (buflen != 0)
-					return (0);
-				A = m_xhalf((struct mbuf *)p, k, &merr);
-				if (merr != 0)
-					return (0);
-				continue;
-#else
-				return (0);
-#endif
-			}
-			A = USBPF_EXTRACT_SHORT(&p[k]);
-			continue;
-
-		case USBPF_LD|USBPF_B|USBPF_IND:
-			k = X + pc->k;
-			if (pc->k >= buflen || X >= buflen - pc->k) {
-#ifdef _KERNEL
-				struct mbuf *m;
-
-				if (buflen != 0)
-					return (0);
-				m = (struct mbuf *)p;
-				MINDEX(m, k);
-				A = mtod(m, u_char *)[k];
-				continue;
-#else
-				return (0);
-#endif
-			}
-			A = p[k];
-			continue;
-
-		case USBPF_LDX|USBPF_MSH|USBPF_B:
-			k = pc->k;
-			if (k >= buflen) {
-#ifdef _KERNEL
-				register struct mbuf *m;
-
-				if (buflen != 0)
-					return (0);
-				m = (struct mbuf *)p;
-				MINDEX(m, k);
-				X = (mtod(m, u_char *)[k] & 0xf) << 2;
-				continue;
-#else
-				return (0);
-#endif
-			}
-			X = (p[pc->k] & 0xf) << 2;
-			continue;
-
-		case USBPF_LD|USBPF_IMM:
-			A = pc->k;
-			continue;
-
-		case USBPF_LDX|USBPF_IMM:
-			X = pc->k;
-			continue;
-
-		case USBPF_LD|USBPF_MEM:
-			A = mem[pc->k];
-			continue;
-
-		case USBPF_LDX|USBPF_MEM:
-			X = mem[pc->k];
-			continue;
-
-		case USBPF_ST:
-			mem[pc->k] = A;
-			continue;
-
-		case USBPF_STX:
-			mem[pc->k] = X;
-			continue;
-
-		case USBPF_JMP|USBPF_JA:
-			pc += pc->k;
-			continue;
-
-		case USBPF_JMP|USBPF_JGT|USBPF_K:
-			pc += (A > pc->k) ? pc->jt : pc->jf;
-			continue;
-
-		case USBPF_JMP|USBPF_JGE|USBPF_K:
-			pc += (A >= pc->k) ? pc->jt : pc->jf;
-			continue;
-
-		case USBPF_JMP|USBPF_JEQ|USBPF_K:
-			pc += (A == pc->k) ? pc->jt : pc->jf;
-			continue;
-
-		case USBPF_JMP|USBPF_JSET|USBPF_K:
-			pc += (A & pc->k) ? pc->jt : pc->jf;
-			continue;
-
-		case USBPF_JMP|USBPF_JGT|USBPF_X:
-			pc += (A > X) ? pc->jt : pc->jf;
-			continue;
-
-		case USBPF_JMP|USBPF_JGE|USBPF_X:
-			pc += (A >= X) ? pc->jt : pc->jf;
-			continue;
-
-		case USBPF_JMP|USBPF_JEQ|USBPF_X:
-			pc += (A == X) ? pc->jt : pc->jf;
-			continue;
-
-		case USBPF_JMP|USBPF_JSET|USBPF_X:
-			pc += (A & X) ? pc->jt : pc->jf;
-			continue;
-
-		case USBPF_ALU|USBPF_ADD|USBPF_X:
-			A += X;
-			continue;
-
-		case USBPF_ALU|USBPF_SUB|USBPF_X:
-			A -= X;
-			continue;
-
-		case USBPF_ALU|USBPF_MUL|USBPF_X:
-			A *= X;
-			continue;
-
-		case USBPF_ALU|USBPF_DIV|USBPF_X:
-			if (X == 0)
-				return (0);
-			A /= X;
-			continue;
-
-		case USBPF_ALU|USBPF_AND|USBPF_X:
-			A &= X;
-			continue;
-
-		case USBPF_ALU|USBPF_OR|USBPF_X:
-			A |= X;
-			continue;
-
-		case USBPF_ALU|USBPF_LSH|USBPF_X:
-			A <<= X;
-			continue;
-
-		case USBPF_ALU|USBPF_RSH|USBPF_X:
-			A >>= X;
-			continue;
-
-		case USBPF_ALU|USBPF_ADD|USBPF_K:
-			A += pc->k;
-			continue;
-
-		case USBPF_ALU|USBPF_SUB|USBPF_K:
-			A -= pc->k;
-			continue;
-
-		case USBPF_ALU|USBPF_MUL|USBPF_K:
-			A *= pc->k;
-			continue;
-
-		case USBPF_ALU|USBPF_DIV|USBPF_K:
-			A /= pc->k;
-			continue;
-
-		case USBPF_ALU|USBPF_AND|USBPF_K:
-			A &= pc->k;
-			continue;
-
-		case USBPF_ALU|USBPF_OR|USBPF_K:
-			A |= pc->k;
-			continue;
-
-		case USBPF_ALU|USBPF_LSH|USBPF_K:
-			A <<= pc->k;
-			continue;
-
-		case USBPF_ALU|USBPF_RSH|USBPF_K:
-			A >>= pc->k;
-			continue;
-
-		case USBPF_ALU|USBPF_NEG:
-			A = -A;
-			continue;
-
-		case USBPF_MISC|USBPF_TAX:
-			X = A;
-			continue;
-
-		case USBPF_MISC|USBPF_TXA:
-			A = X;
-			continue;
-		}
-	}
-}
-
-static void
-usbpf_free(struct usbpf_d *ud)
-{
-
-	switch (ud->ud_bufmode) {
-	case USBPF_BUFMODE_BUFFER:
-		return (usbpf_buffer_free(ud));
-	default:
-		panic("usbpf_buf_free");
-	}
-}
-
-/*
- * Notify the buffer model that a buffer has moved into the hold position.
- */
-static void
-usbpf_bufheld(struct usbpf_d *ud)
-{
-
-	USBPFD_LOCK_ASSERT(ud);
-}
-
-/*
- * Free buffers currently in use by a descriptor.
- * Called on close.
- */
-static void
-usbpf_freed(struct usbpf_d *ud)
-{
-
-	/*
-	 * We don't need to lock out interrupts since this descriptor has
-	 * been detached from its interface and it yet hasn't been marked
-	 * free.
-	 */
-	usbpf_free(ud);
-	if (ud->ud_rfilter != NULL)
-		free((caddr_t)ud->ud_rfilter, M_USBPF);
-	if (ud->ud_wfilter != NULL)
-		free((caddr_t)ud->ud_wfilter, M_USBPF);
-	mtx_destroy(&ud->ud_mtx);
-}
-
-/*
- * Close the descriptor by detaching it from its interface,
- * deallocating its buffers, and marking it free.
- */
-static void
-usbpf_dtor(void *data)
-{
-	struct usbpf_d *ud = data;
-
-	USBPFD_LOCK(ud);
-	if (ud->ud_state == USBPF_WAITING)
-		callout_stop(&ud->ud_callout);
-	ud->ud_state = USBPF_IDLE;
-	USBPFD_UNLOCK(ud);
-	funsetown(&ud->ud_sigio);
-	mtx_lock(&usbpf_mtx);
-	if (ud->ud_bif)
-		usbpf_detachd(ud);
-	mtx_unlock(&usbpf_mtx);
-	selwakeuppri(&ud->ud_sel, PRIUSB);
-	knlist_destroy(&ud->ud_sel.si_note);
-	callout_drain(&ud->ud_callout);
-	usbpf_freed(ud);
-	free(ud, M_USBPF);
-}
-
-/*
- * Open device.  Returns ENXIO for illegal minor device number,
- * EBUSY if file is open by another process.
- */
-/* ARGSUSED */
-static	int
-usbpf_open(struct cdev *dev, int flags, int fmt, struct thread *td)
-{
-	struct usbpf_d *ud;
-	int error;
-
-	ud = malloc(sizeof(*ud), M_USBPF, M_WAITOK | M_ZERO);
-	error = devfs_set_cdevpriv(ud, usbpf_dtor);
-	if (error != 0) {
-		free(ud, M_USBPF);
-		return (error);
-	}
-
-	usbpf_buffer_init(ud);
-	ud->ud_bufmode = USBPF_BUFMODE_BUFFER;
-	ud->ud_sig = SIGIO;
-	ud->ud_direction = USBPF_D_INOUT;
-	ud->ud_pid = td->td_proc->p_pid;
-	mtx_init(&ud->ud_mtx, devtoname(dev), "usbpf cdev lock", MTX_DEF);
-	callout_init_mtx(&ud->ud_callout, &ud->ud_mtx, 0);
-	knlist_init_mtx(&ud->ud_sel.si_note, &ud->ud_mtx);
-
-	return (0);
-}
-
-static int
-usbpf_uiomove(struct usbpf_d *ud, caddr_t buf, u_int len, struct uio *uio)
-{
-
-	if (ud->ud_bufmode != USBPF_BUFMODE_BUFFER)
-		return (EOPNOTSUPP);
-	return (usbpf_buffer_uiomove(ud, buf, len, uio));
-}
-
-/*
- *  usbpf_read - read next chunk of packets from buffers
- */
-static	int
-usbpf_read(struct cdev *dev, struct uio *uio, int ioflag)
-{
-	struct usbpf_d *ud;
-	int error;
-	int non_block;
-	int timed_out;
-
-	error = devfs_get_cdevpriv((void **)&ud);
-	if (error != 0)
-		return (error);
-
-	/*
-	 * Restrict application to use a buffer the same size as
-	 * as kernel buffers.
-	 */
-	if (uio->uio_resid != ud->ud_bufsize)
-		return (EINVAL);
-
-	non_block = ((ioflag & O_NONBLOCK) != 0);
-
-	USBPFD_LOCK(ud);
-	ud->ud_pid = curthread->td_proc->p_pid;
-	if (ud->ud_bufmode != USBPF_BUFMODE_BUFFER) {
-		USBPFD_UNLOCK(ud);
-		return (EOPNOTSUPP);
-	}
-	if (ud->ud_state == USBPF_WAITING)
-		callout_stop(&ud->ud_callout);
-	timed_out = (ud->ud_state == USBPF_TIMED_OUT);
-	ud->ud_state = USBPF_IDLE;
-	/*
-	 * If the hold buffer is empty, then do a timed sleep, which
-	 * ends when the timeout expires or when enough packets
-	 * have arrived to fill the store buffer.
-	 */
-	while (ud->ud_hbuf == NULL) {
-		if (ud->ud_slen != 0) {
-			/*
-			 * A packet(s) either arrived since the previous
-			 * read or arrived while we were asleep.
-			 */
-			if (ud->ud_immediate || non_block || timed_out) {
-				/*
-				 * Rotate the buffers and return what's here
-				 * if we are in immediate mode, non-blocking
-				 * flag is set, or this descriptor timed out.
-				 */
-				USBPF_ROTATE_BUFFERS(ud);
-				break;
-			}
-		}
-
-		/*
-		 * No data is available, check to see if the usbpf device
-		 * is still pointed at a real interface.  If not, return
-		 * ENXIO so that the userland process knows to rebind
-		 * it before using it again.
-		 */
-		if (ud->ud_bif == NULL) {
-			USBPFD_UNLOCK(ud);
-			return (ENXIO);
-		}
-
-		if (non_block) {
-			USBPFD_UNLOCK(ud);
-			return (EWOULDBLOCK);
-		}
-		error = msleep(ud, &ud->ud_mtx, PRIUSB|PCATCH,
-		    "uff", ud->ud_rtout);
-		if (error == EINTR || error == ERESTART) {
-			USBPFD_UNLOCK(ud);
-			return (error);
-		}
-		if (error == EWOULDBLOCK) {
-			/*
-			 * On a timeout, return what's in the buffer,
-			 * which may be nothing.  If there is something
-			 * in the store buffer, we can rotate the buffers.
-			 */
-			if (ud->ud_hbuf)
-				/*
-				 * We filled up the buffer in between
-				 * getting the timeout and arriving
-				 * here, so we don't need to rotate.
-				 */
-				break;
-
-			if (ud->ud_slen == 0) {
-				USBPFD_UNLOCK(ud);
-				return (0);
-			}
-			USBPF_ROTATE_BUFFERS(ud);
-			break;
-		}
-	}
-	/*
-	 * At this point, we know we have something in the hold slot.
-	 */
-	USBPFD_UNLOCK(ud);
-
-	/*
-	 * Move data from hold buffer into user space.
-	 * We know the entire buffer is transferred since
-	 * we checked above that the read buffer is usbpf_bufsize bytes.
-	 *
-	 * XXXRW: More synchronization needed here: what if a second thread
-	 * issues a read on the same fd at the same time?  Don't want this
-	 * getting invalidated.
-	 */
-	error = usbpf_uiomove(ud, ud->ud_hbuf, ud->ud_hlen, uio);
-
-	USBPFD_LOCK(ud);
-	ud->ud_fbuf = ud->ud_hbuf;
-	ud->ud_hbuf = NULL;
-	ud->ud_hlen = 0;
-	usbpf_buf_reclaimed(ud);
-	USBPFD_UNLOCK(ud);
-
-	return (error);
-}
-
-static int
-usbpf_write(struct cdev *dev, struct uio *uio, int ioflag)
-{
-
-	/* NOT IMPLEMENTED */
-	return (ENOSYS);
-}
-
-static int
-usbpf_ioctl_sblen(struct usbpf_d *ud, u_int *i)
-{
-
-	if (ud->ud_bufmode != USBPF_BUFMODE_BUFFER)
-		return (EOPNOTSUPP);
-	return (usbpf_buffer_ioctl_sblen(ud, i));
-}
-
-/*
- * Reset a descriptor by flushing its packet buffer and clearing the receive
- * and drop counts.  This is doable for kernel-only buffers, but with
- * zero-copy buffers, we can't write to (or rotate) buffers that are
- * currently owned by userspace.  It would be nice if we could encapsulate
- * this logic in the buffer code rather than here.
- */
-static void
-usbpf_reset_d(struct usbpf_d *ud)
-{
-
-	USBPFD_LOCK_ASSERT(ud);
-
-	if ((ud->ud_hbuf != NULL) &&
-	    (ud->ud_bufmode != USBPF_BUFMODE_ZBUF || usbpf_canfreebuf(ud))) {
-		/* Free the hold buffer. */
-		ud->ud_fbuf = ud->ud_hbuf;
-		ud->ud_hbuf = NULL;
-		ud->ud_hlen = 0;
-		usbpf_buf_reclaimed(ud);
-	}
-	if (usbpf_canwritebuf(ud))
-		ud->ud_slen = 0;
-	ud->ud_rcount = 0;
-	ud->ud_dcount = 0;
-	ud->ud_fcount = 0;
-	ud->ud_wcount = 0;
-	ud->ud_wfcount = 0;
-	ud->ud_wdcount = 0;
-	ud->ud_zcopy = 0;
-}
-
-static int
-usbpf_setif(struct usbpf_d *ud, struct usbpf_ifreq *ufr)
-{
-	struct usbpf_if *uif;
-	struct usb_bus *theywant;
-
-	theywant = usb_bus_find(ufr->ufr_name);
-	if (theywant == NULL || theywant->uif == NULL)
-		return (ENXIO);
-
-	uif = theywant->uif;
-
-	switch (ud->ud_bufmode) {
-	case USBPF_BUFMODE_BUFFER:
-		if (ud->ud_sbuf == NULL)
-			usbpf_buffer_alloc(ud);
-		KASSERT(ud->ud_sbuf != NULL, ("%s: ud_sbuf == NULL", __func__));
-		break;
-
-	default:
-		panic("usbpf_setif: bufmode %d", ud->ud_bufmode);
-	}
-	if (uif != ud->ud_bif) {
-		if (ud->ud_bif)
-			/*
-			 * Detach if attached to something else.
-			 */
-			usbpf_detachd(ud);
-
-		usbpf_attachd(ud, uif);
-	}
-	USBPFD_LOCK(ud);
-	usbpf_reset_d(ud);
-	USBPFD_UNLOCK(ud);
-	return (0);
-}
-
-/*
- * Set d's packet filter program to fp.  If this file already has a filter,
- * free it and replace it.  Returns EINVAL for bogus requests.
- */
-static int
-usbpf_setf(struct usbpf_d *ud, struct usbpf_program *fp, u_long cmd)
-{
-	struct usbpf_insn *fcode, *old;
-	u_int wfilter, flen, size;
-
-	if (cmd == UIOCSETWF) {
-		old = ud->ud_wfilter;
-		wfilter = 1;
-	} else {
-		wfilter = 0;
-		old = ud->ud_rfilter;
-	}
-	if (fp->uf_insns == NULL) {
-		if (fp->uf_len != 0)
-			return (EINVAL);
-		USBPFD_LOCK(ud);
-		if (wfilter)
-			ud->ud_wfilter = NULL;
-		else {
-			ud->ud_rfilter = NULL;
-			if (cmd == UIOCSETF)
-				usbpf_reset_d(ud);
-		}
-		USBPFD_UNLOCK(ud);
-		if (old != NULL)
-			free((caddr_t)old, M_USBPF);
-		return (0);
-	}
-	flen = fp->uf_len;
-	if (flen > usbpf_maxinsns)
-		return (EINVAL);
-
-	size = flen * sizeof(*fp->uf_insns);
-	fcode = (struct usbpf_insn *)malloc(size, M_USBPF, M_WAITOK);
-	if (copyin((caddr_t)fp->uf_insns, (caddr_t)fcode, size) == 0 &&
-	    usbpf_validate(fcode, (int)flen)) {
-		USBPFD_LOCK(ud);
-		if (wfilter)
-			ud->ud_wfilter = fcode;
-		else {
-			ud->ud_rfilter = fcode;
-			if (cmd == UIOCSETF)
-				usbpf_reset_d(ud);
-		}
-		USBPFD_UNLOCK(ud);
-		if (old != NULL)
-			free((caddr_t)old, M_USBPF);
-
-		return (0);
-	}
-	free((caddr_t)fcode, M_USBPF);
-	return (EINVAL);
-}
-
-static int
-usbpf_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
-    struct thread *td)
-{
-	struct usbpf_d *ud;
-	int error;
-
-	error = devfs_get_cdevpriv((void **)&ud);
-	if (error != 0)
-		return (error);
-
-	/*
-	 * Refresh PID associated with this descriptor.
-	 */
-	USBPFD_LOCK(ud);
-	ud->ud_pid = td->td_proc->p_pid;
-	if (ud->ud_state == USBPF_WAITING)
-		callout_stop(&ud->ud_callout);
-	ud->ud_state = USBPF_IDLE;
-	USBPFD_UNLOCK(ud);
-
-	if (ud->ud_locked == 1) {
-		switch (cmd) {
-		case UIOCGBLEN:
-		case UIOCSBLEN:
-		case UIOCVERSION:
-			break;
-		default:
-			return (EPERM);
-		}
-	}
-
-	switch (cmd) {
-
-	default:
-		error = EINVAL;
-		break;
-
-	/*
-	 * Get buffer len [for read()].
-	 */
-	case UIOCGBLEN:
-		*(u_int *)addr = ud->ud_bufsize;
-		break;
-
-	/*
-	 * Set buffer length.
-	 */
-	case UIOCSBLEN:
-		error = usbpf_ioctl_sblen(ud, (u_int *)addr);
-		break;
-
-	/*
-	 * Set read filter.
-	 */
-	case UIOCSETF:
-		error = usbpf_setf(ud, (struct usbpf_program *)addr, cmd);
-		break;
-
-	/*
-	 * Set read timeout.
-	 */
-	case UIOCSRTIMEOUT:
-		{
-			struct timeval *tv = (struct timeval *)addr;
-
-			/*
-			 * Subtract 1 tick from tvtohz() since this isn't
-			 * a one-shot timer.
-			 */
-			if ((error = itimerfix(tv)) == 0)
-				ud->ud_rtout = tvtohz(tv) - 1;
-			break;
-		}
-
-	/*
-	 * Get read timeout.
-	 */
-	case UIOCGRTIMEOUT:
-		{
-			struct timeval *tv = (struct timeval *)addr;
-
-			tv->tv_sec = ud->ud_rtout / hz;
-			tv->tv_usec = (ud->ud_rtout % hz) * tick;
-			break;
-		}
-
-	/*
-	 * Get packet stats.
-	 */
-	case UIOCGSTATS:
-		{
-			struct usbpf_stat *us = (struct usbpf_stat *)addr;
-
-			/* XXXCSJP overflow */
-			us->us_recv = ud->ud_rcount;
-			us->us_drop = ud->ud_dcount;
-			break;
-		}
-
-	case UIOCVERSION:
-		{
-			struct usbpf_version *uv = (struct usbpf_version *)addr;
-
-			uv->uv_major = USBPF_MAJOR_VERSION;
-			uv->uv_minor = USBPF_MINOR_VERSION;
-			break;
-		}
-
-	/*
-	 * Set interface.
-	 */
-	case UIOCSETIF:
-		error = usbpf_setif(ud, (struct usbpf_ifreq *)addr);
-		break;
-
-	}
-	return (error);
-}
-
-/*
- * Support for select() and poll() system calls
- *
- * Return true iff the specific operation will not block indefinitely.
- * Otherwise, return false but make a note that a selwakeup() must be done.
- */
-static int
-usbpf_poll(struct cdev *dev, int events, struct thread *td)
-{
-
-	/* NOT IMPLEMENTED */
-	return (ENOSYS);
-}
-
-/*
- * Support for kevent() system call.  Register EVFILT_READ filters and
- * reject all others.
- */
-int
-usbpf_kqfilter(struct cdev *dev, struct knote *kn)
-{
-
-	/* NOT IMPLEMENTED */
-	return (ENOSYS);
-}
-
-/*
- * Attach file to the usbpf interface, i.e. make d listen on bp.
- */
-static void
-usbpf_attachd(struct usbpf_d *ud, struct usbpf_if *uif)
-{
-
-	USBPFIF_LOCK(uif);
-	ud->ud_bif = uif;
-	LIST_INSERT_HEAD(&uif->uif_dlist, ud, ud_next);
-
-	usbpf_uifd_cnt++;
-	USBPFIF_UNLOCK(uif);
-}
-
-/*
- * Detach a file from its interface.
- */
-static void
-usbpf_detachd(struct usbpf_d *ud)
-{
-	struct usbpf_if *uif;
-	struct usb_bus *ubus;
-
-	uif = ud->ud_bif;
-	USBPFIF_LOCK(uif);
-	USBPFD_LOCK(ud);
-	ubus = ud->ud_bif->uif_ubus;
-
-	/*
-	 * Remove d from the interface's descriptor list.
-	 */
-	LIST_REMOVE(ud, ud_next);
-
-	usbpf_uifd_cnt--;
-	ud->ud_bif = NULL;
-	USBPFD_UNLOCK(ud);
-	USBPFIF_UNLOCK(uif);
-}
-
-void
-usbpf_attach(struct usb_bus *ubus, struct usbpf_if **driverp)
-{
-	struct usbpf_if *uif;
-
-	uif = malloc(sizeof(*uif), M_USBPF, M_WAITOK | M_ZERO);
-	LIST_INIT(&uif->uif_dlist);
-	uif->uif_ubus = ubus;
-	mtx_init(&uif->uif_mtx, "usbpf interface lock", NULL, MTX_DEF);
-	KASSERT(*driverp == NULL,
-	    ("usbpf_attach: driverp already initialized"));
-	*driverp = uif;
-
-	mtx_lock(&usbpf_mtx);
-	LIST_INSERT_HEAD(&usbpf_iflist, uif, uif_next);
-	mtx_unlock(&usbpf_mtx);
-
 	if (bootverbose)
 		device_printf(ubus->parent, "usbpf attached\n");
 }
 
-/*
- * If there are processes sleeping on this descriptor, wake them up.
- */
-static __inline void
-usbpf_wakeup(struct usbpf_d *ud)
-{
-
-	USBPFD_LOCK_ASSERT(ud);
-	if (ud->ud_state == USBPF_WAITING) {
-		callout_stop(&ud->ud_callout);
-		ud->ud_state = USBPF_IDLE;
-	}
-	wakeup(ud);
-	if (ud->ud_async && ud->ud_sig && ud->ud_sigio)
-		pgsigio(&ud->ud_sigio, ud->ud_sig, 0);
-
-	selwakeuppri(&ud->ud_sel, PRIUSB);
-	KNOTE_LOCKED(&ud->ud_sel.si_note, 0);
-}
-
 void
 usbpf_detach(struct usb_bus *ubus)
 {
-	struct usbpf_if	*uif;
-	struct usbpf_d	*ud;
+	struct ifnet *ifp = ubus->ifp;
 
-	/* Locate USBPF interface information */
-	mtx_lock(&usbpf_mtx);
-	LIST_FOREACH(uif, &usbpf_iflist, uif_next) {
-		if (ubus == uif->uif_ubus)
-			break;
+	if (ifp != NULL) {
+		bpfdetach(ifp);
+		if_detach(ifp);
+		if_free(ifp);
 	}
-
-	/* Interface wasn't attached */
-	if ((uif == NULL) || (uif->uif_ubus == NULL)) {
-		mtx_unlock(&usbpf_mtx);
-		printf("usbpf_detach: not attached\n");	/* XXX */
-		return;
-	}
-
-	LIST_REMOVE(uif, uif_next);
-	mtx_unlock(&usbpf_mtx);
-
-	while ((ud = LIST_FIRST(&uif->uif_dlist)) != NULL) {
-		usbpf_detachd(ud);
-		USBPFD_LOCK(ud);
-		usbpf_wakeup(ud);
-		USBPFD_UNLOCK(ud);
-	}
-
-	mtx_destroy(&uif->uif_mtx);
-	free(uif, M_USBPF);
+	ubus->ifp = NULL;
 }
 
-/* Time stamping functions */
-#define	USBPF_T_MICROTIME	0x0000
-#define	USBPF_T_NANOTIME	0x0001
-#define	USBPF_T_BINTIME		0x0002
-#define	USBPF_T_NONE		0x0003
-#define	USBPF_T_FORMAT_MASK	0x0003
-#define	USBPF_T_NORMAL		0x0000
-#define	USBPF_T_FAST		0x0100
-#define	USBPF_T_MONOTONIC	0x0200
-#define	USBPF_T_FORMAT(t)	((t) & USBPF_T_FORMAT_MASK)
-
-#define	USBPF_TSTAMP_NONE	0
-#define	USBPF_TSTAMP_FAST	1
-#define	USBPF_TSTAMP_NORMAL	2
-
-static int
-usbpf_ts_quality(int tstype)
-{
-
-	if (tstype == USBPF_T_NONE)
-		return (USBPF_TSTAMP_NONE);
-	if ((tstype & USBPF_T_FAST) != 0)
-		return (USBPF_TSTAMP_FAST);
-
-	return (USBPF_TSTAMP_NORMAL);
-}
-
-static int
-usbpf_gettime(struct bintime *bt, int tstype)
-{
-	int quality;
-
-	quality = usbpf_ts_quality(tstype);
-	if (quality == USBPF_TSTAMP_NONE)
-		return (quality);
-	if (quality == USBPF_TSTAMP_NORMAL)
-		binuptime(bt);
-	else
-		getbinuptime(bt);
-
-	return (quality);
-}
-
-/*
- * If the buffer mechanism has a way to decide that a held buffer can be made
- * free, then it is exposed via the usbpf_canfreebuf() interface.  (1) is
- * returned if the buffer can be discarded, (0) is returned if it cannot.
- */
-static int
-usbpf_canfreebuf(struct usbpf_d *ud)
-{
-
-	USBPFD_LOCK_ASSERT(ud);
-
-	return (0);
-}
-
-/*
- * Allow the buffer model to indicate that the current store buffer is
- * immutable, regardless of the appearance of space.  Return (1) if the
- * buffer is writable, and (0) if not.
- */
-static int
-usbpf_canwritebuf(struct usbpf_d *ud)
-{
-
-	USBPFD_LOCK_ASSERT(ud);
-	return (1);
-}
-
-/*
- * Notify buffer model that an attempt to write to the store buffer has
- * resulted in a dropped packet, in which case the buffer may be considered
- * full.
- */
-static void
-usbpf_buffull(struct usbpf_d *ud)
-{
-
-	USBPFD_LOCK_ASSERT(ud);
-}
-
-/*
- * This function gets called when the free buffer is re-assigned.
- */
-static void
-usbpf_buf_reclaimed(struct usbpf_d *ud)
-{
-
-	USBPFD_LOCK_ASSERT(ud);
-
-	switch (ud->ud_bufmode) {
-	case USBPF_BUFMODE_BUFFER:
-		return;
-
-	default:
-		panic("usbpf_buf_reclaimed");
-	}
-}
-
-#define	SIZEOF_USBPF_HDR(type)	\
-    (offsetof(type, uh_hdrlen) + sizeof(((type *)0)->uh_hdrlen))
-
-static int
-usbpf_hdrlen(struct usbpf_d *ud)
-{
-	int hdrlen;
-
-	hdrlen = ud->ud_bif->uif_hdrlen;
-	hdrlen += SIZEOF_USBPF_HDR(struct usbpf_xhdr);
-	hdrlen = USBPF_WORDALIGN(hdrlen);
-
-	return (hdrlen - ud->ud_bif->uif_hdrlen);
-}
-
-static void
-usbpf_bintime2ts(struct bintime *bt, struct usbpf_ts *ts, int tstype)
-{
-	struct bintime bt2;
-	struct timeval tsm;
-	struct timespec tsn;
-
-	if ((tstype & USBPF_T_MONOTONIC) == 0) {
-		bt2 = *bt;
-		bintime_add(&bt2, &boottimebin);
-		bt = &bt2;
-	}
-	switch (USBPF_T_FORMAT(tstype)) {
-	case USBPF_T_MICROTIME:
-		bintime2timeval(bt, &tsm);
-		ts->ut_sec = tsm.tv_sec;
-		ts->ut_frac = tsm.tv_usec;
-		break;
-	case USBPF_T_NANOTIME:
-		bintime2timespec(bt, &tsn);
-		ts->ut_sec = tsn.tv_sec;
-		ts->ut_frac = tsn.tv_nsec;
-		break;
-	case USBPF_T_BINTIME:
-		ts->ut_sec = bt->sec;
-		ts->ut_frac = bt->frac;
-		break;
-	}
-}
-
-/*
- * Move the packet data from interface memory (pkt) into the
- * store buffer.  "cpfn" is the routine called to do the actual data
- * transfer.  bcopy is passed in to copy contiguous chunks, while
- * usbpf_append_mbuf is passed in to copy mbuf chains.  In the latter case,
- * pkt is really an mbuf.
- */
-static void
-catchpacket(struct usbpf_d *ud, u_char *pkt, u_int pktlen, u_int snaplen,
-    void (*cpfn)(struct usbpf_d *, caddr_t, u_int, void *, u_int),
-    struct bintime *bt)
-{
-	struct usbpf_xhdr hdr;
-	int caplen, curlen, hdrlen, totlen;
-	int do_wakeup = 0;
-	int do_timestamp;
-	int tstype;
-
-	USBPFD_LOCK_ASSERT(ud);
-
-	/*
-	 * Detect whether user space has released a buffer back to us, and if
-	 * so, move it from being a hold buffer to a free buffer.  This may
-	 * not be the best place to do it (for example, we might only want to
-	 * run this check if we need the space), but for now it's a reliable
-	 * spot to do it.
-	 */
-	if (ud->ud_fbuf == NULL && usbpf_canfreebuf(ud)) {
-		ud->ud_fbuf = ud->ud_hbuf;
-		ud->ud_hbuf = NULL;
-		ud->ud_hlen = 0;
-		usbpf_buf_reclaimed(ud);
-	}
-
-	/*
-	 * Figure out how many bytes to move.  If the packet is
-	 * greater or equal to the snapshot length, transfer that
-	 * much.  Otherwise, transfer the whole packet (unless
-	 * we hit the buffer size limit).
-	 */
-	hdrlen = usbpf_hdrlen(ud);
-	totlen = hdrlen + min(snaplen, pktlen);
-	if (totlen > ud->ud_bufsize)
-		totlen = ud->ud_bufsize;
-
-	/*
-	 * Round up the end of the previous packet to the next longword.
-	 *
-	 * Drop the packet if there's no room and no hope of room
-	 * If the packet would overflow the storage buffer or the storage
-	 * buffer is considered immutable by the buffer model, try to rotate
-	 * the buffer and wakeup pending processes.
-	 */
-	curlen = USBPF_WORDALIGN(ud->ud_slen);
-	if (curlen + totlen > ud->ud_bufsize || !usbpf_canwritebuf(ud)) {
-		if (ud->ud_fbuf == NULL) {
-			/*
-			 * There's no room in the store buffer, and no
-			 * prospect of room, so drop the packet.  Notify the
-			 * buffer model.
-			 */
-			usbpf_buffull(ud);
-			++ud->ud_dcount;
-			return;
-		}
-		USBPF_ROTATE_BUFFERS(ud);
-		do_wakeup = 1;
-		curlen = 0;
-	} else if (ud->ud_immediate || ud->ud_state == USBPF_TIMED_OUT)
-		/*
-		 * Immediate mode is set, or the read timeout has already
-		 * expired during a select call.  A packet arrived, so the
-		 * reader should be woken up.
-		 */
-		do_wakeup = 1;
-	caplen = totlen - hdrlen;
-	tstype = ud->ud_tstamp;
-	do_timestamp = tstype != USBPF_T_NONE;
-
-	/*
-	 * Append the usbpf header.  Note we append the actual header size, but
-	 * move forward the length of the header plus padding.
-	 */
-	bzero(&hdr, sizeof(hdr));
-	if (do_timestamp)
-		usbpf_bintime2ts(bt, &hdr.uh_tstamp, tstype);
-	hdr.uh_datalen = pktlen;
-	hdr.uh_hdrlen = hdrlen;
-	hdr.uh_caplen = caplen;
-	usbpf_append_bytes(ud, ud->ud_sbuf, curlen, &hdr, sizeof(hdr));
-
-	/*
-	 * Copy the packet data into the store buffer and update its length.
-	 */
-	(*cpfn)(ud, ud->ud_sbuf, curlen + hdrlen, pkt, caplen);
-	ud->ud_slen = curlen + totlen;
-
-	if (do_wakeup)
-		usbpf_wakeup(ud);
-}
-
-/*
- * Incoming linkage from device drivers.  Process the packet pkt, of length
- * pktlen, which is stored in a contiguous buffer.  The packet is parsed
- * by each process' filter, and if accepted, stashed into the corresponding
- * buffer.
- */
-static void
-usbpf_tap(struct usbpf_if *uif, u_char *pkt, u_int pktlen)
-{
-	struct bintime bt;
-	struct usbpf_d *ud;
-	u_int slen;
-	int gottime;
-
-	gottime = USBPF_TSTAMP_NONE;
-	USBPFIF_LOCK(uif);
-	LIST_FOREACH(ud, &uif->uif_dlist, ud_next) {
-		USBPFD_LOCK(ud);
-		++ud->ud_rcount;
-		slen = usbpf_filter(ud->ud_rfilter, pkt, pktlen, pktlen);
-		if (slen != 0) {
-			ud->ud_fcount++;
-			if (gottime < usbpf_ts_quality(ud->ud_tstamp))
-				gottime = usbpf_gettime(&bt, ud->ud_tstamp);
-			catchpacket(ud, pkt, pktlen, slen,
-			    usbpf_append_bytes, &bt);
-		}
-		USBPFD_UNLOCK(ud);
-	}
-	USBPFIF_UNLOCK(uif);
-}
-
 static uint32_t
 usbpf_aggregate_xferflags(struct usb_xfer_flags *flags)
 {
 	uint32_t val = 0;
 
 	if (flags->force_short_xfer == 1)
 		val |= USBPF_FLAG_FORCE_SHORT_XFER;
 	if (flags->short_xfer_ok == 1)
 		val |= USBPF_FLAG_SHORT_XFER_OK;
 	if (flags->short_frames_ok == 1)
 		val |= USBPF_FLAG_SHORT_FRAMES_OK;
 	if (flags->pipe_bof == 1)
 		val |= USBPF_FLAG_PIPE_BOF;
 	if (flags->proxy_buffer == 1)
 		val |= USBPF_FLAG_PROXY_BUFFER;
 	if (flags->ext_buffer == 1)
 		val |= USBPF_FLAG_EXT_BUFFER;
 	if (flags->manual_status == 1)
 		val |= USBPF_FLAG_MANUAL_STATUS;
 	if (flags->no_pipe_ok == 1)
 		val |= USBPF_FLAG_NO_PIPE_OK;
 	if (flags->stall_pipe == 1)
 		val |= USBPF_FLAG_STALL_PIPE;
 	return (val);
 }
 
 static uint32_t
 usbpf_aggregate_status(struct usb_xfer_flags_int *flags)
 {
 	uint32_t val = 0;
 
 	if (flags->open == 1)
 		val |= USBPF_STATUS_OPEN;
 	if (flags->transferring == 1)
 		val |= USBPF_STATUS_TRANSFERRING;
 	if (flags->did_dma_delay == 1)
 		val |= USBPF_STATUS_DID_DMA_DELAY;
 	if (flags->did_close == 1)
 		val |= USBPF_STATUS_DID_CLOSE;
 	if (flags->draining == 1)
 		val |= USBPF_STATUS_DRAINING;
 	if (flags->started == 1)
 		val |= USBPF_STATUS_STARTED;
 	if (flags->bandwidth_reclaimed == 1)
 		val |= USBPF_STATUS_BW_RECLAIMED;
 	if (flags->control_xfr == 1)
 		val |= USBPF_STATUS_CONTROL_XFR;
 	if (flags->control_hdr == 1)
 		val |= USBPF_STATUS_CONTROL_HDR;
 	if (flags->control_act == 1)
 		val |= USBPF_STATUS_CONTROL_ACT;
 	if (flags->control_stall == 1)
 		val |= USBPF_STATUS_CONTROL_STALL;
 	if (flags->short_frames_ok == 1)
 		val |= USBPF_STATUS_SHORT_FRAMES_OK;
 	if (flags->short_xfer_ok == 1)
 		val |= USBPF_STATUS_SHORT_XFER_OK;
 #if USB_HAVE_BUSDMA
 	if (flags->bdma_enable == 1)
 		val |= USBPF_STATUS_BDMA_ENABLE;
 	if (flags->bdma_no_post_sync == 1)
 		val |= USBPF_STATUS_BDMA_NO_POST_SYNC;
 	if (flags->bdma_setup == 1)
 		val |= USBPF_STATUS_BDMA_SETUP;
 #endif
 	if (flags->isochronous_xfr == 1)
 		val |= USBPF_STATUS_ISOCHRONOUS_XFR;
 	if (flags->curr_dma_set == 1)
 		val |= USBPF_STATUS_CURR_DMA_SET;
 	if (flags->can_cancel_immed == 1)
 		val |= USBPF_STATUS_CAN_CANCEL_IMMED;
 	if (flags->doing_callback == 1)
 		val |= USBPF_STATUS_DOING_CALLBACK;
 
 	return (val);
 }
 
 void
 usbpf_xfertap(struct usb_xfer *xfer, int type)
 {
 	struct usb_endpoint *ep = xfer->endpoint;
 	struct usb_page_search res;
 	struct usb_xfer_root *info = xfer->xroot;
 	struct usb_bus *bus = info->bus;
 	struct usbpf_pkthdr *up;
 	usb_frlength_t isoc_offset = 0;
 	int i;
 	char *buf, *ptr, *end;
 
-	/*
-	 * NB: usbpf_uifd_cnt isn't protected by USBPFIF_LOCK() because it's
-	 * not harmful.
-	 */
-	if (usbpf_uifd_cnt == 0)
+	if (!bpf_peers_present(bus->ifp->if_bpf))
 		return;
 
 	/*
 	 * XXX TODO
 	 * Allocating the buffer here causes copy operations twice what's
 	 * really inefficient. Copying usbpf_pkthdr and data is for USB packet
 	 * read filter to pass a virtually linear buffer.
 	 */
 	buf = ptr = malloc(sizeof(struct usbpf_pkthdr) + (USB_PAGE_SIZE * 5),
-	    M_USBPF, M_NOWAIT);
+	    M_TEMP, M_NOWAIT);
 	if (buf == NULL) {
 		printf("usbpf_xfertap: out of memory\n");	/* XXX */
 		return;
 	}
 	end = buf + sizeof(struct usbpf_pkthdr) + (USB_PAGE_SIZE * 5);
 
 	bzero(ptr, sizeof(struct usbpf_pkthdr));
 	up = (struct usbpf_pkthdr *)ptr;
 	up->up_busunit = htole32(device_get_unit(bus->bdev));
 	up->up_type = type;
 	up->up_xfertype = ep->edesc->bmAttributes & UE_XFERTYPE;
 	up->up_address = xfer->address;
 	up->up_endpoint = xfer->endpointno;
 	up->up_flags = htole32(usbpf_aggregate_xferflags(&xfer->flags));
 	up->up_status = htole32(usbpf_aggregate_status(&xfer->flags_int));
 	switch (type) {
 	case USBPF_XFERTAP_SUBMIT:
 		up->up_length = htole32(xfer->sumlen);
 		up->up_frames = htole32(xfer->nframes);
 		break;
 	case USBPF_XFERTAP_DONE:
 		up->up_length = htole32(xfer->actlen);
 		up->up_frames = htole32(xfer->aframes);
 		break;
 	default:
 		panic("wrong usbpf type (%d)", type);
 	}
 
 	up->up_error = htole32(xfer->error);
 	up->up_interval = htole32(xfer->interval);
 	ptr += sizeof(struct usbpf_pkthdr);
 
 	for (i = 0; i < up->up_frames; i++) {
 		if (ptr + sizeof(u_int32_t) >= end)
 			goto done;
 		*((u_int32_t *)ptr) = htole32(xfer->frlengths[i]);
 		ptr += sizeof(u_int32_t);
 
 		if (ptr + xfer->frlengths[i] >= end)
 			goto done;
 		if (xfer->flags_int.isochronous_xfr == 1) {
 			usbd_get_page(&xfer->frbuffers[0], isoc_offset, &res);
 			isoc_offset += xfer->frlengths[i];
 		} else
 			usbd_get_page(&xfer->frbuffers[i], 0, &res);
 		bcopy(res.buffer, ptr, xfer->frlengths[i]);
 		ptr += xfer->frlengths[i];
 	}
 
-	usbpf_tap(bus->uif, buf, ptr - buf);
+	bpf_tap(bus->ifp->if_bpf, buf, ptr - buf);
 done:
-	free(buf, M_USBPF);
+	free(buf, M_TEMP);
 }
-
-static void
-usbpf_append_bytes(struct usbpf_d *ud, caddr_t buf, u_int offset, void *src,
-    u_int len)
-{
-
-	USBPFD_LOCK_ASSERT(ud);
-
-	switch (ud->ud_bufmode) {
-	case USBPF_BUFMODE_BUFFER:
-		return (usbpf_buffer_append_bytes(ud, buf, offset, src, len));
-	default:
-		panic("usbpf_buf_append_bytes");
-	}
-}
-
-static void
-usbpf_drvinit(void *unused)
-{
-	struct cdev *dev;
-
-	mtx_init(&usbpf_mtx, "USB packet filter global lock", NULL,
-	    MTX_DEF);
-	LIST_INIT(&usbpf_iflist);
-
-	dev = make_dev(&usbpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "usbpf");
-}
-
-SYSINIT(usbpf_dev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, usbpf_drvinit, NULL);
Index: projects/binutils-2.17/sys/dev/usb/usb_pf.h
===================================================================
--- projects/binutils-2.17/sys/dev/usb/usb_pf.h	(revision 215829)
+++ projects/binutils-2.17/sys/dev/usb/usb_pf.h	(revision 215830)
@@ -1,319 +1,98 @@
 /*-
  * Copyright (c) 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from the Stanford/CMU enet packet filter,
  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
  * Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _DEV_USB_PF_H
 #define	_DEV_USB_PF_H
 
-#ifdef _KERNEL
-#include <sys/callout.h>
-#include <sys/selinfo.h>
-#include <sys/queue.h>
-#include <sys/conf.h>
-#endif
-
-typedef	int32_t	  usbpf_int32;
-typedef	u_int32_t usbpf_u_int32;
-typedef	int64_t	  usbpf_int64;
-typedef	u_int64_t usbpf_u_int64;
-
-struct usbpf_if;
-
-/*
- * Alignment macros.  USBPF_WORDALIGN rounds up to the next
- * even multiple of USBPF_ALIGNMENT.
- */
-#define	USBPF_ALIGNMENT sizeof(long)
-#define	USBPF_WORDALIGN(x) (((x)+(USBPF_ALIGNMENT-1))&~(USBPF_ALIGNMENT-1))
-
-/*
- * The instruction encodings.
- */
-
-/* instruction classes */
-#define	USBPF_CLASS(code) ((code) & 0x07)
-#define		USBPF_LD	0x00
-#define		USBPF_LDX	0x01
-#define		USBPF_ST	0x02
-#define		USBPF_STX	0x03
-#define		USBPF_ALU	0x04
-#define		USBPF_JMP	0x05
-#define		USBPF_RET	0x06
-#define		USBPF_MISC	0x07
-
-/* ld/ldx fields */
-#define	USBPF_SIZE(code)	((code) & 0x18)
-#define		USBPF_W		0x00
-#define		USBPF_H		0x08
-#define		USBPF_B		0x10
-#define	USBPF_MODE(code)	((code) & 0xe0)
-#define		USBPF_IMM 	0x00
-#define		USBPF_ABS	0x20
-#define		USBPF_IND	0x40
-#define		USBPF_MEM	0x60
-#define		USBPF_LEN	0x80
-#define		USBPF_MSH	0xa0
-
-/* alu/jmp fields */
-#define	USBPF_OP(code)	((code) & 0xf0)
-#define		USBPF_ADD	0x00
-#define		USBPF_SUB	0x10
-#define		USBPF_MUL	0x20
-#define		USBPF_DIV	0x30
-#define		USBPF_OR	0x40
-#define		USBPF_AND	0x50
-#define		USBPF_LSH	0x60
-#define		USBPF_RSH	0x70
-#define		USBPF_NEG	0x80
-#define		USBPF_JA	0x00
-#define		USBPF_JEQ	0x10
-#define		USBPF_JGT	0x20
-#define		USBPF_JGE	0x30
-#define		USBPF_JSET	0x40
-#define	USBPF_SRC(code)	((code) & 0x08)
-#define		USBPF_K		0x00
-#define		USBPF_X		0x08
-
-/* ret - USBPF_K and USBPF_X also apply */
-#define	USBPF_RVAL(code)	((code) & 0x18)
-#define		USBPF_A		0x10
-
-/* misc */
-#define	USBPF_MISCOP(code) ((code) & 0xf8)
-#define		USBPF_TAX	0x00
-#define		USBPF_TXA	0x80
-
-/*
- * The instruction data structure.
- */
-struct usbpf_insn {
-	u_short		code;
-	u_char		jt;
-	u_char		jf;
-	usbpf_u_int32	k;
-};
-
-#ifdef _KERNEL
-
-/*
- * Descriptor associated with each open uff file.
- */
-
-struct usbpf_d {
-	LIST_ENTRY(usbpf_d) ud_next;	/* Linked list of descriptors */
-	/*
-	 * Buffer slots: two memory buffers store the incoming packets.
-	 *   The model has three slots.  Sbuf is always occupied.
-	 *   sbuf (store) - Receive interrupt puts packets here.
-	 *   hbuf (hold) - When sbuf is full, put buffer here and
-	 *                 wakeup read (replace sbuf with fbuf).
-	 *   fbuf (free) - When read is done, put buffer here.
-	 * On receiving, if sbuf is full and fbuf is 0, packet is dropped.
-	 */
-	caddr_t		ud_sbuf;	/* store slot */
-	caddr_t		ud_hbuf;	/* hold slot */
-	caddr_t		ud_fbuf;	/* free slot */
-	int		ud_slen;	/* current length of store buffer */
-	int		ud_hlen;	/* current length of hold buffer */
-
-	int		ud_bufsize;	/* absolute length of buffers */
-
-	struct usbpf_if *ud_bif;	/* interface descriptor */
-	u_long		ud_rtout;	/* Read timeout in 'ticks' */
-	struct usbpf_insn *ud_rfilter;	/* read filter code */
-	struct usbpf_insn *ud_wfilter;	/* write filter code */
-	void		*ud_bfilter;	/* binary filter code */
-	u_int64_t	ud_rcount;	/* number of packets received */
-	u_int64_t	ud_dcount;	/* number of packets dropped */
-
-	u_char		ud_promisc;	/* true if listening promiscuously */
-	u_char		ud_state;	/* idle, waiting, or timed out */
-	u_char		ud_immediate;	/* true to return on packet arrival */
-	int		ud_hdrcmplt;	/* false to fill in src lladdr automatically */
-	int		ud_direction;	/* select packet direction */
-	int		ud_tstamp;	/* select time stamping function */
-	int		ud_feedback;	/* true to feed back sent packets */
-	int		ud_async;	/* non-zero if packet reception should generate signal */
-	int		ud_sig;		/* signal to send upon packet reception */
-	struct sigio *	ud_sigio;	/* information for async I/O */
-	struct selinfo	ud_sel;		/* bsd select info */
-	struct mtx	ud_mtx;		/* mutex for this descriptor */
-	struct callout	ud_callout;	/* for USBPF timeouts with select */
-	struct label	*ud_label;	/* MAC label for descriptor */
-	u_int64_t	ud_fcount;	/* number of packets which matched filter */
-	pid_t		ud_pid;		/* PID which created descriptor */
-	int		ud_locked;	/* true if descriptor is locked */
-	u_int		ud_bufmode;	/* Current buffer mode. */
-	u_int64_t	ud_wcount;	/* number of packets written */
-	u_int64_t	ud_wfcount;	/* number of packets that matched write filter */
-	u_int64_t	ud_wdcount;	/* number of packets dropped during a write */
-	u_int64_t	ud_zcopy;	/* number of zero copy operations */
-	u_char		ud_compat32;	/* 32-bit stream on LP64 system */
-};
-
-#define	USBPFD_LOCK(ud)		mtx_lock(&(ud)->ud_mtx)
-#define	USBPFD_UNLOCK(ud)	mtx_unlock(&(ud)->ud_mtx)
-#define	USBPFD_LOCK_ASSERT(ud)	mtx_assert(&(ud)->ud_mtx, MA_OWNED)
-
-/*
- * Descriptor associated with each attached hardware interface.
- */
-struct usbpf_if {
-	LIST_ENTRY(usbpf_if) uif_next; /* list of all interfaces */
-	LIST_HEAD(, usbpf_d) uif_dlist;	/* descriptor list */
-	u_int uif_hdrlen;		/* length of link header */
-	struct usb_bus *uif_ubus;	/* corresponding interface */
-	struct mtx	uif_mtx;	/* mutex for interface */
-};
-
-#define	USBPFIF_LOCK(uif)	mtx_lock(&(uif)->uif_mtx)
-#define	USBPFIF_UNLOCK(uif)	mtx_unlock(&(uif)->uif_mtx)
-
-#endif
-
-/*
- * Structure prepended to each packet.
- */
-struct usbpf_ts {
-	usbpf_int64	ut_sec;		/* seconds */
-	usbpf_u_int64	ut_frac;	/* fraction */
-};
-struct usbpf_xhdr {
-	struct usbpf_ts	uh_tstamp;	/* time stamp */
-	usbpf_u_int32	uh_caplen;	/* length of captured portion */
-	usbpf_u_int32	uh_datalen;	/* original length of packet */
-	u_short		uh_hdrlen;	/* length of uff header (this struct
-					   plus alignment padding) */
-};
-
-#define	USBPF_BUFMODE_BUFFER	1	/* Kernel buffers with read(). */
-#define	USBPF_BUFMODE_ZBUF	2	/* Zero-copy buffers. */
-
 struct usbpf_pkthdr {
 	int		up_busunit;	/* Host controller unit number */
 	u_char		up_address;	/* USB device address */
 	u_char		up_endpoint;	/* USB endpoint */
 	u_char		up_type;	/* points SUBMIT / DONE */
 	u_char		up_xfertype;	/* Transfer type */
 	u_int32_t	up_flags;	/* Transfer flags */
 #define	USBPF_FLAG_FORCE_SHORT_XFER	(1 << 0)
 #define	USBPF_FLAG_SHORT_XFER_OK	(1 << 1)
 #define	USBPF_FLAG_SHORT_FRAMES_OK	(1 << 2)
 #define	USBPF_FLAG_PIPE_BOF		(1 << 3)
 #define	USBPF_FLAG_PROXY_BUFFER		(1 << 4)
 #define	USBPF_FLAG_EXT_BUFFER		(1 << 5)
 #define	USBPF_FLAG_MANUAL_STATUS	(1 << 6)
 #define	USBPF_FLAG_NO_PIPE_OK		(1 << 7)
 #define	USBPF_FLAG_STALL_PIPE		(1 << 8)
 	u_int32_t	up_status;	/* Transfer status */
 #define	USBPF_STATUS_OPEN		(1 << 0)
 #define	USBPF_STATUS_TRANSFERRING	(1 << 1)
 #define	USBPF_STATUS_DID_DMA_DELAY	(1 << 2)
 #define	USBPF_STATUS_DID_CLOSE		(1 << 3)
 #define	USBPF_STATUS_DRAINING		(1 << 4)
 #define	USBPF_STATUS_STARTED		(1 << 5)
 #define	USBPF_STATUS_BW_RECLAIMED	(1 << 6)
 #define	USBPF_STATUS_CONTROL_XFR	(1 << 7)
 #define	USBPF_STATUS_CONTROL_HDR	(1 << 8)
 #define	USBPF_STATUS_CONTROL_ACT	(1 << 9)
 #define	USBPF_STATUS_CONTROL_STALL	(1 << 10)
 #define	USBPF_STATUS_SHORT_FRAMES_OK	(1 << 11)
 #define	USBPF_STATUS_SHORT_XFER_OK	(1 << 12)
 #if USB_HAVE_BUSDMA
 #define	USBPF_STATUS_BDMA_ENABLE	(1 << 13)
 #define	USBPF_STATUS_BDMA_NO_POST_SYNC	(1 << 14)
 #define	USBPF_STATUS_BDMA_SETUP		(1 << 15)
 #endif
 #define	USBPF_STATUS_ISOCHRONOUS_XFR	(1 << 16)
 #define	USBPF_STATUS_CURR_DMA_SET	(1 << 17)
 #define	USBPF_STATUS_CAN_CANCEL_IMMED	(1 << 18)
 #define	USBPF_STATUS_DOING_CALLBACK	(1 << 19)
 	u_int32_t	up_length;	/* Total data length (submit/actual) */
 	u_int32_t	up_frames;	/* USB frame number (submit/actual) */
 	u_int32_t	up_error;	/* usb_error_t */
 	u_int32_t	up_interval;	/* for interrupt and isoc */
 	/* sizeof(struct usbpf_pkthdr) == 128 bytes */
 	u_char		up_reserved[96];
 };
 
-struct usbpf_version {
-	u_short		uv_major;
-	u_short		uv_minor;
-};
-#define	USBPF_MAJOR_VERSION	1
-#define	USBPF_MINOR_VERSION	1
+#define	USBPF_HDR_LEN		128
 
-#define	USBPF_IFNAMSIZ	32
-struct usbpf_ifreq {
-	/* bus name, e.g. "usbus0" */
-	char		ufr_name[USBPF_IFNAMSIZ];
-};
-
-/*
- *  Structure for UIOCSETF.
- */
-struct usbpf_program {
-	u_int			uf_len;
-	struct usbpf_insn	*uf_insns;
-};
-
-/*
- * Struct returned by UIOCGSTATS.
- */
-struct usbpf_stat {
-	u_int us_recv;		/* number of packets received */
-	u_int us_drop;		/* number of packets dropped */
-};
-
-#define	UIOCGBLEN	_IOR('U', 102, u_int)
-#define	UIOCSBLEN	_IOWR('U', 102, u_int)
-#define	UIOCSETF	_IOW('U', 103, struct usbpf_program)
-#define	UIOCSETIF	_IOW('U', 108, struct usbpf_ifreq)
-#define	UIOCSRTIMEOUT	_IOW('U', 109, struct timeval)
-#define	UIOCGRTIMEOUT	_IOR('U', 110, struct timeval)
-#define	UIOCGSTATS	_IOR('U', 111, struct usbpf_stat)
-#define	UIOCVERSION	_IOR('U', 113, struct usbpf_version)
-#define	UIOCSETWF	_IOW('U', 123, struct usbpf_program)
-
 #define	USBPF_XFERTAP_SUBMIT	0
 #define	USBPF_XFERTAP_DONE	1
 
 #ifdef _KERNEL
-void	usbpf_attach(struct usb_bus *, struct usbpf_if **);
+void	usbpf_attach(struct usb_bus *);
 void	usbpf_detach(struct usb_bus *);
 void	usbpf_xfertap(struct usb_xfer *, int);
 #endif
 
 #endif
Index: projects/binutils-2.17/sys/dev/usb/usbdevs
===================================================================
--- projects/binutils-2.17/sys/dev/usb/usbdevs	(revision 215829)
+++ projects/binutils-2.17/sys/dev/usb/usbdevs	(revision 215830)
@@ -1,3419 +1,3419 @@
 $FreeBSD$
 /* $NetBSD: usbdevs,v 1.392 2004/12/29 08:38:44 imp Exp $ */
 
 /*-
  * Copyright (c) 1998-2004 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Lennart Augustsson (lennart@augustsson.net) at
  * Carlstedt Research & Technology.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * List of known USB vendors
  *
  * USB.org publishes a VID list of USB-IF member companies at
  * http://www.usb.org/developers/tools
  * Note that it does not show companies that have obtained a Vendor ID
  * without becoming full members.
  *
  * Please note that these IDs do not do anything. Adding an ID here and
  * regenerating the usbdevs.h and usbdevs_data.h only makes a symbolic name
  * available to the source code and does not change any functionality, nor
  * does it make your device available to a specific driver.
  * It will however make the descriptive string available if a device does not
  * provide the string itself.
  *
  * After adding a vendor ID VNDR and a product ID PRDCT you will have the
  * following extra defines:
  * #define USB_VENDOR_VNDR              0x????
  * #define USB_PRODUCT_VNDR_PRDCT       0x????
  *
  * You may have to add these defines to the respective probe routines to
  * make the device recognised by the appropriate device driver.
  */
 
 vendor UNKNOWN1		0x0053	Unknown vendor
 vendor UNKNOWN2		0x0105	Unknown vendor
 vendor EGALAX2		0x0123	eGalax, Inc.
 vendor CHIPSBANK	0x0204	Chipsbank Microelectronics Co.
 vendor HUMAX		0x02ad	HUMAX
 vendor LTS		0x0386	LTS
 vendor BWCT		0x03da	Bernd Walter Computer Technology
 vendor AOX		0x03e8	AOX
 vendor THESYS		0x03e9	Thesys
 vendor DATABROADCAST	0x03ea	Data Broadcasting
 vendor ATMEL		0x03eb	Atmel
 vendor IWATSU		0x03ec	Iwatsu America
 vendor MITSUMI		0x03ee	Mitsumi
 vendor HP		0x03f0	Hewlett Packard
 vendor GENOA		0x03f1	Genoa
 vendor OAK		0x03f2	Oak
 vendor ADAPTEC		0x03f3	Adaptec
 vendor DIEBOLD		0x03f4	Diebold
 vendor SIEMENSELECTRO	0x03f5	Siemens Electromechanical
 vendor EPSONIMAGING	0x03f8	Epson Imaging
 vendor KEYTRONIC	0x03f9	KeyTronic
 vendor OPTI		0x03fb	OPTi
 vendor ELITEGROUP	0x03fc	Elitegroup
 vendor XILINX		0x03fd	Xilinx
 vendor FARALLON		0x03fe	Farallon Communications
 vendor NATIONAL		0x0400	National Semiconductor
 vendor NATIONALREG	0x0401	National Registry
 vendor ACERLABS		0x0402	Acer Labs
 vendor FTDI		0x0403	Future Technology Devices
 vendor NCR		0x0404	NCR
 vendor SYNOPSYS2	0x0405	Synopsys
 vendor FUJITSUICL	0x0406	Fujitsu-ICL
 vendor FUJITSU2		0x0407	Fujitsu Personal Systems
 vendor QUANTA		0x0408	Quanta
 vendor NEC		0x0409	NEC
 vendor KODAK		0x040a	Eastman Kodak
 vendor WELTREND		0x040b	Weltrend
 vendor VIA		0x040d	VIA
 vendor MCCI		0x040e	MCCI
 vendor MELCO		0x0411	Melco
 vendor LEADTEK		0x0413	Leadtek
 vendor WINBOND		0x0416	Winbond
 vendor PHOENIX		0x041a	Phoenix
 vendor CREATIVE		0x041e	Creative Labs
 vendor NOKIA		0x0421	Nokia
 vendor ADI		0x0422	ADI Systems
 vendor CATC		0x0423	Computer Access Technology
 vendor SMC2		0x0424	Standard Microsystems
 vendor MOTOROLA_HK	0x0425	Motorola HK
 vendor GRAVIS		0x0428	Advanced Gravis Computer
 vendor CIRRUSLOGIC	0x0429	Cirrus Logic
 vendor INNOVATIVE	0x042c	Innovative Semiconductors
 vendor MOLEX		0x042f	Molex
 vendor SUN		0x0430	Sun Microsystems
 vendor UNISYS		0x0432	Unisys
 vendor TAUGA		0x0436	Taugagreining HF
 vendor AMD		0x0438	Advanced Micro Devices
 vendor LEXMARK		0x043d	Lexmark International
 vendor LG		0x043e	LG Electronics
 vendor NANAO		0x0440	NANAO
 vendor GATEWAY		0x0443	Gateway 2000
 vendor NMB		0x0446	NMB
 vendor ALPS		0x044e	Alps Electric
 vendor THRUST		0x044f	Thrustmaster
 vendor TI		0x0451	Texas Instruments
 vendor ANALOGDEVICES	0x0456	Analog Devices
 vendor SIS		0x0457	Silicon Integrated Systems Corp.
 vendor KYE		0x0458	KYE Systems
 vendor DIAMOND2		0x045a	Diamond (Supra)
 vendor RENESAS		0x045b	Renesas
 vendor MICROSOFT	0x045e	Microsoft
 vendor PRIMAX		0x0461	Primax Electronics
 vendor MGE		0x0463	MGE UPS Systems
 vendor AMP		0x0464	AMP
 vendor CHERRY		0x046a	Cherry Mikroschalter
 vendor MEGATRENDS	0x046b	American Megatrends
 vendor LOGITECH		0x046d	Logitech
 vendor BTC		0x046e	Behavior Tech. Computer
 vendor PHILIPS		0x0471	Philips
 vendor SUN2		0x0472	Sun Microsystems (offical)
 vendor SANYO		0x0474	Sanyo Electric
 vendor SEAGATE		0x0477	Seagate
 vendor CONNECTIX	0x0478	Connectix
 vendor SEMTECH		0x047a	Semtech
 vendor KENSINGTON	0x047d	Kensington
 vendor LUCENT		0x047e	Lucent
 vendor PLANTRONICS	0x047f	Plantronics
 vendor KYOCERA		0x0482	Kyocera Wireless Corp.
 vendor STMICRO		0x0483	STMicroelectronics
 vendor FOXCONN		0x0489	Foxconn
 vendor MEIZU		0x0492	Meizu Electronics
 vendor YAMAHA		0x0499	YAMAHA
 vendor COMPAQ		0x049f	Compaq
 vendor HITACHI		0x04a4	Hitachi
 vendor ACERP		0x04a5	Acer Peripherals
 vendor DAVICOM		0x04a6	Davicom
 vendor VISIONEER	0x04a7	Visioneer
 vendor CANON		0x04a9	Canon
 vendor NIKON		0x04b0	Nikon
 vendor PAN		0x04b1	Pan International
 vendor IBM		0x04b3	IBM
 vendor CYPRESS		0x04b4	Cypress Semiconductor
 vendor ROHM		0x04b5	ROHM
 vendor COMPAL		0x04b7	Compal
 vendor EPSON		0x04b8	Seiko Epson
 vendor RAINBOW		0x04b9	Rainbow Technologies
 vendor IODATA		0x04bb	I-O Data
 vendor TDK		0x04bf	TDK
 vendor 3COMUSR		0x04c1	U.S. Robotics
 vendor METHODE		0x04c2	Methode Electronics Far East
 vendor MAXISWITCH	0x04c3	Maxi Switch
 vendor LOCKHEEDMER	0x04c4	Lockheed Martin Energy Research
 vendor FUJITSU		0x04c5	Fujitsu
 vendor TOSHIBAAM	0x04c6	Toshiba America
 vendor MICROMACRO	0x04c7	Micro Macro Technologies
 vendor KONICA		0x04c8	Konica
 vendor LITEON		0x04ca	Lite-On Technology
 vendor FUJIPHOTO	0x04cb	Fuji Photo Film
 vendor PHILIPSSEMI	0x04cc	Philips Semiconductors
 vendor TATUNG		0x04cd	Tatung Co. Of America
 vendor SCANLOGIC	0x04ce	ScanLogic
 vendor MYSON		0x04cf	Myson Technology
 vendor DIGI2		0x04d0	Digi
 vendor ITTCANON		0x04d1	ITT Canon
 vendor ALTEC		0x04d2	Altec Lansing
 vendor LSI		0x04d4	LSI
 vendor MENTORGRAPHICS	0x04d6	Mentor Graphics
 vendor ITUNERNET	0x04d8	I-Tuner Networks
 vendor HOLTEK		0x04d9	Holtek Semiconductor, Inc.
 vendor PANASONIC	0x04da	Panasonic (Matsushita)
 vendor HUANHSIN		0x04dc	Huan Hsin
 vendor SHARP		0x04dd	Sharp
 vendor IIYAMA		0x04e1	Iiyama
 vendor SHUTTLE		0x04e6	Shuttle Technology
 vendor ELO		0x04e7	Elo TouchSystems
 vendor SAMSUNG		0x04e8	Samsung Electronics
 vendor NORTHSTAR	0x04eb	Northstar
 vendor TOKYOELECTRON	0x04ec	Tokyo Electron
 vendor ANNABOOKS	0x04ed	Annabooks
 vendor JVC		0x04f1	JVC
 vendor CHICONY		0x04f2	Chicony Electronics
 vendor ELAN		0x04f3	Elan
 vendor NEWNEX		0x04f7	Newnex
 vendor BROTHER		0x04f9	Brother Industries
 vendor DALLAS		0x04fa	Dallas Semiconductor
 vendor AIPTEK2		0x04fc	AIPTEK International
 vendor PFU		0x04fe	PFU
 vendor FUJIKURA		0x0501	Fujikura/DDK
 vendor ACER		0x0502	Acer
 vendor 3COM		0x0506	3Com
 vendor HOSIDEN		0x0507	Hosiden Corporation
 vendor AZTECH		0x0509	Aztech Systems
 vendor BELKIN		0x050d	Belkin Components
 vendor KAWATSU		0x050f	Kawatsu Semiconductor
 vendor FCI		0x0514	FCI
 vendor LONGWELL		0x0516	Longwell
 vendor COMPOSITE	0x0518	Composite
 vendor STAR		0x0519	Star Micronics
 vendor APC		0x051d	American Power Conversion
 vendor SCIATLANTA	0x051e	Scientific Atlanta
 vendor TSM		0x0520	TSM
 vendor CONNECTEK	0x0522	Advanced Connectek USA
 vendor NETCHIP		0x0525	NetChip Technology
 vendor ALTRA		0x0527	ALTRA
 vendor ATI		0x0528	ATI Technologies
 vendor AKS		0x0529	Aladdin Knowledge Systems
 vendor TEKOM		0x052b	Tekom
 vendor CANONDEV		0x052c	Canon
 vendor WACOMTECH	0x0531	Wacom
 vendor INVENTEC		0x0537	Inventec
 vendor SHYHSHIUN	0x0539	Shyh Shiun Terminals
 vendor PREHWERKE	0x053a	Preh Werke Gmbh & Co. KG
 vendor SYNOPSYS		0x053f	Synopsys
 vendor UNIACCESS	0x0540	Universal Access
 vendor VIEWSONIC	0x0543	ViewSonic
 vendor XIRLINK		0x0545	Xirlink
 vendor ANCHOR		0x0547	Anchor Chips
 vendor SONY		0x054c	Sony
 vendor FUJIXEROX	0x0550	Fuji Xerox
 vendor VISION		0x0553	VLSI Vision
 vendor ASAHIKASEI	0x0556	Asahi Kasei Microsystems
 vendor ATEN		0x0557	ATEN International
 vendor SAMSUNG2		0x055d	Samsung Electronics
 vendor MUSTEK		0x055f	Mustek Systems
 vendor TELEX		0x0562	Telex Communications
 vendor CHINON		0x0564	Chinon
 vendor PERACOM		0x0565	Peracom Networks
 vendor ALCOR2		0x0566	Alcor Micro
 vendor XYRATEX		0x0567	Xyratex
 vendor WACOM		0x056a	WACOM
 vendor ETEK		0x056c	e-TEK Labs
 vendor EIZO		0x056d	EIZO
 vendor ELECOM		0x056e	Elecom
 vendor CONEXANT		0x0572	Conexant
 vendor HAUPPAUGE	0x0573	Hauppauge Computer Works
 vendor BAFO		0x0576	BAFO/Quality Computer Accessories
 vendor YEDATA		0x057b	Y-E Data
 vendor AVM		0x057c	AVM
 vendor QUICKSHOT	0x057f	Quickshot
 vendor ROLAND		0x0582	Roland
 vendor ROCKFIRE		0x0583	Rockfire
 vendor RATOC		0x0584	RATOC Systems
 vendor ZYXEL		0x0586	ZyXEL Communication
 vendor INFINEON		0x058b	Infineon
 vendor MICREL		0x058d	Micrel
 vendor ALCOR		0x058f	Alcor Micro
 vendor OMRON		0x0590	OMRON
 vendor ZORAN		0x0595	Zoran Microelectronics
 vendor NIIGATA		0x0598	Niigata
 vendor IOMEGA		0x059b	Iomega
 vendor ATREND		0x059c	A-Trend Technology
 vendor AID		0x059d	Advanced Input Devices
 vendor LACIE		0x059f	LaCie
 vendor FUJIFILM		0x05a2	Fuji Film
 vendor ARC		0x05a3	ARC
 vendor ORTEK		0x05a4	Ortek
 vendor CISCOLINKSYS3	0x05a6	Cisco-Linksys
 vendor BOSE		0x05a7	Bose
 vendor OMNIVISION	0x05a9	OmniVision
 vendor INSYSTEM		0x05ab	In-System Design
 vendor APPLE		0x05ac	Apple Computer
 vendor YCCABLE		0x05ad	Y.C. Cable
 vendor DIGITALPERSONA	0x05ba	DigitalPersona
 vendor 3G		0x05bc	3G Green Green Globe
 vendor RAFI		0x05bd	RAFI
 vendor TYCO		0x05be	Tyco
 vendor KAWASAKI		0x05c1	Kawasaki
 vendor DIGI		0x05c5	Digi International
 vendor QUALCOMM2	0x05c6	Qualcomm
 vendor QTRONIX		0x05c7	Qtronix
 vendor FOXLINK		0x05c8	Foxlink
 vendor RICOH		0x05ca	Ricoh
 vendor ELSA		0x05cc	ELSA
 vendor SCIWORX		0x05ce	sci-worx
 vendor BRAINBOXES	0x05d1	Brainboxes Limited
 vendor ULTIMA		0x05d8	Ultima
 vendor AXIOHM		0x05d9	Axiohm Transaction Solutions
 vendor MICROTEK		0x05da	Microtek
 vendor SUNTAC		0x05db	SUN Corporation
 vendor LEXAR		0x05dc	Lexar Media
 vendor ADDTRON		0x05dd	Addtron
 vendor SYMBOL		0x05e0	Symbol Technologies
 vendor SYNTEK		0x05e1	Syntek
 vendor GENESYS		0x05e3	Genesys Logic
 vendor FUJI		0x05e5	Fuji Electric
 vendor KEITHLEY		0x05e6	Keithley Instruments
 vendor EIZONANAO	0x05e7	EIZO Nanao
 vendor KLSI		0x05e9	Kawasaki LSI
 vendor FFC		0x05eb	FFC
 vendor ANKO		0x05ef	Anko Electronic
 vendor PIENGINEERING	0x05f3	P.I. Engineering
 vendor AOC		0x05f6	AOC International
 vendor CHIC		0x05fe	Chic Technology
 vendor BARCO		0x0600	Barco Display Systems
 vendor BRIDGE		0x0607	Bridge Information
 vendor SOLIDYEAR	0x060b	Solid Year
 vendor BIORAD		0x0614	Bio-Rad Laboratories
 vendor MACALLY		0x0618	Macally
 vendor ACTLABS		0x061c	Act Labs
 vendor ALARIS		0x0620	Alaris
 vendor APEX		0x0624	Apex
 vendor CREATIVE3	0x062a	Creative Labs
 vendor VIVITAR		0x0636	Vivitar
 vendor GUNZE		0x0637	Gunze Electronics USA
 vendor AVISION		0x0638	Avision
 vendor TEAC		0x0644	TEAC
 vendor SGI		0x065e	Silicon Graphics
 vendor SANWASUPPLY	0x0663	Sanwa Supply
 vendor MEGATEC		0x0665	Megatec
 vendor LINKSYS		0x066b	Linksys
 vendor ACERSA		0x066e	Acer Semiconductor America
 vendor SIGMATEL		0x066f	Sigmatel
 vendor DRAYTEK		0x0675	DrayTek
 vendor AIWA		0x0677	Aiwa
 vendor ACARD		0x0678	ACARD Technology
 vendor PROLIFIC		0x067b	Prolific Technology
 vendor SIEMENS		0x067c	Siemens
 vendor AVANCELOGIC	0x0680	Avance Logic
 vendor SIEMENS2		0x0681	Siemens
 vendor MINOLTA		0x0686	Minolta
 vendor CHPRODUCTS	0x068e	CH Products
 vendor HAGIWARA		0x0693	Hagiwara Sys-Com
 vendor CTX		0x0698	Chuntex
 vendor ASKEY		0x069a	Askey Computer
 vendor SAITEK		0x06a3	Saitek
 vendor ALCATELT		0x06b9	Alcatel Telecom
 vendor AGFA		0x06bd	AGFA-Gevaert
 vendor ASIAMD		0x06be	Asia Microelectronic Development
 vendor BIZLINK		0x06c4	Bizlink International
 vendor KEYSPAN		0x06cd	Keyspan / InnoSys Inc.
 vendor AASHIMA		0x06d6	Aashima Technology
 vendor LIEBERT		0x06da	Liebert
 vendor MULTITECH	0x06e0	MultiTech
 vendor ADS		0x06e1	ADS Technologies
 vendor ALCATELM		0x06e4	Alcatel Microelectronics
 vendor SIRIUS		0x06ea	Sirius Technologies
 vendor GUILLEMOT	0x06f8	Guillemot
 vendor BOSTON		0x06fd	Boston Acoustics
 vendor SMC		0x0707	Standard Microsystems
 vendor PUTERCOM		0x0708	Putercom
 vendor MCT		0x0711	MCT
 vendor IMATION		0x0718	Imation
 vendor TECLAST		0x071b	Teclast
 vendor SONYERICSSON	0x0731	Sony Ericsson
 vendor EICON		0x0734	Eicon Networks
 vendor SYNTECH		0x0745	Syntech Information
 vendor DIGITALSTREAM	0x074e	Digital Stream
 vendor AUREAL		0x0755	Aureal Semiconductor
 vendor MIDIMAN		0x0763	Midiman
 vendor CYBERPOWER	0x0764	Cyber Power Systems, Inc.
 vendor SURECOM		0x0769	Surecom Technology
 vendor HIDGLOBAL	0x076b	HID Global
 vendor LINKSYS2		0x077b	Linksys
 vendor GRIFFIN		0x077d	Griffin Technology
 vendor SANDISK		0x0781	SanDisk
 vendor JENOPTIK		0x0784	Jenoptik
 vendor LOGITEC		0x0789	Logitec
 vendor NOKIA2		0x078b	Nokia
 vendor BRIMAX		0x078e	Brimax
 vendor AXIS		0x0792	Axis Communications
 vendor ABL		0x0794	ABL Electronics
 vendor SAGEM		0x079b	Sagem
 vendor SUNCOMM		0x079c	Sun Communications, Inc.
 vendor ALFADATA		0x079d	Alfadata Computer
 vendor NATIONALTECH	0x07a2	National Technical Systems
 vendor ONNTO		0x07a3	Onnto
 vendor BE		0x07a4	Be
 vendor ADMTEK		0x07a6	ADMtek
 vendor COREGA		0x07aa	Corega
 vendor FREECOM		0x07ab	Freecom
 vendor MICROTECH	0x07af	Microtech
 vendor GENERALINSTMNTS	0x07b2	General Instruments (Motorola)
 vendor OLYMPUS		0x07b4	Olympus
 vendor ABOCOM		0x07b8	AboCom Systems
 vendor KEISOKUGIKEN	0x07c1	Keisokugiken
 vendor ONSPEC		0x07c4	OnSpec
 vendor APG		0x07c5	APG Cash Drawer
 vendor BUG		0x07c8	B.U.G.
 vendor ALLIEDTELESYN	0x07c9	Allied Telesyn International
 vendor AVERMEDIA	0x07ca	AVerMedia Technologies
 vendor SIIG		0x07cc	SIIG
 vendor CASIO		0x07cf	CASIO
 vendor DLINK2		0x07d1	D-Link
 vendor APTIO		0x07d2	Aptio Products
 vendor ARASAN		0x07da	Arasan Chip Systems
 vendor ALLIEDCABLE	0x07e6	Allied Cable
 vendor STSN		0x07ef	STSN
 vendor CENTURY		0x07f7	Century Corp
 vendor NEWLINK		0x07ff	NEWlink
 vendor ZOOM		0x0803	Zoom Telephonics
 vendor PCS		0x0810	Personal Communication Systems
 vendor ALPHASMART	0x081e	AlphaSmart, Inc.
 vendor BROADLOGIC	0x0827	BroadLogic
 vendor HANDSPRING	0x082d	Handspring
 vendor PALM		0x0830	Palm Computing
 vendor SOURCENEXT	0x0833	SOURCENEXT
 vendor ACTIONSTAR	0x0835	Action Star Enterprise
 vendor SAMSUNG_TECHWIN	0x0839	Samsung Techwin
 vendor ACCTON		0x083a	Accton Technology
 vendor DIAMOND		0x0841	Diamond
 vendor NETGEAR		0x0846	BayNETGEAR
 vendor TOPRE		0x0853	Topre Corporation
 vendor ACTIVEWIRE	0x0854	ActiveWire
 vendor BBELECTRONICS	0x0856	B&B Electronics
 vendor PORTGEAR		0x085a	PortGear
 vendor NETGEAR2		0x0864	Netgear
 vendor SYSTEMTALKS	0x086e	System Talks
 vendor METRICOM		0x0870	Metricom
 vendor ADESSOKBTEK	0x087c	ADESSO/Kbtek America
 vendor JATON		0x087d	Jaton
 vendor APT		0x0880	APT Technologies
 vendor BOCARESEARCH	0x0885	Boca Research
 vendor ANDREA		0x08a8	Andrea Electronics
 vendor BURRBROWN	0x08bb	Burr-Brown Japan
 vendor 2WIRE		0x08c8	2Wire
 vendor AIPTEK		0x08ca	AIPTEK International
 vendor SMARTBRIDGES	0x08d1	SmartBridges
 vendor FUJITSUSIEMENS	0x08d4	Fujitsu-Siemens
 vendor BILLIONTON	0x08dd	Billionton Systems
 vendor GEMALTO		0x08e6	Gemalto SA
 vendor EXTENDED		0x08e9	Extended Systems
 vendor MSYSTEMS		0x08ec	M-Systems
 vendor DIGIANSWER	0x08fd	Digianswer
 vendor AUTHENTEC	0x08ff	AuthenTec
 vendor AUDIOTECHNICA	0x0909	Audio-Technica
 vendor TRUMPION		0x090a	Trumpion Microelectronics
 vendor FEIYA		0x090c	Feiya
 vendor ALATION		0x0910	Alation Systems
 vendor GLOBESPAN	0x0915	Globespan
 vendor CONCORDCAMERA	0x0919	Concord Camera
 vendor GARMIN		0x091e	Garmin International
 vendor GOHUBS		0x0921	GoHubs
 vendor XEROX		0x0924	Xerox
 vendor BIOMETRIC	0x0929	American Biometric Company
 vendor TOSHIBA		0x0930	Toshiba
 vendor PLEXTOR		0x093b	Plextor
 vendor INTREPIDCS	0x093c	Intrepid
 vendor YANO		0x094f	Yano
 vendor KINGSTON		0x0951	Kingston Technology
 vendor BLUEWATER	0x0956	BlueWater Systems
 vendor AGILENT		0x0957	Agilent Technologies
 vendor GUDE		0x0959	Gude ADS
 vendor PORTSMITH	0x095a	Portsmith
 vendor ACERW		0x0967	Acer
 vendor ADIRONDACK	0x0976	Adirondack Wire & Cable
 vendor BECKHOFF		0x0978	Beckhoff
 vendor MINDSATWORK	0x097a	Minds At Work
 vendor POINTCHIPS	0x09a6	PointChips
 vendor INTERSIL		0x09aa	Intersil
 vendor ALTIUS		0x09b3	Altius Solutions
 vendor ARRIS		0x09c1	Arris Interactive
 vendor ACTIVCARD	0x09c3	ACTIVCARD
 vendor ACTISYS		0x09c4	ACTiSYS
 vendor NOVATEL2		0x09d7	Novatel Wireless
 vendor AFOURTECH	0x09da	A-FOUR TECH
 vendor AIMEX		0x09dc	AIMEX
 vendor ADDONICS		0x09df	Addonics Technologies
 vendor AKAI		0x09e8	AKAI professional M.I.
 vendor ARESCOM		0x09f5	ARESCOM
 vendor BAY		0x09f9	Bay Associates
 vendor ALTERA		0x09fb	Altera
 vendor CSR		0x0a12	Cambridge Silicon Radio
 vendor TREK		0x0a16	Trek Technology
 vendor ASAHIOPTICAL	0x0a17	Asahi Optical
 vendor BOCASYSTEMS	0x0a43	Boca Systems
 vendor SHANTOU		0x0a46	ShanTou
 vendor MEDIAGEAR	0x0a48	MediaGear
 vendor BROADCOM		0x0a5c	Broadcom
 vendor GREENHOUSE	0x0a6b	GREENHOUSE
 vendor GEOCAST		0x0a79	Geocast Network Systems
 vendor IDQUANTIQUE	0x0aba	id Quantique
 vendor ZYDAS		0x0ace	Zydas Technology Corporation
 vendor NEODIO		0x0aec	Neodio
 vendor OPTION		0x0af0	Option N.V.
 vendor ASUS		0x0b05	ASUSTeK Computer
 vendor TODOS		0x0b0c	Todos Data System
 vendor SIIG2		0x0b39	SIIG
 vendor TEKRAM		0x0b3b	Tekram Technology
 vendor HAL		0x0b41	HAL Corporation
 vendor EMS		0x0b43	EMS Production
 vendor NEC2		0x0b62	NEC
 vendor ADLINK		0x0b63	ADLINK Technoligy, Inc.
 vendor ATI2		0x0b6f	ATI
 vendor ZEEVO		0x0b7a	Zeevo, Inc.
 vendor KURUSUGAWA	0x0b7e	Kurusugawa Electronics, Inc.
 vendor SMART		0x0b8c	Smart Technologies
 vendor ASIX		0x0b95	ASIX Electronics
 vendor O2MICRO		0x0b97	O2 Micro, Inc.
 vendor USR		0x0baf	U.S. Robotics
 vendor AMBIT		0x0bb2	Ambit Microsystems
 vendor HTC		0x0bb4	HTC
 vendor REALTEK		0x0bda	Realtek
 vendor MEI		0x0bed	MEI
 vendor ADDONICS2	0x0bf6	Addonics Technology
 vendor FSC		0x0bf8	Fujitsu Siemens Computers
 vendor AGATE		0x0c08	Agate Technologies
 vendor DMI		0x0c0b	DMI
 vendor CHICONY2		0x0c45	Chicony
 vendor REINERSCT	0x0c4b	Reiner-SCT
 vendor SEALEVEL		0x0c52	Sealevel System
 vendor LUWEN		0x0c76	Luwen
 vendor KYOCERA2		0x0c88	Kyocera Wireless Corp.
 vendor ZCOM		0x0cde	Z-Com
 vendor ATHEROS2		0x0cf3	Atheros Communications
 vendor TANGTOP		0x0d3d	Tangtop
 vendor SMC3		0x0d5c	Standard Microsystems
 vendor ADDON		0x0d7d	Add-on Technology
 vendor ACDC		0x0d7e	American Computer & Digital Components
 vendor CMEDIA		0x0d8c	CMEDIA
 vendor CONCEPTRONIC	0x0d8e	Conceptronic
 vendor SKANHEX		0x0d96	Skanhex Technology, Inc.
 vendor MSI		0x0db0	Micro Star International
 vendor ELCON		0x0db7	ELCON Systemtechnik
 vendor NETAC		0x0dd8	Netac
 vendor SITECOMEU	0x0df6	Sitecom Europe
 vendor MOBILEACTION	0x0df7	Mobile Action
 vendor AMIGO		0x0e0b	Amigo Technology
 vendor SPEEDDRAGON	0x0e55	Speed Dragon Multimedia
 vendor HAWKING		0x0e66	Hawking
 vendor FOSSIL		0x0e67	Fossil, Inc
 vendor GMATE		0x0e7e	G.Mate, Inc
 vendor OTI		0x0ea0	Ours Technology
 vendor YISO		0x0eab	Yiso Wireless Co.
 vendor PILOTECH		0x0eaf	Pilotech
 vendor NOVATECH		0x0eb0	NovaTech
 vendor ITEGNO		0x0eba	iTegno
 vendor WINMAXGROUP	0x0ed1	WinMaxGroup
 vendor TOD		0x0ede	TOD
 vendor EGALAX		0x0eef	eGalax, Inc.
 vendor AIRPRIME		0x0f3d	AirPrime, Inc.
 vendor MICROTUNE	0x0f4d	Microtune
 vendor VTECH		0x0f88	VTech
 vendor FALCOM		0x0f94	Falcom Wireless Communications GmbH
 vendor RIM		0x0fca	Research In Motion
 vendor DYNASTREAM	0x0fcf	Dynastream Innovations
 vendor QUALCOMM		0x1004	Qualcomm
 vendor APACER		0x1005	Apacer
 vendor MOTOROLA4	0x100d	Motorola
 vendor AIRPLUS		0x1011  Airplus
 vendor DESKNOTE		0x1019	Desknote
 vendor GIGABYTE		0x1044	GIGABYTE
 vendor WESTERN		0x1058	Western Digital
 vendor MOTOROLA		0x1063	Motorola
 vendor CCYU		0x1065	CCYU Technology
 vendor CURITEL		0x106c	Curitel Communications Inc
 vendor SILABS2		0x10a6	SILABS2
 vendor USI		0x10ab	USI
 vendor PLX		0x10b5	PLX
 vendor ASANTE		0x10bd	Asante
 vendor SILABS		0x10c4	Silicon Labs
 vendor SILABS3		0x10c5	Silicon Labs
 vendor SILABS4		0x10ce	Silicon Labs
 vendor ACTIONS		0x10d6  Actions
 vendor ANALOG		0x1110	Analog Devices
 vendor TENX		0x1130	Ten X Technology, Inc.
 vendor ISSC		0x1131	Integrated System Solution Corp.
 vendor JRC		0x1145	Japan Radio Company
 vendor SPHAIRON		0x114b	Sphairon Access Systems GmbH
 vendor DELORME		0x1163	DeLorme
 vendor SERVERWORKS	0x1166	ServerWorks
 vendor DLINK3		0x1186	Dlink
 vendor ACERCM		0x1189	Acer Communications & Multimedia
 vendor SIERRA		0x1199	Sierra Wireless
 vendor SANWA		0x11ad	Sanwa Electric Instrument Co., Ltd.
 vendor TOPFIELD		0x11db	Topfield Co., Ltd
 vendor SIEMENS3		0x11f5	Siemens
 vendor NETINDEX		0x11f6	NetIndex
 vendor ALCATEL		0x11f7	Alcatel
 vendor UNKNOWN3		0x1233	Unknown vendor
 vendor TSUNAMI		0x1241	Tsunami
 vendor PHEENET		0x124a	Pheenet
 vendor TARGUS		0x1267	Targus
 vendor TWINMOS		0x126f	TwinMOS
 vendor TENDA		0x1286	Tenda
 vendor CREATIVE2	0x1292	Creative Labs
 vendor BELKIN2		0x1293	Belkin Components
 vendor CYBERTAN		0x129b	CyberTAN Technology
 vendor HUAWEI		0x12d1	Huawei Technologies
 vendor ARANEUS		0x12d8	Araneus Information Systems
 vendor TAPWAVE		0x12ef	Tapwave
 vendor AINCOMM		0x12fd	Aincomm
 vendor MOBILITY		0x1342	Mobility
 vendor DICKSMITH	0x1371	Dick Smith Electronics
 vendor NETGEAR3		0x1385	Netgear
 vendor BALTECH		0x13ad	Baltech
 vendor CISCOLINKSYS	0x13b1	Cisco-Linksys
 vendor SHARK		0x13d2	Shark
 vendor AZUREWAVE	0x13d3	AsureWave
 vendor EMTEC		0x13fe	Emtec
 vendor NOVATEL		0x1410	Novatel Wireless
 vendor MERLIN		0x1416	Merlin
 vendor WISTRONNEWEB	0x1435	Wistron NeWeb
 vendor RADIOSHACK	0x1453	Radio Shack
 vendor HUAWEI3COM	0x1472	Huawei-3Com
 vendor ABOCOM2		0x1482	AboCom Systems
 vendor SILICOM		0x1485	Silicom
 vendor RALINK		0x148f	Ralink Technology
 vendor IMAGINATION	0x149a	Imagination Technologies
 vendor CONCEPTRONIC2	0x14b2	Conceptronic
 vendor SUPERTOP		0x14cd	Super Top
 vendor PLANEX3		0x14ea	Planex Communications
 vendor SILICONPORTALS	0x1527	Silicon Portals
 vendor UBIQUAM		0x1529	UBIQUAM Co., Ltd.
 vendor JMICRON		0x152d	JMicron
 vendor UBLOX		0x1546	U-blox
 vendor PNY		0x154b	PNY
 vendor OWEN		0x1555	Owen
 vendor OQO		0x1557	OQO
 vendor UMEDIA		0x157e	U-MEDIA Communications
 vendor FIBERLINE	0x1582	Fiberline
 vendor SPARKLAN		0x15a9	SparkLAN
 vendor AMIT2		0x15c5	AMIT
 vendor SOHOWARE		0x15e8	SOHOware
 vendor UMAX		0x1606	UMAX Data Systems
 vendor INSIDEOUT	0x1608	Inside Out Networks
 vendor AMOI		0x1614	Amoi Electronics
 vendor GOODWAY		0x1631	Good Way Technology
 vendor ENTREGA		0x1645	Entrega
 vendor ACTIONTEC	0x1668	Actiontec Electronics
 vendor CLIPSAL		0x166a	Clipsal
 vendor CISCOLINKSYS2	0x167b  Cisco-Linksys
 vendor ATHEROS		0x168c	Atheros Communications
 vendor GIGASET		0x1690	Gigaset
 vendor GLOBALSUN	0x16ab	Global Sun Technology
 vendor ANYDATA		0x16d5	AnyDATA Corporation
 vendor JABLOTRON	0x16d6	Jablotron
 vendor CMOTECH		0x16d8	C-motech
 vendor AXESSTEL		0x1726  Axesstel Co., Ltd.
 vendor LINKSYS4		0x1737	Linksys
 vendor SENAO		0x1740	Senao
 vendor ASUS2		0x1761	ASUS
 vendor SWEEX2		0x177f	Sweex
 vendor METAGEEK		0x1781	MetaGeek
 vendor WAVESENSE	0x17f4	WaveSense
 vendor VAISALA		0x1843	Vaisala
 vendor AMIT		0x18c5	AMIT
 vendor GOOGLE		0x18d1	Google
 vendor QCOM		0x18e8	Qcom
 vendor ELV		0x18ef	ELV
 vendor LINKSYS3		0x1915	Linksys
 vendor QUALCOMMINC	0x19d2	Qualcomm, Incorporated
 vendor WCH2		0x1a86	QinHeng Electronics
 vendor STELERA		0x1a8d	Stelera Wireless
 vendor MATRIXORBITAL	0x1b3d	Matrix Orbital 
 vendor OVISLINK		0x1b75	OvisLink
 vendor TCTMOBILE	0x1bbb  TCT Mobile
 vendor TELIT		0x1bc7  Telit
 vendor LONGCHEER	0x1c9e	Longcheer Holdings, Ltd.
 vendor MPMAN		0x1cae	MpMan
 vendor DRESDENELEKTRONIK 0x1cf1 dresden elektronik
 vendor NEOTEL		0x1d09	Neotel
 vendor PEGATRON		0x1d4d	Pegatron
 vendor QISDA		0x1da5  Qisda
 vendor METAGEEK2	0x1dd5	MetaGeek
 vendor ALINK		0x1e0e  Alink
 vendor AIRTIES		0x1eda	AirTies
 vendor DLINK		0x2001	D-Link
 vendor PLANEX2		0x2019	Planex Communications
 vendor HAUPPAUGE2	0x2040	Hauppauge Computer Works
 vendor TLAYTECH		0x20b9	Tlay Tech
 vendor ENCORE		0x203d	Encore
 vendor PARA		0x20b8	PARA Industrial
 vendor ERICSSON		0x2282	Ericsson
 vendor MOTOROLA2	0x22b8	Motorola
 vendor TRIPPLITE	0x2478	Tripp-Lite
 vendor HIROSE		0x2631	Hirose Electric
 vendor NHJ		0x2770	NHJ
 vendor PLANEX		0x2c02	Planex Communications
 vendor VIDZMEDIA	0x3275	VidzMedia Pte Ltd
 vendor AEI		0x3334	AEI
 vendor HANK		0x3353	Hank Connection
 vendor PQI		0x3538	PQI
 vendor DAISY		0x3579	Daisy Technology
 vendor NI		0x3923	National Instruments
 vendor MICRONET		0x3980	Micronet Communications
 vendor IODATA2		0x40bb	I-O Data
 vendor IRIVER		0x4102	iRiver
 vendor DELL		0x413c	Dell
 vendor WCH		0x4348	QinHeng Electronics
 vendor ACEECA		0x4766	Aceeca
 vendor AVERATEC		0x50c2	Averatec
 vendor SWEEX		0x5173	Sweex
 vendor PROLIFIC2	0x5372	Prolific Technologies
 vendor ONSPEC2		0x55aa	OnSpec Electronic Inc.
 vendor ZINWELL		0x5a57	Zinwell
 vendor SITECOM		0x6189	Sitecom
 vendor ARKMICRO		0x6547	Arkmicro Technologies Inc.
 vendor 3COM2		0x6891	3Com
 vendor EDIMAX		0x7392	Edimax
 vendor INTEL		0x8086	Intel
 vendor INTEL2		0x8087	Intel
 vendor ALLWIN		0x8516	ALLWIN Tech
 vendor SITECOM2		0x9016	Sitecom
 vendor MOSCHIP		0x9710	MosChip Semiconductor
 vendor MARVELL		0x9e88	Marvell Technology Group Ltd.
 vendor 3COM3		0xa727	3Com
 vendor DATAAPEX		0xdaae	DataApex
 vendor HP2		0xf003	Hewlett Packard
 vendor USRP		0xfffe	GNU Radio USRP
 
 /*
  * List of known products.  Grouped by vendor.
  */
 
 /* 3Com products */
 product 3COM HOMECONN		0x009d	HomeConnect Camera
 product 3COM 3CREB96		0x00a0	Bluetooth USB Adapter
 product 3COM 3C19250		0x03e8	3C19250 Ethernet Adapter
 product 3COM 3CRSHEW696		0x0a01	3CRSHEW696 Wireless Adapter
 product 3COM 3C460		0x11f8	HomeConnect 3C460
 product 3COM USR56K		0x3021	U.S.Robotics 56000 Voice FaxModem Pro
 product 3COM 3C460B		0x4601	HomeConnect 3C460B
 product 3COM2 3CRUSB10075	0xa727	3CRUSB10075
 product 3COM3 AR5523_1		0x6893	AR5523
 product 3COM3 AR5523_2		0x6895	AR5523
 product 3COM3 AR5523_3		0x6897	AR5523
 
 product 3COMUSR OFFICECONN	0x0082	3Com OfficeConnect Analog Modem
 product 3COMUSR USRISDN		0x008f	3Com U.S. Robotics Pro ISDN TA
 product 3COMUSR HOMECONN	0x009d	3Com HomeConnect Camera
 product 3COMUSR USR56K		0x3021	U.S. Robotics 56000 Voice FaxModem Pro
 
 /* AboCom products */
 product ABOCOM XX1		0x110c	XX1
 product ABOCOM XX2		0x200c	XX2
 product ABOCOM RT2770		0x2770	RT2770
 product ABOCOM RT2870		0x2870	RT2870
 product ABOCOM RT3070		0x3070	RT3070
 product ABOCOM RT3071		0x3071	RT3071
 product ABOCOM RT3072		0x3072	RT3072
 product ABOCOM2 RT2870_1	0x3c09	RT2870
 product ABOCOM URE450		0x4000	URE450 Ethernet Adapter
 product ABOCOM UFE1000		0x4002	UFE1000 Fast Ethernet Adapter
 product ABOCOM DSB650TX_PNA	0x4003	1/10/100 Ethernet Adapter
 product ABOCOM XX4		0x4004	XX4
 product ABOCOM XX5		0x4007	XX5
 product ABOCOM XX6		0x400b	XX6
 product ABOCOM XX7		0x400c	XX7
 product ABOCOM RTL8151		0x401a	RTL8151
 product ABOCOM XX8		0x4102	XX8
 product ABOCOM XX9		0x4104	XX9
 product ABOCOM UF200		0x420a	UF200 Ethernet
 product ABOCOM WL54		0x6001	WL54
 product ABOCOM XX10		0xabc1	XX10
 product ABOCOM BWU613		0xb000	BWU613
 product ABOCOM HWU54DM		0xb21b	HWU54DM
 product ABOCOM RT2573_2		0xb21c	RT2573
 product ABOCOM RT2573_3		0xb21d	RT2573
 product ABOCOM RT2573_4		0xb21e	RT2573
 product ABOCOM WUG2700		0xb21f	WUG2700
 
 /* Accton products */
 product ACCTON USB320_EC	0x1046	USB320-EC Ethernet Adapter
 product ACCTON 2664W		0x3501	2664W
 product ACCTON 111		0x3503	T-Sinus 111 Wireless Adapter
 product ACCTON SMCWUSBG_NF	0x4505	SMCWUSB-G (no firmware)
 product ACCTON SMCWUSBG		0x4506	SMCWUSB-G
 product ACCTON SMCWUSBTG2_NF	0x4507	SMCWUSBT-G2 (no firmware)
 product ACCTON SMCWUSBTG2	0x4508	SMCWUSBT-G2
 product ACCTON PRISM_GT		0x4521	PrismGT USB 2.0 WLAN
 product ACCTON SS1001		0x5046	SpeedStream Ethernet Adapter
 product ACCTON RT2870_2		0x6618	RT2870
 product ACCTON RT3070		0x7511	RT3070
 product ACCTON RT2770		0x7512	RT2770
 product ACCTON RT2870_3		0x7522	RT2870
 product ACCTON RT2870_5		0x8522	RT2870
 product ACCTON RT3070_4		0xa512	RT3070
 product ACCTON RT2870_4		0xa618	RT2870
 product	ACCTON RT3070_1		0xa701	RT3070
 product	ACCTON RT3070_2		0xa702	RT3070
 product ACCTON RT2870_1		0xb522	RT2870
 product	ACCTON RT3070_3		0xc522	RT3070
 product	ACCTON RT3070_5		0xd522	RT3070
 product ACCTON ZD1211B		0xe501	ZD1211B
 
 /* Aceeca products */
 product ACEECA MEZ1000		0x0001	MEZ1000 RDA
 
 /* Acer Communications & Multimedia (oemd by Surecom) */
 product ACERCM EP1427X2		0x0893	EP-1427X-2 Ethernet Adapter
 
 /* Acer Labs products */
 product ACERLABS M5632		0x5632	USB 2.0 Data Link
 
 /* Acer Peripherals, Inc. products */
 product ACERP ACERSCAN_C310U	0x12a6	Acerscan C310U
 product ACERP ACERSCAN_320U	0x2022	Acerscan 320U
 product ACERP ACERSCAN_640U	0x2040	Acerscan 640U
 product ACERP ACERSCAN_620U	0x2060	Acerscan 620U
 product ACERP ACERSCAN_4300U	0x20b0	Benq 3300U/4300U
 product ACERP ACERSCAN_640BT	0x20be	Acerscan 640BT
 product ACERP ACERSCAN_1240U	0x20c0	Acerscan 1240U
 product ACERP S81		0x4027	BenQ S81 phone
 product ACERP H10		0x4068	AWL400 Wireless Adapter
 product ACERP ATAPI		0x6003	ATA/ATAPI Adapter
 product ACERP AWL300		0x9000	AWL300 Wireless Adapter
 product ACERP AWL400		0x9001	AWL400 Wireless Adapter
 
 /* Acer Warp products */
 product ACERW WARPLINK		0x0204	Warplink
 
 /* Actions products */
 product ACTIONS MP4		0x1101  Actions MP4 Player
 
 /* Actiontec, Inc. products */
 product ACTIONTEC PRISM_25	0x0408	Prism2.5 Wireless Adapter
 product ACTIONTEC PRISM_25A	0x0421	Prism2.5 Wireless Adapter A
 product ACTIONTEC FREELAN	0x6106	ROPEX FreeLan 802.11b
 product ACTIONTEC UAT1		0x7605	UAT1 Wireless Ethernet Adapter
 
 /* ACTiSYS products */
 product ACTISYS IR2000U		0x0011	ACT-IR2000U FIR
 
 /* ActiveWire, Inc. products */
 product ACTIVEWIRE IOBOARD	0x0100	I/O Board
 product ACTIVEWIRE IOBOARD_FW1	0x0101	I/O Board, rev. 1 firmware
 
 /* Adaptec products */
 product ADAPTEC AWN8020		0x0020	AWN-8020 WLAN
 
 /* Addtron products */
 product ADDTRON AWU120		0xff31	AWU-120
 
 /* ADLINK Texhnology products */
 product ADLINK ND6530		0x6530	ND-6530 USB-Serial
 
 /* ADMtek products */
 product ADMTEK PEGASUSII_4	0x07c2	AN986A Ethernet
 product ADMTEK PEGASUS		0x0986	AN986 Ethernet
 product ADMTEK PEGASUSII	0x8511	AN8511 Ethernet
 product ADMTEK PEGASUSII_2	0x8513	AN8513 Ethernet
 product ADMTEK PEGASUSII_3	0x8515	AN8515 Ethernet
 
 /* ADDON products */
 /* PNY OEMs these */
 product ADDON ATTACHE		0x1300	USB 2.0 Flash Drive
 product ADDON ATTACHE		0x1300	USB 2.0 Flash Drive
 product ADDON A256MB		0x1400	Attache 256MB USB 2.0 Flash Drive
 product ADDON DISKPRO512	0x1420	USB 2.0 Flash Drive (DANE-ELEC zMate 512MB USB flash drive)
 
 /* Addonics products */
 product ADDONICS2 CABLE_205	0xa001	Cable 205
 
 /* ADS products */
 product ADS UBS10BT		0x0008	UBS-10BT Ethernet
 product ADS UBS10BTX		0x0009	UBS-10BT Ethernet
 
 /* AEI products */
 product AEI FASTETHERNET	0x1701	Fast Ethernet
 
 /* Agate Technologies products */
 product AGATE QDRIVE		0x0378	Q-Drive
 
 /* AGFA products */
 product AGFA SNAPSCAN1212U	0x0001	SnapScan 1212U
 product AGFA SNAPSCAN1236U	0x0002	SnapScan 1236U
 product AGFA SNAPSCANTOUCH	0x0100	SnapScan Touch
 product AGFA SNAPSCAN1212U2	0x2061	SnapScan 1212U
 product AGFA SNAPSCANE40	0x208d	SnapScan e40
 product AGFA SNAPSCANE50	0x208f	SnapScan e50
 product AGFA SNAPSCANE20	0x2091	SnapScan e20
 product AGFA SNAPSCANE25	0x2095	SnapScan e25
 product AGFA SNAPSCANE26	0x2097	SnapScan e26
 product AGFA SNAPSCANE52	0x20fd	SnapScan e52
 
 /* Ain Communication Technology products */
 product AINCOMM AWU2000B	0x1001	AWU2000B Wireless Adapter
 
 /* AIPTEK products */
 product AIPTEK POCKETCAM3M	0x2011	PocketCAM 3Mega
 product AIPTEK2 PENCAM_MEGA_1_3 0x504a	PenCam Mega 1.3
 product AIPTEK2 SUNPLUS_TECH	0x0c15	Sunplus Technology Inc.
 
 /* AirPlis products */
 product AIRPLUS MCD650		0x3198	MCD650 modem
 
 /* AirPrime products */
 product AIRPRIME PC5220		0x0112	CDMA Wireless PC Card
 
 /* AirTies products */
 product AIRTIES RT3070		0x2310	RT3070
 
 /* AKS products */
 product AKS USBHASP		0x0001	USB-HASP 0.06
 
 /* Alcatel products */
 product ALCATEL OT535		0x02df	One Touch 535/735
 
 /* Alcor Micro, Inc. products */
 product ALCOR2 KBD_HUB		0x2802	Kbd Hub
 
 product ALCOR SDCR_6335		0x6335	SD/MMC Card Reader
 product ALCOR SDCR_6362		0x6362	SD/MMC Card Reader
 product ALCOR TRANSCEND		0x6387	Transcend JetFlash Drive
 product ALCOR MA_KBD_HUB	0x9213	MacAlly Kbd Hub
 product ALCOR AU9814		0x9215	AU9814 Hub
 product ALCOR UMCR_9361		0x9361	USB Multimedia Card Reader
 product ALCOR SM_KBD		0x9410	MicroConnectors/StrongMan Keyboard
 product ALCOR NEC_KBD_HUB	0x9472	NEC Kbd Hub
 product ALCOR AU9720 		0x9720	USB2 - RS-232
 product ALCOR AU6390	0x6390	AU6390 USB-IDE converter
 
 /* Alink products */
 product ALINK DWM652U5		0xce16	DWM-652
 product ALINK 3G		0x9000	3G modem
 product ALINK 3GU		0x9200	3G modem
 
 /* Altec Lansing products */
 product ALTEC ADA70		0x0070	ADA70 Speakers
 product ALTEC ASC495		0xff05	ASC495 Speakers
 
 /* Allied Telesyn International products */
 product ALLIEDTELESYN ATUSB100	0xb100	AT-USB100
 
 /* ALLWIN Tech products */
 product ALLWIN RT2070		0x2070	RT2070
 product ALLWIN RT2770		0x2770	RT2770
 product ALLWIN RT2870		0x2870	RT2870
 product ALLWIN RT3070		0x3070	RT3070
 product ALLWIN RT3071		0x3071	RT3071
 product ALLWIN RT3072		0x3072	RT3072
 product ALLWIN RT3572		0x3572	RT3572
 
 /* AlphaSmart, Inc. products */
 product ALPHASMART DANA_KB	0xdbac	AlphaSmart Dana Keyboard
 product ALPHASMART DANA_SYNC	0xdf00	AlphaSmart Dana HotSync
 
 /* Amoi products */
 product AMOI H01		0x0800	H01 3G modem
 product AMOI H01A		0x7002	H01A 3G modem
 product AMOI H02		0x0802	H02 3G modem
 
 /* American Power Conversion products */
 product APC UPS			0x0002	Uninterruptible Power Supply
 
 /* Ambit Microsystems products */
 product AMBIT WLAN		0x0302	WLAN
 product AMBIT NTL_250		0x6098	NTL 250 cable modem
 
 /* Apacer products */
 product APACER HT202		0xb113	USB 2.0 Flash Drive
 
 /* American Power Conversion products */
 product APC UPS			0x0002	Uninterruptible Power Supply
 
 /* Amigo Technology products */
 product AMIGO RT2870_1		0x9031	RT2870
 product AMIGO RT2870_2		0x9041	RT2870
 
 /* AMIT products */
 product AMIT CGWLUSB2GO		0x0002	CG-WLUSB2GO
 product AMIT CGWLUSB2GNR	0x0008	CG-WLUSB2GNR
 product AMIT RT2870_1		0x0012	RT2870
 
 /* AMIT(2) products */
 product AMIT2 RT2870		0x0008	RT2870
 
 /* Anchor products */
 product ANCHOR SERIAL		0x2008	Serial
 product ANCHOR EZUSB		0x2131	EZUSB
 product ANCHOR EZLINK		0x2720	EZLINK
 
 /* AnyData products */
 product ANYDATA ADU_620UW	0x6202	CDMA 2000 EV-DO USB Modem
 product ANYDATA ADU_E100X	0x6501	CDMA 2000 1xRTT/EV-DO USB Modem
 product ANYDATA ADU_500A	0x6502	CDMA 2000 EV-DO USB Modem
 
 /* AOX, Inc. products */
 product AOX USB101		0x0008	Ethernet
 
 /* American Power Conversion products */
 product APC UPS			0x0002	Uninterruptible Power Supply
 
 /* Apple Computer products */
 product APPLE IMAC_KBD		0x0201	USB iMac Keyboard
 product APPLE KBD		0x0202	USB Keyboard M2452
 product APPLE EXT_KBD		0x020c	Apple Extended USB Keyboard
 product APPLE KBD_TP_ANSI	0x0223	Apple Internal Keyboard/Trackpad (Wellspring/ANSI)
 product APPLE KBD_TP_ISO	0x0224	Apple Internal Keyboard/Trackpad (Wellspring/ISO)
 product APPLE KBD_TP_JIS	0x0225	Apple Internal Keyboard/Trackpad (Wellspring/JIS)
 product APPLE KBD_TP_ANSI2	0x0230	Apple Internal Keyboard/Trackpad (Wellspring2/ANSI)
 product APPLE KBD_TP_ISO2	0x0231	Apple Internal Keyboard/Trackpad (Wellspring2/ISO)
 product APPLE KBD_TP_JIS2	0x0232	Apple Internal Keyboard/Trackpad (Wellspring2/JIS)
 product APPLE MOUSE		0x0301	Mouse M4848
 product APPLE OPTMOUSE		0x0302	Optical mouse
 product APPLE MIGHTYMOUSE	0x0304	Mighty Mouse
 product APPLE KBD_HUB		0x1001	Hub in Apple USB Keyboard
 product APPLE EXT_KBD_HUB	0x1003	Hub in Apple Extended USB Keyboard
 product APPLE SPEAKERS		0x1101	Speakers
 product APPLE IPOD		0x1201	iPod
 product APPLE IPOD2G		0x1202	iPod 2G
 product APPLE IPOD3G		0x1203	iPod 3G
 product APPLE IPOD_04		0x1204	iPod '04'
 product APPLE IPODMINI		0x1205	iPod Mini
 product APPLE IPOD_06		0x1206	iPod '06'
 product APPLE IPOD_07		0x1207	iPod '07'
 product APPLE IPOD_08		0x1208	iPod '08'
 product APPLE IPODVIDEO		0x1209	iPod Video
 product APPLE IPODNANO		0x120a	iPod Nano
 product APPLE IPHONE		0x1290	iPhone
 product APPLE IPOD_TOUCH	0x1291	iPod Touch
 product APPLE IPHONE_3G		0x1292	iPhone 3G
 product APPLE IPHONE_3GS	0x1294	iPhone 3GS
 product APPLE IPHONE_4		0x1297	iPhone 4
 product APPLE IPAD		0x129a	iPad
 product APPLE ETHERNET		0x1402	Ethernet A1277
 
 /* Arkmicro Technologies */
 product ARKMICRO ARK3116	0x0232	ARK3116 Serial
 
 /* Asahi Optical products */
 product ASAHIOPTICAL OPTIO230	0x0004	Digital camera
 product ASAHIOPTICAL OPTIO330	0x0006	Digital camera
 
 /* Asante products */
 product ASANTE EA		0x1427	Ethernet
 
 /* ASIX Electronics products */
 product ASIX AX88172		0x1720	10/100 Ethernet
 product ASIX AX88178		0x1780	AX88178
 product ASIX AX88772		0x7720	AX88772
 product ASIX AX88772A		0x772a	AX88772A USB 2.0 10/100 Ethernet
 
 /* ASUS products */
 product ASUS2 USBN11		0x0b05	USB-N11
 product ASUS WL167G		0x1707	WL-167g Wireless Adapter
 product ASUS WL159G		0x170c	WL-159g
 product ASUS A9T_WIFI		0x171b	A9T wireless
 product ASUS P5B_WIFI		0x171d	P5B wireless
 product ASUS RT2573_1		0x1723	RT2573
 product ASUS RT2573_2		0x1724	RT2573
 product ASUS LCM		0x1726	LCM display
 product ASUS RT2870_1		0x1731	RT2870
 product ASUS RT2870_2		0x1732	RT2870
 product ASUS RT2870_3		0x1742	RT2870
 product ASUS RT2870_4		0x1760	RT2870
 product ASUS RT2870_5		0x1761	RT2870
 product	ASUS USBN13		0x1784	USB-N13
 product	ASUS RT3070_1		0x1790	RT3070
 product ASUS A730W		0x4202	ASUS MyPal A730W
 product ASUS P535		0x420f	ASUS P535 PDA
 product	ASUS GMSC		0x422f	ASUS Generic Mass Storage
 product ASUS RT2570		0x1706	RT2500USB Wireless Adapter
 
 /* ATen products */
 product ATEN UC1284		0x2001	Parallel printer
 product ATEN UC10T		0x2002	10Mbps Ethernet
 product ATEN UC110T		0x2007	UC-110T Ethernet
 product ATEN UC232A		0x2008	Serial
 product ATEN UC210T		0x2009	UC-210T Ethernet
 product ATEN DSB650C		0x4000	DSB-650C
 
 /* Atheros Communications products */
 product ATHEROS AR5523		0x0001	AR5523
 product ATHEROS AR5523_NF	0x0002	AR5523 (no firmware)
 product ATHEROS2 AR5523_1	0x0001	AR5523
 product ATHEROS2 AR5523_1_NF	0x0002	AR5523 (no firmware)
 product ATHEROS2 AR5523_2	0x0003	AR5523
 product ATHEROS2 AR5523_2_NF	0x0004	AR5523 (no firmware)
 product ATHEROS2 AR5523_3	0x0005	AR5523
 product ATHEROS2 AR5523_3_NF	0x0006	AR5523 (no firmware)
 
 /* Atmel Comp. products */
 product ATMEL STK541		0x2109	Zigbee Controller
 product ATMEL UHB124		0x3301	UHB124 hub
 product ATMEL DWL120		0x7603	DWL-120 Wireless Adapter
 product ATMEL BW002		0x7605	BW002 Wireless Adapter
 product ATMEL WL1130USB		0x7613	WL-1130 USB
 product ATMEL AT76C505A		0x7614	AT76c505a Wireless Adapter
 
 /* AuthenTec products */
 product AUTHENTEC AES1610	0x1600	AES1610 Fingerprint Sensor
 
 /* Avision products */
 product AVISION 1200U		0x0268	1200U scanner
 
 /* Axesstel products */
 product AXESSTEL DATAMODEM	0x1000  Data Modem
 
 /* AsureWave products */
 product AZUREWAVE RT2870_1	0x3247	RT2870
 product AZUREWAVE RT2870_2	0x3262	RT2870
 product AZUREWAVE RT3070_1	0x3273	RT3070
 product	AZUREWAVE RT3070_2	0x3284	RT3070
 product	AZUREWAVE RT3070_3	0x3305	RT3070
 
 /* Baltech products */
 product BALTECH CARDREADER	0x9999	Card reader
 
 /* B&B Electronics products */
 product BBELECTRONICS USOTL4	0xAC01	RS-422/485
 
 /* Belkin products */
 /*product BELKIN F5U111		0x????	F5U111 Ethernet*/
 product BELKIN F5D6050		0x0050	F5D6050 802.11b Wireless Adapter
 product BELKIN FBT001V		0x0081	FBT001v2 Bluetooth
 product BELKIN FBT003V		0x0084	FBT003v2 Bluetooth
 product BELKIN F5U103		0x0103	F5U103 Serial
 product BELKIN F5U109		0x0109	F5U109 Serial
 product BELKIN USB2SCSI		0x0115	USB to SCSI
 product BELKIN F8T012		0x0121	F8T012xx1 Bluetooth USB Adapter
 product BELKIN USB2LAN		0x0121	USB to LAN
 product BELKIN F5U208		0x0208	F5U208 VideoBus II
 product BELKIN F5U237		0x0237	F5U237 USB 2.0 7-Port Hub
 product BELKIN F5U257		0x0257	F5U257 Serial
 product BELKIN F5U409		0x0409	F5U409 Serial
 product BELKIN F6C550AVR	0x0551	F6C550-AVR UPS
 product BELKIN F5U120		0x1203	F5U120-PC Hub
 product BELKIN ZD1211B		0x4050	ZD1211B
 product BELKIN F5D5055		0x5055	F5D5055
 product BELKIN F5D7050		0x7050	F5D7050 Wireless Adapter
 product BELKIN F5D7051		0x7051	F5D7051 54g USB Network Adapter
 product BELKIN F5D7050A		0x705a	F5D7050A Wireless Adapter
 /* Also sold as 'Ativa 802.11g wireless card' */
 product BELKIN F5D7050_V4000	0x705c	F5D7050 v4000 Wireless Adapter
 product BELKIN F5D7050E		0x705e	F5D7050E Wireless Adapter
 product BELKIN RT2870_1		0x8053	RT2870
 product BELKIN RT2870_2		0x805c	RT2870
 product BELKIN F5D8053V3	0x815c	F5D8053 v3
 product BELKIN F5D8055		0x825a	F5D8055
 product BELKIN F5D9050V3	0x905b	F5D9050 ver 3 Wireless Adapter
 product BELKIN2 F5U002		0x0002	F5U002 Parallel printer
 product BELKIN F6D4050V1	0x935a	F6D4050 v1
 
 /* Billionton products */
 product BILLIONTON USB100	0x0986	USB100N 10/100 FastEthernet
 product BILLIONTON USBLP100	0x0987	USB100LP
 product BILLIONTON USBEL100	0x0988	USB100EL
 product BILLIONTON USBE100	0x8511	USBE100
 product BILLIONTON USB2AR	0x90ff	USB2AR Ethernet
 
 /* Broadcom products */
 product BROADCOM BCM2033	0x2033	BCM2033 Bluetooth USB dongle
 
 /* Brother Industries products */
 product BROTHER HL1050		0x0002	HL-1050 laser printer
 product BROTHER MFC8600_9650	0x0100	MFC8600/9650 multifunction device
 
 /* Behavior Technology Computer products */
 product BTC BTC6100		0x5550	6100C Keyboard
 product BTC BTC7932		0x6782	Keyboard with mouse port
 
 /* Canon, Inc. products */
 product CANON N656U		0x2206	CanoScan N656U
 product CANON N1220U		0x2207	CanoScan N1220U
 product CANON D660U		0x2208	CanoScan D660U
 product CANON N676U		0x220d	CanoScan N676U
 product CANON N1240U		0x220e	CanoScan N1240U
 product CANON LIDE25		0x2220	CanoScan LIDE 25
 product CANON S10		0x3041	PowerShot S10
 product CANON S100		0x3045	PowerShot S100
 product CANON S200		0x3065	PowerShot S200
 product CANON REBELXT		0x30ef	Digital Rebel XT
 
 /* CATC products */
 product CATC NETMATE		0x000a	Netmate Ethernet
 product CATC NETMATE2		0x000c	Netmate2 Ethernet
 product CATC CHIEF		0x000d	USB Chief Bus & Protocol Analyzer
 product CATC ANDROMEDA		0x1237	Andromeda hub
 
 /* CASIO products */
 product CASIO QV_DIGICAM	0x1001	QV DigiCam
 product CASIO EXS880		0x1105	Exilim EX-S880
 product CASIO BE300		0x2002	BE-300 PDA
 product CASIO NAMELAND		0x4001	CASIO Nameland EZ-USB
 
 /* CCYU products */
 product CCYU ED1064		0x2136	EasyDisk ED1064
 
 /* Century products */
 product CENTURY EX35QUAT	0x011e	Century USB Disk Enclosure
 product CENTURY EX35SW4_SB4	0x011f	Century USB Disk Enclosure
 
 /* Cherry products */
 product CHERRY MY3000KBD	0x0001	My3000 keyboard
 product CHERRY MY3000HUB	0x0003	My3000 hub
 product CHERRY CYBOARD		0x0004	CyBoard Keyboard
 
 /* Chic Technology products */
 product CHIC MOUSE1		0x0001	mouse
 product CHIC CYPRESS		0x0003	Cypress USB Mouse
 
 /* Chicony products */
 product CHICONY KB8933		0x0001	KB-8933 keyboard
 product CHICONY KU0325		0x0116	KU-0325 keyboard
 product CHICONY CNF7129		0xb071	Notebook Web Camera
 product CHICONY2 TWINKLECAM	0x600d	TwinkleCam USB camera
 
 /* CH Products */
 product CHPRODUCTS PROTHROTTLE	0x00f1	Pro Throttle
 product CHPRODUCTS PROPEDALS	0x00f2	Pro Pedals
 product CHPRODUCTS FIGHTERSTICK 0x00f3	Fighterstick
 product CHPRODUCTS FLIGHTYOKE	0x00ff	Flight Sim Yoke
 
 /* Cisco-Linksys products */
 product CISCOLINKSYS WUSB54AG	0x000c	WUSB54AG Wireless Adapter
 product CISCOLINKSYS WUSB54G	0x000d	WUSB54G Wireless Adapter
 product CISCOLINKSYS WUSB54GP	0x0011	WUSB54GP Wireless Adapter
 product CISCOLINKSYS USB200MV2	0x0018	USB200M v2
 product CISCOLINKSYS HU200TS	0x001a	HU200TS Wireless Adapter
 product CISCOLINKSYS WUSB54GC	0x0020	WUSB54GC
 product CISCOLINKSYS WUSB54GR	0x0023	WUSB54GR
 product CISCOLINKSYS WUSBF54G	0x0024	WUSBF54G
 product	CISCOLINKSYS2 RT3070	0x4001	RT3070
 product	CISCOLINKSYS3 RT3070	0x0101	RT3070
 
 /* Clipsal products */
 product CLIPSAL 5500PCU		0x0303	5500PCU C-Bus
 
 /* CMOTECH products */
 product CMOTECH CNU510		0x5141	CDMA Technologies USB modem
 product CMOTECH CNU550		0x5543	CDMA 2000 1xRTT/1xEVDO USB modem
 product CMOTECH CGU628		0x6006	CGU-628
 product CMOTECH CDMA_MODEM1	0x6280	CDMA Technologies USB modem
 product CMOTECH DISK		0xf000	disk mode
 
 /* Compaq products */
 product COMPAQ IPAQPOCKETPC	0x0003	iPAQ PocketPC
 product COMPAQ PJB100		0x504a	Personal Jukebox PJB100
 product COMPAQ IPAQLINUX	0x505a	iPAQ Linux
 
 /* Composite Corp products looks the same as "TANGTOP" */
 product COMPOSITE USBPS2	0x0001	USB to PS2 Adaptor
 
 /* Conceptronic products */
 product CONCEPTRONIC PRISM_GT	0x3762	PrismGT USB 2.0 WLAN
 product CONCEPTRONIC C11U	0x7100	C11U
 product CONCEPTRONIC WL210	0x7110	WL-210
 product CONCEPTRONIC AR5523_1	0x7801	AR5523
 product CONCEPTRONIC AR5523_1_NF	0x7802	AR5523 (no firmware)
 product CONCEPTRONIC AR5523_2	0x7811	AR5523
 product CONCEPTRONIC AR5523_2_NF	0x7812	AR5523 (no firmware)
 product CONCEPTRONIC2 C54RU	0x3c02	C54RU WLAN
 product CONCEPTRONIC2 C54RU2	0x3c22	C54RU
 product CONCEPTRONIC2 RT3070_1	0x3c08	RT3070
 product CONCEPTRONIC2 RT3070_2	0x3c11	RT3070
 product CONCEPTRONIC2 VIGORN61	0x3c25	VIGORN61
 product CONCEPTRONIC2 RT2870_1	0x3c06	RT2870
 product CONCEPTRONIC2 RT2870_2	0x3c07	RT2870
 product CONCEPTRONIC2 RT2870_7	0x3c09	RT2870
 product CONCEPTRONIC2 RT2870_8	0x3c12	RT2870
 product CONCEPTRONIC2 RT2870_3	0x3c23	RT2870
 product CONCEPTRONIC2 RT2870_4	0x3c25	RT2870
 product CONCEPTRONIC2 RT2870_5	0x3c27	RT2870
 product CONCEPTRONIC2 RT2870_6	0x3c28	RT2870
 
 /* Connectix products */
 product CONNECTIX QUICKCAM	0x0001	QuickCam
 
 /* Corega products */
 product COREGA ETHER_USB_T	0x0001	Ether USB-T
 product COREGA FETHER_USB_TX	0x0004	FEther USB-TX
 product COREGA WLAN_USB_USB_11	0x000c	WirelessLAN USB-11
 product COREGA FETHER_USB_TXS	0x000d	FEther USB-TXS
 product COREGA WLANUSB		0x0012	Wireless LAN Stick-11
 product COREGA FETHER_USB2_TX	0x0017	FEther USB2-TX
 product COREGA WLUSB_11_KEY	0x001a	ULUSB-11 Key
 product COREGA CGUSBRS232R	0x002a	CG-USBRS232R
 product COREGA CGWLUSB2GL	0x002d	CG-WLUSB2GL
 product COREGA CGWLUSB2GPX	0x002e	CG-WLUSB2GPX
 product COREGA RT2870_1		0x002f	RT2870
 product COREGA RT2870_2		0x003c	RT2870
 product COREGA RT2870_3		0x003f	RT2870
 product COREGA RT3070		0x0041	RT3070
 product COREGA CGWLUSB300GNM	0x0042	CG-WLUSB300GNM
 
 product COREGA WLUSB_11_STICK	0x7613	WLAN USB Stick 11
 product COREGA FETHER_USB_TXC	0x9601	FEther USB-TXC
 
 /* Creative products */
 product CREATIVE NOMAD_II	0x1002	Nomad II MP3 player
 product CREATIVE NOMAD_IIMG	0x4004	Nomad II MG
 product CREATIVE NOMAD		0x4106	Nomad
 product CREATIVE2 VOIP_BLASTER	0x0258	Voip Blaster
 product CREATIVE3 OPTICAL_MOUSE	0x0001	Notebook Optical Mouse
 
 /* Cambridge Silicon Radio Ltd. products */
 product CSR BT_DONGLE		0x0001	Bluetooth USB dongle
 product CSR CSRDFU		0xffff	USB Bluetooth Device in DFU State
 
 /* Chipsbank Microelectronics Co., Ltd */
 product CHIPSBANK USBMEMSTICK	0x6025	CBM2080 Flash drive controller
 product CHIPSBANK USBMEMSTICK1	0x6026	CBM1180 Flash drive controller
 
 /* CTX products */
 product CTX EX1300		0x9999	Ex1300 hub
 
 /* Curitel products */
 product CURITEL HX550C		0x1101	CDMA 2000 1xRTT USB modem (HX-550C)
 product CURITEL HX57XB		0x2101	CDMA 2000 1xRTT USB modem (HX-570/575B/PR-600)
 product CURITEL PC5740		0x3701	Broadband Wireless modem
 product CURITEL UM175		0x3714	EVDO modem
 
 /* CyberPower products */
 product CYBERPOWER 1500CAVRLCD	0x0501	1500CAVRLCD
 
 /* CyberTAN Technology products */
 product CYBERTAN TG54USB	0x1666	TG54USB
 product CYBERTAN RT2870		0x1828	RT2870
 
 /* Cypress Semiconductor products */
 product CYPRESS MOUSE		0x0001	mouse
 product CYPRESS THERMO		0x0002	thermometer
 product CYPRESS WISPY1A		0x0bad	MetaGeek Wi-Spy
 product CYPRESS KBDHUB		0x0101	Keyboard/Hub
 product CYPRESS FMRADIO		0x1002	FM Radio
 product CYPRESS IKARILASER	0x121f	Ikari Laser SteelSeries ApS
 
 product CYPRESS USBRS232	0x5500	USB-RS232 Interface
 product CYPRESS SLIM_HUB	0x6560	Slim Hub
 product CYPRESS XX6830XX	0x6830	PATA Storage Device
 product CYPRESS SILVERSHIELD	0xfd13	Gembird Silver Shield PM
 
 /* Daisy Technology products */
 product DAISY DMC		0x6901	USB MultiMedia Reader
 
 /* Dallas Semiconductor products */
 product DALLAS J6502		0x4201	J-6502 speakers
 
 /* DataApex products */
 product DATAAPEX MULTICOM	0xead6	MultiCom
 
 /* Dell products */
 product DELL PORT		0x0058	Port Replicator
 product DELL AIO926		0x5115	Photo AIO Printer 926
 product DELL BC02		0x8000	BC02 Bluetooth USB Adapter
 product DELL PRISM_GT_1		0x8102	PrismGT USB 2.0 WLAN
 product DELL TM350		0x8103	TrueMobile 350 Bluetooth USB Adapter
 product DELL PRISM_GT_2		0x8104	PrismGT USB 2.0 WLAN
 product DELL U5700		0x8114	Dell 5700 3G
 product DELL U5500		0x8115	Dell 5500 3G
 product DELL U5505		0x8116	Dell 5505 3G
 product DELL U5700_2		0x8117	Dell 5700 3G
 product DELL U5510		0x8118	Dell 5510 3G
 product DELL U5700_3		0x8128	Dell 5700 3G
 product DELL U5700_4		0x8129	Dell 5700 3G
 product DELL U5720		0x8133	Dell 5720 3G
 product DELL U5720_2		0x8134	Dell 5720 3G
 product DELL U740		0x8135	Dell U740 CDMA
 product DELL U5520		0x8136	Dell 5520 3G
 product DELL U5520_2		0x8137	Dell 5520 3G
 product DELL U5520_3		0x8138	Dell 5520 3G
 product DELL U5730		0x8180	Dell 5730 3G
 product DELL U5730_2		0x8181	Dell 5730 3G
 product DELL U5730_3		0x8182	Dell 5730 3G
 product DELL DW700		0x9500	Dell DW700 GPS
 
 /* Delorme Paublishing products */
 product DELORME EARTHMATE	0x0100	Earthmate GPS
 
 /* Desknote products */
 product DESKNOTE UCR_61S2B	0x0c55	UCR-61S2B
 
 /* Diamond products */
 product DIAMOND RIO500USB	0x0001	Rio 500 USB
 
 /* Dick Smith Electronics (really C-Net) products */
 product DICKSMITH RT2573	0x9022	RT2573
 product DICKSMITH CWD854F	0x9032	C-Net CWD-854 rev F
 
 /* Digi International products */
 product DIGI ACCELEPORT2	0x0002	AccelePort USB 2
 product DIGI ACCELEPORT4	0x0004	AccelePort USB 4
 product DIGI ACCELEPORT8	0x0008	AccelePort USB 8
 
 /* Digianswer A/S products */
 product DIGIANSWER ZIGBEE802154	0x000a	ZigBee/802.15.4 MAC
 
 /* D-Link products */
 /*product DLINK DSBS25		0x0100	DSB-S25 serial*/
 product DLINK DUBE100		0x1a00	10/100 Ethernet
 product DLINK DSB650TX4		0x200c	10/100 Ethernet
 product DLINK DWL120E		0x3200	DWL-120 rev E
 product DLINK DWL122		0x3700	DWL-122
 product DLINK DWLG120		0x3701	DWL-G120
 product DLINK DWL120F		0x3702	DWL-120 rev F
 product DLINK DWLAG132		0x3a00	DWL-AG132
 product DLINK DWLAG132_NF	0x3a01	DWL-AG132 (no firmware)
 product DLINK DWLG132		0x3a02	DWL-G132
 product DLINK DWLG132_NF	0x3a03	DWL-G132 (no firmware)
 product DLINK DWLAG122		0x3a04	DWL-AG122
 product DLINK DWLAG122_NF	0x3a05	DWL-AG122 (no firmware)
 product DLINK DWLG122		0x3c00	DWL-G122 b1 Wireless Adapter
 product DLINK DUBE100B1		0x3c05	DUB-E100 rev B1
 product DLINK RT2870		0x3c09	RT2870
 product DLINK RT3072		0x3c0a	RT3072
 product DLINK DSB650C		0x4000	10Mbps Ethernet
 product DLINK DSB650TX1		0x4001	10/100 Ethernet
 product DLINK DSB650TX		0x4002	10/100 Ethernet
 product DLINK DSB650TX_PNA	0x4003	1/10/100 Ethernet
 product DLINK DSB650TX3		0x400b	10/100 Ethernet
 product DLINK DSB650TX2		0x4102	10/100 Ethernet
 product DLINK DSB650		0xabc1	10/100 Ethernet
 product DLINK DUBH7		0xf103	DUB-H7 USB 2.0 7-Port Hub
 product DLINK2 DWA120		0x3a0c	DWA-120
 product DLINK2 DWA120_NF	0x3a0d	DWA-120 (no firmware)
 product DLINK2 DWLG122C1	0x3c03	DWL-G122 c1
 product DLINK2 WUA1340		0x3c04	WUA-1340
 product DLINK2 DWA111		0x3c06	DWA-111
 product DLINK2 RT2870_1		0x3c09	RT2870
 product DLINK2 DWA110		0x3c07	DWA-110
 product DLINK2 RT3072		0x3c0a	RT3072
 product DLINK2 RT3072_1		0x3c0b	RT3072
 product DLINK2 RT3070_1		0x3c0d	RT3070
 product DLINK2 RT3070_2		0x3c0e	RT3070
 product DLINK2 RT3070_3		0x3c0f	RT3070
 product DLINK2 RT2870_2		0x3c11	RT2870
 product DLINK2 DWA130		0x3c13	DWA-130
 product DLINK2 RT3070_4		0x3c15	RT3070
 product DLINK2 RT3070_5		0x3c16	RT3070
 product DLINK3 DWM652		0x3e04	DWM-652
 
 /* DMI products */
 product DMI CFSM_RW		0xa109	CF/SM Reader/Writer
 product DMI DISK		0x2bcf	Generic Disk
 
 /* DrayTek products */
 product DRAYTEK VIGOR550	0x0550	Vigor550
 
 /* dresden elektronik products */
 product DRESDENELEKTRONIK SENSORTERMINALBOARD  0x0001 SensorTerminalBoard
 product DRESDENELEKTRONIK WIRELESSHANDHELDTERMINAL  0x0004 Wireless Handheld Terminal
 
 /* Dynastream Innovations */
 product DYNASTREAM ANTDEVBOARD	0x1003	ANT dev board
 product DYNASTREAM ANT2USB	0x1004	ANT2USB
 product DYNASTREAM ANTDEVBOARD2	0x1006	ANT dev board
 
 /* Edimax products */
 product EDIMAX EW7318USG	0x7318	USB Wireless dongle
 product EDIMAX RT2870_1		0x7711	RT2870
 product EDIMAX EW7717		0x7717	EW-7717
 product EDIMAX EW7718		0x7718	EW-7718
 
 /* eGalax Products */
 product EGALAX TPANEL		0x0001	Touch Panel
 product EGALAX TPANEL2		0x0002	Touch Panel
 product EGALAX2 TPANEL		0x0001	Touch Panel
 
 /* Eicon Networks */
 product EICON DIVA852		0x4905	Diva 852 ISDN TA
 
 /* EIZO products */
 product EIZO HUB		0x0000	hub
 product EIZO MONITOR		0x0001	monitor
 
 /* ELCON Systemtechnik products */
 product ELCON PLAN		0x0002	Goldpfeil P-LAN
 
 /* Elecom products */
 product ELECOM MOUSE29UO	0x0002	mouse 29UO
 product ELECOM LDUSBTX0		0x200c	LD-USB/TX
 product ELECOM LDUSBTX1		0x4002	LD-USB/TX
 product ELECOM LDUSBLTX		0x4005	LD-USBL/TX
 product ELECOM LDUSBTX2		0x400b	LD-USB/TX
 product ELECOM LDUSB20		0x4010	LD-USB20
 product ELECOM UCSGT		0x5003	UC-SGT
 product ELECOM UCSGT0		0x5004	UC-SGT
 product ELECOM LDUSBTX3		0xabc1	LD-USB/TX
 
 /* Elsa products */
 product ELSA MODEM1		0x2265	ELSA Modem Board
 product ELSA USB2ETHERNET	0x3000	Microlink USB2Ethernet
 
 /* ELV products */
 product ELV USBI2C		0xe00f	USB-I2C interface
 
 /* EMS products */
 product EMS DUAL_SHOOTER	0x0003	PSX gun controller converter
 
 /* Encore products */
 product ENCORE RT3070_1		0x1480	RT3070
 product ENCORE RT3070_2		0x14a1	RT3070
 product ENCORE RT3070_3		0x14a9	RT3070
 
 /* Entrega products */
 product ENTREGA 1S		0x0001	1S serial
 product ENTREGA 2S		0x0002	2S serial
 product ENTREGA 1S25		0x0003	1S25 serial
 product ENTREGA 4S		0x0004	4S serial
 product ENTREGA E45		0x0005	E45 Ethernet
 product ENTREGA CENTRONICS	0x0006	Parallel Port
 product ENTREGA XX1		0x0008	Ethernet
 product ENTREGA 1S9		0x0093	1S9 serial
 product ENTREGA EZUSB		0x8000	EZ-USB
 /*product ENTREGA SERIAL	0x8001	DB25 Serial*/
 product ENTREGA 2U4S		0x8004	2U4S serial/usb hub
 product ENTREGA XX2		0x8005	Ethernet
 /*product ENTREGA SERIAL_DB9	0x8093	DB9 Serial*/
 
 /* Epson products */
 product EPSON PRINTER1		0x0001	USB Printer
 product EPSON PRINTER2		0x0002	ISD USB Smart Cable for Mac
 product EPSON PRINTER3		0x0003	ISD USB Smart Cable
 product EPSON PRINTER5		0x0005	USB Printer
 product EPSON 636		0x0101	Perfection 636U / 636Photo scanner
 product EPSON 610		0x0103	Perfection 610 scanner
 product EPSON 1200		0x0104	Perfection 1200U / 1200Photo scanner
 product EPSON 1600		0x0107	Expression 1600 scanner
 product EPSON 1640		0x010a	Perfection 1640SU scanner
 product EPSON 1240		0x010b	Perfection 1240U / 1240Photo scanner
 product EPSON 640U		0x010c	Perfection 640U scanner
 product EPSON 1250		0x010f	Perfection 1250U / 1250Photo scanner
 product EPSON 1650		0x0110	Perfection 1650 scanner
 product EPSON GT9700F		0x0112	GT-9700F scanner
 product EPSON GT9300UF		0x011b	GT-9300UF scanner
 product EPSON 3200		0x011c	Perfection 3200 scanner
 product EPSON 1260		0x011d	Perfection 1260 scanner
 product EPSON 1660		0x011e	Perfection 1660 scanner
 product EPSON 1670		0x011f	Perfection 1670 scanner
 product EPSON 1270		0x0120	Perfection 1270 scanner
 product EPSON 2480		0x0121	Perfection 2480 scanner
 product EPSON 3590		0x0122	Perfection 3590 scanner
 product EPSON 4990		0x012a	Perfection 4990 Photo scanner
 product EPSON CRESSI_EDY	0x0521	Cressi Edy diving computer
 product EPSON STYLUS_875DC	0x0601	Stylus Photo 875DC Card Reader
 product EPSON STYLUS_895	0x0602	Stylus Photo 895 Card Reader
 product EPSON CX5400		0x0808	CX5400 scanner
 product EPSON 3500		0x080e	CX-3500/3600/3650 MFP
 product EPSON RX425		0x080f	Stylus Photo RX425 scanner
 product EPSON DX3800		0x0818	CX3700/CX3800/DX38x0 MFP scanner
 product EPSON 4800		0x0819	CX4700/CX4800/DX48x0 MFP scanner
 product EPSON 4200		0x0820	CX4100/CX4200/DX4200 MFP scanner
 product EPSON 5000		0x082b  CX4900/CX5000/DX50x0 MFP scanner
 product EPSON 6000		0x082e  CX5900/CX6000/DX60x0 MFP scanner
 product EPSON DX4000		0x082f  DX4000 MFP scanner
 product EPSON DX7400		0x0838	CX7300/CX7400/DX7400 MFP scanner
 product EPSON DX8400		0x0839	CX8300/CX8400/DX8400 MFP scanner
 product EPSON SX100		0x0841	SX100/NX100 MFP scanner
 product EPSON NX300		0x0848	NX300 MFP scanner
 product EPSON SX200		0x0849	SX200/SX205 MFP scanner
 product EPSON SX400		0x084a	SX400/NX400/TX400 MFP scanner
 
 /* e-TEK Labs products */
 product ETEK 1COM		0x8007	Serial
 
 /* Extended Systems products */
 product EXTENDED XTNDACCESS	0x0100	XTNDAccess IrDA
 
 /* FEIYA products */
 product FEIYA 5IN1		0x1132	5-in-1 Card Reader
 
 /* Fiberline */
 product FIBERLINE WL430U	0x6003	WL-430U
 
 /* Fossil, Inc products */
 product FOSSIL WRISTPDA		0x0002	Wrist PDA
 
 /* Foxconn products */
 product FOXCONN PIRELLI_DP_L10	0xe000	Pirelli DP-L10
 
 /* Freecom products */
 product FREECOM DVD		0xfc01	DVD drive
 product FREECOM HDD		0xfc05	Classic SL Hard Drive
 
 /* Fujitsu Siemens Computers products */
 product FSC E5400		0x1009	PrismGT USB 2.0 WLAN
 
 /* Future Technology Devices products */
 product FTDI SERIAL_8U100AX	0x8372	8U100AX Serial
 product FTDI SERIAL_8U232AM	0x6001	8U232AM Serial
 product FTDI SERIAL_8U232AM4	0x6004	8U232AM Serial
 product FTDI SERIAL_2232C	0x6010	FT2232C Dual port Serial
 product FTDI SERIAL_2232D	0x9e90	FT2232D Dual port Serial
 product FTDI SERIAL_4232H	0x6011	FT4232H Quad port Serial
 /* Gude Analog- und Digitalsysteme products also uses FTDI's id: */
 product FTDI TACTRIX_OPENPORT_13M 0xcc48 OpenPort 1.3 Mitsubishi
 product FTDI TACTRIX_OPENPORT_13S 0xcc49 OpenPort 1.3 Subaru
 product FTDI TACTRIX_OPENPORT_13U 0xcc4a OpenPort 1.3 Universal
 product FTDI GAMMASCOUT		0xd678	Gamma-Scout
 product FTDI KBS		0xe6c8	Pyramid KBS USB LCD
 product FTDI EISCOU		0xe888	Expert ISDN Control USB
 product FTDI UOPTBR		0xe889	USB-RS232 OptoBridge
 product FTDI EMCU2D		0xe88a	Expert mouseCLOCK USB II
 product FTDI PCMSFU		0xe88b	Precision Clock MSF USB
 product FTDI EMCU2H		0xe88c	Expert mouseCLOCK USB II HBG
 product FTDI MAXSTREAM		0xee18	Maxstream PKG-U
 product FTDI USB_UIRT		0xf850	USB-UIRT
 product FTDI USBSERIAL		0xfa00	Matrix Orbital USB Serial
 product FTDI MX2_3		0xfa01	Matrix Orbital MX2 or MX3
 product FTDI MX4_5		0xfa02	Matrix Orbital MX4 or MX5
 product FTDI LK202		0xfa03	Matrix Orbital VK/LK202 Family
 product FTDI LK204		0xfa04	Matrix Orbital VK/LK204 Family
 product FTDI CFA_632		0xfc08	Crystalfontz CFA-632 USB LCD
 product FTDI CFA_634		0xfc09	Crystalfontz CFA-634 USB LCD
 product FTDI CFA_633		0xfc0b	Crystalfontz CFA-633 USB LCD
 product FTDI CFA_631		0xfc0c	Crystalfontz CFA-631 USB LCD
 product FTDI CFA_635		0xfc0d	Crystalfontz CFA-635 USB LCD
 product FTDI SEMC_DSS20		0xfc82	SEMC DSS-20 SyncStation
 /* Commerzielle und Technische Informationssysteme GmbH products */
 product FTDI CTI_USB_NANO_485	0xf60b	CTI USB-Nano 485
 product FTDI CTI_USB_MINI_485	0xf608	CTI USB-Mini 485
 
 /* Fuji photo products */
 product FUJIPHOTO MASS0100	0x0100	Mass Storage
 
 /* Fujitsu protducts */
 product FUJITSU AH_F401U	0x105b	AH-F401U Air H device
 
 /* Fujitsu-Siemens protducts */
 product FUJITSUSIEMENS SCR	0x0009	Fujitsu-Siemens SCR USB Reader
 
 /* Garmin products */
 product GARMIN IQUE_3600	0x0004	iQue 3600
 
 /* Gemalto products */
 product GEMALTO PROXPU		0x5501	Prox-PU/CU
 
 /* General Instruments (Motorola) products */
 product GENERALINSTMNTS SB5100	0x5100	SURFboard SB5100 Cable modem
 
 /* Genesys Logic products */
 product GENESYS GL620USB	0x0501	GL620USB Host-Host interface
 product GENESYS GL650		0x0604	GL650 HUB
 product GENESYS GL606		0x0606	USB 2.0 HUB
 product GENESYS GL641USB	0x0700	GL641USB CompactFlash Card Reader
 product GENESYS GL641USB2IDE_2	0x0701	GL641USB USB-IDE Bridge No 2
 product GENESYS GL641USB2IDE	0x0702	GL641USB USB-IDE Bridge
 product GENESYS GL641USB_2	0x0760	GL641USB 6-in-1 Card Reader
 
 /* GIGABYTE products */
 product GIGABYTE GN54G		0x8001	GN-54G
 product GIGABYTE GNBR402W	0x8002	GN-BR402W
 product GIGABYTE GNWLBM101	0x8003	GN-WLBM101
 product GIGABYTE GNWBKG		0x8007	GN-WBKG
 product GIGABYTE GNWB01GS	0x8008	GN-WB01GS
 product GIGABYTE GNWI05GS	0x800a	GN-WI05GS
 
 /* Gigaset products */
 product GIGASET WLAN		0x0701	WLAN
 product GIGASET SMCWUSBTG	0x0710	SMCWUSBT-G
 product GIGASET SMCWUSBTG_NF	0x0711	SMCWUSBT-G (no firmware)
 product GIGASET AR5523		0x0712	AR5523
 product GIGASET AR5523_NF	0x0713	AR5523 (no firmware)
 product GIGASET RT2573		0x0722	RT2573
 product GIGASET RT3070_1	0x0740	RT3070
 product GIGASET RT3070_2	0x0744	RT3070
 product GIGABYTE RT2870_1	0x800b	RT2870
 product GIGABYTE GNWB31N	0x800c	GN-WB31N
 product GIGABYTE GNWB32L	0x800d	GN-WB32L
 
 /* Global Sun Technology product */
 product GLOBALSUN AR5523_1	0x7801	AR5523
 product GLOBALSUN AR5523_1_NF	0x7802	AR5523 (no firmware)
 product GLOBALSUN AR5523_2	0x7811	AR5523
 product GLOBALSUN AR5523_2_NF	0x7812	AR5523 (no firmware)
 
 /* Globespan products */
 product GLOBESPAN PRISM_GT_1	0x2000	PrismGT USB 2.0 WLAN
 product GLOBESPAN PRISM_GT_2	0x2002	PrismGT USB 2.0 WLAN
 
 /* G.Mate, Inc products */
 product GMATE YP3X00		0x1001	YP3X00 PDA
 
 /* GoHubs products */
 product GOHUBS GOCOM232		0x1001	GoCOM232 Serial
 
 /* Good Way Technology products */
 product GOODWAY GWUSB2E		0x6200	GWUSB2E
 product GOODWAY RT2573		0xc019	RT2573
 
 /* Google products */
 product GOOGLE NEXUSONE		0x4e11	Nexus One
 
 /* Gravis products */
 product GRAVIS GAMEPADPRO	0x4001	GamePad Pro
 
 /* GREENHOUSE products */
 product GREENHOUSE KANA21	0x0001	CF-writer with MP3
 
 /* Griffin Technology */
 product GRIFFIN IMATE		0x0405	iMate, ADB Adapter
 
 /* Guillemot Corporation */
 product GUILLEMOT DALEADER	0xa300	DA Leader
 product GUILLEMOT HWGUSB254	0xe000	HWGUSB2-54 WLAN
 product GUILLEMOT HWGUSB254LB	0xe010	HWGUSB2-54-LB
 product GUILLEMOT HWGUSB254V2AP	0xe020	HWGUSB2-54V2-AP
 product GUILLEMOT HWNU300	0xe030	HWNU-300
 
 /* Hagiwara products */
 product HAGIWARA FGSM		0x0002	FlashGate SmartMedia Card Reader
 product HAGIWARA FGCF		0x0003	FlashGate CompactFlash Card Reader
 product HAGIWARA FG		0x0005	FlashGate
 
 /* HAL Corporation products */
 product HAL IMR001		0x0011	Crossam2+USB IR commander
 
 /* Handspring, Inc. */
 product HANDSPRING VISOR	0x0100	Handspring Visor
 product HANDSPRING TREO		0x0200	Handspring Treo
 product HANDSPRING TREO600	0x0300	Handspring Treo 600
 
 /* Hauppauge Computer Works */
 product HAUPPAUGE WINTV_USB_FM	0x4d12	WinTV USB FM
 product HAUPPAUGE2 NOVAT500	0x9580	NovaT 500Stick
 
 /* Hawking Technologies products */
 product HAWKING RT2870_1	0x0001	RT2870
 product HAWKING RT2870_2	0x0003	RT2870
 product HAWKING HWUN2		0x0009	HWUN2
 product HAWKING RT3070		0x000b	RT3070
 product HAWKING UF100		0x400c	10/100 USB Ethernet
 
 /* HID Global GmbH products */
 product HIDGLOBAL CM2020	0x0596	Omnikey Cardman 2020
 product HIDGLOBAL CM6020	0x1784	Omnikey Cardman 6020
 
 /* Hitachi, Ltd. products */
 product HITACHI DVDCAM_DZ_MV100A	0x0004	DVD-CAM DZ-MV100A Camcorder
 product HITACHI DVDCAM_USB	0x001e	DVDCAM USB HS Interface
 
 /* HP products */
 product HP 895C			0x0004	DeskJet 895C
 product HP 4100C		0x0101	Scanjet 4100C
 product HP S20			0x0102	Photosmart S20
 product HP 880C			0x0104	DeskJet 880C
 product HP 4200C		0x0105	ScanJet 4200C
 product HP CDWRITERPLUS		0x0107	CD-Writer Plus
 product HP KBDHUB		0x010c	Multimedia Keyboard Hub
 product HP G55XI		0x0111	OfficeJet G55xi
 product HP HN210W		0x011c	HN210W 802.11b WLAN
 product HP 49GPLUS		0x0121	49g+ graphing calculator
 product HP 6200C		0x0201	ScanJet 6200C
 product HP S20b			0x0202	PhotoSmart S20
 product HP 815C			0x0204	DeskJet 815C
 product HP 3300C		0x0205	ScanJet 3300C
 product HP CDW8200		0x0207	CD-Writer Plus 8200e
 product HP MMKEYB		0x020c	Multimedia keyboard
 product HP 1220C		0x0212	DeskJet 1220C
 product HP 810C			0x0304	DeskJet 810C/812C
 product HP 4300C		0x0305	Scanjet 4300C
 product HP CDW4E		0x0307	CD-Writer+ CD-4e
 product HP G85XI		0x0311	OfficeJet G85xi
 product HP 1200			0x0317	LaserJet 1200
 product HP 5200C		0x0401	Scanjet 5200C
 product HP 830C			0x0404	DeskJet 830C
 product HP 3400CSE		0x0405	ScanJet 3400cse
 product HP 6300C		0x0601	Scanjet 6300C
 product HP 840C			0x0604	DeskJet 840c
 product HP 2200C		0x0605	ScanJet 2200C
 product HP 5300C		0x0701	Scanjet 5300C
 product HP 4400C		0x0705	Scanjet 4400C
 product	HP 4470C		0x0805	Scanjet 4470C
 product HP 82x0C		0x0b01	Scanjet 82x0C
 product HP 2300D 		0x0b17	Laserjet 2300d
 product HP 970CSE		0x1004	Deskjet 970Cse
 product HP 5400C		0x1005	Scanjet 5400C
 product HP 2215			0x1016	iPAQ 22xx/Jornada 548
 product HP 568J			0x1116	Jornada 568
 product HP 930C			0x1204	DeskJet 930c
 product HP P2000U		0x1801	Inkjet P-2000U
 product HP HS2300		0x1e1d  HS2300 HSDPA (aka MC8775)
 product HP 640C			0x2004	DeskJet 640c
 product HP 4670V		0x3005	ScanJet 4670v
 product HP P1100		0x3102	Photosmart P1100
 product HP LD220		0x3524	LD220 POS Display
 product HP OJ4215		0x3d11	OfficeJet 4215
 product HP HN210E		0x811c	Ethernet HN210E
 product HP2 C500		0x6002	PhotoSmart C500
 product HP EV2200		0x1b1d  ev2200 HSDPA (aka MC5720)
 product HP HS2300		0x1e1d  hs2300 HSDPA (aka MC8775)
 
 /* HTC products */
 product HTC WINMOBILE		0x00ce	HTC USB Sync
 product HTC PPC6700MODEM	0x00cf	PPC6700 Modem
 product HTC SMARTPHONE		0x0a51	SmartPhone USB Sync
 product HTC WIZARD		0x0bce	HTC Wizard USB Sync
 product HTC LEGENDSYNC		0x0c97	HTC Legend USB Sync
 product HTC LEGEND		0x0ff9	HTC Legend
 product HTC LEGENDINTERNET	0x0ffe	HTC Legend Internet Sharing
 
 /* HUAWEI products */
 product HUAWEI MOBILE		0x1001	Huawei Mobile
-product HUAWEI E220		0x1003	Huawei HSDPA modem
-product HUAWEI E220BIS		0x1004	Huawei HSDPA modem
+product HUAWEI E220		0x1003	HSDPA modem
+product HUAWEI E220BIS		0x1004	HSDPA modem
 product HUAWEI E1401		0x1401	3G modem
 product HUAWEI E1402		0x1402	3G modem
 product HUAWEI E1403		0x1403	3G modem
 product HUAWEI E1404		0x1404	3G modem
 product HUAWEI E1405		0x1405	3G modem
 product HUAWEI E1406		0x1406	3G modem
 product HUAWEI E1407		0x1407	3G modem
 product HUAWEI E1408		0x1408	3G modem
 product HUAWEI E1409		0x1409	3G modem
 product HUAWEI E140A		0x140a	3G modem
 product HUAWEI E140B		0x140b	3G modem
-product HUAWEI E180V		0x140c	Huawei Mobile E180V
+product HUAWEI E180V		0x140c	E180V
 product HUAWEI E140D		0x140d	3G modem
 product HUAWEI E140E		0x140e	3G modem
 product HUAWEI E140F		0x140f	3G modem
 product HUAWEI E1410		0x1410	3G modem
 product HUAWEI E1411		0x1411	3G modem
 product HUAWEI E1412		0x1412	3G modem
 product HUAWEI E1413		0x1413	3G modem
 product HUAWEI E1414		0x1414	3G modem
 product HUAWEI E1415		0x1415	3G modem
 product HUAWEI E1416		0x1416	3G modem
 product HUAWEI E1417		0x1417	3G modem
 product HUAWEI E1418		0x1418	3G modem
 product HUAWEI E1419		0x1419	3G modem
 product HUAWEI E141A		0x141a	3G modem
 product HUAWEI E141B		0x141b	3G modem
 product HUAWEI E141C		0x141c	3G modem
 product HUAWEI E141D		0x141d	3G modem
 product HUAWEI E141E		0x141e	3G modem
 product HUAWEI E141F		0x141f	3G modem
 product HUAWEI E1420		0x1420	3G modem
 product HUAWEI E1421		0x1421	3G modem
 product HUAWEI E1422		0x1422	3G modem
 product HUAWEI E1423		0x1423	3G modem
 product HUAWEI E1424		0x1424	3G modem
 product HUAWEI E1425		0x1425	3G modem
 product HUAWEI E1426		0x1426	3G modem
 product HUAWEI E1427		0x1427	3G modem
 product HUAWEI E1428		0x1428	3G modem
 product HUAWEI E1429		0x1429	3G modem
 product HUAWEI E142A		0x142a	3G modem
 product HUAWEI E142B		0x142b	3G modem
 product HUAWEI E142C		0x142c	3G modem
 product HUAWEI E142D		0x142d	3G modem
 product HUAWEI E142E		0x142e	3G modem
 product HUAWEI E142F		0x142f	3G modem
 product HUAWEI E1430		0x1430	3G modem
 product HUAWEI E1431		0x1431	3G modem
 product HUAWEI E1432		0x1432	3G modem
 product HUAWEI E1433		0x1433	3G modem
 product HUAWEI E1434		0x1434	3G modem
 product HUAWEI E1435		0x1435	3G modem
 product HUAWEI E1436		0x1436	3G modem
 product HUAWEI E1437		0x1437	3G modem
 product HUAWEI E1438		0x1438	3G modem
 product HUAWEI E1439		0x1439	3G modem
 product HUAWEI E143A		0x143a	3G modem
 product HUAWEI E143B		0x143b	3G modem
 product HUAWEI E143C		0x143c	3G modem
 product HUAWEI E143D		0x143d	3G modem
 product HUAWEI E143E		0x143e	3G modem
 product HUAWEI E143F		0x143f	3G modem
 product HUAWEI E1752		0x1446	3G modem
 product HUAWEI K3765		0x1465	3G modem
-product HUAWEI E14AC		0x14ac	3G modem
-product HUAWEI K3765_INIT	0x1520	HUAWEI Mobile K3765 Initial
+product HUAWEI E1820		0x14ac	E1820 HSPA+ USB Slider
+product HUAWEI K3765_INIT	0x1520	K3765 Initial
 
 /* HUAWEI 3com products */
 product HUAWEI3COM WUB320G	0x0009	Aolynk WUB320g
 
 /* IBM Corporation */
 product IBM USBCDROMDRIVE	0x4427	USB CD-ROM Drive
 
 /* Imagination Technologies products */
 product IMAGINATION DBX1	0x2107	DBX1 DSP core
 
 /* Inside Out Networks products */
 product INSIDEOUT EDGEPORT4	0x0001	EdgePort/4 serial ports
 
 /* In-System products */
 product INSYSTEM F5U002		0x0002	Parallel printer
 product INSYSTEM ATAPI		0x0031	ATAPI Adapter
 product INSYSTEM ISD110		0x0200	IDE Adapter ISD110
 product INSYSTEM ISD105		0x0202	IDE Adapter ISD105
 product INSYSTEM USBCABLE	0x081a	USB cable
 product INSYSTEM STORAGE_V2	0x5701	USB Storage Adapter V2
 
 /* Intel products */
 product INTEL EASYPC_CAMERA	0x0110	Easy PC Camera
 product INTEL TESTBOARD		0x9890	82930 test board
 product INTEL2 IRMH        	0x0020	Integrated Rate Matching Hub
 
 /* Intersil products */
 product INTERSIL PRISM_GT	0x1000	PrismGT USB 2.0 WLAN
 product INTERSIL PRISM_2X	0x3642	Prism2.x or Atmel WLAN
 
 /* Interpid Control Systems products */
 product INTREPIDCS VALUECAN	0x0601	ValueCAN CAN bus interface
 product INTREPIDCS NEOVI	0x0701	NeoVI Blue vehicle bus interface
 
 /* I/O DATA products */
 product IODATA IU_CD2		0x0204	DVD Multi-plus unit iU-CD2
 product IODATA DVR_UEH8		0x0206	DVD Multi-plus unit DVR-UEH8
 product IODATA USBSSMRW		0x0314	USB-SSMRW SD-card
 product IODATA USBSDRW		0x031e	USB-SDRW SD-card
 product IODATA USBETT		0x0901	USB ETT
 product IODATA USBETTX		0x0904	USB ETTX
 product IODATA USBETTXS		0x0913	USB ETTX
 product IODATA USBWNB11A	0x0919	USB WN-B11
 product IODATA USBWNB11		0x0922	USB Airport WN-B11
 product IODATA ETGUS2		0x0930	ETG-US2
 product IODATA RT3072_1		0x0944	RT3072
 product IODATA RT3072_2		0x0945	RT3072
 product IODATA RT3072_3		0x0947	RT3072
 product IODATA RT3072_4		0x0948	RT3072
 product IODATA USBRSAQ		0x0a03	Serial USB-RSAQ1
 product IODATA USBRSAQ5		0x0a0e  Serial USB-RSAQ5
 product IODATA2 USB2SC		0x0a09	USB2.0-SCSI Bridge USB2-SC
 
 /* Iomega products */
 product IOMEGA ZIP100		0x0001	Zip 100
 product IOMEGA ZIP250		0x0030	Zip 250
 
 /* Integrated System Solution Corp. products */
 product ISSC ISSCBTA		0x1001	Bluetooth USB Adapter
 
 /* iTegno products */
 product ITEGNO WM1080A		0x1080	WM1080A GSM/GPRS modem
 product ITEGNO WM2080A		0x2080	WM2080A CDMA modem
 
 /* Ituner networks products */
 product ITUNERNET USBLCD2X20	0x0002	USB-LCD 2x20
 product ITUNERNET USBLCD4X20	0xc001	USB-LCD 4x20
 
 /* Jablotron products */
 product JABLOTRON PC60B		0x0001	PC-60B
 
 /* Jaton products */
 product JATON EDA		0x5704	Ethernet
 
 /* JMicron products */
 product JMICRON JM20336		0x2336	USB to SATA Bridge
 product JMICRON JM20337		0x2338	USB to ATA/ATAPI Bridge
 
 /* JVC products */
 product JVC GR_DX95		0x000a	GR-DX95
 product JVC MP_PRX1		0x3008	MP-PRX1 Ethernet
 
 /* JRC products */
 product JRC AH_J3001V_J3002V	0x0001	AirH PHONE AH-J3001V/J3002V
 
 /* Kawatsu products */
 product KAWATSU MH4000P		0x0003	MiniHub 4000P
 
 /* Keisokugiken Corp. products */
 product KEISOKUGIKEN USBDAQ	0x0068	HKS-0200 USBDAQ
 
 /* Kensington products */
 product KENSINGTON ORBIT	0x1003	Orbit USB/PS2 trackball
 product KENSINGTON TURBOBALL	0x1005	TurboBall
 
 /* Keyspan products */
 product KEYSPAN USA28_NF	0x0101	USA-28 serial Adapter (no firmware)
 product KEYSPAN USA28X_NF	0x0102	USA-28X serial Adapter (no firmware)
 product KEYSPAN USA19_NF	0x0103	USA-19 serial Adapter (no firmware)
 product KEYSPAN USA18_NF	0x0104	USA-18 serial Adapter (no firmware)
 product KEYSPAN USA18X_NF	0x0105	USA-18X serial Adapter (no firmware)
 product KEYSPAN USA19W_NF	0x0106	USA-19W serial Adapter (no firmware)
 product KEYSPAN USA19		0x0107	USA-19 serial Adapter
 product KEYSPAN USA19W		0x0108	USA-19W serial Adapter
 product KEYSPAN USA49W_NF	0x0109	USA-49W serial Adapter (no firmware)
 product KEYSPAN USA49W		0x010a	USA-49W serial Adapter
 product KEYSPAN USA19QI_NF	0x010b	USA-19QI serial Adapter (no firmware)
 product KEYSPAN USA19QI		0x010c	USA-19QI serial Adapter
 product KEYSPAN USA19Q_NF	0x010d	USA-19Q serial Adapter (no firmware)
 product KEYSPAN USA19Q		0x010e	USA-19Q serial Adapter
 product KEYSPAN USA28		0x010f	USA-28 serial Adapter
 product KEYSPAN USA28XXB	0x0110	USA-28X/XB serial Adapter
 product KEYSPAN USA18		0x0111	USA-18 serial Adapter
 product KEYSPAN USA18X		0x0112	USA-18X serial Adapter
 product KEYSPAN USA28XB_NF	0x0113	USA-28XB serial Adapter (no firmware)
 product KEYSPAN USA28XA_NF	0x0114	USA-28XB serial Adapter (no firmware)
 product KEYSPAN USA28XA		0x0115	USA-28XA serial Adapter
 product KEYSPAN USA18XA_NF	0x0116	USA-18XA serial Adapter (no firmware)
 product KEYSPAN USA18XA		0x0117	USA-18XA serial Adapter
 product KEYSPAN USA19QW_NF	0x0118	USA-19WQ serial Adapter (no firmware)
 product KEYSPAN USA19QW		0x0119	USA-19WQ serial Adapter
 product KEYSPAN USA19HA		0x0121	USA-19HS serial Adapter
 product KEYSPAN UIA10		0x0201	UIA-10 remote control
 product KEYSPAN UIA11		0x0202	UIA-11 remote control
 
 /* Kingston products */
 product KINGSTON XX1		0x0008	Ethernet
 product KINGSTON KNU101TX	0x000a	KNU101TX USB Ethernet
 
 /* Kawasaki products */
 product KLSI DUH3E10BT		0x0008	USB Ethernet
 product KLSI DUH3E10BTN		0x0009	USB Ethernet
 
 /* Kodak products */
 product KODAK DC220		0x0100	Digital Science DC220
 product KODAK DC260		0x0110	Digital Science DC260
 product KODAK DC265		0x0111	Digital Science DC265
 product KODAK DC290		0x0112	Digital Science DC290
 product KODAK DC240		0x0120	Digital Science DC240
 product KODAK DC280		0x0130	Digital Science DC280
 
 /* Konica Corp. Products */
 product KONICA CAMERA		0x0720	Digital Color Camera
 
 /* KYE products */
 product KYE NICHE		0x0001	Niche mouse
 product KYE NETSCROLL		0x0003	Genius NetScroll mouse
 product KYE FLIGHT2000		0x1004	Flight 2000 joystick
 product KYE VIVIDPRO		0x2001	ColorPage Vivid-Pro scanner
 
 /* Kyocera products */
 product KYOCERA FINECAM_S3X	0x0100	Finecam S3x
 product KYOCERA FINECAM_S4	0x0101	Finecam S4
 product KYOCERA FINECAM_S5	0x0103	Finecam S5
 product KYOCERA FINECAM_L3	0x0105	Finecam L3
 product KYOCERA AHK3001V	0x0203	AH-K3001V
 product KYOCERA2 CDMA_MSM_K	0x17da	Qualcomm Kyocera CDMA Technologies MSM
 product KYOCERA2 KPC680		0x180a	Qualcomm Kyocera CDMA Technologies MSM
 
 /* LaCie products */
 product LACIE HD		0xa601	Hard Disk
 product LACIE CDRW		0xa602	CD R/W
 
 /* Leadtek products */
 product LEADTEK 9531		0x2101	9531 GPS
 
 /* Lexar products */
 product LEXAR JUMPSHOT		0x0001	jumpSHOT CompactFlash Reader
 product LEXAR CF_READER		0xb002	USB CF Reader
 
 /* Lexmark products */
 product LEXMARK S2450		0x0009	Optra S 2450
 
 /* Liebert products */
 product LIEBERT POWERSURE_PXT	0xffff	PowerSure Personal XT
 
 /* Linksys products */
 product LINKSYS MAUSB2		0x0105	Camedia MAUSB-2
 product LINKSYS USB10TX1	0x200c	USB10TX
 product LINKSYS USB10T		0x2202	USB10T Ethernet
 product LINKSYS USB100TX	0x2203	USB100TX Ethernet
 product LINKSYS USB100H1	0x2204	USB100H1 Ethernet/HPNA
 product LINKSYS USB10TA		0x2206	USB10TA Ethernet
 product LINKSYS USB10TX2	0x400b	USB10TX
 product LINKSYS2 WUSB11		0x2219	WUSB11 Wireless Adapter
 product LINKSYS2 USB200M	0x2226	USB 2.0 10/100 Ethernet
 product LINKSYS3 WUSB11v28	0x2233	WUSB11 v2.8 Wireless Adapter
 product LINKSYS4 USB1000	0x0039	USB1000
 product LINKSYS4 WUSB100	0x0070	WUSB100
 product LINKSYS4 WUSB600N	0x0071	WUSB600N
 product LINKSYS4 WUSB54GCV2	0x0073	WUSB54GC v2
 product LINKSYS4 WUSB54GCV3	0x0077	WUSB54GC v3
 product LINKSYS4 RT3070		0x0078	RT3070
 product LINKSYS4 WUSB600NV2	0x0079	WUSB600N v2
 
 /* Logitech products */
 product LOGITECH M2452		0x0203	M2452 keyboard
 product LOGITECH M4848		0x0301	M4848 mouse
 product LOGITECH PAGESCAN	0x040f	PageScan
 product LOGITECH QUICKCAMWEB	0x0801	QuickCam Web
 product LOGITECH QUICKCAMPRO	0x0810	QuickCam Pro
 product LOGITECH QUICKCAMEXP	0x0840	QuickCam Express
 product LOGITECH QUICKCAM	0x0850	QuickCam
 product LOGITECH QUICKCAMPRO3	0x0990	QuickCam Pro 9000
 product LOGITECH N43		0xc000	N43
 product LOGITECH N48		0xc001	N48 mouse
 product LOGITECH MBA47		0xc002	M-BA47 mouse
 product LOGITECH WMMOUSE	0xc004	WingMan Gaming Mouse
 product LOGITECH BD58		0xc00c	BD58 mouse
 product LOGITECH UN58A		0xc030	iFeel Mouse
 product LOGITECH UN53B		0xc032	iFeel MouseMan
 product LOGITECH WMPAD		0xc208	WingMan GamePad Extreme
 product LOGITECH WMRPAD		0xc20a	WingMan RumblePad
 product LOGITECH WMJOY		0xc281	WingMan Force joystick
 product LOGITECH BB13		0xc401	USB-PS/2 Trackball
 product LOGITECH RK53		0xc501	Cordless mouse
 product LOGITECH RB6		0xc503	Cordless keyboard
 product LOGITECH MX700		0xc506	Cordless optical mouse
 product LOGITECH QUICKCAMPRO2	0xd001	QuickCam Pro
 
 /* Logitec Corp. products */
 product LOGITEC LDR_H443SU2	0x0033	DVD Multi-plus unit LDR-H443SU2
 product LOGITEC LDR_H443U2	0x00b3	DVD Multi-plus unit LDR-H443U2
 product LOGITEC LAN_GTJU2A	0x0160	LAN-GTJ/U2A Ethernet
 product LOGITEC RT2870_1	0x0162	RT2870
 product LOGITEC RT2870_2	0x0163	RT2870
 product LOGITEC RT2870_3	0x0164	RT2870
 
 /* Longcheer Holdings, Ltd. products */
 product LONGCHEER WM66		0x6061	Longcheer WM66 HSDPA
 product LONGCHEER W14		0x9603	Mobilcom W14
 product LONGCHEER DISK		0xf000	Driver disk
 
 
 /* Lucent products */
 product LUCENT EVALKIT		0x1001	USS-720 evaluation kit
 
 /* Luwen products */
 product LUWEN EASYDISK		0x0005	EasyDisc
 
 /* Macally products */
 product MACALLY MOUSE1		0x0101	mouse
 
 /* Marvell Technology Group, Ltd. products */
 product MARVELL SHEEVAPLUG	0x9e8f	SheevaPlug serial interface
   
 /* Matrix Orbital products */
 product MATRIXORBITAL MOUA	0x0153	Martrix Orbital MOU-Axxxx LCD displays
 
 /* MCT Corp. */
 product MCT HUB0100		0x0100	Hub
 product MCT DU_H3SP_USB232	0x0200	D-Link DU-H3SP USB BAY Hub
 product MCT USB232		0x0210	USB-232 Interface
 product MCT SITECOM_USB232	0x0230	Sitecom USB-232 Products
 
 /* Meizu Electronics */
 product MEIZU M6_SL		0x0140	MiniPlayer M6 (SL)
 
 /* Melco, Inc products */
 product MELCO LUATX1		0x0001	LUA-TX Ethernet
 product MELCO LUATX5		0x0005	LUA-TX Ethernet
 product MELCO LUA2TX5		0x0009	LUA2-TX Ethernet
 product MELCO LUAKTX		0x0012	LUA-KTX Ethernet
 product MELCO DUBPXXG		0x001c	DUB-PxxG
 product MELCO LUAU2KTX		0x003d	LUA-U2-KTX Ethernet
 product MELCO KG54YB		0x005e	WLI-U2-KG54-YB WLAN
 product MELCO KG54		0x0066	WLI-U2-KG54 WLAN
 product MELCO KG54AI		0x0067	WLI-U2-KG54-AI WLAN
 product MELCO LUA3U2AGT		0x006e	LUA3-U2-AGT
 product MELCO NINWIFI		0x008b	Nintendo Wi-Fi
 product MELCO PCOPRS1		0x00b3	PC-OP-RS1 RemoteStation
 product MELCO SG54HP		0x00d8	WLI-U2-SG54HP
 product MELCO G54HP		0x00d9	WLI-U2-G54HP
 product MELCO KG54L		0x00da	WLI-U2-KG54L
 product MELCO WLIUCG300N	0x00e8	WLI-UC-G300N
 product MELCO SG54HG		0x00f4	WLI-U2-SG54HG
 product MELCO WLRUCG		0x0116	WLR-UC-G
 product MELCO WLRUCGAOSS	0x0119	WLR-UC-G-AOSS
 product MELCO WLIUCAG300N	0x012e	WLI-UC-AG300N
 product MELCO RT2870_1		0x0148	RT2870
 product MELCO RT2870_2		0x0150	RT2870
 product MELCO WLIUCGN		0x015d	WLI-UC-GN
 
 /* Merlin products */
 product MERLIN V620             0x1110  Merlin V620
 
 /* MetaGeek products */
 product METAGEEK WISPY1B	0x083e	MetaGeek Wi-Spy
 product METAGEEK WISPY24X	0x083f	MetaGeek Wi-Spy 2.4x
 product METAGEEK2 WISPYDBX	0x5000	MetaGeek Wi-Spy DBx
 
 /* Metricom products */
 product METRICOM RICOCHET_GS	0x0001	Ricochet GS
 
 /* MGE UPS Systems */
 product MGE UPS1		0x0001	MGE UPS SYSTEMS PROTECTIONCENTER 1
 product MGE UPS2		0xffff	MGE UPS SYSTEMS PROTECTIONCENTER 2
 
 /* MEI products */
 product MEI CASHFLOW_SC		0x1100	Cashflow-SC Cash Acceptor
 product MEI S2000		0x1101	Seies 2000 Combo Acceptor
 
 /* Micro Star International products */
 product MSI BT_DONGLE		0x1967	Bluetooth USB dongle
 product MSI RT3070_1		0x3820	RT3070
 product MSI RT3070_2		0x3821	RT3070
 product MSI RT3070_8		0x3822	RT3070
 product MSI RT3070_3		0x3870	RT3070
 product MSI RT3070_9		0x3871	RT3070
 product MSI UB11B		0x6823	UB11B
 product MSI RT2570		0x6861	RT2570
 product MSI RT2570_2		0x6865	RT2570
 product MSI RT2570_3		0x6869	RT2570
 product MSI RT2573_1		0x6874	RT2573
 product MSI RT2573_2		0x6877	RT2573
 product MSI RT3070_4		0x6899	RT3070
 product MSI RT3070_5		0x821a	RT3070
 product MSI RT3070_10		0x822a	RT3070
 product MSI RT3070_6		0x870a	RT3070
 product MSI RT3070_11		0x871a	RT3070
 product MSI RT3070_7		0x899a	RT3070
 product MSI RT2573_3		0xa861	RT2573
 product MSI RT2573_4		0xa874	RT2573
 
 /* Microsoft products */
 product MICROSOFT SIDEPREC	0x0008	SideWinder Precision Pro
 product MICROSOFT INTELLIMOUSE	0x0009	IntelliMouse
 product MICROSOFT NATURALKBD	0x000b	Natural Keyboard Elite
 product MICROSOFT DDS80		0x0014	Digital Sound System 80
 product MICROSOFT SIDEWINDER	0x001a	Sidewinder Precision Racing Wheel
 product MICROSOFT INETPRO	0x001c	Internet Keyboard Pro
 product MICROSOFT TBEXPLORER	0x0024	Trackball Explorer
 product MICROSOFT INTELLIEYE	0x0025	IntelliEye mouse
 product MICROSOFT INETPRO2	0x002b	Internet Keyboard Pro
 product MICROSOFT INTELLIMOUSE5	0x0039	IntelliMouse 1.1 5-Button Mouse
 product MICROSOFT WHEELMOUSE	0x0040	Wheel Mouse Optical
 product MICROSOFT MN510		0x006e	MN510 Wireless
 product MICROSOFT 700WX		0x0079	Palm 700WX
 product MICROSOFT MN110		0x007a	10/100 USB NIC
 product MICROSOFT WLINTELLIMOUSE 0x008c	Wireless Optical IntelliMouse
 product MICROSOFT WLNOTEBOOK	0x00b9	Wireless Optical Mouse (Model 1023)
 product MICROSOFT COMFORT3000	0x00d1	Comfort Optical Mouse 3000 (Model 1043)
 product MICROSOFT WLNOTEBOOK3	0x00d2	Wireless Optical Mouse 3000 (Model 1049)
 product MICROSOFT NATURAL4000	0x00db	Natural Ergonomic Keyboard 4000
 product MICROSOFT WLNOTEBOOK2	0x00e1	Wireless Optical Mouse 3000 (Model 1056)
 product MICROSOFT XBOX360	0x0292	XBOX 360 WLAN
 
 /* Microtech products */
 product MICROTECH SCSIDB25	0x0004	USB-SCSI-DB25
 product MICROTECH SCSIHD50	0x0005	USB-SCSI-HD50
 product MICROTECH DPCM		0x0006	USB CameraMate
 product MICROTECH FREECOM	0xfc01	Freecom USB-IDE
 
 /* Microtek products */
 product MICROTEK 336CX		0x0094	Phantom 336CX - C3 scanner
 product MICROTEK X6U		0x0099	ScanMaker X6 - X6U
 product MICROTEK C6		0x009a	Phantom C6 scanner
 product MICROTEK 336CX2		0x00a0	Phantom 336CX - C3 scanner
 product MICROTEK V6USL		0x00a3	ScanMaker V6USL
 product MICROTEK V6USL2		0x80a3	ScanMaker V6USL
 product MICROTEK V6UL		0x80ac	ScanMaker V6UL
 
 /* Microtune, Inc. products */
 product MICROTUNE BT_DONGLE	0x1000	Bluetooth USB dongle
 
 /* Midiman products */
 product MIDIMAN MIDISPORT2X2	0x1001	Midisport 2x2
 
 /* MindsAtWork products */
 product MINDSATWORK WALLET	0x0001	Digital Wallet
 
 /* Minolta Co., Ltd. */
 product MINOLTA 2300		0x4001	Dimage 2300
 product MINOLTA S304		0x4007	Dimage S304
 product MINOLTA X		0x4009	Dimage X
 product MINOLTA 5400		0x400e	Dimage 5400
 product MINOLTA F300		0x4011	Dimage F300
 product MINOLTA E223		0x4017	Dimage E223
 
 /* Mitsumi products */
 product MITSUMI CDRRW		0x0000	CD-R/RW Drive
 product MITSUMI BT_DONGLE	0x641f	Bluetooth USB dongle
 product MITSUMI FDD		0x6901	USB FDD
 
 /* Mobile Action products */
 product MOBILEACTION MA620	0x0620	MA-620 Infrared Adapter
 
 /* Mobility products */
 product MOBILITY EA		0x0204	Ethernet
 product MOBILITY EASIDOCK	0x0304	EasiDock Ethernet
 
 /* MosChip products */
 product MOSCHIP MCS7703		0x7703	MCS7703 Serial Port Adapter
 product MOSCHIP MCS7830		0x7830	MCS7830 Ethernet
 
 /* Motorola products */
 product MOTOROLA MC141555	0x1555	MC141555 hub controller
 product MOTOROLA SB4100		0x4100	SB4100 USB Cable Modem
 product MOTOROLA2 T720C		0x2822	T720c
 product MOTOROLA2 A41XV32X	0x2a22	A41x/V32x Mobile Phones
 product MOTOROLA2 E398		0x4810	E398 Mobile Phone
 product MOTOROLA2 USBLAN	0x600c	USBLAN
 product MOTOROLA2 USBLAN2	0x6027	USBLAN
 product MOTOROLA4 RT2770	0x9031	RT2770
 product MOTOROLA4 RT3070	0x9032	RT3070
 
 /* MultiTech products */
 product MULTITECH ATLAS		0xf101	MT5634ZBA-USB modem
 
 /* Mustek products */
 product MUSTEK 1200CU		0x0001	1200 CU scanner
 product MUSTEK 600CU		0x0002	600 CU scanner
 product MUSTEK 1200USB		0x0003	1200 USB scanner
 product MUSTEK 1200UB		0x0006	1200 UB scanner
 product MUSTEK 1200USBPLUS	0x0007	1200 USB Plus scanner
 product MUSTEK 1200CUPLUS	0x0008	1200 CU Plus scanner
 product MUSTEK BEARPAW1200F	0x0010	BearPaw 1200F scanner
 product MUSTEK BEARPAW2400TA	0x0218	BearPaw 2400TA scanner
 product MUSTEK BEARPAW1200TA	0x021e	BearPaw 1200TA scanner
 product MUSTEK 600USB		0x0873	600 USB scanner
 product MUSTEK MDC800		0xa800	MDC-800 digital camera
 
 /* M-Systems products */
 product MSYSTEMS DISKONKEY	0x0010	DiskOnKey
 product MSYSTEMS DISKONKEY2	0x0011	DiskOnKey
 
 /* Myson products */
 product MYSON HEDEN_8813	0x8813	USB-IDE
 product MYSON HEDEN		0x8818	USB-IDE
 product MYSON HUBREADER		0x8819	COMBO Card reader with USB HUB
 product MYSON STARREADER	0x9920	USB flash card adapter
 
 /* National Semiconductor */
 product NATIONAL BEARPAW1200	0x1000	BearPaw 1200
 product NATIONAL BEARPAW2400	0x1001	BearPaw 2400
 
 /* NEC products */
 product NEC HUB_0050		0x0050	USB 2.0 7-Port Hub
 product NEC HUB_005A		0x005a	USB 2.0 4-Port Hub
 product NEC HUB			0x55aa	hub
 product NEC HUB_B		0x55ab	hub
 
 /* NEODIO products */
 product NEODIO ND3260		0x3260	8-in-1 Multi-format Flash Controller
 product NEODIO ND5010		0x5010	Multi-format Flash Controller
 
 /* Neotel products */
 product NEOTEL PRIME		0x4000	Prime USB modem
 
 /* Netac products */
 product NETAC CF_CARD		0x1060	USB-CF-Card
 product NETAC ONLYDISK		0x0003	OnlyDisk
 
 /* NetChip Technology Products */
 product NETCHIP TURBOCONNECT	0x1080	Turbo-Connect
 product NETCHIP CLIK_40		0xa140	USB Clik! 40
 product NETCHIP ETHERNETGADGET	0xa4a2	Linux Ethernet/RNDIS gadget on pxa210/25x/26x
 
 /* Netgear products */
 product NETGEAR EA101		0x1001	Ethernet
 product NETGEAR EA101X		0x1002	Ethernet
 product NETGEAR FA101		0x1020	Ethernet 10/100, USB1.1
 product NETGEAR FA120		0x1040	USB 2.0 Ethernet
 product NETGEAR WG111V2_2	0x4240	PrismGT USB 2.0 WLAN
 product NETGEAR WG111V3		0x4260	WG111v3
 product NETGEAR WG111U		0x4300	WG111U
 product NETGEAR WG111U_NF	0x4301	WG111U (no firmware)
 product NETGEAR WG111V2		0x6a00	WG111V2
 product NETGEAR2 MA101		0x4100	MA101
 product NETGEAR2 MA101B		0x4102	MA101 Rev B
 product NETGEAR3 WG111T		0x4250	WG111T
 product NETGEAR3 WG111T_NF	0x4251	WG111T (no firmware)
 product NETGEAR3 WPN111		0x5f00	WPN111
 product NETGEAR3 WPN111_NF	0x5f01	WPN111 (no firmware)
 product NETGEAR3 WPN111_2	0x5f02	WPN111
 
 /* NetIndex products */
 product NETINDEX WS002IN	0x2001	Willcom WS002IN
 
 /* NEWlink */
 product NEWLINK USB2IDEBRIDGE	0x00ff	USB 2.0 Hard Drive Enclosure
 
 /* Nikon products */
 product NIKON E990		0x0102	Digital Camera E990
 product NIKON LS40		0x4000	CoolScan LS40 ED
 product NIKON D300		0x041a  Digital Camera D300
 
 /* NovaTech Products */
 product NOVATECH NV902		0x9020	NovaTech NV-902W
 product NOVATECH RT2573		0x9021	RT2573
 
 /* Nokia products */
 product NOKIA N958GB		0x0070	Nokia N95 8GBc
 product NOKIA2 CA42		0x1234	CA-42 cable
 
 /* Novatel Wireless products */
 product NOVATEL V640		0x1100	Merlin V620
 product NOVATEL CDMA_MODEM	0x1110	Novatel Wireless Merlin CDMA
 product NOVATEL V620		0x1110	Merlin V620
 product NOVATEL V740		0x1120	Merlin V740
 product NOVATEL V720		0x1130	Merlin V720
 product NOVATEL U740		0x1400	Merlin U740
 product NOVATEL U740_2		0x1410	Merlin U740
 product NOVATEL U870		0x1420	Merlin U870
 product NOVATEL XU870		0x1430	Merlin XU870
 product NOVATEL X950D		0x1450	Merlin X950D
 product NOVATEL ES620		0x2100	Expedite ES620
 product NOVATEL E725		0x2120	Expedite E725
 product NOVATEL ES620_2		0x2130	Expedite ES620
 product NOVATEL ES620		0x2100	ES620 CDMA
 product NOVATEL U720		0x2110	Merlin U720
 product NOVATEL EU730		0x2400	Expedite EU730
 product NOVATEL EU740		0x2410	Expedite EU740
 product NOVATEL EU870D		0x2420	Expedite EU870D
 product NOVATEL U727		0x4100	Merlin U727 CDMA
 product NOVATEL MC950D		0x4400	Novatel MC950D HSUPA
 product NOVATEL ZEROCD		0x5010	Novatel ZeroCD
 product NOVATEL ZEROCD2		0x5030	Novatel ZeroCD
 product NOVATEL U727_2		0x5100	Merlin U727 CDMA
 product NOVATEL U760		0x6000	Novatel U760
 product NOVATEL MC760		0x6002	Novatel MC760
 product NOVATEL2 FLEXPACKGPS	0x0100	NovAtel FlexPack GPS receiver
 
 /* Merlin products */
 product	MERLIN V620		0x1110	Merlin V620
 
 /* O2Micro products */
 product O2MICRO OZ776_HUB	0x7761	OZ776 hub
 product O2MICRO OZ776_CCID_SC	0x7772	OZ776 CCID SC Reader
 
 /* Olympus products */
 product OLYMPUS C1		0x0102	C-1 Digital Camera
 product OLYMPUS C700		0x0105	C-700 Ultra Zoom
 
 /* OmniVision Technologies, Inc. products */
 product OMNIVISION OV511	0x0511	OV511 Camera
 product OMNIVISION OV511PLUS	0xa511	OV511+ Camera
 
 /* OnSpec Electronic, Inc. */
 product ONSPEC SDS_HOTFIND_D	0x0400	SDS-infrared.com Hotfind-D Infrared Camera
 product ONSPEC MDCFE_B_CF_READER	0xa000	MDCFE-B USB CF Reader
 product ONSPEC CFMS_RW		0xa001	SIIG/Datafab Memory Stick+CF Reader/Writer
 product ONSPEC READER		0xa003	Datafab-based Reader
 product ONSPEC CFSM_READER	0xa005	PNY/Datafab CF+SM Reader
 product ONSPEC CFSM_READER2	0xa006	Simple Tech/Datafab CF+SM Reader
 product ONSPEC MDSM_B_READER	0xa103	MDSM-B reader
 product ONSPEC CFSM_COMBO	0xa109	USB to CF + SM Combo (LC1)
 product ONSPEC UCF100		0xa400	FlashLink UCF-100 CompactFlash Reader
 product ONSPEC2 IMAGEMATE_SDDR55	0xa103	ImageMate SDDR55
 
 /* Option products */
 product OPTION VODAFONEMC3G	0x5000	Vodafone Mobile Connect 3G datacard
 product OPTION GT3G		0x6000	GlobeTrotter 3G datacard
 product OPTION GT3GQUAD		0x6300	GlobeTrotter 3G QUAD datacard
 product OPTION GT3GPLUS		0x6600	GlobeTrotter 3G+ datacard
 product OPTION GTICON322	0xd033	GlobeTrotter Icon322 storage
 product OPTION GTMAX36		0x6701	GlobeTrotter Max 3.6 Modem
 product OPTION GTHSDPA		0x6971	GlobeTrotter HSDPA
 product OPTION GTMAXHSUPA	0x7001	GlobeTrotter HSUPA
 product OPTION GTMAXHSUPAE	0x6901	GlobeTrotter HSUPA PCIe
 product OPTION GTMAX380HSUPAE	0x7211	GlobeTrotter 380HSUPA PCIe
 product OPTION GT3G_1		0x6050	3G modem
 product OPTION GT3G_2		0x6100	3G modem
 product OPTION GT3G_3		0x6150	3G modem
 product OPTION GT3G_4		0x6200	3G modem
 product OPTION GT3G_5		0x6250	3G modem
 product OPTION GT3G_6		0x6350	3G modem
 product OPTION E6500		0x6500	3G modem
 product OPTION E6501		0x6501	3G modem
 product OPTION E6601		0x6601	3G modem
 product OPTION E6721		0x6721	3G modem
 product OPTION E6741		0x6741	3G modem
 product OPTION E6761		0x6761	3G modem
 product OPTION E6800		0x6800	3G modem
 product OPTION E7021		0x7021	3G modem
 product OPTION E7041		0x7041	3G modem
 product OPTION E7061		0x7061	3G modem
 product OPTION E7100		0x7100	3G modem
 product OPTION GTM380		0x7201	3G modem
 product OPTION GE40X		0x7601	Globetrotter HSUPA
 product OPTION GSICON72		0x6911	GlobeSurfer iCON
 product OPTION GSICONHSUPA	0x7251	Globetrotter HSUPA
 product OPTION ICON401		0x7401	GlobeSurfer iCON 401
 product OPTION GTHSUPA		0x7011	Globetrotter HSUPA
 product OPTION GMT382		0x7501	Globetrotter HSUPA
 product OPTION GE40X_1		0x7301	Globetrotter HSUPA
 product OPTION GE40X_2		0x7361	Globetrotter HSUPA
 product OPTION GE40X_3		0x7381	Globetrotter HSUPA
 product OPTION ICONEDGE		0xc031	GlobeSurfer iCON EDGE
 product OPTION MODHSXPA		0xd013	Globetrotter HSUPA
 product OPTION ICON321		0xd031	Globetrotter HSUPA
 product OPTION ICON505		0xd055	Globetrotter iCON 505
 product OPTION ICON452		0x7901	Globetrotter iCON 452
 
 /* OvisLink product */
 product OVISLINK RT3072		0x3072	RT3072
 
 /* OQO */
 product OQO WIFI01		0x0002	model 01 WiFi interface
 product OQO BT01		0x0003	model 01 Bluetooth interface
 product OQO ETHER01PLUS		0x7720	model 01+ Ethernet
 product OQO ETHER01		0x8150	model 01 Ethernet interface
 
 /* Ours Technology Inc. */
 product OTI DKU5		0x6858	DKU-5 Serial
 
 /* Owen.ru products */
 product OWEN AC4		0x0004	AC4 USB-RS485 converter
 
 /* Palm Computing, Inc. product */
 product PALM SERIAL		0x0080	USB Serial
 product PALM M500		0x0001	Palm m500
 product PALM M505		0x0002	Palm m505
 product PALM M515		0x0003	Palm m515
 product PALM I705		0x0020	Palm i705
 product PALM TUNGSTEN_Z		0x0031	Palm Tungsten Z
 product PALM M125		0x0040	Palm m125
 product PALM M130		0x0050	Palm m130
 product PALM TUNGSTEN_T		0x0060	Palm Tungsten T
 product PALM ZIRE31		0x0061	Palm Zire 31
 product PALM ZIRE		0x0070	Palm Zire
 
 /* Panasonic products */
 product PANASONIC LS120CAM	0x0901	LS-120 Camera
 product PANASONIC KXL840AN	0x0d01	CD-R Drive KXL-840AN
 product PANASONIC KXLRW32AN	0x0d09	CD-R Drive KXL-RW32AN
 product PANASONIC KXLCB20AN	0x0d0a	CD-R Drive KXL-CB20AN
 product PANASONIC KXLCB35AN	0x0d0e	DVD-ROM & CD-R/RW
 product PANASONIC SDCAAE	0x1b00	MultiMediaCard
 product PANASONIC TYTP50P6S	0x3900	TY-TP50P6-S 50in Touch Panel
 
 /* PARA Industrial products */
 product PARA RT3070		0x8888	RT3070
 
 /* Pegatron products */
 product PEGATRON RT2870		0x0002	RT2870
 product PEGATRON RT3070		0x000c	RT3070
 product PEGATRON RT3070_2	0x000e	RT3070
 product PEGATRON RT3070_3	0x0010	RT3070
 
 /* Peracom products */
 product PERACOM SERIAL1		0x0001	Serial
 product PERACOM ENET		0x0002	Ethernet
 product PERACOM ENET3		0x0003	At Home Ethernet
 product PERACOM ENET2		0x0005	Ethernet
 
 /* Philips products */
 product PHILIPS DSS350		0x0101	DSS 350 Digital Speaker System
 product PHILIPS DSS		0x0104	DSS XXX Digital Speaker System
 product PHILIPS HUB		0x0201	hub
 product PHILIPS PCA646VC	0x0303	PCA646VC PC Camera
 product PHILIPS PCVC680K	0x0308	PCVC680K Vesta Pro PC Camera
 product PHILIPS DSS150		0x0471	DSS 150 Digital Speaker System
 product PHILIPS ACE1001		0x066a	AKTAKOM ACE-1001 cable
 product PHILIPS SPE3030CC	0x083a	USB 2.0 External Disk
 product PHILIPS SNU5600		0x1236	SNU5600
 product PHILIPS UM10016		0x1552	ISP 1581 Hi-Speed USB MPEG2 Encoder Reference Kit
 product PHILIPS DIVAUSB		0x1801	DIVA USB mp3 player
 product PHILIPS RT2870		0x200f	RT2870
 
 /* Philips Semiconductor products */
 product PHILIPSSEMI HUB1122	0x1122	HUB
 
 /* Megatec */
 product MEGATEC UPS		0x5161	Phoenixtec protocol based UPS
 
 /* P.I. Engineering products */
 product PIENGINEERING PS2USB	0x020b	PS2 to Mac USB Adapter
 
 /* Planex Communications products */
 product PLANEX GW_US11H		0x14ea	GW-US11H WLAN
 product PLANEX2 GW_US11S	0x3220	GW-US11S WLAN
 product PLANEX2 GW_US54GXS	0x5303	GW-US54GXS WLAN
 product PLANEX2 GWUS54HP	0xab01	GW-US54HP
 product PLANEX2 GWUS300MINIS	0xab24	GW-US300MiniS
 product PLANEX2	RT3070		0xab25	RT3070
 product PLANEX2 GWUS54MINI2	0xab50	GW-US54Mini2
 product PLANEX2 GWUS54SG	0xc002	GW-US54SG
 product PLANEX2 GWUS54GZL	0xc007	GW-US54GZL
 product PLANEX2 GWUS54GD	0xed01	GW-US54GD
 product PLANEX2 GWUSMM		0xed02	GW-USMM
 product PLANEX2 RT2870		0xed06	RT2870
 product PLANEX2 GWUSMICRON	0xed14	GW-USMicroN
 product PLANEX3 GWUS54GZ	0xab10	GW-US54GZ
 product PLANEX3 GU1000T		0xab11	GU-1000T
 product PLANEX3 GWUS54MINI	0xab13	GW-US54Mini
 
 /* Plextor Corp. */
 product PLEXTOR 40_12_40U	0x0011	PlexWriter 40/12/40U
 
 /* PLX products */
 product PLX TESTBOARD		0x9060	test board
 product PLX CA42		0xac70	CA-42
 
 /* PNY products */
 product PNY ATTACHE2		0x0010	USB 2.0 Flash Drive
 
 /* PortGear products */
 product PORTGEAR EA8		0x0008	Ethernet
 product PORTGEAR EA9		0x0009	Ethernet
 
 /* Portsmith products */
 product PORTSMITH EEA		0x3003	Express Ethernet
 
 /* Primax products */
 product PRIMAX G2X300		0x0300	G2-200 scanner
 product PRIMAX G2E300		0x0301	G2E-300 scanner
 product PRIMAX G2300		0x0302	G2-300 scanner
 product PRIMAX G2E3002		0x0303	G2E-300 scanner
 product PRIMAX 9600		0x0340	Colorado USB 9600 scanner
 product PRIMAX 600U		0x0341	Colorado 600u scanner
 product PRIMAX 6200		0x0345	Visioneer 6200 scanner
 product PRIMAX 19200		0x0360	Colorado USB 19200 scanner
 product PRIMAX 1200U		0x0361	Colorado 1200u scanner
 product PRIMAX G600		0x0380	G2-600 scanner
 product PRIMAX 636I		0x0381	ReadyScan 636i
 product PRIMAX G2600		0x0382	G2-600 scanner
 product PRIMAX G2E600		0x0383	G2E-600 scanner
 product PRIMAX COMFORT		0x4d01	Comfort
 product PRIMAX MOUSEINABOX	0x4d02	Mouse-in-a-Box
 product PRIMAX PCGAUMS1		0x4d04	Sony PCGA-UMS1
 product PRIMAX HP_RH304AA	0x4d17	HP RH304AA mouse
 
 /* Prolific products */
 product PROLIFIC PL2301		0x0000	PL2301 Host-Host interface
 product PROLIFIC PL2302		0x0001	PL2302 Host-Host interface
 product PROLIFIC RSAQ2		0x04bb	PL2303 Serial (IODATA USB-RSAQ2)
 product PROLIFIC ALLTRONIX_GPRS	0x0609	Alltronix ACM003U00 modem
 product PROLIFIC ALDIGA_AL11U	0x0611	AlDiga AL-11U modem
 product PROLIFIC MICROMAX_610U	0x0612	Micromax 610U
 product PROLIFIC DCU11		0x1234	DCU-11 Phone Cable
 product PROLIFIC PL2303		0x2303	PL2303 Serial (ATEN/IOGEAR UC232A)
 product PROLIFIC PL2305		0x2305	Parallel printer
 product PROLIFIC ATAPI4		0x2307	ATAPI-4 Controller
 product PROLIFIC PL2501		0x2501	PL2501 Host-Host interface
 product PROLIFIC PL2506		0x2506	PL2506 USB to IDE Bridge
 product PROLIFIC HCR331		0x331a	HCR331 Hybrid Card Reader
 product PROLIFIC PHAROS		0xaaa0	Prolific Pharos
 product PROLIFIC RSAQ3		0xaaa2	PL2303 Serial Adapter (IODATA USB-RSAQ3)
 product PROLIFIC2 PL2303	0x2303	PL2303 Serial Adapter
 
 /* Putercom products */
 product PUTERCOM UPA100		0x047e	USB-1284 BRIDGE
 
 /* Qcom products */
 product QCOM RT2573		0x6196	RT2573
 product QCOM RT2573_2		0x6229	RT2573
 product QCOM RT2573_3		0x6238	RT2573
 product QCOM RT2870		0x6259	RT2870
 
 /* Qisda products */
 product QISDA H21_1		0x4512	3G modem
 product QISDA H21_2		0x4523	3G modem
 product QISDA H20_1		0x4515	3G modem
 product QISDA H20_2		0x4519	3G modem
 
 /* Qualcomm products */
 product QUALCOMM CDMA_MSM	0x6000	CDMA Technologies MSM phone
 product QUALCOMM2 MF330		0x6613	MF330
 product QUALCOMM2 RWT_FCT	0x3100	RWT FCT-CDMA 2000 1xRTT modem
 product QUALCOMM2 CDMA_MSM	0x3196	CDMA Technologies MSM modem
 product QUALCOMM2 AC8700	0x6000	AC8700
 product QUALCOMMINC CDMA_MSM	0x0001	CDMA Technologies MSM modem
 product QUALCOMMINC E0002	0x0002	3G modem
 product QUALCOMMINC E0003	0x0003	3G modem
 product QUALCOMMINC E0004	0x0004	3G modem
 product QUALCOMMINC E0005	0x0005	3G modem
 product QUALCOMMINC E0006	0x0006	3G modem
 product QUALCOMMINC E0007	0x0007	3G modem
 product QUALCOMMINC E0008	0x0008	3G modem
 product QUALCOMMINC E0009	0x0009	3G modem
 product QUALCOMMINC E000A	0x000a	3G modem
 product QUALCOMMINC E000B	0x000b	3G modem
 product QUALCOMMINC E000C	0x000c	3G modem
 product QUALCOMMINC E000D	0x000d	3G modem
 product QUALCOMMINC E000E	0x000e	3G modem
 product QUALCOMMINC E000F	0x000f	3G modem
 product QUALCOMMINC E0010	0x0010	3G modem
 product QUALCOMMINC E0011	0x0011	3G modem
 product QUALCOMMINC E0012	0x0012	3G modem
 product QUALCOMMINC E0013	0x0013	3G modem
 product QUALCOMMINC E0014	0x0014	3G modem
 product QUALCOMMINC MF628	0x0015	3G modem
 product QUALCOMMINC MF633R	0x0016	ZTE WCDMA modem
 product QUALCOMMINC E0017	0x0017	3G modem
 product QUALCOMMINC E0018	0x0018	3G modem
 product QUALCOMMINC E0019	0x0019	3G modem
 product QUALCOMMINC E0020	0x0020	3G modem
 product QUALCOMMINC E0021	0x0021	3G modem
 product QUALCOMMINC E0022	0x0022	3G modem
 product QUALCOMMINC E0023	0x0023	3G modem
 product QUALCOMMINC E0024	0x0024	3G modem
 product QUALCOMMINC E0025	0x0025	3G modem
 product QUALCOMMINC E0026	0x0026	3G modem
 product QUALCOMMINC E0027	0x0027	3G modem
 product QUALCOMMINC E0028	0x0028	3G modem
 product QUALCOMMINC E0029	0x0029	3G modem
 product QUALCOMMINC E0030	0x0030	3G modem
 product QUALCOMMINC MF626	0x0031	3G modem
 product QUALCOMMINC E0032	0x0032	3G modem
 product QUALCOMMINC E0033	0x0033	3G modem
 product QUALCOMMINC E0037	0x0037	3G modem
 product QUALCOMMINC E0039	0x0039	3G modem
 product QUALCOMMINC E0042	0x0042	3G modem
 product QUALCOMMINC E0043	0x0043	3G modem
 product QUALCOMMINC E0048	0x0048	3G modem
 product QUALCOMMINC E0049	0x0049	3G modem
 product QUALCOMMINC E0051	0x0051	3G modem
 product QUALCOMMINC E0052	0x0052	3G modem
 product QUALCOMMINC ZTE_STOR2	0x0053	USB ZTE Storage
 product QUALCOMMINC E0054	0x0054	3G modem
 product QUALCOMMINC E0055	0x0055	3G modem
 product QUALCOMMINC E0057	0x0057	3G modem
 product QUALCOMMINC E0058	0x0058	3G modem
 product QUALCOMMINC E0059	0x0059	3G modem
 product QUALCOMMINC E0060	0x0060	3G modem
 product QUALCOMMINC E0061	0x0061	3G modem
 product QUALCOMMINC E0062	0x0062	3G modem
 product QUALCOMMINC E0063	0x0063	3G modem
 product QUALCOMMINC E0064	0x0064	3G modem
 product QUALCOMMINC E0066	0x0066	3G modem
 product QUALCOMMINC E0069	0x0069	3G modem
 product QUALCOMMINC E0070	0x0070	3G modem
 product QUALCOMMINC E0073	0x0073	3G modem
 product QUALCOMMINC E0076	0x0076	3G modem
 product QUALCOMMINC E0078	0x0078	3G modem
 product QUALCOMMINC E0082	0x0082	3G modem
 product QUALCOMMINC E0086	0x0086	3G modem
 product QUALCOMMINC ZTE_STOR	0x2000	USB ZTE Storage
 product QUALCOMMINC E2002	0x2002	3G modem
 product QUALCOMMINC E2003	0x2003	3G modem
 product QUALCOMMINC AC8710	0xfff1	3G modem
 product QUALCOMMINC AC2726	0xfff5	3G modem
 product QUALCOMMINC AC8700	0xfffe	CDMA 1xEVDO USB modem
 
 /* Quanta products */
 product QUANTA RW6815_1		0x00ce	HP iPAQ rw6815
 product QUANTA RT3070		0x0304	RT3070
 product QUANTA Q101_STOR	0x1000	USB Q101 Storage
 product QUANTA Q101		0xea02	HSDPA modem
 product QUANTA Q111		0xea03	HSDPA modem
 product QUANTA GLX		0xea04	HSDPA modem
 product QUANTA GKE		0xea05	HSDPA modem
 product QUANTA GLE		0xea06	HSDPA modem
 product QUANTA RW6815R		0xf003	HP iPAQ rw6815 RNDIS
 
 /* Qtronix products */
 product QTRONIX 980N		0x2011	Scorpion-980N keyboard
 
 /* Quickshot products */
 product QUICKSHOT STRIKEPAD	0x6238	USB StrikePad
 
 /* Radio Shack */
 product RADIOSHACK USBCABLE	0x4026	USB to Serial Cable
 
 /* Rainbow Technologies products */
 product RAINBOW IKEY2000	0x1200	i-Key 2000
 
 /* Ralink Technology products */
 product RALINK RT2570		0x1706	RT2500USB Wireless Adapter
 product RALINK RT2070		0x2070	RT2070
 product RALINK RT2570_2		0x2570	RT2500USB Wireless Adapter
 product RALINK RT2573		0x2573	RT2501USB Wireless Adapter
 product RALINK RT2671		0x2671	RT2601USB Wireless Adapter
 product RALINK RT2770		0x2770	RT2770
 product RALINK RT2870		0x2870	RT2870
 product RALINK RT3070		0x3070	RT3070
 product RALINK RT3071		0x3071	RT3071
 product RALINK RT3072		0x3072	RT3072
 product RALINK RT3370		0x3370	RT3370
 product RALINK RT3572		0x3572	RT3572
 product RALINK RT8070		0x8070	RT8070
 product RALINK RT2570_3		0x9020	RT2500USB Wireless Adapter
 product RALINK RT2573_2		0x9021	RT2501USB Wireless Adapter
 
 /* RATOC Systems products */
 product RATOC REXUSB60		0xb000	USB serial adapter REX-USB60
 product RATOC REXUSB60F		0xb020	USB serial adapter REX-USB60F
 
 /* ReakTek products */
 /* Green House and CompUSA OEM this part */
 product REALTEK USB20CRW	0x0158	USB20CRW Card Reader
 product REALTEK USBKR100	0x8150	USBKR100 USB Ethernet
 product REALTEK RTL8187		0x8187	RTL8187 Wireless Adapter
 product REALTEK RTL8187B_0	0x8189	RTL8187B Wireless Adapter
 product REALTEK RTL8187B_1	0x8197	RTL8187B Wireless Adapter
 product REALTEK RTL8187B_2	0x8198	RTL8187B Wireless Adapter
 
 /* Ricoh products */
 product RICOH VGPVCC2		0x1830	VGP-VCC2 Camera
 product RICOH VGPVCC3		0x1832	VGP-VCC3 Camera
 product RICOH VGPVCC2_2		0x1833	VGP-VCC2 Camera
 product RICOH VGPVCC2_3		0x1834	VGP-VCC2 Camera
 product RICOH VGPVCC7		0x183a	VGP-VCC7 Camera
 product RICOH VGPVCC8		0x183b	VGP-VCC8 Camera
 
 /* Reiner-SCT products */
 product REINERSCT CYBERJACK_ECOM	0x0100	e-com cyberJack
 
 /* Roland products */
 product ROLAND UM1		0x0009	UM-1 MIDI I/F
 product ROLAND UM880N		0x0014	EDIROL UM-880 MIDI I/F (native)
 product ROLAND UM880G		0x0015	EDIROL UM-880 MIDI I/F (generic)
 
 /* Rockfire products */
 product ROCKFIRE GAMEPAD	0x2033	gamepad 203USB
 
 /* RATOC Systems products */
 product RATOC REXUSB60		0xb000	REX-USB60
 product RATOC REXUSB60F		0xb020	REX-USB60F
 
 /* Sagem products */
 product SAGEM USBSERIAL		0x0027	USB-Serial Controller
 product SAGEM XG760A		0x004a	XG-760A
 product SAGEM XG76NA		0x0062	XG-76NA
 
 /* Samsung products */
 product SAMSUNG ML6060		0x3008	ML-6060 laser printer
 product SAMSUNG YP_U2		0x5050	YP-U2 MP3 Player
 product SAMSUNG YP_U4		0x5092	YP-U4 MP3 Player
 product SAMSUNG I500		0x6601	I500 Palm USB Phone
 product SAMSUNG I330		0x8001	I330 phone cradle
 product SAMSUNG2 RT2870_1	0x2018	RT2870
 
 /* Samsung Techwin products */
 product SAMSUNG_TECHWIN DIGIMAX_410	0x000a	Digimax 410
 
 /* SanDisk products */
 product SANDISK SDDR05A		0x0001	ImageMate SDDR-05a
 product SANDISK SDDR31		0x0002	ImageMate SDDR-31
 product SANDISK SDDR05		0x0005	ImageMate SDDR-05
 product SANDISK SDDR12		0x0100	ImageMate SDDR-12
 product SANDISK SDDR09		0x0200	ImageMate SDDR-09
 product SANDISK SDDR75		0x0810	ImageMate SDDR-75
 product SANDISK SDCZ2_256	0x7104	Cruzer Mini 256MB
 product SANDISK SDCZ4_128	0x7112	Cruzer Micro 128MB
 product SANDISK SDCZ4_256	0x7113	Cruzer Micro 256MB
 
 /* Sanwa Electric Instrument Co., Ltd. products */
 product SANWA KB_USB2 		0x0701	KB-USB2 multimeter cable
 
 /* Sanyo Electric products */
 product SANYO SCP4900 		0x0701	Sanyo SCP-4900 USB Phone
 
 /* ScanLogic products */
 product SCANLOGIC SL11R		0x0002	SL11R IDE Adapter
 product SCANLOGIC 336CX		0x0300	Phantom 336CX - C3 scanner
 
 /* Senao products */
 product SENAO RT2870_3		0x0605	RT2870
 product SENAO RT2870_4		0x0615	RT2870
 product SENAO NUB8301		0x2000	NUB-8301
 product SENAO RT2870_1		0x9701	RT2870
 product SENAO RT2870_2		0x9702	RT2870
 product SENAO RT3070		0x9703	RT3070
 product SENAO RT3071		0x9705	RT3071
 product SENAO RT3072_1		0x9706	RT3072
 product SENAO RT3072_2		0x9707	RT3072
 product SENAO RT3072_3		0x9708	RT3072
 product SENAO RT3072_4		0x9709	RT3072
 product SENAO RT3072_5		0x9801	RT3072
 
 /* ShanTou products */
 product SHANTOU ST268		0x0268	ST268
 product SHANTOU DM9601		0x9601	DM 9601
 
 /* Shark products */
 product SHARK PA		0x0400	Pocket Adapter
 
 /* Sharp products */
 product SHARP SL5500		0x8004	Zaurus SL-5500 PDA
 product SHARP SLA300		0x8005	Zaurus SL-A300 PDA
 product SHARP SL5600		0x8006	Zaurus SL-5600 PDA
 product SHARP SLC700		0x8007	Zaurus SL-C700 PDA
 product SHARP SLC750		0x9031	Zaurus SL-C750 PDA
 product SHARP WZERO3ES		0x9123	W-ZERO3 ES Smartphone
 product SHARP WZERO3ADES	0x91ac	Advanced W-ZERO3 ES Smartphone
 product SHARP WILLCOM03		0x9242	WILLCOM03
 
 /* Shuttle Technology products */
 product SHUTTLE EUSB		0x0001	E-USB Bridge
 product SHUTTLE EUSCSI		0x0002	eUSCSI Bridge
 product SHUTTLE SDDR09		0x0003	ImageMate SDDR09
 product SHUTTLE EUSBCFSM	0x0005	eUSB SmartMedia / CompactFlash Adapter
 product SHUTTLE ZIOMMC		0x0006	eUSB MultiMediaCard Adapter
 product SHUTTLE HIFD		0x0007	Sony Hifd
 product SHUTTLE EUSBATAPI	0x0009	eUSB ATA/ATAPI Adapter
 product SHUTTLE CF		0x000a	eUSB CompactFlash Adapter
 product SHUTTLE EUSCSI_B	0x000b	eUSCSI Bridge
 product SHUTTLE EUSCSI_C	0x000c	eUSCSI Bridge
 product SHUTTLE CDRW		0x0101	CD-RW Device
 product SHUTTLE EUSBORCA	0x0325	eUSB ORCA Quad Reader
 
 /* Siemens products */
 product SIEMENS SPEEDSTREAM	0x1001	SpeedStream
 product SIEMENS SPEEDSTREAM22	0x1022	SpeedStream 1022
 product SIEMENS2 WLL013		0x001b	WLL013
 product SIEMENS2 ES75		0x0034  GSM module MC35
 product SIEMENS2 WL54G		0x3c06	54g USB Network Adapter
 product SIEMENS3 SX1		0x0001	SX1
 product SIEMENS3 X65		0x0003	X65
 product SIEMENS3 X75		0x0004	X75
 product SIEMENS3 EF81 		0x0005	EF81
 
 /* Sierra Wireless products */
 product SIERRA EM5625		0x0017	EM5625
 product SIERRA MC5720_2		0x0018	MC5720
 product SIERRA MC5725		0x0020	MC5725
 product SIERRA AIRCARD580	0x0112	Sierra Wireless AirCard 580
 product SIERRA AIRCARD595	0x0019	Sierra Wireless AirCard 595
 product SIERRA AC595U		0x0120	Sierra Wireless AirCard 595U
 product SIERRA AC597E		0x0021	Sierra Wireless AirCard 597E
 product SIERRA EM5725		0x0022	EM5725
 product SIERRA C597		0x0023	Sierra Wireless Compass 597
 product SIERRA MC5727		0x0024	MC5727
 product SIERRA T598		0x0025	T598
 product SIERRA T11		0x0026	T11
 product SIERRA AC402		0x0027	AC402
 product SIERRA MC5728		0x0028	MC5728
 product SIERRA E0029		0x0029	E0029
 product SIERRA AIRCARD580	0x0112	Sierra Wireless AirCard 580
 product SIERRA AC595U		0x0120	Sierra Wireless AirCard 595U
 product SIERRA MC5720		0x0218	MC5720 Wireless Modem
 product SIERRA MINI5725		0x0220	Sierra Wireless miniPCI 5275
 product SIERRA MC5727_2		0x0224	MC5727
 product SIERRA MC8755_2		0x6802	MC8755
 product SIERRA MC8765		0x6803	MC8765
 product SIERRA MC8755		0x6804	MC8755
 product SIERRA MC8765_2		0x6805	MC8765
 product SIERRA MC8755_4		0x6808	MC8755
 product SIERRA MC8765_3		0x6809	MC8765
 product SIERRA AC875U		0x6812	AC875U HSDPA USB Modem
 product SIERRA MC8755_3		0x6813	MC8755 HSDPA
 product SIERRA MC8775_2		0x6815	MC8775
 product SIERRA MC8775		0x6816	MC8775
 product SIERRA AC875		0x6820	Sierra Wireless AirCard 875
 product SIERRA AC875U_2		0x6821	AC875U
 product SIERRA AC875E		0x6822	AC875E
 product SIERRA MC8780		0x6832	MC8780
 product SIERRA MC8781		0x6833	MC8781
 product SIERRA MC8780_2		0x6834	MC8780
 product SIERRA MC8781_2		0x6835	MC8781
 product SIERRA MC8780_3		0x6838	MC8780
 product SIERRA MC8781_3		0x6839	MC8781
 product SIERRA MC8785		0x683A	MC8785
 product SIERRA MC8785_2		0x683B	MC8785
 product SIERRA MC8790		0x683C	MC8790
 product SIERRA MC8791		0x683D	MC8791
 product SIERRA MC8792		0x683E	MC8792
 product SIERRA AC880		0x6850	Sierra Wireless AirCard 880
 product SIERRA AC881		0x6851	Sierra Wireless AirCard 881
 product SIERRA AC880E		0x6852	Sierra Wireless AirCard 880E
 product SIERRA AC881E		0x6853	Sierra Wireless AirCard 881E
 product SIERRA AC880U		0x6855	Sierra Wireless AirCard 880U
 product SIERRA AC881U		0x6856	Sierra Wireless AirCard 881U
 product SIERRA AC885E		0x6859	AC885E
 product SIERRA AC885E_2		0x685A	AC885E
 product SIERRA AC885U		0x6880	Sierra Wireless AirCard 885U
 product SIERRA C888		0x6890	C888
 product SIERRA C22		0x6891	C22
 product SIERRA E6892		0x6892	E6892
 product SIERRA E6893		0x6893	E6893
 product SIERRA MC8700		0x68A3	MC8700
 product SIERRA AIRCARD875	0x6820	Aircard 875 HSDPA
 product SIERRA TRUINSTALL	0x0fff	Aircard Tru Installer
 
 /* Sigmatel products */
 product SIGMATEL WBT_3052	0x4200	WBT-3052 IrDA/USB Bridge
 product SIGMATEL I_BEAD100	0x8008	i-Bead 100 MP3 Player
 
 /* SIIG products */
 /* Also: Omnidirectional Control Technology products */
 product SIIG DIGIFILMREADER	0x0004	DigiFilm-Combo Reader
 product SIIG WINTERREADER	0x0330	WINTERREADER Reader
 product SIIG2 USBTOETHER	0x0109	USB TO Ethernet
 product SIIG2 US2308		0x0421	Serial
 
 /* Silicom products */
 product SILICOM U2E		0x0001	U2E
 product SILICOM GPE		0x0002	Psion Gold Port Ethernet
 
 /* SI Labs */
 product SILABS	VSTABI		0x0f91	Vstabi
 product SILABS	ARKHAM_DS101_M	0x1101	Arkham DS101 Monitor
 product SILABS	ARKHAM_DS101_A	0x1601	Arkham DS101 Adapter
 product SILABS	BSM7DUSB	0x800a	BSM7-D-USB
 product SILABS	POLOLU		0x803b	Pololu Serial
 product SILABS	CYGNAL_DEBUG	0x8044	Cygnal Debug Adapter
 product SILABS	SB_PARAMOUNT_ME	0x8043	Software Bisque Paramount ME
 product SILABS	SAEL		0x8053	SA-EL USB
 product SILABS	GSM2228		0x8054	Enfora GSM2228 USB
 product SILABS	ARGUSISP	0x8066	Argussoft ISP
 product SILABS	IMS_USB_RS422	0x806f	IMS USB-RS422
 product SILABS	CRUMB128	0x807a	Crumb128 board
 product SILABS	DEGREE		0x80ca	Degree Controls Inc
 product SILABS	TRACIENT	0x80dd	Tracient RFID
 product SILABS	TRAQMATE	0x80ed	Track Systems Traqmate
 product SILABS	SUUNTO		0x80f6	Suunto Sports Instrument
 product SILABS	ARYGON_MIFARE	0x8115	Arygon Mifare RFID reader
 product SILABS	BURNSIDE	0x813d	Burnside Telecon Deskmobile
 product SILABS	TAMSMASTER	0x813f	Tams Master Easy Control
 product SILABS	WMRBATT		0x814a	WMR RIGblaster Plug&Play
 product SILABS	WMRRIGBLASTER	0x814a	WMR RIGblaster Plug&Play
 product SILABS	WMRRIGTALK	0x814b	WMR RIGtalk RT1
 product SILABS	HELICOM		0x815e	Helicomm IP-Link 1220-DVM
 product SILABS	AVIT_USB_TTL	0x818b	AVIT Research USB-TTL
 product SILABS	MJS_TOSLINK	0x819f	MJS USB-TOSLINk
 product SILABS	WAVIT		0x81a6	ThinkOptics WavIt
 product SILABS	MSD_DASHHAWK	0x81ac	MSD DashHawk
 product SILABS	INSYS_MODEM	0x81ad	INSYS Modem
 product SILABS	LIPOWSKY_JTAG	0x81c8	Lipowsky Baby-JTAG
 product SILABS	LIPOWSKY_LIN	0x81e2	Lipowsky Baby-LIN
 product SILABS	AEROCOMM	0x81e7	Aerocomm Radio
 product SILABS	ZEPHYR_BIO	0x81e8	Zephyr Bioharness
 product SILABS	EMS_C1007	0x81f2	EMS C1007 HF RFID controller
 product SILABS	LIPOWSKY_HARP	0x8218	Lipowsky HARP-1
 product SILABS	C2_EDGE_MODEM	0x822b	Commander 2 EDGE(GSM) Modem
 product SILABS	CYGNAL_GPS	0x826b	Cygnal Fasttrax GPS
 product SILABS	TELEGESYS_ETRX2	0x8293	Telegesys ETRX2USB
 product SILABS	PROCYON_AVS	0x82f9	Procyon AVS
 product SILABS	MC35PU		0x8341	MC35pu
 product SILABS	CYGNAL		0x8382	Cygnal
 product SILABS	AMBER_AMB2560	0x83a8	Amber Wireless AMB2560
 product SILABS	KYOCERA_GPS	0x8411	Kyocera GPS
 product SILABS	BEI_VCP		0x846e	BEI USB Sensor (VCP)
 product SILABS	CP2102		0xea60	SILABS USB UART
 product SILABS	CP210X_2	0xea61	CP210x Serial
 product SILABS	INFINITY_MIC	0xea71	Infinity GPS-MIC-1 Radio Monophone
 product SILABS	USBSCOPE50	0xf001	USBscope50
 product SILABS	USBWAVE12	0xf002	USBwave12
 product SILABS	USBPULSE100	0xf003	USBpulse100
 product SILABS	USBCOUNT50	0xf004	USBcount50
 product SILABS2 DCU11CLONE	0xaa26	DCU-11 clone
 product SILABS3 GPRS_MODEM	0xea61	GPRS Modem
 product SILABS4 100EU_MODEM	0xea61	GPRS Modem 100EU
 
 /* Silicon Portals Inc. */
 product SILICONPORTALS YAPPH_NF	0x0200	YAP Phone (no firmware)
 product SILICONPORTALS YAPPHONE	0x0201	YAP Phone
 
 /* Sirius Technologies products */
 product SIRIUS ROADSTER		0x0001	NetComm Roadster II 56 USB
 
 /* Sitecom products */
 product SITECOM LN029		0x182d	USB 2.0 Ethernet
 product SITECOM SERIAL		0x2068	USB to serial cable (v2)
 product SITECOM2 WL022		0x182d	WL-022
 
 /* Sitecom Europe products */
 product SITECOMEU RT2870_1	0x0017	RT2870
 product SITECOMEU WL168V1	0x000d	WL-168 v1
 product SITECOMEU WL168V4	0x0028	WL-168 v4
 product SITECOMEU RT2870_2	0x002b	RT2870
 product SITECOMEU RT2870_3	0x002c	RT2870
 product SITECOMEU RT2870_4	0x002d	RT2870
 product SITECOMEU RT2770	0x0039	RT2770
 product SITECOMEU RT3070_2	0x003b	RT3070
 product SITECOMEU RT3070_3	0x003c	RT3070
 product SITECOMEU RT3070_4	0x003d	RT3070
 product SITECOMEU RT3070	0x003e	RT3070
 product SITECOMEU WL608		0x003f	WL-608
 product SITECOMEU RT3071	0x0040	RT3071
 product SITECOMEU RT3072_1	0x0041	RT3072
 product SITECOMEU RT3072_2	0x0042	RT3072
 product SITECOMEU RT3072_3	0x0047	RT3072
 product SITECOMEU RT3072_4	0x0048	RT3072
 product SITECOMEU RT3072_5	0x004a	RT3072
 product SITECOMEU RT3072_6	0x004d	RT3072
 product SITECOMEU LN028		0x061c	LN-028
 product SITECOMEU WL113		0x9071	WL-113
 product SITECOMEU ZD1211B	0x9075	ZD1211B
 product SITECOMEU WL172		0x90ac	WL-172
 product SITECOMEU WL113R2	0x9712	WL-113 rev 2
 
 /* Skanhex Technology products */
 product SKANHEX MD_7425		0x410a	MD 7425 Camera
 product SKANHEX SX_520Z		0x5200	SX 520z Camera
 
 /* Smart Technologies products */
 product SMART PL2303		0x2303	Serial adapter
 
 /* SmartBridges products */
 product SMARTBRIDGES SMARTLINK	0x0001	SmartLink USB Ethernet
 product SMARTBRIDGES SMARTNIC	0x0003	smartNIC 2 PnP Ethernet
 
 /* SMC products */
 product SMC 2102USB		0x0100	10Mbps Ethernet
 product SMC 2202USB		0x0200	10/100 Ethernet
 product SMC 2206USB		0x0201	EZ Connect USB Ethernet
 product SMC 2862WG		0xee13	EZ Connect Wireless Adapter
 product SMC2 2020HUB		0x2020	USB Hub
 product SMC2 2514HUB		0x2514	USB Hub
 product SMC3 2662WUSB		0xa002	2662W-AR Wireless
 
 /* SOHOware products */
 product SOHOWARE NUB100		0x9100	10/100 USB Ethernet
 product SOHOWARE NUB110		0x9110	10/100 USB Ethernet
 
 /* SOLID YEAR products */
 product SOLIDYEAR KEYBOARD	0x2101	Solid Year USB keyboard
 
 /* SONY products */
 product SONY DSC		0x0010	DSC cameras
 product SONY MS_NW_MS7		0x0025	Memorystick NW-MS7
 product SONY PORTABLE_HDD_V2	0x002b	Portable USB Harddrive V2
 product SONY MSACUS1		0x002d	Memorystick MSAC-US1
 product SONY HANDYCAM		0x002e	Handycam
 product SONY MSC		0x0032	MSC memory stick slot
 product SONY CLIE_35		0x0038	Sony Clie v3.5
 product SONY MS_PEG_N760C	0x0058	PEG N760c Memorystick
 product SONY CLIE_40		0x0066	Sony Clie v4.0
 product SONY MS_MSC_U03		0x0069	Memorystick MSC-U03
 product SONY CLIE_40_MS		0x006d	Sony Clie v4.0 Memory Stick slot
 product SONY CLIE_S360		0x0095	Sony Clie s360
 product SONY CLIE_41_MS		0x0099	Sony Clie v4.1 Memory Stick slot
 product SONY CLIE_41		0x009a	Sony Clie v4.1
 product SONY CLIE_NX60		0x00da	Sony Clie nx60
 product SONY CLIE_TH55		0x0144	Sony Clie th55
 product SONY CLIE_TJ37		0x0169	Sony Clie tj37
 product SONY RF_RECEIVER	0x01db	Sony RF mouse/kbd Receiver VGP-WRC1
 product SONY QN3		0x0437	Sony QN3 CMD-Jxx phone cable
 
 /* Sony Ericsson products */
 product SONYERICSSON DCU10	0x0528	DCU-10 Phone Data Cable
 product SONYERICSSON DATAPILOT	0x2003	Datapilot Phone Cable
 
 /* SOURCENEXT products */
 product SOURCENEXT KEIKAI8	0x039f	KeikaiDenwa 8
 product SOURCENEXT KEIKAI8_CHG	0x012e	KeikaiDenwa 8 with charger
 
 /* SparkLAN products */
 product SPARKLAN RT2573		0x0004	RT2573
 product SPARKLAN RT2870_1	0x0006	RT2870
 product SPARKLAN RT3070		0x0010	RT3070
 
 /* Speed Dragon Multimedia products */
 product SPEEDDRAGON MS3303H	0x110b	MS3303H Serial
 
 /* Sphairon Access Systems GmbH products */
 product SPHAIRON UB801R		0x0110	UB801R
 
 /* Stelera Wireless products */
 product STELERA ZEROCD		0x1000	Zerocd Installer
 product STELERA C105		0x1002	Stelera/Bandrish C105 USB
 product STELERA E1003		0x1003	3G modem
 product STELERA E1004		0x1004	3G modem
 product STELERA E1005		0x1005	3G modem
 product STELERA E1006		0x1006	3G modem
 product STELERA E1007		0x1007	3G modem
 product STELERA E1008		0x1008	3G modem
 product STELERA E1009		0x1009	3G modem
 product STELERA E100A		0x100a	3G modem
 product STELERA E100B		0x100b	3G modem
 product STELERA E100C		0x100c	3G modem
 product STELERA E100D		0x100d	3G modem
 product STELERA E100E		0x100e	3G modem
 product STELERA E100F		0x100f	3G modem
 product STELERA E1010		0x1010	3G modem
 product STELERA E1011		0x1011	3G modem
 product STELERA E1012		0x1012	3G modem
 
 /* MpMan products */
 product MPMAN MPF400_1		0x36d0	MPF400 Music Player 1Go
 product MPMAN MPF400_2		0x25a8	MPF400 Music Player 2Go
 
 /* STMicroelectronics products */
 product STMICRO BIOCPU		0x2016	Biometric Coprocessor
 product STMICRO COMMUNICATOR	0x7554	USB Communicator
 
 /* STSN products */
 product STSN STSN0001		0x0001	Internet Access Device
 
 /* SUN Corporation products */
 product SUNTAC DS96L		0x0003	SUNTAC U-Cable type D2
 product SUNTAC PS64P1		0x0005	SUNTAC U-Cable type P1
 product SUNTAC VS10U		0x0009	SUNTAC Slipper U
 product SUNTAC IS96U		0x000a	SUNTAC Ir-Trinity
 product SUNTAC AS64LX		0x000b	SUNTAC U-Cable type A3
 product SUNTAC AS144L4		0x0011	SUNTAC U-Cable type A4
 
 /* Sun Microsystems products */
 product SUN KEYBOARD_TYPE_6	0x0005	Type 6 USB keyboard
 product SUN KEYBOARD_TYPE_7	0x00a2	Type 7 USB keyboard
 /* XXX The above is a North American PC style keyboard possibly */
 product SUN MOUSE		0x0100	Type 6 USB mouse
 product SUN KBD_HUB		0x100e	Kbd Hub
 
 /* Super Top products */
 product	SUPERTOP IDE		0x6600	USB-IDE
 
 /* Syntech products */
 product SYNTECH CPT8001C	0x0001	CPT-8001C Barcode scanner
 product	SYNTECH CYPHERLAB100	0x1000	CipherLab USB Barcode Scanner
 
 /* Teclast products */
 product TECLAST TLC300		0x3203	USB Media Player
 
 /* Supra products */
 product DIAMOND2 SUPRAEXPRESS56K 0x07da	Supra Express 56K modem
 product DIAMOND2 SUPRA2890	0x0b4a	SupraMax 2890 56K Modem
 product DIAMOND2 RIO600USB	0x5001	Rio 600 USB
 product DIAMOND2 RIO800USB	0x5002	Rio 800 USB
 
 /* Surecom Technology products */
 product SURECOM EP9001G2A	0x11f2	EP-9001-G rev 2A
 product SURECOM RT2570		0x11f3	RT2570
 product SURECOM RT2573		0x31f3	RT2573
 
 /* Sweex products */
 product SWEEX ZD1211		0x1809	ZD1211
 product SWEEX2 LW153		0x0153	LW153
 product SWEEX2 LW303		0x0302	LW303
 product SWEEX2 LW313		0x0313	LW313
 
 /* System TALKS, Inc. */
 product SYSTEMTALKS SGCX2UL	0x1920	SGC-X2UL
 
 /* Tapwave products */
 product TAPWAVE ZODIAC		0x0100	Zodiac
 
 /* Taugagreining products */
 product TAUGA CAMERAMATE	0x0005	CameraMate (DPCM_USB)
 
 /* TCTMobile products */
 product TCTMOBILE X060S		0x0000	X060S 3G modem
 product TCTMOBILE X080S		0xf000	X080S 3G modem
 
 /* TDK products */
 product TDK UPA9664		0x0115	USB-PDC Adapter UPA9664
 product TDK UCA1464		0x0116	USB-cdmaOne Adapter UCA1464
 product TDK UHA6400		0x0117	USB-PHS Adapter UHA6400
 product TDK UPA6400		0x0118	USB-PHS Adapter UPA6400
 product TDK BT_DONGLE		0x0309	Bluetooth USB dongle
 
 /* TEAC products */
 product TEAC FD05PUB		0x0000	FD-05PUB floppy
 
 /* Tekram Technology products */
 product TEKRAM QUICKWLAN	0x1630	QuickWLAN
 product TEKRAM ZD1211_1		0x5630	ZD1211
 product TEKRAM ZD1211_2		0x6630	ZD1211
 
 /* Telex Communications products */
 product TELEX MIC1		0x0001	Enhanced USB Microphone
 
 /* Telit products */
 product TELIT UC864E		0x1003	UC864E 3G modem
 product TELIT UC864G		0x1004	UC864G 3G modem
 
 /* Ten X Technology, Inc. */
 product TENX UAUDIO0		0xf211	USB audio headset
 
 /* Texas Intel products */
 product TI UTUSB41		0x1446	UT-USB41 hub
 product TI TUSB2046		0x2046	TUSB2046 hub
 
 /* Thrustmaster products */
 product THRUST FUSION_PAD	0xa0a3	Fusion Digital Gamepad
 
 /* TLayTech products */
 product TLAYTECH TEU800		0x1682	TEU800 3G modem
 
 /* Topre Corporation products */
 product TOPRE HHKB		0x0100	HHKB Professional
 
 /* Toshiba Corporation products */
 product TOSHIBA POCKETPC_E740	0x0706	PocketPC e740
 product TOSHIBA RT3070		0x0a07	RT3070
 product TOSHIBA G450		0x0d45	G450 modem
 product TOSHIBA HSDPA		0x1302	G450 modem
 
 /* Trek Technology products */
 product TREK THUMBDRIVE		0x1111	ThumbDrive
 product TREK MEMKEY		0x8888	IBM USB Memory Key
 product TREK THUMBDRIVE_8MB	0x9988	ThumbDrive_8MB
 
 /* Tripp-Lite products */
 product TRIPPLITE U209		0x2008	Serial
 
 /* Trumpion products */
 product TRUMPION T33520		0x1001	T33520 USB Flash Card Controller
 product TRUMPION C3310		0x1100	Comotron C3310 MP3 player
 product TRUMPION MP3		0x1200	MP3 player
 
 /* TwinMOS */
 product TWINMOS G240		0xa006	G240
 product TWINMOS MDIV		0x1325	Memory Disk IV
 
 /* Ubiquam products */
 product UBIQUAM UALL		0x3100	CDMA 1xRTT USB Modem (U-100/105/200/300/520)
 
 /* Ultima products */
 product ULTIMA 1200UBPLUS	0x4002	1200 UB Plus scanner
 
 /* UMAX products */
 product UMAX ASTRA1236U		0x0002	Astra 1236U Scanner
 product UMAX ASTRA1220U		0x0010	Astra 1220U Scanner
 product UMAX ASTRA2000U		0x0030	Astra 2000U Scanner
 product UMAX ASTRA2100U		0x0130	Astra 2100U Scanner
 product UMAX ASTRA2200U		0x0230	Astra 2200U Scanner
 product UMAX ASTRA3400		0x0060	Astra 3400 Scanner
 
 /* U-MEDIA Communications products */
 product UMEDIA TEW444UBEU	0x3006	TEW-444UB EU
 product UMEDIA TEW444UBEU_NF	0x3007	TEW-444UB EU (no firmware)
 product UMEDIA TEW429UB_A	0x300a	TEW-429UB_A
 product UMEDIA TEW429UB		0x300b	TEW-429UB
 product UMEDIA TEW429UBC1	0x300d	TEW-429UB C1
 product UMEDIA RT2870_1		0x300e	RT2870
 product UMEDIA ALL0298V2	0x3204	ALL0298 v2
 product UMEDIA AR5523_2		0x3205	AR5523
 product UMEDIA AR5523_2_NF	0x3206	AR5523 (no firmware)
 
 /* Universal Access products */
 product UNIACCESS PANACHE	0x0101	Panache Surf USB ISDN Adapter
 
 /* USI products */
 product USI MC60		0x10c5	MC60 Serial
 
 /* U.S. Robotics products */
 product USR USR5422		0x0118	USR5422 WLAN
 product USR USR5423		0x0121	USR5423 WLAN
 
 /* VIA Technologies products */
 product VIA USB2IDEBRIDGE	0x6204	USB 2.0 IDE Bridge
 
 /* Vaisala products */
 product VAISALA CABLE		0x0200	USB Interface cable
 
 /* VidzMedia products */
 product VIDZMEDIA MONSTERTV	0x4fb1	MonsterTV P2H
 
 /* Vision products */
 product VISION VC6452V002	0x0002	CPiA Camera
 
 /* Visioneer products */
 product VISIONEER 7600		0x0211	OneTouch 7600
 product VISIONEER 5300		0x0221	OneTouch 5300
 product VISIONEER 3000		0x0224	Scanport 3000
 product VISIONEER 6100		0x0231	OneTouch 6100
 product VISIONEER 6200		0x0311	OneTouch 6200
 product VISIONEER 8100		0x0321	OneTouch 8100
 product VISIONEER 8600		0x0331	OneTouch 8600
 
 /* Vivitar products */
 product VIVITAR 35XX		0x0003	Vivicam 35Xx
 
 /* VTech products */
 product VTECH RT2570		0x3012	RT2570
 product VTECH ZD1211B		0x3014	ZD1211B
 
 /* Wacom products */
 product WACOM CT0405U		0x0000	CT-0405-U Tablet
 product WACOM GRAPHIRE		0x0010	Graphire
 product WACOM GRAPHIRE3_4X5	0x0013	Graphire 3 4x5
 product WACOM INTUOSA5		0x0021	Intuos A5
 product WACOM GD0912U		0x0022	Intuos 9x12 Graphics Tablet
 
 /* WaveSense products */
 product WAVESENSE JAZZ		0xaaaa	Jazz blood glucose meter
 
 /* WCH products */
 product WCH CH341SER		0x5523	CH341/CH340 USB-Serial Bridge
 product WCH2 CH341SER		0x7523	CH341/CH340 USB-Serial Bridge
 
 /* Western Digital products */
 product WESTERN COMBO		0x0200	Firewire USB Combo
 product WESTERN EXTHDD		0x0400	External HDD
 product WESTERN HUB		0x0500	USB HUB
 product WESTERN MYBOOK		0x0901	MyBook External HDD
 product WESTERN MYPASSWORD	0x0704	MyPassword External HDD
 
 /* Windbond Electronics */
 product WINBOND UH104		0x5518	4-port USB Hub
 
 /* WinMaxGroup products */
 product WINMAXGROUP FLASH64MC	0x6660	USB Flash Disk 64M-C
 
 /* Wistron NeWeb products */
 product WISTRONNEWEB UR045G	0x0427	PrismGT USB 2.0 WLAN
 product WISTRONNEWEB UR055G	0x0711	UR055G
 product WISTRONNEWEB AR5523_1	0x0826	AR5523
 product WISTRONNEWEB AR5523_1_NF	0x0827	AR5523 (no firmware)
 product WISTRONNEWEB AR5523_2	0x082a	AR5523
 product WISTRONNEWEB AR5523_2_NF	0x0829	AR5523 (no firmware)
 
 /* Xerox products */
 product XEROX WCM15		0xffef	WorkCenter M15
 
 /* Xirlink products */
 product XIRLINK PCCAM		0x8080	IBM PC Camera
 
 /* Xyratex products */
 product XYRATEX PRISM_GT_1	0x2000	PrismGT USB 2.0 WLAN
 product XYRATEX PRISM_GT_2	0x2002	PrismGT USB 2.0 WLAN
 
 /* Yamaha products */
 product YAMAHA UX256		0x1000	UX256 MIDI I/F
 product YAMAHA UX96		0x1008	UX96 MIDI I/F
 product YAMAHA RTA54I		0x4000	NetVolante RTA54i Broadband&ISDN Router
 product YAMAHA RTA55I		0x4004	NetVolante RTA55i Broadband VoIP Router
 product YAMAHA RTW65B		0x4001	NetVolante RTW65b Broadband Wireless Router
 product YAMAHA RTW65I		0x4002	NetVolante RTW65i Broadband&ISDN Wireless Router
 
 /* Yano products */
 product YANO U640MO		0x0101	U640MO-03
 product YANO FW800HD		0x05fc	METALWEAR-HDD
 
 /* Y.C. Cable products */
 product YCCABLE PL2303		0x0fba	PL2303 Serial
 
 /* Y-E Data products */
 product YEDATA FLASHBUSTERU	0x0000	Flashbuster-U
 
 /* Yiso Wireless Co. products */
 product	YISO C893		0xc893	CDMA 2000 1xEVDO PC Card
 
 /* Z-Com products */
 product ZCOM M4Y750		0x0001	M4Y-750
 product ZCOM XI725		0x0002	XI-725/726
 product ZCOM XI735		0x0005	XI-735
 product ZCOM XG703A		0x0008	PrismGT USB 2.0 WLAN
 product ZCOM ZD1211		0x0011	ZD1211
 product ZCOM AR5523		0x0012	AR5523
 product ZCOM AR5523_NF		0x0013	AR5523 driver (no firmware)
 product ZCOM XM142		0x0015	XM-142
 product ZCOM ZD1211B		0x001a	ZD1211B
 product ZCOM RT2870_1		0x0022	RT2870
 product ZCOM RT2870_2		0x0025	RT2870
 
 /* Zinwell products */
 product ZINWELL RT2570		0x0260	RT2570
 product ZINWELL RT2870_1	0x0280	RT2870
 product ZINWELL RT2870_2	0x0282	RT2870
 product ZINWELL RT3072_1	0x0283	RT3072
 product ZINWELL RT3072_2	0x0284	RT3072
 product ZINWELL RT3070		0x5257	RT3070
 
 /* Zoom Telephonics, Inc. products */
 product ZOOM 2986L		0x9700	2986L Fax modem
 
 /* Zoran Microelectronics products */
 product ZORAN EX20DSC		0x4343	Digital Camera EX-20 DSC
 
 /* Zydas Technology Corporation products */
 product ZYDAS ZD1211		0x1211	ZD1211 WLAN abg
 product ZYDAS ZD1211B		0x1215	ZD1211B
 
 /* ZyXEL Communication Co. products */
 product ZYXEL OMNI56K		0x1500	Omni 56K Plus
 product ZYXEL 980N		0x2011	Scorpion-980N keyboard
 product ZYXEL ZYAIRG220		0x3401	ZyAIR G-220
 product ZYXEL G200V2		0x3407	G-200 v2
 product ZYXEL AG225H		0x3409	AG-225H
 product ZYXEL M202		0x340a	M-202
 product ZYXEL G220V2		0x340f	G-220 v2
 product ZYXEL G202		0x3410	G-202
 product ZYXEL RT2870_1		0x3416	RT2870
 product ZYXEL RT2870_2		0x341a	RT2870
Index: projects/binutils-2.17/sys/i386/i386/pmap.c
===================================================================
--- projects/binutils-2.17/sys/i386/i386/pmap.c	(revision 215829)
+++ projects/binutils-2.17/sys/i386/i386/pmap.c	(revision 215830)
@@ -1,5208 +1,5207 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  */
 /*-
  * Copyright (c) 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jake Burkholder,
  * Safeport Network Services, and Network Associates Laboratories, the
  * Security Research Division of Network Associates, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
  * CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Manages physical address maps.
  *
  *	In addition to hardware address maps, this
  *	module is called upon to provide software-use-only
  *	maps which may or may not be stored in the same
  *	form as hardware maps.  These pseudo-maps are
  *	used to store intermediate results from copy
  *	operations to and from address spaces.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include "opt_cpu.h"
 #include "opt_pmap.h"
 #include "opt_msgbuf.h"
 #include "opt_smp.h"
 #include "opt_xbox.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sf_buf.h>
 #include <sys/sx.h>
 #include <sys/vmmeter.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #ifdef SMP
 #include <sys/smp.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_reserv.h>
 #include <vm/uma.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/specialreg.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 
 #ifdef XBOX
 #include <machine/xbox.h>
 #endif
 
 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
 #define CPU_ENABLE_SSE
 #endif
 
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC 200
 #endif
 
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
 #define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
 #else
 #define PMAP_INLINE	extern inline
 #endif
 #else
 #define PMAP_INLINE
 #endif
 
 #define PV_STATS
 #ifdef PV_STATS
 #define PV_STAT(x)	do { x ; } while (0)
 #else
 #define PV_STAT(x)	do { } while (0)
 #endif
 
 #define	pa_index(pa)	((pa) >> PDRSHIFT)
 #define	pa_to_pvh(pa)	(&pv_table[pa_index(pa)])
 
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
 #define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
 
 #define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
 #define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
 #define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
 #define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
 #define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
 
 #define pmap_pte_set_w(pte, v)	((v) ? atomic_set_int((u_int *)(pte), PG_W) : \
     atomic_clear_int((u_int *)(pte), PG_W))
 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
 
 struct pmap kernel_pmap_store;
 LIST_HEAD(pmaplist, pmap);
 static struct pmaplist allpmaps;
 static struct mtx allpmaps_lock;
 
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 int pgeflag = 0;		/* PG_G or-in */
 int pseflag = 0;		/* PG_PS or-in */
 
 static int nkpt = NKPT;
 vm_offset_t kernel_vm_end = KERNBASE + NKPT * NBPDR;
 extern u_int32_t KERNend;
 extern u_int32_t KPTphys;
 
 #ifdef PAE
 pt_entry_t pg_nx;
 static uma_zone_t pdptzone;
 #endif
 
 SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
 
 static int pat_works = 1;
-TUNABLE_INT("vm.pmap.pat_works", &pat_works);
-SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RDTUN, &pat_works, 1,
+SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1,
     "Is page attribute table fully functional?");
 
 static int pg_ps_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0,
     "Are large page mappings enabled?");
 
 #define	PAT_INDEX_SIZE	8
 static int pat_index[PAT_INDEX_SIZE];	/* cache mode to PAT index conversion */
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
 static struct md_page *pv_table;
 static int shpgperproc = PMAP_SHPGPERPROC;
 
 struct pv_chunk *pv_chunkbase;		/* KVA block for pv_chunks */
 int pv_maxchunks;			/* How many chunks we have KVA for */
 vm_offset_t pv_vafree;			/* freelist stored in the PTE */
 
 /*
  * All those kernel PT submaps that BSD is so fond of
  */
 struct sysmaps {
 	struct	mtx lock;
 	pt_entry_t *CMAP1;
 	pt_entry_t *CMAP2;
 	caddr_t	CADDR1;
 	caddr_t	CADDR2;
 };
 static struct sysmaps sysmaps_pcpu[MAXCPU];
 pt_entry_t *CMAP1 = 0;
 static pt_entry_t *CMAP3;
 static pd_entry_t *KPTD;
 caddr_t CADDR1 = 0, ptvmmap = 0;
 static caddr_t CADDR3;
 struct msgbuf *msgbufp = 0;
 
 /*
  * Crashdump maps.
  */
 static caddr_t crashdumpmap;
 
 static pt_entry_t *PMAP1 = 0, *PMAP2;
 static pt_entry_t *PADDR1 = 0, *PADDR2;
 #ifdef SMP
 static int PMAP1cpu;
 static int PMAP1changedcpu;
 SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 
 	   &PMAP1changedcpu, 0,
 	   "Number of times pmap_pte_quick changed CPU with same PMAP1");
 #endif
 static int PMAP1changed;
 SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 
 	   &PMAP1changed, 0,
 	   "Number of times pmap_pte_quick changed PMAP1");
 static int PMAP1unchanged;
 SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 
 	   &PMAP1unchanged, 0,
 	   "Number of times pmap_pte_quick didn't change PMAP1");
 static struct mtx PMAP2mutex;
 
 static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
 static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try);
 static void	pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
 static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
 static void	pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
 static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
 		    vm_offset_t va);
 static int	pmap_pvh_wired_mappings(struct md_page *pvh, int count);
 
 static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
 static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte);
 static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
 static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
 static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
 static boolean_t pmap_is_referenced_pvh(struct md_page *pvh);
 static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
 static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde);
 static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
 static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
     vm_prot_t prot);
 static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
 static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
     vm_page_t *free);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
     vm_page_t *free);
 static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
     vm_page_t *free);
 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
 					vm_offset_t va);
 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
     vm_page_t m);
 static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
     pd_entry_t newpde);
 static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
 
 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
 
 static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
 static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free);
 static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
 static void pmap_pte_release(pt_entry_t *pte);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
 #ifdef PAE
 static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
 #endif
 static void pmap_set_pg(void);
 
 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
 
 /*
  * If you get an error here, then you set KVA_PAGES wrong! See the
  * description of KVA_PAGES in sys/i386/include/pmap.h. It must be
  * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE.
  */
 CTASSERT(KERNBASE % (1 << 24) == 0);
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *
  *	On the i386 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
  *	from the linked base (virtual) address "KERNBASE" to the actual
  *	(physical) address starting relative to 0]
  */
 void
 pmap_bootstrap(vm_paddr_t firstaddr)
 {
 	vm_offset_t va;
 	pt_entry_t *pte, *unused;
 	struct sysmaps *sysmaps;
 	int i;
 
 	/*
 	 * Initialize the first available kernel virtual address.  However,
 	 * using "firstaddr" may waste a few pages of the kernel virtual
 	 * address space, because locore may not have mapped every physical
 	 * page that it allocated.  Preferably, locore would provide a first
 	 * unused virtual address in addition to "firstaddr".
 	 */
 	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
 
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
 	/*
 	 * Initialize the kernel pmap (which is statically allocated).
 	 */
 	PMAP_LOCK_INIT(kernel_pmap);
 	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
 #ifdef PAE
 	kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
 #endif
 	kernel_pmap->pm_root = NULL;
 	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 	LIST_INIT(&allpmaps);
 
 	/*
 	 * Request a spin mutex so that changes to allpmaps cannot be
 	 * preempted by smp_rendezvous_cpus().  Otherwise,
 	 * pmap_update_pde_kernel() could access allpmaps while it is
 	 * being changed.
 	 */
 	mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 
 	/*
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
 #define	SYSMAP(c, p, v, n)	\
 	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
 
 	va = virtual_avail;
 	pte = vtopte(va);
 
 	/*
 	 * CMAP1/CMAP2 are used for zeroing and copying pages.
 	 * CMAP3 is used for the idle process page zeroing.
 	 */
 	for (i = 0; i < MAXCPU; i++) {
 		sysmaps = &sysmaps_pcpu[i];
 		mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF);
 		SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1)
 		SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1)
 	}
 	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
 	SYSMAP(caddr_t, CMAP3, CADDR3, 1)
 
 	/*
 	 * Crashdump maps.
 	 */
 	SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
 
 	/*
 	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
 	 */
 	SYSMAP(caddr_t, unused, ptvmmap, 1)
 
 	/*
 	 * msgbufp is used to map the system message buffer.
 	 */
 	SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE)))
 
 	/*
 	 * KPTmap is used by pmap_kextract().
 	 *
 	 * KPTmap is first initialized by locore.  However, that initial
 	 * KPTmap can only support NKPT page table pages.  Here, a larger
 	 * KPTmap is created that can support KVA_PAGES page table pages.
 	 */
 	SYSMAP(pt_entry_t *, KPTD, KPTmap, KVA_PAGES)
 
 	for (i = 0; i < NKPT; i++)
 		KPTD[i] = (KPTphys + (i << PAGE_SHIFT)) | pgeflag | PG_RW | PG_V;
 
 	/*
 	 * Adjust the start of the KPTD and KPTmap so that the implementation
 	 * of pmap_kextract() and pmap_growkernel() can be made simpler.
 	 */
 	KPTD -= KPTDI;
 	KPTmap -= i386_btop(KPTDI << PDRSHIFT);
 
 	/*
 	 * ptemap is used for pmap_pte_quick
 	 */
 	SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1)
 	SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1)
 
 	mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
 
 	virtual_avail = va;
 
 	/*
 	 * Leave in place an identity mapping (virt == phys) for the low 1 MB
 	 * physical memory region that is used by the ACPI wakeup code.  This
 	 * mapping must not have PG_G set. 
 	 */
 #ifdef XBOX
 	/* FIXME: This is gross, but needed for the XBOX. Since we are in such
 	 * an early stadium, we cannot yet neatly map video memory ... :-(
 	 * Better fixes are very welcome! */
 	if (!arch_i386_is_xbox)
 #endif
 	for (i = 1; i < NKPT; i++)
 		PTD[i] = 0;
 
 	/* Initialize the PAT MSR if present. */
 	pmap_init_pat();
 
 	/* Turn on PG_G on kernel page(s) */
 	pmap_set_pg();
 }
 
 /*
  * Setup the PAT MSR.
  */
 void
 pmap_init_pat(void)
 {
 	int pat_table[PAT_INDEX_SIZE];
 	uint64_t pat_msr;
 	u_long cr0, cr4;
 	int i;
 
 	/* Set default PAT index table. */
 	for (i = 0; i < PAT_INDEX_SIZE; i++)
 		pat_table[i] = -1;
 	pat_table[PAT_WRITE_BACK] = 0;
 	pat_table[PAT_WRITE_THROUGH] = 1;
 	pat_table[PAT_UNCACHEABLE] = 3;
 	pat_table[PAT_WRITE_COMBINING] = 3;
 	pat_table[PAT_WRITE_PROTECTED] = 3;
 	pat_table[PAT_UNCACHED] = 3;
 
 	/* Bail if this CPU doesn't implement PAT. */
 	if ((cpu_feature & CPUID_PAT) == 0) {
 		for (i = 0; i < PAT_INDEX_SIZE; i++)
 			pat_index[i] = pat_table[i];
 		pat_works = 0;
 		return;
 	}
 
 	/*
 	 * Due to some Intel errata, we can only safely use the lower 4
 	 * PAT entries.
 	 *
 	 *   Intel Pentium III Processor Specification Update
 	 * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B
 	 * or Mode C Paging)
 	 *
 	 *   Intel Pentium IV  Processor Specification Update
 	 * Errata N46 (PAT Index MSB May Be Calculated Incorrectly)
 	 */
 	if (cpu_vendor_id == CPU_VENDOR_INTEL &&
 	    !(CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe))
 		pat_works = 0;
 
 	/* Initialize default PAT entries. */
 	pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) |
 	    PAT_VALUE(1, PAT_WRITE_THROUGH) |
 	    PAT_VALUE(2, PAT_UNCACHED) |
 	    PAT_VALUE(3, PAT_UNCACHEABLE) |
 	    PAT_VALUE(4, PAT_WRITE_BACK) |
 	    PAT_VALUE(5, PAT_WRITE_THROUGH) |
 	    PAT_VALUE(6, PAT_UNCACHED) |
 	    PAT_VALUE(7, PAT_UNCACHEABLE);
 
 	if (pat_works) {
 		/*
 		 * Leave the indices 0-3 at the default of WB, WT, UC-, and UC.
 		 * Program 5 and 6 as WP and WC.
 		 * Leave 4 and 7 as WB and UC.
 		 */
 		pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6));
 		pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) |
 		    PAT_VALUE(6, PAT_WRITE_COMBINING);
 		pat_table[PAT_UNCACHED] = 2;
 		pat_table[PAT_WRITE_PROTECTED] = 5;
 		pat_table[PAT_WRITE_COMBINING] = 6;
 	} else {
 		/*
 		 * Just replace PAT Index 2 with WC instead of UC-.
 		 */
 		pat_msr &= ~PAT_MASK(2);
 		pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
 		pat_table[PAT_WRITE_COMBINING] = 2;
 	}
 
 	/* Disable PGE. */
 	cr4 = rcr4();
 	load_cr4(cr4 & ~CR4_PGE);
 
 	/* Disable caches (CD = 1, NW = 0). */
 	cr0 = rcr0();
 	load_cr0((cr0 & ~CR0_NW) | CR0_CD);
 
 	/* Flushes caches and TLBs. */
 	wbinvd();
 	invltlb();
 
 	/* Update PAT and index table. */
 	wrmsr(MSR_PAT, pat_msr);
 	for (i = 0; i < PAT_INDEX_SIZE; i++)
 		pat_index[i] = pat_table[i];
 
 	/* Flush caches and TLBs again. */
 	wbinvd();
 	invltlb();
 
 	/* Restore caches and PGE. */
 	load_cr0(cr0);
 	load_cr4(cr4);
 }
 
 /*
  * Set PG_G on kernel pages.  Only the BSP calls this when SMP is turned on.
  */
 static void
 pmap_set_pg(void)
 {
 	pt_entry_t *pte;
 	vm_offset_t va, endva;
 
 	if (pgeflag == 0)
 		return;
 
 	endva = KERNBASE + KERNend;
 
 	if (pseflag) {
 		va = KERNBASE + KERNLOAD;
 		while (va  < endva) {
 			pdir_pde(PTD, va) |= pgeflag;
 			invltlb();	/* Play it safe, invltlb() every time */
 			va += NBPDR;
 		}
 	} else {
 		va = (vm_offset_t)btext;
 		while (va < endva) {
 			pte = vtopte(va);
 			if (*pte)
 				*pte |= pgeflag;
 			invltlb();	/* Play it safe, invltlb() every time */
 			va += PAGE_SIZE;
 		}
 	}
 }
 
 /*
  * Initialize a vm_page's machine-dependent fields.
  */
 void
 pmap_page_init(vm_page_t m)
 {
 
 	TAILQ_INIT(&m->md.pv_list);
 	m->md.pat_mode = PAT_WRITE_BACK;
 }
 
 #ifdef PAE
 static void *
 pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
 {
 
 	/* Inform UMA that this allocator uses kernel_map/object. */
 	*flags = UMA_SLAB_KERNEL;
 	return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, 0x0ULL,
 	    0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT));
 }
 #endif
 
 /*
  * ABuse the pte nodes for unmapped kva to thread a kva freelist through.
  * Requirements:
  *  - Must deal with pages in order to ensure that none of the PG_* bits
  *    are ever set, PG_V in particular.
  *  - Assumes we can write to ptes without pte_store() atomic ops, even
  *    on PAE systems.  This should be ok.
  *  - Assumes nothing will ever test these addresses for 0 to indicate
  *    no mapping instead of correctly checking PG_V.
  *  - Assumes a vm_offset_t will fit in a pte (true for i386).
  * Because PG_V is never set, there can be no mappings to invalidate.
  */
 static vm_offset_t
 pmap_ptelist_alloc(vm_offset_t *head)
 {
 	pt_entry_t *pte;
 	vm_offset_t va;
 
 	va = *head;
 	if (va == 0)
 		return (va);	/* Out of memory */
 	pte = vtopte(va);
 	*head = *pte;
 	if (*head & PG_V)
 		panic("pmap_ptelist_alloc: va with PG_V set!");
 	*pte = 0;
 	return (va);
 }
 
 static void
 pmap_ptelist_free(vm_offset_t *head, vm_offset_t va)
 {
 	pt_entry_t *pte;
 
 	if (va & PG_V)
 		panic("pmap_ptelist_free: freeing va with PG_V set!");
 	pte = vtopte(va);
 	*pte = *head;		/* virtual! PG_V is 0 though */
 	*head = va;
 }
 
 static void
 pmap_ptelist_init(vm_offset_t *head, void *base, int npages)
 {
 	int i;
 	vm_offset_t va;
 
 	*head = 0;
 	for (i = npages - 1; i >= 0; i--) {
 		va = (vm_offset_t)base + i * PAGE_SIZE;
 		pmap_ptelist_free(head, va);
 	}
 }
 
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  */
 void
 pmap_init(void)
 {
 	vm_page_t mpte;
 	vm_size_t s;
 	int i, pv_npg;
 
 	/*
 	 * Initialize the vm page array entries for the kernel pmap's
 	 * page table pages.
 	 */ 
 	for (i = 0; i < NKPT; i++) {
 		mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT));
 		KASSERT(mpte >= vm_page_array &&
 		    mpte < &vm_page_array[vm_page_array_size],
 		    ("pmap_init: page table page is out of range"));
 		mpte->pindex = i + KPTDI;
 		mpte->phys_addr = KPTphys + (i << PAGE_SHIFT);
 	}
 
 	/*
 	 * Initialize the address space (zone) for the pv entries.  Set a
 	 * high water mark so that the system can recover from excessive
 	 * numbers of pv entries.
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
 	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
 	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
 	pv_entry_max = roundup(pv_entry_max, _NPCPV);
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 
 	/*
 	 * If the kernel is running in a virtual machine on an AMD Family 10h
 	 * processor, then it must assume that MCA is enabled by the virtual
 	 * machine monitor.
 	 */
 	if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
 	    CPUID_TO_FAMILY(cpu_id) == 0x10)
 		workaround_erratum383 = 1;
 
 	/*
 	 * Are large page mappings supported and enabled?
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
 	if (pseflag == 0)
 		pg_ps_enabled = 0;
 	else if (pg_ps_enabled) {
 		KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
 		    ("pmap_init: can't assign to pagesizes[1]"));
 		pagesizes[1] = NBPDR;
 	}
 
 	/*
 	 * Calculate the size of the pv head table for superpages.
 	 */
 	for (i = 0; phys_avail[i + 1]; i += 2);
 	pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR;
 
 	/*
 	 * Allocate memory for the pv head table for superpages.
 	 */
 	s = (vm_size_t)(pv_npg * sizeof(struct md_page));
 	s = round_page(s);
 	pv_table = (struct md_page *)kmem_alloc(kernel_map, s);
 	for (i = 0; i < pv_npg; i++)
 		TAILQ_INIT(&pv_table[i].pv_list);
 
 	pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
 	pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map,
 	    PAGE_SIZE * pv_maxchunks);
 	if (pv_chunkbase == NULL)
 		panic("pmap_init: not enough kvm for pv chunks");
 	pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
 #ifdef PAE
 	pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
 	    NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
 	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
 	uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
 #endif
 }
 
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
 	"Max number of PV entries");
 SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
 	"Page share factor per proc");
 
 SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
     "2/4MB page mapping counters");
 
 static u_long pmap_pde_demotions;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD,
     &pmap_pde_demotions, 0, "2/4MB page demotions");
 
 static u_long pmap_pde_mappings;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
     &pmap_pde_mappings, 0, "2/4MB page mappings");
 
 static u_long pmap_pde_p_failures;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD,
     &pmap_pde_p_failures, 0, "2/4MB page promotion failures");
 
 static u_long pmap_pde_promotions;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD,
     &pmap_pde_promotions, 0, "2/4MB page promotions");
 
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
 
 /*
  * Determine the appropriate bits to set in a PTE or PDE for a specified
  * caching mode.
  */
 int
 pmap_cache_bits(int mode, boolean_t is_pde)
 {
 	int cache_bits, pat_flag, pat_idx;
 
 	if (mode < 0 || mode >= PAT_INDEX_SIZE || pat_index[mode] < 0)
 		panic("Unknown caching mode %d\n", mode);
 
 	/* The PAT bit is different for PTE's and PDE's. */
 	pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
 
 	/* Map the caching mode to a PAT index. */
 	pat_idx = pat_index[mode];
 
 	/* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
 	cache_bits = 0;
 	if (pat_idx & 0x4)
 		cache_bits |= pat_flag;
 	if (pat_idx & 0x2)
 		cache_bits |= PG_NC_PCD;
 	if (pat_idx & 0x1)
 		cache_bits |= PG_NC_PWT;
 	return (cache_bits);
 }
 
 /*
  * The caller is responsible for maintaining TLB consistency.
  */
 static void
 pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde)
 {
 	pd_entry_t *pde;
 	pmap_t pmap;
 	boolean_t PTD_updated;
 
 	PTD_updated = FALSE;
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_FOREACH(pmap, &allpmaps, pm_list) {
 		if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] &
 		    PG_FRAME))
 			PTD_updated = TRUE;
 		pde = pmap_pde(pmap, va);
 		pde_store(pde, newpde);
 	}
 	mtx_unlock_spin(&allpmaps_lock);
 	KASSERT(PTD_updated,
 	    ("pmap_kenter_pde: current page table is not in allpmaps"));
 }
 
 /*
  * After changing the page size for the specified virtual address in the page
  * table, flush the corresponding entries from the processor's TLB.  Only the
  * calling processor's TLB is affected.
  *
  * The calling thread must be pinned to a processor.
  */
 static void
 pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
 {
 	u_long cr4;
 
 	if ((newpde & PG_PS) == 0)
 		/* Demotion: flush a specific 2MB page mapping. */
 		invlpg(va);
 	else if ((newpde & PG_G) == 0)
 		/*
 		 * Promotion: flush every 4KB page mapping from the TLB
 		 * because there are too many to flush individually.
 		 */
 		invltlb();
 	else {
 		/*
 		 * Promotion: flush every 4KB page mapping from the TLB,
 		 * including any global (PG_G) mappings.
 		 */
 		cr4 = rcr4();
 		load_cr4(cr4 & ~CR4_PGE);
 		/*
 		 * Although preemption at this point could be detrimental to
 		 * performance, it would not lead to an error.  PG_G is simply
 		 * ignored if CR4.PGE is clear.  Moreover, in case this block
 		 * is re-entered, the load_cr4() either above or below will
 		 * modify CR4.PGE flushing the TLB.
 		 */
 		load_cr4(cr4 | CR4_PGE);
 	}
 }
 #ifdef SMP
 /*
  * For SMP, these functions have to use the IPI mechanism for coherence.
  *
  * N.B.: Before calling any of the following TLB invalidation functions,
  * the calling processor must ensure that all stores updating a non-
  * kernel page table are globally performed.  Otherwise, another
  * processor could cache an old, pre-update entry without being
  * invalidated.  This can happen one of two ways: (1) The pmap becomes
  * active on another processor after its pm_active field is checked by
  * one of the following functions but before a store updating the page
  * table is globally performed. (2) The pmap becomes active on another
  * processor before its pm_active field is checked but due to
  * speculative loads one of the following functions stills reads the
  * pmap as inactive on the other processor.
  * 
  * The kernel page table is exempt because its pm_active field is
  * immutable.  The kernel page table is always active on every
  * processor.
  */
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 	cpumask_t cpumask, other_cpus;
 
 	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
 		invlpg(va);
 		smp_invlpg(va);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
 		if (pmap->pm_active & cpumask)
 			invlpg(va);
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
 	}
 	sched_unpin();
 }
 
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	cpumask_t cpumask, other_cpus;
 	vm_offset_t addr;
 
 	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 		smp_invlpg_range(sva, eva);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
 		if (pmap->pm_active & cpumask)
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
 			    sva, eva);
 	}
 	sched_unpin();
 }
 
 void
 pmap_invalidate_all(pmap_t pmap)
 {
 	cpumask_t cpumask, other_cpus;
 
 	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
 		invltlb();
 		smp_invltlb();
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
 		if (pmap->pm_active & cpumask)
 			invltlb();
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invltlb(pmap->pm_active & other_cpus);
 	}
 	sched_unpin();
 }
 
 void
 pmap_invalidate_cache(void)
 {
 
 	sched_pin();
 	wbinvd();
 	smp_cache_flush();
 	sched_unpin();
 }
 
 struct pde_action {
 	cpumask_t store;	/* processor that updates the PDE */
 	cpumask_t invalidate;	/* processors that invalidate their TLB */
 	vm_offset_t va;
 	pd_entry_t *pde;
 	pd_entry_t newpde;
 };
 
 static void
 pmap_update_pde_kernel(void *arg)
 {
 	struct pde_action *act = arg;
 	pd_entry_t *pde;
 	pmap_t pmap;
 
 	if (act->store == PCPU_GET(cpumask))
 		/*
 		 * Elsewhere, this operation requires allpmaps_lock for
 		 * synchronization.  Here, it does not because it is being
 		 * performed in the context of an all_cpus rendezvous.
 		 */
 		LIST_FOREACH(pmap, &allpmaps, pm_list) {
 			pde = pmap_pde(pmap, act->va);
 			pde_store(pde, act->newpde);
 		}
 }
 
 static void
 pmap_update_pde_user(void *arg)
 {
 	struct pde_action *act = arg;
 
 	if (act->store == PCPU_GET(cpumask))
 		pde_store(act->pde, act->newpde);
 }
 
 static void
 pmap_update_pde_teardown(void *arg)
 {
 	struct pde_action *act = arg;
 
 	if ((act->invalidate & PCPU_GET(cpumask)) != 0)
 		pmap_update_pde_invalidate(act->va, act->newpde);
 }
 
 /*
  * Change the page size for the specified virtual address in a way that
  * prevents any possibility of the TLB ever having two entries that map the
  * same virtual address using different page sizes.  This is the recommended
  * workaround for Erratum 383 on AMD Family 10h processors.  It prevents a
  * machine check exception for a TLB state that is improperly diagnosed as a
  * hardware error.
  */
 static void
 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 	struct pde_action act;
 	cpumask_t active, cpumask;
 
 	sched_pin();
 	cpumask = PCPU_GET(cpumask);
 	if (pmap == kernel_pmap)
 		active = all_cpus;
 	else
 		active = pmap->pm_active;
 	if ((active & PCPU_GET(other_cpus)) != 0) {
 		act.store = cpumask;
 		act.invalidate = active;
 		act.va = va;
 		act.pde = pde;
 		act.newpde = newpde;
 		smp_rendezvous_cpus(cpumask | active,
 		    smp_no_rendevous_barrier, pmap == kernel_pmap ?
 		    pmap_update_pde_kernel : pmap_update_pde_user,
 		    pmap_update_pde_teardown, &act);
 	} else {
 		if (pmap == kernel_pmap)
 			pmap_kenter_pde(va, newpde);
 		else
 			pde_store(pde, newpde);
 		if ((active & cpumask) != 0)
 			pmap_update_pde_invalidate(va, newpde);
 	}
 	sched_unpin();
 }
 #else /* !SMP */
 /*
  * Normal, non-SMP, 486+ invalidation functions.
  * We inline these within pmap.c for speed.
  */
 PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invlpg(va);
 }
 
 PMAP_INLINE void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t addr;
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 }
 
 PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invltlb();
 }
 
 PMAP_INLINE void
 pmap_invalidate_cache(void)
 {
 
 	wbinvd();
 }
 
 static void
 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 
 	if (pmap == kernel_pmap)
 		pmap_kenter_pde(va, newpde);
 	else
 		pde_store(pde, newpde);
 	if (pmap == kernel_pmap || pmap->pm_active)
 		pmap_update_pde_invalidate(va, newpde);
 }
 #endif /* !SMP */
 
 void
 pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
 {
 
 	KASSERT((sva & PAGE_MASK) == 0,
 	    ("pmap_invalidate_cache_range: sva not page-aligned"));
 	KASSERT((eva & PAGE_MASK) == 0,
 	    ("pmap_invalidate_cache_range: eva not page-aligned"));
 
 	if (cpu_feature & CPUID_SS)
 		; /* If "Self Snoop" is supported, do nothing. */
 	else if ((cpu_feature & CPUID_CLFSH) != 0 &&
 		 eva - sva < 2 * 1024 * 1024) {
 
 		/*
 		 * Otherwise, do per-cache line flush.  Use the mfence
 		 * instruction to insure that previous stores are
 		 * included in the write-back.  The processor
 		 * propagates flush to other processors in the cache
 		 * coherence domain.
 		 */
 		mfence();
 		for (; sva < eva; sva += cpu_clflush_line_size)
 			clflush(sva);
 		mfence();
 	} else {
 
 		/*
 		 * No targeted cache flush methods are supported by CPU,
 		 * or the supplied range is bigger than 2MB.
 		 * Globally invalidate cache.
 		 */
 		pmap_invalidate_cache();
 	}
 }
 
 /*
  * Are we current address space or kernel?  N.B. We return FALSE when
  * a pmap's page table is in use because a kernel thread is borrowing
  * it.  The borrowed page table can change spontaneously, making any
  * dependence on its continued use subject to a race condition.
  */
 static __inline int
 pmap_is_current(pmap_t pmap)
 {
 
 	return (pmap == kernel_pmap ||
 		(pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
 	    (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
 }
 
 /*
  * If the given pmap is not the current or kernel pmap, the returned pte must
  * be released by passing it to pmap_pte_release().
  */
 pt_entry_t *
 pmap_pte(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t newpf;
 	pd_entry_t *pde;
 
 	pde = pmap_pde(pmap, va);
 	if (*pde & PG_PS)
 		return (pde);
 	if (*pde != 0) {
 		/* are we current address space or kernel? */
 		if (pmap_is_current(pmap))
 			return (vtopte(va));
 		mtx_lock(&PMAP2mutex);
 		newpf = *pde & PG_FRAME;
 		if ((*PMAP2 & PG_FRAME) != newpf) {
 			*PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M;
 			pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
 		}
 		return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
 	}
 	return (0);
 }
 
 /*
  * Releases a pte that was obtained from pmap_pte().  Be prepared for the pte
  * being NULL.
  */
 static __inline void
 pmap_pte_release(pt_entry_t *pte)
 {
 
 	if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2)
 		mtx_unlock(&PMAP2mutex);
 }
 
 static __inline void
 invlcaddr(void *caddr)
 {
 
 	invlpg((u_int)caddr);
 }
 
 /*
  * Super fast pmap_pte routine best used when scanning
  * the pv lists.  This eliminates many coarse-grained
  * invltlb calls.  Note that many of the pv list
  * scans are across different pmaps.  It is very wasteful
  * to do an entire invltlb for checking a single mapping.
  *
  * If the given pmap is not the current pmap, vm_page_queue_mtx
  * must be held and curthread pinned to a CPU.
  */
 static pt_entry_t *
 pmap_pte_quick(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t newpf;
 	pd_entry_t *pde;
 
 	pde = pmap_pde(pmap, va);
 	if (*pde & PG_PS)
 		return (pde);
 	if (*pde != 0) {
 		/* are we current address space or kernel? */
 		if (pmap_is_current(pmap))
 			return (vtopte(va));
 		mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 		KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
 		newpf = *pde & PG_FRAME;
 		if ((*PMAP1 & PG_FRAME) != newpf) {
 			*PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M;
 #ifdef SMP
 			PMAP1cpu = PCPU_GET(cpuid);
 #endif
 			invlcaddr(PADDR1);
 			PMAP1changed++;
 		} else
 #ifdef SMP
 		if (PMAP1cpu != PCPU_GET(cpuid)) {
 			PMAP1cpu = PCPU_GET(cpuid);
 			invlcaddr(PADDR1);
 			PMAP1changedcpu++;
 		} else
 #endif
 			PMAP1unchanged++;
 		return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
 	}
 	return (0);
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_paddr_t 
 pmap_extract(pmap_t pmap, vm_offset_t va)
 {
 	vm_paddr_t rtval;
 	pt_entry_t *pte;
 	pd_entry_t pde;
 
 	rtval = 0;
 	PMAP_LOCK(pmap);
 	pde = pmap->pm_pdir[va >> PDRSHIFT];
 	if (pde != 0) {
 		if ((pde & PG_PS) != 0)
 			rtval = (pde & PG_PS_FRAME) | (va & PDRMASK);
 		else {
 			pte = pmap_pte(pmap, va);
 			rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
 			pmap_pte_release(pte);
 		}
 	}
 	PMAP_UNLOCK(pmap);
 	return (rtval);
 }
 
 /*
  *	Routine:	pmap_extract_and_hold
  *	Function:
  *		Atomically extract and hold the physical page
  *		with the given pmap and virtual address pair
  *		if that mapping permits the given protection.
  */
 vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	pd_entry_t pde;
 	pt_entry_t pte;
 	vm_page_t m;
 	vm_paddr_t pa;
 
 	pa = 0;
 	m = NULL;
 	PMAP_LOCK(pmap);
 retry:
 	pde = *pmap_pde(pmap, va);
 	if (pde != 0) {
 		if (pde & PG_PS) {
 			if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
 				if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) |
 				       (va & PDRMASK), &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
 				    (va & PDRMASK));
 				vm_page_hold(m);
 			}
 		} else {
 			sched_pin();
 			pte = *pmap_pte_quick(pmap, va);
 			if (pte != 0 &&
 			    ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
 				if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
 				vm_page_hold(m);
 			}
 			sched_unpin();
 		}
 	}
 	PA_UNLOCK_COND(pa);
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 /*
  * Add a wired page to the kva.
  * Note: not SMP coherent.
  *
  * This function may be used before pmap_bootstrap() is called.
  */
 PMAP_INLINE void 
 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	pte_store(pte, pa | PG_RW | PG_V | pgeflag);
 }
 
 static __inline void
 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	pte_store(pte, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
 }
 
 /*
  * Remove a page from the kernel pagetables.
  * Note: not SMP coherent.
  *
  * This function may be used before pmap_bootstrap() is called.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	pte_clear(pte);
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	The value passed in '*virt' is a suggested virtual address for
  *	the mapping. Architectures which can support a direct-mapped
  *	physical to virtual region can return the appropriate address
  *	within that region, leaving '*virt' unchanged. Other
  *	architectures should map the pages starting at '*virt' and
  *	update '*virt' with the first usable address after the mapped
  *	region.
  */
 vm_offset_t
 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 {
 	vm_offset_t va, sva;
 
 	va = sva = *virt;
 	while (start < end) {
 		pmap_kenter(va, start);
 		va += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 	*virt = va;
 	return (sva);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
 	pt_entry_t *endpte, oldpte, pa, *pte;
 	vm_page_t m;
 
 	oldpte = 0;
 	pte = vtopte(sva);
 	endpte = pte + count;
 	while (pte < endpte) {
 		m = *ma++;
 		pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
 		if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) {
 			oldpte |= *pte;
 			pte_store(pte, pa | pgeflag | PG_RW | PG_V);
 		}
 		pte++;
 	}
 	if (__predict_false((oldpte & PG_V) != 0))
 		pmap_invalidate_range(kernel_pmap, sva, sva + count *
 		    PAGE_SIZE);
 }
 
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
 	vm_offset_t va;
 
 	va = sva;
 	while (count-- > 0) {
 		pmap_kremove(va);
 		va += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 static __inline void
 pmap_free_zero_pages(vm_page_t free)
 {
 	vm_page_t m;
 
 	while (free != NULL) {
 		m = free;
 		free = m->right;
 		/* Preserve the page's PG_ZERO setting. */
 		vm_page_free_toq(m);
 	}
 }
 
 /*
  * Schedule the specified unused page table page to be freed.  Specifically,
  * add the page to the specified list of pages that will be released to the
  * physical memory manager after the TLB has been updated.
  */
 static __inline void
 pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO)
 {
 
 	if (set_PG_ZERO)
 		m->flags |= PG_ZERO;
 	else
 		m->flags &= ~PG_ZERO;
 	m->right = *free;
 	*free = m;
 }
 
 /*
  * Inserts the specified page table page into the specified pmap's collection
  * of idle page table pages.  Each of a pmap's page table pages is responsible
  * for mapping a distinct range of virtual addresses.  The pmap's collection is
  * ordered by this virtual address range.
  */
 static void
 pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
 {
 	vm_page_t root;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	root = pmap->pm_root;
 	if (root == NULL) {
 		mpte->left = NULL;
 		mpte->right = NULL;
 	} else {
 		root = vm_page_splay(mpte->pindex, root);
 		if (mpte->pindex < root->pindex) {
 			mpte->left = root->left;
 			mpte->right = root;
 			root->left = NULL;
 		} else if (mpte->pindex == root->pindex)
 			panic("pmap_insert_pt_page: pindex already inserted");
 		else {
 			mpte->right = root->right;
 			mpte->left = root;
 			root->right = NULL;
 		}
 	}
 	pmap->pm_root = mpte;
 }
 
 /*
  * Looks for a page table page mapping the specified virtual address in the
  * specified pmap's collection of idle page table pages.  Returns NULL if there
  * is no page table page corresponding to the specified virtual address.
  */
 static vm_page_t
 pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
 {
 	vm_page_t mpte;
 	vm_pindex_t pindex = va >> PDRSHIFT;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) {
 		mpte = vm_page_splay(pindex, mpte);
 		if ((pmap->pm_root = mpte)->pindex != pindex)
 			mpte = NULL;
 	}
 	return (mpte);
 }
 
 /*
  * Removes the specified page table page from the specified pmap's collection
  * of idle page table pages.  The specified page table page must be a member of
  * the pmap's collection.
  */
 static void
 pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
 {
 	vm_page_t root;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if (mpte != pmap->pm_root)
 		vm_page_splay(mpte->pindex, pmap->pm_root);
 	if (mpte->left == NULL)
 		root = mpte->right;
 	else {
 		root = vm_page_splay(mpte->pindex, mpte->left);
 		root->right = mpte->right;
 	}
 	pmap->pm_root = root;
 }
 
 /*
  * This routine unholds page table pages, and if the hold count
  * drops to zero, then it decrements the wire count.
  */
 static __inline int
 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free)
 {
 
 	--m->wire_count;
 	if (m->wire_count == 0)
 		return (_pmap_unwire_pte_hold(pmap, m, free));
 	else
 		return (0);
 }
 
 static int 
 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free)
 {
 	vm_offset_t pteva;
 
 	/*
 	 * unmap the page table page
 	 */
 	pmap->pm_pdir[m->pindex] = 0;
 	--pmap->pm_stats.resident_count;
 
 	/*
 	 * This is a release store so that the ordinary store unmapping
 	 * the page table page is globally performed before TLB shoot-
 	 * down is begun.
 	 */
 	atomic_subtract_rel_int(&cnt.v_wire_count, 1);
 
 	/*
 	 * Do an invltlb to make the invalidated mapping
 	 * take effect immediately.
 	 */
 	pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
 	pmap_invalidate_page(pmap, pteva);
 
 	/* 
 	 * Put page on a list so that it is released after
 	 * *ALL* TLB shootdown is done
 	 */
 	pmap_add_delayed_free_list(m, free, TRUE);
 
 	return (1);
 }
 
 /*
  * After removing a page table entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free)
 {
 	pd_entry_t ptepde;
 	vm_page_t mpte;
 
 	if (va >= VM_MAXUSER_ADDRESS)
 		return (0);
 	ptepde = *pmap_pde(pmap, va);
 	mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
 	return (pmap_unwire_pte_hold(pmap, mpte, free));
 }
 
 /*
  * Initialize the pmap for the swapper process.
  */
 void
 pmap_pinit0(pmap_t pmap)
 {
 
 	PMAP_LOCK_INIT(pmap);
 	/*
 	 * Since the page table directory is shared with the kernel pmap,
 	 * which is already included in the list "allpmaps", this pmap does
 	 * not need to be inserted into that list.
 	 */
 	pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
 #ifdef PAE
 	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
 #endif
 	pmap->pm_root = NULL;
 	pmap->pm_active = 0;
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 int
 pmap_pinit(pmap_t pmap)
 {
 	vm_page_t m, ptdpg[NPGPTD];
 	vm_paddr_t pa;
 	static int color;
 	int i;
 
 	PMAP_LOCK_INIT(pmap);
 
 	/*
 	 * No need to allocate page table space yet but we do need a valid
 	 * page directory table.
 	 */
 	if (pmap->pm_pdir == NULL) {
 		pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
 		    NBPTD);
 
 		if (pmap->pm_pdir == NULL) {
 			PMAP_LOCK_DESTROY(pmap);
 			return (0);
 		}
 #ifdef PAE
 		pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
 		KASSERT(((vm_offset_t)pmap->pm_pdpt &
 		    ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
 		    ("pmap_pinit: pdpt misaligned"));
 		KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
 		    ("pmap_pinit: pdpt above 4g"));
 #endif
 		pmap->pm_root = NULL;
 	}
 	KASSERT(pmap->pm_root == NULL,
 	    ("pmap_pinit: pmap has reserved page table page(s)"));
 
 	/*
 	 * allocate the page directory page(s)
 	 */
 	for (i = 0; i < NPGPTD;) {
 		m = vm_page_alloc(NULL, color++,
 		    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 		    VM_ALLOC_ZERO);
 		if (m == NULL)
 			VM_WAIT;
 		else {
 			ptdpg[i++] = m;
 		}
 	}
 
 	pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
 
 	for (i = 0; i < NPGPTD; i++) {
 		if ((ptdpg[i]->flags & PG_ZERO) == 0)
 			bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
 	}
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	/* Copy the kernel page table directory entries. */
 	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
 	mtx_unlock_spin(&allpmaps_lock);
 
 	/* install self-referential address mapping entry(s) */
 	for (i = 0; i < NPGPTD; i++) {
 		pa = VM_PAGE_TO_PHYS(ptdpg[i]);
 		pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
 #ifdef PAE
 		pmap->pm_pdpt[i] = pa | PG_V;
 #endif
 	}
 
 	pmap->pm_active = 0;
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 
 	return (1);
 }
 
 /*
  * this routine is called if the page table page is not
  * mapped correctly.
  */
 static vm_page_t
 _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
 {
 	vm_paddr_t ptepa;
 	vm_page_t m;
 
 	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 	    ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 
 	/*
 	 * Allocate a page table page.
 	 */
 	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 		if (flags & M_WAITOK) {
 			PMAP_UNLOCK(pmap);
 			vm_page_unlock_queues();
 			VM_WAIT;
 			vm_page_lock_queues();
 			PMAP_LOCK(pmap);
 		}
 
 		/*
 		 * Indicate the need to retry.  While waiting, the page table
 		 * page may have been allocated.
 		 */
 		return (NULL);
 	}
 	if ((m->flags & PG_ZERO) == 0)
 		pmap_zero_page(m);
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	pmap->pm_stats.resident_count++;
 
 	ptepa = VM_PAGE_TO_PHYS(m);
 	pmap->pm_pdir[ptepindex] =
 		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 
 	return (m);
 }
 
 static vm_page_t
 pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
 {
 	unsigned ptepindex;
 	pd_entry_t ptepa;
 	vm_page_t m;
 
 	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 	    ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = va >> PDRSHIFT;
 retry:
 	/*
 	 * Get the page directory entry
 	 */
 	ptepa = pmap->pm_pdir[ptepindex];
 
 	/*
 	 * This supports switching from a 4MB page to a
 	 * normal 4K page.
 	 */
 	if (ptepa & PG_PS) {
 		(void)pmap_demote_pde(pmap, &pmap->pm_pdir[ptepindex], va);
 		ptepa = pmap->pm_pdir[ptepindex];
 	}
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (ptepa) {
 		m = PHYS_TO_VM_PAGE(ptepa & PG_FRAME);
 		m->wire_count++;
 	} else {
 		/*
 		 * Here if the pte page isn't mapped, or if it has
 		 * been deallocated. 
 		 */
 		m = _pmap_allocpte(pmap, ptepindex, flags);
 		if (m == NULL && (flags & M_WAITOK))
 			goto retry;
 	}
 	return (m);
 }
 
 
 /***************************************************
 * Pmap allocation/deallocation routines.
  ***************************************************/
 
 #ifdef SMP
 /*
  * Deal with a SMP shootdown of other users of the pmap that we are
  * trying to dispose of.  This can be a bit hairy.
  */
 static cpumask_t *lazymask;
 static u_int lazyptd;
 static volatile u_int lazywait;
 
 void pmap_lazyfix_action(void);
 
 void
 pmap_lazyfix_action(void)
 {
 	cpumask_t mymask = PCPU_GET(cpumask);
 
 #ifdef COUNT_IPIS
 	(*ipi_lazypmap_counts[PCPU_GET(cpuid)])++;
 #endif
 	if (rcr3() == lazyptd)
 		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 	atomic_clear_int(lazymask, mymask);
 	atomic_store_rel_int(&lazywait, 1);
 }
 
 static void
 pmap_lazyfix_self(cpumask_t mymask)
 {
 
 	if (rcr3() == lazyptd)
 		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 	atomic_clear_int(lazymask, mymask);
 }
 
 
 static void
 pmap_lazyfix(pmap_t pmap)
 {
 	cpumask_t mymask, mask;
 	u_int spins;
 
 	while ((mask = pmap->pm_active) != 0) {
 		spins = 50000000;
 		mask = mask & -mask;	/* Find least significant set bit */
 		mtx_lock_spin(&smp_ipi_mtx);
 #ifdef PAE
 		lazyptd = vtophys(pmap->pm_pdpt);
 #else
 		lazyptd = vtophys(pmap->pm_pdir);
 #endif
 		mymask = PCPU_GET(cpumask);
 		if (mask == mymask) {
 			lazymask = &pmap->pm_active;
 			pmap_lazyfix_self(mymask);
 		} else {
 			atomic_store_rel_int((u_int *)&lazymask,
 			    (u_int)&pmap->pm_active);
 			atomic_store_rel_int(&lazywait, 0);
 			ipi_selected(mask, IPI_LAZYPMAP);
 			while (lazywait == 0) {
 				ia32_pause();
 				if (--spins == 0)
 					break;
 			}
 		}
 		mtx_unlock_spin(&smp_ipi_mtx);
 		if (spins == 0)
 			printf("pmap_lazyfix: spun for 50000000\n");
 	}
 }
 
 #else	/* SMP */
 
 /*
  * Cleaning up on uniprocessor is easy.  For various reasons, we're
  * unlikely to have to even execute this code, including the fact
  * that the cleanup is deferred until the parent does a wait(2), which
  * means that another userland process has run.
  */
 static void
 pmap_lazyfix(pmap_t pmap)
 {
 	u_int cr3;
 
 	cr3 = vtophys(pmap->pm_pdir);
 	if (cr3 == rcr3()) {
 		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 		pmap->pm_active &= ~(PCPU_GET(cpumask));
 	}
 }
 #endif	/* SMP */
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 	vm_page_t m, ptdpg[NPGPTD];
 	int i;
 
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
 	KASSERT(pmap->pm_root == NULL,
 	    ("pmap_release: pmap has reserved page table page(s)"));
 
 	pmap_lazyfix(pmap);
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_REMOVE(pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 
 	for (i = 0; i < NPGPTD; i++)
 		ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i] &
 		    PG_FRAME);
 
 	bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
 	    sizeof(*pmap->pm_pdir));
 
 	pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
 
 	for (i = 0; i < NPGPTD; i++) {
 		m = ptdpg[i];
 #ifdef PAE
 		KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
 		    ("pmap_release: got wrong ptd page"));
 #endif
 		m->wire_count--;
 		atomic_subtract_int(&cnt.v_wire_count, 1);
 		vm_page_free_zero(m);
 	}
 	PMAP_LOCK_DESTROY(pmap);
 }
 
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 
 	return (sysctl_handle_long(oidp, &ksize, 0, req));
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_size, "IU", "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
 	return (sysctl_handle_long(oidp, &kfree, 0, req));
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "IU", "Amount of KVM free");
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	vm_paddr_t ptppaddr;
 	vm_page_t nkpg;
 	pd_entry_t newpdir;
 
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 	addr = roundup2(addr, NBPDR);
 	if (addr - 1 >= kernel_map->max_offset)
 		addr = kernel_map->max_offset;
 	while (kernel_vm_end < addr) {
 		if (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
 			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 				kernel_vm_end = kernel_map->max_offset;
 				break;
 			}
 			continue;
 		}
 
 		nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT,
 		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 		    VM_ALLOC_ZERO);
 		if (nkpg == NULL)
 			panic("pmap_growkernel: no memory to grow kernel");
 
 		nkpt++;
 
 		if ((nkpg->flags & PG_ZERO) == 0)
 			pmap_zero_page(nkpg);
 		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 		pdir_pde(KPTD, kernel_vm_end) = pgeflag | newpdir;
 
 		pmap_kenter_pde(kernel_vm_end, newpdir);
 		kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
 		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 			kernel_vm_end = kernel_map->max_offset;
 			break;
 		}
 	}
 }
 
 
 /***************************************************
  * page management routines.
  ***************************************************/
 
 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 CTASSERT(_NPCM == 11);
 
 static __inline struct pv_chunk *
 pv_to_chunk(pv_entry_t pv)
 {
 
 	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
 }
 
 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 
 #define	PC_FREE0_9	0xfffffffful	/* Free values for index 0 through 9 */
 #define	PC_FREE10	0x0000fffful	/* Free values for index 10 */
 
 static uint32_t pc_freemask[11] = {
 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 	PC_FREE0_9, PC_FREE10
 };
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 	"Current number of pv entries");
 
 #ifdef PV_STATS
 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
 	"Current number of pv entry chunks");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
 	"Current number of pv entry chunks allocated");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
 	"Current number of pv entry chunks frees");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
 	"Number of times tried to get a chunk page but failed.");
 
 static long pv_entry_frees, pv_entry_allocs;
 static int pv_entry_spare;
 
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
 	"Current number of pv entry frees");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
 	"Current number of pv entry allocs");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
 	"Current number of spare pv entries");
 
 static int pmap_collect_inactive, pmap_collect_active;
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0,
 	"Current number times pmap_collect called on inactive queue");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0,
 	"Current number times pmap_collect called on active queue");
 #endif
 
 /*
  * We are in a serious low memory condition.  Resort to
  * drastic measures to free some pages so we can allocate
  * another pv entry chunk.  This is normally called to
  * unmap inactive pages, and if necessary, active pages.
  */
 static void
 pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
 {
 	pd_entry_t *pde;
 	pmap_t pmap;
 	pt_entry_t *pte, tpte;
 	pv_entry_t next_pv, pv;
 	vm_offset_t va;
 	vm_page_t m, free;
 
 	sched_pin();
 	TAILQ_FOREACH(m, &vpq->pl, pageq) {
 		if (m->hold_count || m->busy)
 			continue;
 		TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
 			va = pv->pv_va;
 			pmap = PV_PMAP(pv);
 			/* Avoid deadlock and lock recursion. */
 			if (pmap > locked_pmap)
 				PMAP_LOCK(pmap);
 			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
 				continue;
 			pmap->pm_stats.resident_count--;
 			pde = pmap_pde(pmap, va);
 			KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found"
 			    " a 4mpage in page %p's pv list", m));
 			pte = pmap_pte_quick(pmap, va);
 			tpte = pte_load_clear(pte);
 			KASSERT((tpte & PG_W) == 0,
 			    ("pmap_collect: wired pte %#jx", (uintmax_t)tpte));
 			if (tpte & PG_A)
 				vm_page_flag_set(m, PG_REFERENCED);
 			if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 				vm_page_dirty(m);
 			free = NULL;
 			pmap_unuse_pt(pmap, va, &free);
 			pmap_invalidate_page(pmap, va);
 			pmap_free_zero_pages(free);
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 			free_pv_entry(pmap, pv);
 			if (pmap != locked_pmap)
 				PMAP_UNLOCK(pmap);
 		}
 		if (TAILQ_EMPTY(&m->md.pv_list) &&
 		    TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list))
 			vm_page_flag_clear(m, PG_WRITEABLE);
 	}
 	sched_unpin();
 }
 
 
 /*
  * free the pv_entry back to the free list
  */
 static void
 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 {
 	vm_page_t m;
 	struct pv_chunk *pc;
 	int idx, field, bit;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(pv_entry_frees++);
 	PV_STAT(pv_entry_spare++);
 	pv_entry_count--;
 	pc = pv_to_chunk(pv);
 	idx = pv - &pc->pc_pventry[0];
 	field = idx / 32;
 	bit = idx % 32;
 	pc->pc_map[field] |= 1ul << bit;
 	/* move to head of list */
 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 	for (idx = 0; idx < _NPCM; idx++)
 		if (pc->pc_map[idx] != pc_freemask[idx]) {
 			TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 			return;
 		}
 	PV_STAT(pv_entry_spare -= _NPCPV);
 	PV_STAT(pc_chunk_count--);
 	PV_STAT(pc_chunk_frees++);
 	/* entire chunk is free, return it */
 	m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 	pmap_qremove((vm_offset_t)pc, 1);
 	vm_page_unwire(m, 0);
 	vm_page_free(m);
 	pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  */
 static pv_entry_t
 get_pv_entry(pmap_t pmap, int try)
 {
 	static const struct timeval printinterval = { 60, 0 };
 	static struct timeval lastprint;
 	static vm_pindex_t colour;
 	struct vpgqueues *pq;
 	int bit, field;
 	pv_entry_t pv;
 	struct pv_chunk *pc;
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	PV_STAT(pv_entry_allocs++);
 	pv_entry_count++;
 	if (pv_entry_count > pv_entry_high_water)
 		if (ratecheck(&lastprint, &printinterval))
 			printf("Approaching the limit on PV entries, consider "
 			    "increasing either the vm.pmap.shpgperproc or the "
 			    "vm.pmap.pv_entry_max tunable.\n");
 	pq = NULL;
 retry:
 	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 	if (pc != NULL) {
 		for (field = 0; field < _NPCM; field++) {
 			if (pc->pc_map[field]) {
 				bit = bsfl(pc->pc_map[field]);
 				break;
 			}
 		}
 		if (field < _NPCM) {
 			pv = &pc->pc_pventry[field * 32 + bit];
 			pc->pc_map[field] &= ~(1ul << bit);
 			/* If this was the last item, move it to tail */
 			for (field = 0; field < _NPCM; field++)
 				if (pc->pc_map[field] != 0) {
 					PV_STAT(pv_entry_spare--);
 					return (pv);	/* not full, return */
 				}
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 			PV_STAT(pv_entry_spare--);
 			return (pv);
 		}
 	}
 	/*
 	 * Access to the ptelist "pv_vafree" is synchronized by the page
 	 * queues lock.  If "pv_vafree" is currently non-empty, it will
 	 * remain non-empty until pmap_ptelist_alloc() completes.
 	 */
 	if (pv_vafree == 0 || (m = vm_page_alloc(NULL, colour, (pq ==
 	    &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
 		if (try) {
 			pv_entry_count--;
 			PV_STAT(pc_chunk_tryfail++);
 			return (NULL);
 		}
 		/*
 		 * Reclaim pv entries: At first, destroy mappings to
 		 * inactive pages.  After that, if a pv chunk entry
 		 * is still needed, destroy mappings to active pages.
 		 */
 		if (pq == NULL) {
 			PV_STAT(pmap_collect_inactive++);
 			pq = &vm_page_queues[PQ_INACTIVE];
 		} else if (pq == &vm_page_queues[PQ_INACTIVE]) {
 			PV_STAT(pmap_collect_active++);
 			pq = &vm_page_queues[PQ_ACTIVE];
 		} else
 			panic("get_pv_entry: increase vm.pmap.shpgperproc");
 		pmap_collect(pmap, pq);
 		goto retry;
 	}
 	PV_STAT(pc_chunk_count++);
 	PV_STAT(pc_chunk_allocs++);
 	colour++;
 	pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree);
 	pmap_qenter((vm_offset_t)pc, &m, 1);
 	pc->pc_pmap = pmap;
 	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
 	for (field = 1; field < _NPCM; field++)
 		pc->pc_map[field] = pc_freemask[field];
 	pv = &pc->pc_pventry[0];
 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 	PV_STAT(pv_entry_spare += _NPCPV - 1);
 	return (pv);
 }
 
 static __inline pv_entry_t
 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
 			break;
 		}
 	}
 	return (pv);
 }
 
 static void
 pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_offset_t va_last;
 	vm_page_t m;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	KASSERT((pa & PDRMASK) == 0,
 	    ("pmap_pv_demote_pde: pa is not 4mpage aligned"));
 
 	/*
 	 * Transfer the 4mpage's pv entry for this mapping to the first
 	 * page's pv list.
 	 */
 	pvh = pa_to_pvh(pa);
 	va = trunc_4mpage(va);
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found"));
 	m = PHYS_TO_VM_PAGE(pa);
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 	/* Instantiate the remaining NPTEPG - 1 pv entries. */
 	va_last = va + NBPDR - PAGE_SIZE;
 	do {
 		m++;
 		KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 		    ("pmap_pv_demote_pde: page %p is not managed", m));
 		va += PAGE_SIZE;
 		pmap_insert_entry(pmap, va, m);
 	} while (va < va_last);
 }
 
 static void
 pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_offset_t va_last;
 	vm_page_t m;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	KASSERT((pa & PDRMASK) == 0,
 	    ("pmap_pv_promote_pde: pa is not 4mpage aligned"));
 
 	/*
 	 * Transfer the first page's pv entry for this mapping to the
 	 * 4mpage's pv list.  Aside from avoiding the cost of a call
 	 * to get_pv_entry(), a transfer avoids the possibility that
 	 * get_pv_entry() calls pmap_collect() and that pmap_collect()
 	 * removes one of the mappings that is being promoted.
 	 */
 	m = PHYS_TO_VM_PAGE(pa);
 	va = trunc_4mpage(va);
 	pv = pmap_pvh_remove(&m->md, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found"));
 	pvh = pa_to_pvh(pa);
 	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
 	/* Free the remaining NPTEPG - 1 pv entries. */
 	va_last = va + NBPDR - PAGE_SIZE;
 	do {
 		m++;
 		va += PAGE_SIZE;
 		pmap_pvh_free(&m->md, pmap, va);
 	} while (va < va_last);
 }
 
 static void
 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
 	free_pv_entry(pmap, pv);
 }
 
 static void
 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 {
 	struct md_page *pvh;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	pmap_pvh_free(&m->md, pmap, va);
 	if (TAILQ_EMPTY(&m->md.pv_list)) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		if (TAILQ_EMPTY(&pvh->pv_list))
 			vm_page_flag_clear(m, PG_WRITEABLE);
 	}
 }
 
 /*
  * Create a pv entry for page at pa for
  * (pmap, va).
  */
 static void
 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pv_entry_t pv;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	pv = get_pv_entry(pmap, FALSE);
 	pv->pv_va = va;
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 }
 
 /*
  * Conditionally create a pv entry.
  */
 static boolean_t
 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pv_entry_t pv;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	if (pv_entry_count < pv_entry_high_water && 
 	    (pv = get_pv_entry(pmap, TRUE)) != NULL) {
 		pv->pv_va = va;
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  * Create the pv entries for each of the pages within a superpage.
  */
 static boolean_t
 pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	if (pv_entry_count < pv_entry_high_water && 
 	    (pv = get_pv_entry(pmap, TRUE)) != NULL) {
 		pv->pv_va = va;
 		pvh = pa_to_pvh(pa);
 		TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  * Fills a page table page with mappings to consecutive physical pages.
  */
 static void
 pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte)
 {
 	pt_entry_t *pte;
 
 	for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
 		*pte = newpte;	
 		newpte += PAGE_SIZE;
 	}
 }
 
 /*
  * Tries to demote a 2- or 4MB page mapping.  If demotion fails, the
  * 2- or 4MB page mapping is invalidated.
  */
 static boolean_t
 pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
 	pd_entry_t newpde, oldpde;
 	pt_entry_t *firstpte, newpte;
 	vm_paddr_t mptepa;
 	vm_page_t free, mpte;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	oldpde = *pde;
 	KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
 	    ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
 	mpte = pmap_lookup_pt_page(pmap, va);
 	if (mpte != NULL)
 		pmap_remove_pt_page(pmap, mpte);
 	else {
 		KASSERT((oldpde & PG_W) == 0,
 		    ("pmap_demote_pde: page table page for a wired mapping"
 		    " is missing"));
 
 		/*
 		 * Invalidate the 2- or 4MB page mapping and return
 		 * "failure" if the mapping was never accessed or the
 		 * allocation of the new page table page fails.
 		 */
 		if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL,
 		    va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL |
 		    VM_ALLOC_WIRED)) == NULL) {
 			free = NULL;
 			pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
 			pmap_invalidate_page(pmap, trunc_4mpage(va));
 			pmap_free_zero_pages(free);
 			CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
 			    " in pmap %p", va, pmap);
 			return (FALSE);
 		}
 		if (va < VM_MAXUSER_ADDRESS)
 			pmap->pm_stats.resident_count++;
 	}
 	mptepa = VM_PAGE_TO_PHYS(mpte);
 
 	/*
 	 * If the page mapping is in the kernel's address space, then the
 	 * KPTmap can provide access to the page table page.  Otherwise,
 	 * temporarily map the page table page (mpte) into the kernel's
 	 * address space at either PADDR1 or PADDR2. 
 	 */
 	if (va >= KERNBASE)
 		firstpte = &KPTmap[i386_btop(trunc_4mpage(va))];
 	else if (curthread->td_pinned > 0 && mtx_owned(&vm_page_queue_mtx)) {
 		if ((*PMAP1 & PG_FRAME) != mptepa) {
 			*PMAP1 = mptepa | PG_RW | PG_V | PG_A | PG_M;
 #ifdef SMP
 			PMAP1cpu = PCPU_GET(cpuid);
 #endif
 			invlcaddr(PADDR1);
 			PMAP1changed++;
 		} else
 #ifdef SMP
 		if (PMAP1cpu != PCPU_GET(cpuid)) {
 			PMAP1cpu = PCPU_GET(cpuid);
 			invlcaddr(PADDR1);
 			PMAP1changedcpu++;
 		} else
 #endif
 			PMAP1unchanged++;
 		firstpte = PADDR1;
 	} else {
 		mtx_lock(&PMAP2mutex);
 		if ((*PMAP2 & PG_FRAME) != mptepa) {
 			*PMAP2 = mptepa | PG_RW | PG_V | PG_A | PG_M;
 			pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
 		}
 		firstpte = PADDR2;
 	}
 	newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V;
 	KASSERT((oldpde & PG_A) != 0,
 	    ("pmap_demote_pde: oldpde is missing PG_A"));
 	KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW,
 	    ("pmap_demote_pde: oldpde is missing PG_M"));
 	newpte = oldpde & ~PG_PS;
 	if ((newpte & PG_PDE_PAT) != 0)
 		newpte ^= PG_PDE_PAT | PG_PTE_PAT;
 
 	/*
 	 * If the page table page is new, initialize it.
 	 */
 	if (mpte->wire_count == 1) {
 		mpte->wire_count = NPTEPG;
 		pmap_fill_ptp(firstpte, newpte);
 	}
 	KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
 	    ("pmap_demote_pde: firstpte and newpte map different physical"
 	    " addresses"));
 
 	/*
 	 * If the mapping has changed attributes, update the page table
 	 * entries.
 	 */ 
 	if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
 		pmap_fill_ptp(firstpte, newpte);
 	
 	/*
 	 * Demote the mapping.  This pmap is locked.  The old PDE has
 	 * PG_A set.  If the old PDE has PG_RW set, it also has PG_M
 	 * set.  Thus, there is no danger of a race with another
 	 * processor changing the setting of PG_A and/or PG_M between
 	 * the read above and the store below. 
 	 */
 	if (workaround_erratum383)
 		pmap_update_pde(pmap, va, pde, newpde);
 	else if (pmap == kernel_pmap)
 		pmap_kenter_pde(va, newpde);
 	else
 		pde_store(pde, newpde);	
 	if (firstpte == PADDR2)
 		mtx_unlock(&PMAP2mutex);
 
 	/*
 	 * Invalidate the recursive mapping of the page table page.
 	 */
 	pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
 
 	/*
 	 * Demote the pv entry.  This depends on the earlier demotion
 	 * of the mapping.  Specifically, the (re)creation of a per-
 	 * page pv entry might trigger the execution of pmap_collect(),
 	 * which might reclaim a newly (re)created per-page pv entry
 	 * and destroy the associated mapping.  In order to destroy
 	 * the mapping, the PDE must have already changed from mapping
 	 * the 2mpage to referencing the page table page.
 	 */
 	if ((oldpde & PG_MANAGED) != 0)
 		pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME);
 
 	pmap_pde_demotions++;
 	CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#x"
 	    " in pmap %p", va, pmap);
 	return (TRUE);
 }
 
 /*
  * pmap_remove_pde: do the things to unmap a superpage in a process
  */
 static void
 pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
     vm_page_t *free)
 {
 	struct md_page *pvh;
 	pd_entry_t oldpde;
 	vm_offset_t eva, va;
 	vm_page_t m, mpte;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((sva & PDRMASK) == 0,
 	    ("pmap_remove_pde: sva is not 4mpage aligned"));
 	oldpde = pte_load_clear(pdq);
 	if (oldpde & PG_W)
 		pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE;
 
 	/*
 	 * Machines that don't support invlpg, also don't support
 	 * PG_G.
 	 */
 	if (oldpde & PG_G)
 		pmap_invalidate_page(kernel_pmap, sva);
 	pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 	if (oldpde & PG_MANAGED) {
 		pvh = pa_to_pvh(oldpde & PG_PS_FRAME);
 		pmap_pvh_free(pvh, pmap, sva);
 		eva = sva + NBPDR;
 		for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
 		    va < eva; va += PAGE_SIZE, m++) {
 			if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
 				vm_page_dirty(m);
 			if (oldpde & PG_A)
 				vm_page_flag_set(m, PG_REFERENCED);
 			if (TAILQ_EMPTY(&m->md.pv_list) &&
 			    TAILQ_EMPTY(&pvh->pv_list))
 				vm_page_flag_clear(m, PG_WRITEABLE);
 		}
 	}
 	if (pmap == kernel_pmap) {
 		if (!pmap_demote_pde(pmap, pdq, sva))
 			panic("pmap_remove_pde: failed demotion");
 	} else {
 		mpte = pmap_lookup_pt_page(pmap, sva);
 		if (mpte != NULL) {
 			pmap_remove_pt_page(pmap, mpte);
 			pmap->pm_stats.resident_count--;
 			KASSERT(mpte->wire_count == NPTEPG,
 			    ("pmap_remove_pde: pte page wire count error"));
 			mpte->wire_count = 0;
 			pmap_add_delayed_free_list(mpte, free, FALSE);
 			atomic_subtract_int(&cnt.v_wire_count, 1);
 		}
 	}
 }
 
 /*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
 pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free)
 {
 	pt_entry_t oldpte;
 	vm_page_t m;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	oldpte = pte_load_clear(ptq);
 	if (oldpte & PG_W)
 		pmap->pm_stats.wired_count -= 1;
 	/*
 	 * Machines that don't support invlpg, also don't support
 	 * PG_G.
 	 */
 	if (oldpte & PG_G)
 		pmap_invalidate_page(kernel_pmap, va);
 	pmap->pm_stats.resident_count -= 1;
 	if (oldpte & PG_MANAGED) {
 		m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME);
 		if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			vm_page_dirty(m);
 		if (oldpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 		pmap_remove_entry(pmap, m, va);
 	}
 	return (pmap_unuse_pt(pmap, va, free));
 }
 
 /*
  * Remove a single page from a process address space
  */
 static void
 pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free)
 {
 	pt_entry_t *pte;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
 		return;
 	pmap_remove_pte(pmap, pte, va, free);
 	pmap_invalidate_page(pmap, va);
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t pdnxt;
 	pd_entry_t ptpaddr;
 	pt_entry_t *pte;
 	vm_page_t free = NULL;
 	int anyvalid;
 
 	/*
 	 * Perform an unsynchronized read.  This is, however, safe.
 	 */
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	anyvalid = 0;
 
 	vm_page_lock_queues();
 	sched_pin();
 	PMAP_LOCK(pmap);
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if ((sva + PAGE_SIZE == eva) && 
 	    ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 		pmap_remove_page(pmap, sva, &free);
 		goto out;
 	}
 
 	for (; sva < eva; sva = pdnxt) {
 		unsigned pdirindex;
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		pdnxt = (sva + NBPDR) & ~PDRMASK;
 		if (pdnxt < sva)
 			pdnxt = eva;
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		pdirindex = sva >> PDRSHIFT;
 		ptpaddr = pmap->pm_pdir[pdirindex];
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
 			/*
 			 * Are we removing the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + NBPDR == pdnxt && eva >= pdnxt) {
 				/*
 				 * The TLB entry for a PG_G mapping is
 				 * invalidated by pmap_remove_pde().
 				 */
 				if ((ptpaddr & PG_G) == 0)
 					anyvalid = 1;
 				pmap_remove_pde(pmap,
 				    &pmap->pm_pdir[pdirindex], sva, &free);
 				continue;
 			} else if (!pmap_demote_pde(pmap,
 			    &pmap->pm_pdir[pdirindex], sva)) {
 				/* The large page mapping was destroyed. */
 				continue;
 			}
 		}
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (pdnxt > eva)
 			pdnxt = eva;
 
 		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
 		    sva += PAGE_SIZE) {
 			if (*pte == 0)
 				continue;
 
 			/*
 			 * The TLB entry for a PG_G mapping is invalidated
 			 * by pmap_remove_pte().
 			 */
 			if ((*pte & PG_G) == 0)
 				anyvalid = 1;
 			if (pmap_remove_pte(pmap, pte, sva, &free))
 				break;
 		}
 	}
 out:
 	sched_unpin();
 	if (anyvalid)
 		pmap_invalidate_all(pmap);
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 	pmap_free_zero_pages(free);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 void
 pmap_remove_all(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	pmap_t pmap;
 	pt_entry_t *pte, tpte;
 	pd_entry_t *pde;
 	vm_offset_t va;
 	vm_page_t free;
 
 	KASSERT((m->flags & PG_FICTITIOUS) == 0,
 	    ("pmap_remove_all: page %p is fictitious", m));
 	free = NULL;
 	vm_page_lock_queues();
 	sched_pin();
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, va);
 		(void)pmap_demote_pde(pmap, pde, va);
 		PMAP_UNLOCK(pmap);
 	}
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pmap->pm_stats.resident_count--;
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found"
 		    " a 4mpage in page %p's pv list", m));
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		tpte = pte_load_clear(pte);
 		if (tpte & PG_W)
 			pmap->pm_stats.wired_count--;
 		if (tpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			vm_page_dirty(m);
 		pmap_unuse_pt(pmap, pv->pv_va, &free);
 		pmap_invalidate_page(pmap, pv->pv_va);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		free_pv_entry(pmap, pv);
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_flag_clear(m, PG_WRITEABLE);
 	sched_unpin();
 	vm_page_unlock_queues();
 	pmap_free_zero_pages(free);
 }
 
 /*
  * pmap_protect_pde: do the things to protect a 4mpage in a process
  */
 static boolean_t
 pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot)
 {
 	pd_entry_t newpde, oldpde;
 	vm_offset_t eva, va;
 	vm_page_t m;
 	boolean_t anychanged;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((sva & PDRMASK) == 0,
 	    ("pmap_protect_pde: sva is not 4mpage aligned"));
 	anychanged = FALSE;
 retry:
 	oldpde = newpde = *pde;
 	if (oldpde & PG_MANAGED) {
 		eva = sva + NBPDR;
 		for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
 		    va < eva; va += PAGE_SIZE, m++)
 			if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
 				vm_page_dirty(m);
 	}
 	if ((prot & VM_PROT_WRITE) == 0)
 		newpde &= ~(PG_RW | PG_M);
 #ifdef PAE
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpde |= pg_nx;
 #endif
 	if (newpde != oldpde) {
 		if (!pde_cmpset(pde, oldpde, newpde))
 			goto retry;
 		if (oldpde & PG_G)
 			pmap_invalidate_page(pmap, sva);
 		else
 			anychanged = TRUE;
 	}
 	return (anychanged);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	vm_offset_t pdnxt;
 	pd_entry_t ptpaddr;
 	pt_entry_t *pte;
 	int anychanged;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 #ifdef PAE
 	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
 	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
 		return;
 #else
 	if (prot & VM_PROT_WRITE)
 		return;
 #endif
 
 	anychanged = 0;
 
 	vm_page_lock_queues();
 	sched_pin();
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = pdnxt) {
 		pt_entry_t obits, pbits;
 		unsigned pdirindex;
 
 		pdnxt = (sva + NBPDR) & ~PDRMASK;
 		if (pdnxt < sva)
 			pdnxt = eva;
 
 		pdirindex = sva >> PDRSHIFT;
 		ptpaddr = pmap->pm_pdir[pdirindex];
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
 			/*
 			 * Are we protecting the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + NBPDR == pdnxt && eva >= pdnxt) {
 				/*
 				 * The TLB entry for a PG_G mapping is
 				 * invalidated by pmap_protect_pde().
 				 */
 				if (pmap_protect_pde(pmap,
 				    &pmap->pm_pdir[pdirindex], sva, prot))
 					anychanged = 1;
 				continue;
 			} else if (!pmap_demote_pde(pmap,
 			    &pmap->pm_pdir[pdirindex], sva)) {
 				/* The large page mapping was destroyed. */
 				continue;
 			}
 		}
 
 		if (pdnxt > eva)
 			pdnxt = eva;
 
 		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
 		    sva += PAGE_SIZE) {
 			vm_page_t m;
 
 retry:
 			/*
 			 * Regardless of whether a pte is 32 or 64 bits in
 			 * size, PG_RW, PG_A, and PG_M are among the least
 			 * significant 32 bits.
 			 */
 			obits = pbits = *pte;
 			if ((pbits & PG_V) == 0)
 				continue;
 
 			if ((prot & VM_PROT_WRITE) == 0) {
 				if ((pbits & (PG_MANAGED | PG_M | PG_RW)) ==
 				    (PG_MANAGED | PG_M | PG_RW)) {
 					m = PHYS_TO_VM_PAGE(pbits & PG_FRAME);
 					vm_page_dirty(m);
 				}
 				pbits &= ~(PG_RW | PG_M);
 			}
 #ifdef PAE
 			if ((prot & VM_PROT_EXECUTE) == 0)
 				pbits |= pg_nx;
 #endif
 
 			if (pbits != obits) {
 #ifdef PAE
 				if (!atomic_cmpset_64(pte, obits, pbits))
 					goto retry;
 #else
 				if (!atomic_cmpset_int((u_int *)pte, obits,
 				    pbits))
 					goto retry;
 #endif
 				if (obits & PG_G)
 					pmap_invalidate_page(pmap, sva);
 				else
 					anychanged = 1;
 			}
 		}
 	}
 	sched_unpin();
 	if (anychanged)
 		pmap_invalidate_all(pmap);
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * Tries to promote the 512 or 1024, contiguous 4KB page mappings that are
  * within a single page table page (PTP) to a single 2- or 4MB page mapping.
  * For promotion to occur, two conditions must be met: (1) the 4KB page
  * mappings must map aligned, contiguous physical memory and (2) the 4KB page
  * mappings must have identical characteristics.
  *
  * Managed (PG_MANAGED) mappings within the kernel address space are not
  * promoted.  The reason is that kernel PDEs are replicated in each pmap but
  * pmap_clear_ptes() and pmap_ts_referenced() only read the PDE from the kernel
  * pmap.
  */
 static void
 pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
 	pd_entry_t newpde;
 	pt_entry_t *firstpte, oldpte, pa, *pte;
 	vm_offset_t oldpteva;
 	vm_page_t mpte;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * Examine the first PTE in the specified PTP.  Abort if this PTE is
 	 * either invalid, unused, or does not map the first 4KB physical page
 	 * within a 2- or 4MB page.
 	 */
 	firstpte = pmap_pte_quick(pmap, trunc_4mpage(va));
 setpde:
 	newpde = *firstpte;
 	if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) {
 		pmap_pde_p_failures++;
 		CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 		    " in pmap %p", va, pmap);
 		return;
 	}
 	if ((*firstpte & PG_MANAGED) != 0 && pmap == kernel_pmap) {
 		pmap_pde_p_failures++;
 		CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 		    " in pmap %p", va, pmap);
 		return;
 	}
 	if ((newpde & (PG_M | PG_RW)) == PG_RW) {
 		/*
 		 * When PG_M is already clear, PG_RW can be cleared without
 		 * a TLB invalidation.
 		 */
 		if (!atomic_cmpset_int((u_int *)firstpte, newpde, newpde &
 		    ~PG_RW))  
 			goto setpde;
 		newpde &= ~PG_RW;
 	}
 
 	/* 
 	 * Examine each of the other PTEs in the specified PTP.  Abort if this
 	 * PTE maps an unexpected 4KB physical page or does not have identical
 	 * characteristics to the first PTE.
 	 */
 	pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
 	for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
 setpte:
 		oldpte = *pte;
 		if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
 			pmap_pde_p_failures++;
 			CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 			    " in pmap %p", va, pmap);
 			return;
 		}
 		if ((oldpte & (PG_M | PG_RW)) == PG_RW) {
 			/*
 			 * When PG_M is already clear, PG_RW can be cleared
 			 * without a TLB invalidation.
 			 */
 			if (!atomic_cmpset_int((u_int *)pte, oldpte,
 			    oldpte & ~PG_RW))
 				goto setpte;
 			oldpte &= ~PG_RW;
 			oldpteva = (oldpte & PG_FRAME & PDRMASK) |
 			    (va & ~PDRMASK);
 			CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#x"
 			    " in pmap %p", oldpteva, pmap);
 		}
 		if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) {
 			pmap_pde_p_failures++;
 			CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 			    " in pmap %p", va, pmap);
 			return;
 		}
 		pa -= PAGE_SIZE;
 	}
 
 	/*
 	 * Save the page table page in its current state until the PDE
 	 * mapping the superpage is demoted by pmap_demote_pde() or
 	 * destroyed by pmap_remove_pde(). 
 	 */
 	mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
 	KASSERT(mpte >= vm_page_array &&
 	    mpte < &vm_page_array[vm_page_array_size],
 	    ("pmap_promote_pde: page table page is out of range"));
 	KASSERT(mpte->pindex == va >> PDRSHIFT,
 	    ("pmap_promote_pde: page table page's pindex is wrong"));
 	pmap_insert_pt_page(pmap, mpte);
 
 	/*
 	 * Promote the pv entries.
 	 */
 	if ((newpde & PG_MANAGED) != 0)
 		pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME);
 
 	/*
 	 * Propagate the PAT index to its proper position.
 	 */
 	if ((newpde & PG_PTE_PAT) != 0)
 		newpde ^= PG_PDE_PAT | PG_PTE_PAT;
 
 	/*
 	 * Map the superpage.
 	 */
 	if (workaround_erratum383)
 		pmap_update_pde(pmap, va, pde, PG_PS | newpde);
 	else if (pmap == kernel_pmap)
 		pmap_kenter_pde(va, PG_PS | newpde);
 	else
 		pde_store(pde, PG_PS | newpde);
 
 	pmap_pde_promotions++;
 	CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x"
 	    " in pmap %p", va, pmap);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 void
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
     vm_prot_t prot, boolean_t wired)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	pt_entry_t newpte, origpte;
 	pv_entry_t pv;
 	vm_paddr_t opa, pa;
 	vm_page_t mpte, om;
 	boolean_t invlva;
 
 	va = trunc_page(va);
 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
 	KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
 	    ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
 	    va));
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 ||
 	    (m->oflags & VPO_BUSY) != 0,
 	    ("pmap_enter: page %p is not busy", m));
 
 	mpte = NULL;
 
 	vm_page_lock_queues();
 	PMAP_LOCK(pmap);
 	sched_pin();
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		mpte = pmap_allocpte(pmap, va, M_WAITOK);
 	}
 
 	pde = pmap_pde(pmap, va);
 	if ((*pde & PG_PS) != 0)
 		panic("pmap_enter: attempted pmap_enter on 4MB page");
 	pte = pmap_pte_quick(pmap, va);
 
 	/*
 	 * Page Directory table entry not valid, we need a new PT page
 	 */
 	if (pte == NULL) {
 		panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x",
 			(uintmax_t)pmap->pm_pdir[PTDPTDI], va);
 	}
 
 	pa = VM_PAGE_TO_PHYS(m);
 	om = NULL;
 	origpte = *pte;
 	opa = origpte & PG_FRAME;
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (origpte && (opa == pa)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && ((origpte & PG_W) == 0))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && (origpte & PG_W))
 			pmap->pm_stats.wired_count--;
 
 		/*
 		 * Remove extra pte reference
 		 */
 		if (mpte)
 			mpte->wire_count--;
 
 		if (origpte & PG_MANAGED) {
 			om = m;
 			pa |= PG_MANAGED;
 		}
 		goto validate;
 	} 
 
 	pv = NULL;
 
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
 	if (opa) {
 		if (origpte & PG_W)
 			pmap->pm_stats.wired_count--;
 		if (origpte & PG_MANAGED) {
 			om = PHYS_TO_VM_PAGE(opa);
 			pv = pmap_pvh_remove(&om->md, pmap, va);
 		}
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			KASSERT(mpte->wire_count > 0,
 			    ("pmap_enter: missing reference to page table page,"
 			     " va: 0x%x", va));
 		}
 	} else
 		pmap->pm_stats.resident_count++;
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
 		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
 		    ("pmap_enter: managed mapping within the clean submap"));
 		if (pv == NULL)
 			pv = get_pv_entry(pmap, FALSE);
 		pv->pv_va = va;
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 		pa |= PG_MANAGED;
 	} else if (pv != NULL)
 		free_pv_entry(pmap, pv);
 
 	/*
 	 * Increment counters
 	 */
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V);
 	if ((prot & VM_PROT_WRITE) != 0) {
 		newpte |= PG_RW;
 		if ((newpte & PG_MANAGED) != 0)
 			vm_page_flag_set(m, PG_WRITEABLE);
 	}
 #ifdef PAE
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpte |= pg_nx;
 #endif
 	if (wired)
 		newpte |= PG_W;
 	if (va < VM_MAXUSER_ADDRESS)
 		newpte |= PG_U;
 	if (pmap == kernel_pmap)
 		newpte |= pgeflag;
 
 	/*
 	 * if the mapping or permission bits are different, we need
 	 * to update the pte.
 	 */
 	if ((origpte & ~(PG_M|PG_A)) != newpte) {
 		newpte |= PG_A;
 		if ((access & VM_PROT_WRITE) != 0)
 			newpte |= PG_M;
 		if (origpte & PG_V) {
 			invlva = FALSE;
 			origpte = pte_load_store(pte, newpte);
 			if (origpte & PG_A) {
 				if (origpte & PG_MANAGED)
 					vm_page_flag_set(om, PG_REFERENCED);
 				if (opa != VM_PAGE_TO_PHYS(m))
 					invlva = TRUE;
 #ifdef PAE
 				if ((origpte & PG_NX) == 0 &&
 				    (newpte & PG_NX) != 0)
 					invlva = TRUE;
 #endif
 			}
 			if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 				if ((origpte & PG_MANAGED) != 0)
 					vm_page_dirty(om);
 				if ((prot & VM_PROT_WRITE) == 0)
 					invlva = TRUE;
 			}
 			if ((origpte & PG_MANAGED) != 0 &&
 			    TAILQ_EMPTY(&om->md.pv_list) &&
 			    TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))
 				vm_page_flag_clear(om, PG_WRITEABLE);
 			if (invlva)
 				pmap_invalidate_page(pmap, va);
 		} else
 			pte_store(pte, newpte);
 	}
 
 	/*
 	 * If both the page table page and the reservation are fully
 	 * populated, then attempt promotion.
 	 */
 	if ((mpte == NULL || mpte->wire_count == NPTEPG) &&
 	    pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0)
 		pmap_promote_pde(pmap, pde, va);
 
 	sched_unpin();
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * Tries to create a 2- or 4MB page mapping.  Returns TRUE if successful and
  * FALSE otherwise.  Fails if (1) a page table page cannot be allocated without
  * blocking, (2) a mapping already exists at the specified virtual address, or
  * (3) a pv entry cannot be allocated without reclaiming another pv entry. 
  */
 static boolean_t
 pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 	pd_entry_t *pde, newpde;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pde = pmap_pde(pmap, va);
 	if (*pde != 0) {
 		CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 		    " in pmap %p", va, pmap);
 		return (FALSE);
 	}
 	newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 1) |
 	    PG_PS | PG_V;
 	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
 		newpde |= PG_MANAGED;
 
 		/*
 		 * Abort this mapping if its PV entry could not be created.
 		 */
 		if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) {
 			CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			return (FALSE);
 		}
 	}
 #ifdef PAE
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpde |= pg_nx;
 #endif
 	if (va < VM_MAXUSER_ADDRESS)
 		newpde |= PG_U;
 
 	/*
 	 * Increment counters.
 	 */
 	pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
 
 	/*
 	 * Map the superpage.
 	 */
 	pde_store(pde, newpde);
 
 	pmap_pde_mappings++;
 	CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx"
 	    " in pmap %p", va, pmap);
 	return (TRUE);
 }
 
 /*
  * Maps a sequence of resident pages belonging to the same object.
  * The sequence begins with the given page m_start.  This page is
  * mapped at the given virtual address start.  Each subsequent page is
  * mapped at a virtual address that is offset from start by the same
  * amount as the page is offset from m_start within the object.  The
  * last page in the sequence is the page with the largest offset from
  * m_start that can be mapped at a virtual address less than the given
  * virtual address end.  Not every virtual page between start and end
  * is mapped; only those for which a resident page exists with the
  * corresponding offset from m_start are mapped.
  */
 void
 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	vm_offset_t va;
 	vm_page_t m, mpte;
 	vm_pindex_t diff, psize;
 
 	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
 	psize = atop(end - start);
 	mpte = NULL;
 	m = m_start;
 	vm_page_lock_queues();
 	PMAP_LOCK(pmap);
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		va = start + ptoa(diff);
 		if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
 		    (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 &&
 		    pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 &&
 		    pmap_enter_pde(pmap, va, m, prot))
 			m = &m[NBPDR / PAGE_SIZE - 1];
 		else
 			mpte = pmap_enter_quick_locked(pmap, va, m, prot,
 			    mpte);
 		m = TAILQ_NEXT(m, listq);
 	}
 	vm_page_unlock_queues();
  	PMAP_UNLOCK(pmap);
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * but is *MUCH* faster than pmap_enter...
  */
 
 void
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 
 	vm_page_lock_queues();
 	PMAP_LOCK(pmap);
 	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 }
 
 static vm_page_t
 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, vm_page_t mpte)
 {
 	pt_entry_t *pte;
 	vm_paddr_t pa;
 	vm_page_t free;
 
 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 	    (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
 	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		unsigned ptepindex;
 		pd_entry_t ptepa;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		ptepindex = va >> PDRSHIFT;
 		if (mpte && (mpte->pindex == ptepindex)) {
 			mpte->wire_count++;
 		} else {
 			/*
 			 * Get the page directory entry
 			 */
 			ptepa = pmap->pm_pdir[ptepindex];
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.
 			 */
 			if (ptepa) {
 				if (ptepa & PG_PS)
 					return (NULL);
 				mpte = PHYS_TO_VM_PAGE(ptepa & PG_FRAME);
 				mpte->wire_count++;
 			} else {
 				mpte = _pmap_allocpte(pmap, ptepindex,
 				    M_NOWAIT);
 				if (mpte == NULL)
 					return (mpte);
 			}
 		}
 	} else {
 		mpte = NULL;
 	}
 
 	/*
 	 * This call to vtopte makes the assumption that we are
 	 * entering the page into the current pmap.  In order to support
 	 * quick entry into any pmap, one would likely use pmap_pte_quick.
 	 * But that isn't as quick as vtopte.
 	 */
 	pte = vtopte(va);
 	if (*pte) {
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 &&
 	    !pmap_try_insert_pv_entry(pmap, va, m)) {
 		if (mpte != NULL) {
 			free = NULL;
 			if (pmap_unwire_pte_hold(pmap, mpte, &free)) {
 				pmap_invalidate_page(pmap, va);
 				pmap_free_zero_pages(free);
 			}
 			
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
 #ifdef PAE
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		pa |= pg_nx;
 #endif
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 		pte_store(pte, pa | PG_V | PG_U);
 	else
 		pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
 	return (mpte);
 }
 
 /*
  * Make a temporary mapping for a physical address.  This is only intended
  * to be used for panic dumps.
  */
 void *
 pmap_kenter_temporary(vm_paddr_t pa, int i)
 {
 	vm_offset_t va;
 
 	va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 	pmap_kenter(va, pa);
 	invlpg(va);
 	return ((void *)crashdumpmap);
 }
 
 /*
  * This code maps large physical mmap regions into the
  * processor address space.  Note that some shortcuts
  * are taken, but the code works.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
 	pd_entry_t *pde;
 	vm_paddr_t pa, ptepa;
 	vm_page_t p;
 	int pat_mode;
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 	    ("pmap_object_init_pt: non-device object"));
 	if (pseflag && 
 	    (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
 		if (!vm_object_populate(object, pindex, pindex + atop(size)))
 			return;
 		p = vm_page_lookup(object, pindex);
 		KASSERT(p->valid == VM_PAGE_BITS_ALL,
 		    ("pmap_object_init_pt: invalid page %p", p));
 		pat_mode = p->md.pat_mode;
 
 		/*
 		 * Abort the mapping if the first page is not physically
 		 * aligned to a 2/4MB page boundary.
 		 */
 		ptepa = VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1))
 			return;
 
 		/*
 		 * Skip the first page.  Abort the mapping if the rest of
 		 * the pages are not physically contiguous or have differing
 		 * memory attributes.
 		 */
 		p = TAILQ_NEXT(p, listq);
 		for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
 		    pa += PAGE_SIZE) {
 			KASSERT(p->valid == VM_PAGE_BITS_ALL,
 			    ("pmap_object_init_pt: invalid page %p", p));
 			if (pa != VM_PAGE_TO_PHYS(p) ||
 			    pat_mode != p->md.pat_mode)
 				return;
 			p = TAILQ_NEXT(p, listq);
 		}
 
 		/*
 		 * Map using 2/4MB pages.  Since "ptepa" is 2/4M aligned and
 		 * "size" is a multiple of 2/4M, adding the PAT setting to
 		 * "pa" will not affect the termination of this loop.
 		 */
 		PMAP_LOCK(pmap);
 		for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
 		    size; pa += NBPDR) {
 			pde = pmap_pde(pmap, addr);
 			if (*pde == 0) {
 				pde_store(pde, pa | PG_PS | PG_M | PG_A |
 				    PG_U | PG_RW | PG_V);
 				pmap->pm_stats.resident_count += NBPDR /
 				    PAGE_SIZE;
 				pmap_pde_mappings++;
 			}
 			/* Else continue on if the PDE is already valid. */
 			addr += NBPDR;
 		}
 		PMAP_UNLOCK(pmap);
 	}
 }
 
 /*
  *	Routine:	pmap_change_wiring
  *	Function:	Change the wiring attribute for a map/virtual-address
  *			pair.
  *	In/out conditions:
  *			The mapping must already exist in the pmap.
  */
 void
 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	boolean_t are_queues_locked;
 
 	are_queues_locked = FALSE;
 retry:
 	PMAP_LOCK(pmap);
 	pde = pmap_pde(pmap, va);
 	if ((*pde & PG_PS) != 0) {
 		if (!wired != ((*pde & PG_W) == 0)) {
 			if (!are_queues_locked) {
 				are_queues_locked = TRUE;
 				if (!mtx_trylock(&vm_page_queue_mtx)) {
 					PMAP_UNLOCK(pmap);
 					vm_page_lock_queues();
 					goto retry;
 				}
 			}
 			if (!pmap_demote_pde(pmap, pde, va))
 				panic("pmap_change_wiring: demotion failed");
 		} else
 			goto out;
 	}
 	pte = pmap_pte(pmap, va);
 
 	if (wired && !pmap_pte_w(pte))
 		pmap->pm_stats.wired_count++;
 	else if (!wired && pmap_pte_w(pte))
 		pmap->pm_stats.wired_count--;
 
 	/*
 	 * Wiring is not a hardware characteristic so there is no need to
 	 * invalidate TLB.
 	 */
 	pmap_pte_set_w(pte, wired);
 	pmap_pte_release(pte);
 out:
 	if (are_queues_locked)
 		vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 }
 
 
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
     vm_offset_t src_addr)
 {
 	vm_page_t   free;
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t pdnxt;
 
 	if (dst_addr != src_addr)
 		return;
 
 	if (!pmap_is_current(src_pmap))
 		return;
 
 	vm_page_lock_queues();
 	if (dst_pmap < src_pmap) {
 		PMAP_LOCK(dst_pmap);
 		PMAP_LOCK(src_pmap);
 	} else {
 		PMAP_LOCK(src_pmap);
 		PMAP_LOCK(dst_pmap);
 	}
 	sched_pin();
 	for (addr = src_addr; addr < end_addr; addr = pdnxt) {
 		pt_entry_t *src_pte, *dst_pte;
 		vm_page_t dstmpte, srcmpte;
 		pd_entry_t srcptepaddr;
 		unsigned ptepindex;
 
 		KASSERT(addr < UPT_MIN_ADDRESS,
 		    ("pmap_copy: invalid to pmap_copy page tables"));
 
 		pdnxt = (addr + NBPDR) & ~PDRMASK;
 		if (pdnxt < addr)
 			pdnxt = end_addr;
 		ptepindex = addr >> PDRSHIFT;
 
 		srcptepaddr = src_pmap->pm_pdir[ptepindex];
 		if (srcptepaddr == 0)
 			continue;
 			
 		if (srcptepaddr & PG_PS) {
 			if (dst_pmap->pm_pdir[ptepindex] == 0 &&
 			    ((srcptepaddr & PG_MANAGED) == 0 ||
 			    pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr &
 			    PG_PS_FRAME))) {
 				dst_pmap->pm_pdir[ptepindex] = srcptepaddr &
 				    ~PG_W;
 				dst_pmap->pm_stats.resident_count +=
 				    NBPDR / PAGE_SIZE;
 			}
 			continue;
 		}
 
 		srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME);
 		KASSERT(srcmpte->wire_count > 0,
 		    ("pmap_copy: source page table page is unused"));
 
 		if (pdnxt > end_addr)
 			pdnxt = end_addr;
 
 		src_pte = vtopte(addr);
 		while (addr < pdnxt) {
 			pt_entry_t ptetemp;
 			ptetemp = *src_pte;
 			/*
 			 * we only virtual copy managed pages
 			 */
 			if ((ptetemp & PG_MANAGED) != 0) {
 				dstmpte = pmap_allocpte(dst_pmap, addr,
 				    M_NOWAIT);
 				if (dstmpte == NULL)
 					goto out;
 				dst_pte = pmap_pte_quick(dst_pmap, addr);
 				if (*dst_pte == 0 &&
 				    pmap_try_insert_pv_entry(dst_pmap, addr,
 				    PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) {
 					/*
 					 * Clear the wired, modified, and
 					 * accessed (referenced) bits
 					 * during the copy.
 					 */
 					*dst_pte = ptetemp & ~(PG_W | PG_M |
 					    PG_A);
 					dst_pmap->pm_stats.resident_count++;
 	 			} else {
 					free = NULL;
 					if (pmap_unwire_pte_hold(dst_pmap,
 					    dstmpte, &free)) {
 						pmap_invalidate_page(dst_pmap,
 						    addr);
 						pmap_free_zero_pages(free);
 					}
 					goto out;
 				}
 				if (dstmpte->wire_count >= srcmpte->wire_count)
 					break;
 			}
 			addr += PAGE_SIZE;
 			src_pte++;
 		}
 	}
 out:
 	sched_unpin();
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(src_pmap);
 	PMAP_UNLOCK(dst_pmap);
 }	
 
 static __inline void
 pagezero(void *page)
 {
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686) {
 #if defined(CPU_ENABLE_SSE)
 		if (cpu_feature & CPUID_SSE2)
 			sse2_pagezero(page);
 		else
 #endif
 			i686_pagezero(page);
 	} else
 #endif
 		bzero(page, PAGE_SIZE);
 }
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(vm_page_t m)
 {
 	struct sysmaps *sysmaps;
 
 	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 	mtx_lock(&sysmaps->lock);
 	if (*sysmaps->CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 	sched_pin();
 	*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
 	    pmap_cache_bits(m->md.pat_mode, 0);
 	invlcaddr(sysmaps->CADDR2);
 	pagezero(sysmaps->CADDR2);
 	*sysmaps->CMAP2 = 0;
 	sched_unpin();
 	mtx_unlock(&sysmaps->lock);
 }
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(vm_page_t m, int off, int size)
 {
 	struct sysmaps *sysmaps;
 
 	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 	mtx_lock(&sysmaps->lock);
 	if (*sysmaps->CMAP2)
 		panic("pmap_zero_page_area: CMAP2 busy");
 	sched_pin();
 	*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
 	    pmap_cache_bits(m->md.pat_mode, 0);
 	invlcaddr(sysmaps->CADDR2);
 	if (off == 0 && size == PAGE_SIZE) 
 		pagezero(sysmaps->CADDR2);
 	else
 		bzero((char *)sysmaps->CADDR2 + off, size);
 	*sysmaps->CMAP2 = 0;
 	sched_unpin();
 	mtx_unlock(&sysmaps->lock);
 }
 
 /*
  *	pmap_zero_page_idle zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.  This
  *	is intended to be called from the vm_pagezero process only and
  *	outside of Giant.
  */
 void
 pmap_zero_page_idle(vm_page_t m)
 {
 
 	if (*CMAP3)
 		panic("pmap_zero_page_idle: CMAP3 busy");
 	sched_pin();
 	*CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
 	    pmap_cache_bits(m->md.pat_mode, 0);
 	invlcaddr(CADDR3);
 	pagezero(CADDR3);
 	*CMAP3 = 0;
 	sched_unpin();
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(vm_page_t src, vm_page_t dst)
 {
 	struct sysmaps *sysmaps;
 
 	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 	mtx_lock(&sysmaps->lock);
 	if (*sysmaps->CMAP1)
 		panic("pmap_copy_page: CMAP1 busy");
 	if (*sysmaps->CMAP2)
 		panic("pmap_copy_page: CMAP2 busy");
 	sched_pin();
 	invlpg((u_int)sysmaps->CADDR1);
 	invlpg((u_int)sysmaps->CADDR2);
 	*sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A |
 	    pmap_cache_bits(src->md.pat_mode, 0);
 	*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M |
 	    pmap_cache_bits(dst->md.pat_mode, 0);
 	bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE);
 	*sysmaps->CMAP1 = 0;
 	*sysmaps->CMAP2 = 0;
 	sched_unpin();
 	mtx_unlock(&sysmaps->lock);
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	int loops = 0;
 	boolean_t rv;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_page_exists_quick: page %p is not managed", m));
 	rv = FALSE;
 	vm_page_lock_queues();
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		if (PV_PMAP(pv) == pmap) {
 			rv = TRUE;
 			break;
 		}
 		loops++;
 		if (loops >= 16)
 			break;
 	}
 	if (!rv && loops < 16) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 			if (PV_PMAP(pv) == pmap) {
 				rv = TRUE;
 				break;
 			}
 			loops++;
 			if (loops >= 16)
 				break;
 		}
 	}
 	vm_page_unlock_queues();
 	return (rv);
 }
 
 /*
  *	pmap_page_wired_mappings:
  *
  *	Return the number of managed mappings to the given physical page
  *	that are wired.
  */
 int
 pmap_page_wired_mappings(vm_page_t m)
 {
 	int count;
 
 	count = 0;
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		return (count);
 	vm_page_lock_queues();
 	count = pmap_pvh_wired_mappings(&m->md, count);
 	count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count);
 	vm_page_unlock_queues();
 	return (count);
 }
 
 /*
  *	pmap_pvh_wired_mappings:
  *
  *	Return the updated number "count" of managed mappings that are wired.
  */
 static int
 pmap_pvh_wired_mappings(struct md_page *pvh, int count)
 {
 	pmap_t pmap;
 	pt_entry_t *pte;
 	pv_entry_t pv;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	sched_pin();
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		if ((*pte & PG_W) != 0)
 			count++;
 		PMAP_UNLOCK(pmap);
 	}
 	sched_unpin();
 	return (count);
 }
 
 /*
  * Returns TRUE if the given page is mapped individually or as part of
  * a 4mpage.  Otherwise, returns FALSE.
  */
 boolean_t
 pmap_page_is_mapped(vm_page_t m)
 {
 	boolean_t rv;
 
 	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
 		return (FALSE);
 	vm_page_lock_queues();
 	rv = !TAILQ_EMPTY(&m->md.pv_list) ||
 	    !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list);
 	vm_page_unlock_queues();
 	return (rv);
 }
 
 /*
  * Remove all pages from specified address space
  * this aids process exit speeds.  Also, this code
  * is special cased for current process only, but
  * can have the more generic (and slightly slower)
  * mode enabled.  This is much faster than pmap_remove
  * in the case of running down an entire address space.
  */
 void
 pmap_remove_pages(pmap_t pmap)
 {
 	pt_entry_t *pte, tpte;
 	vm_page_t free = NULL;
 	vm_page_t m, mpte, mt;
 	pv_entry_t pv;
 	struct md_page *pvh;
 	struct pv_chunk *pc, *npc;
 	int field, idx;
 	int32_t bit;
 	uint32_t inuse, bitmask;
 	int allfree;
 
 	if (pmap != PCPU_GET(curpmap)) {
 		printf("warning: pmap_remove_pages called with non-current pmap\n");
 		return;
 	}
 	vm_page_lock_queues();
 	PMAP_LOCK(pmap);
 	sched_pin();
 	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 		allfree = 1;
 		for (field = 0; field < _NPCM; field++) {
 			inuse = (~(pc->pc_map[field])) & pc_freemask[field];
 			while (inuse != 0) {
 				bit = bsfl(inuse);
 				bitmask = 1UL << bit;
 				idx = field * 32 + bit;
 				pv = &pc->pc_pventry[idx];
 				inuse &= ~bitmask;
 
 				pte = pmap_pde(pmap, pv->pv_va);
 				tpte = *pte;
 				if ((tpte & PG_PS) == 0) {
 					pte = vtopte(pv->pv_va);
 					tpte = *pte & ~PG_PTE_PAT;
 				}
 
 				if (tpte == 0) {
 					printf(
 					    "TPTE at %p  IS ZERO @ VA %08x\n",
 					    pte, pv->pv_va);
 					panic("bad pte");
 				}
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 				if (tpte & PG_W) {
 					allfree = 0;
 					continue;
 				}
 
 				m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
 				KASSERT(m->phys_addr == (tpte & PG_FRAME),
 				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 				    m, (uintmax_t)m->phys_addr,
 				    (uintmax_t)tpte));
 
 				KASSERT(m < &vm_page_array[vm_page_array_size],
 					("pmap_remove_pages: bad tpte %#jx",
 					(uintmax_t)tpte));
 
 				pte_clear(pte);
 
 				/*
 				 * Update the vm_page_t clean/reference bits.
 				 */
 				if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 					if ((tpte & PG_PS) != 0) {
 						for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
 							vm_page_dirty(mt);
 					} else
 						vm_page_dirty(m);
 				}
 
 				/* Mark free */
 				PV_STAT(pv_entry_frees++);
 				PV_STAT(pv_entry_spare++);
 				pv_entry_count--;
 				pc->pc_map[field] |= bitmask;
 				if ((tpte & PG_PS) != 0) {
 					pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 					pvh = pa_to_pvh(tpte & PG_PS_FRAME);
 					TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
 					if (TAILQ_EMPTY(&pvh->pv_list)) {
 						for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
 							if (TAILQ_EMPTY(&mt->md.pv_list))
 								vm_page_flag_clear(mt, PG_WRITEABLE);
 					}
 					mpte = pmap_lookup_pt_page(pmap, pv->pv_va);
 					if (mpte != NULL) {
 						pmap_remove_pt_page(pmap, mpte);
 						pmap->pm_stats.resident_count--;
 						KASSERT(mpte->wire_count == NPTEPG,
 						    ("pmap_remove_pages: pte page wire count error"));
 						mpte->wire_count = 0;
 						pmap_add_delayed_free_list(mpte, &free, FALSE);
 						atomic_subtract_int(&cnt.v_wire_count, 1);
 					}
 				} else {
 					pmap->pm_stats.resident_count--;
 					TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 					if (TAILQ_EMPTY(&m->md.pv_list)) {
 						pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 						if (TAILQ_EMPTY(&pvh->pv_list))
 							vm_page_flag_clear(m, PG_WRITEABLE);
 					}
 					pmap_unuse_pt(pmap, pv->pv_va, &free);
 				}
 			}
 		}
 		if (allfree) {
 			PV_STAT(pv_entry_spare -= _NPCPV);
 			PV_STAT(pc_chunk_count--);
 			PV_STAT(pc_chunk_frees++);
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 			pmap_qremove((vm_offset_t)pc, 1);
 			vm_page_unwire(m, 0);
 			vm_page_free(m);
 			pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
 		}
 	}
 	sched_unpin();
 	pmap_invalidate_all(pmap);
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 	pmap_free_zero_pages(free);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 	boolean_t rv;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_is_modified: page %p is not managed", m));
 
 	/*
 	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PG_WRITEABLE
 	 * is clear, no PTEs can have PG_M set.
 	 */
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	if ((m->oflags & VPO_BUSY) == 0 &&
 	    (m->flags & PG_WRITEABLE) == 0)
 		return (FALSE);
 	vm_page_lock_queues();
 	rv = pmap_is_modified_pvh(&m->md) ||
 	    pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)));
 	vm_page_unlock_queues();
 	return (rv);
 }
 
 /*
  * Returns TRUE if any of the given mappings were used to modify
  * physical memory.  Otherwise, returns FALSE.  Both page and 2mpage
  * mappings are supported.
  */
 static boolean_t
 pmap_is_modified_pvh(struct md_page *pvh)
 {
 	pv_entry_t pv;
 	pt_entry_t *pte;
 	pmap_t pmap;
 	boolean_t rv;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	rv = FALSE;
 	sched_pin();
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		rv = (*pte & (PG_M | PG_RW)) == (PG_M | PG_RW);
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			break;
 	}
 	sched_unpin();
 	return (rv);
 }
 
 /*
  *	pmap_is_prefaultable:
  *
  *	Return whether or not the specified virtual address is elgible
  *	for prefault.
  */
 boolean_t
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	boolean_t rv;
 
 	rv = FALSE;
 	PMAP_LOCK(pmap);
 	pde = pmap_pde(pmap, addr);
 	if (*pde != 0 && (*pde & PG_PS) == 0) {
 		pte = vtopte(addr);
 		rv = *pte == 0;
 	}
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  *	pmap_is_referenced:
  *
  *	Return whether or not the specified physical page was referenced
  *	in any physical maps.
  */
 boolean_t
 pmap_is_referenced(vm_page_t m)
 {
 	boolean_t rv;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_is_referenced: page %p is not managed", m));
 	vm_page_lock_queues();
 	rv = pmap_is_referenced_pvh(&m->md) ||
 	    pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)));
 	vm_page_unlock_queues();
 	return (rv);
 }
 
 /*
  * Returns TRUE if any of the given mappings were referenced and FALSE
  * otherwise.  Both page and 4mpage mappings are supported.
  */
 static boolean_t
 pmap_is_referenced_pvh(struct md_page *pvh)
 {
 	pv_entry_t pv;
 	pt_entry_t *pte;
 	pmap_t pmap;
 	boolean_t rv;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	rv = FALSE;
 	sched_pin();
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V);
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			break;
 	}
 	sched_unpin();
 	return (rv);
 }
 
 /*
  * Clear the write and modified bits in each of the given page's mappings.
  */
 void
 pmap_remove_write(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t next_pv, pv;
 	pmap_t pmap;
 	pd_entry_t *pde;
 	pt_entry_t oldpte, *pte;
 	vm_offset_t va;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_remove_write: page %p is not managed", m));
 
 	/*
 	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by
 	 * another thread while the object is locked.  Thus, if PG_WRITEABLE
 	 * is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	if ((m->oflags & VPO_BUSY) == 0 &&
 	    (m->flags & PG_WRITEABLE) == 0)
 		return;
 	vm_page_lock_queues();
 	sched_pin();
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, va);
 		if ((*pde & PG_RW) != 0)
 			(void)pmap_demote_pde(pmap, pde, va);
 		PMAP_UNLOCK(pmap);
 	}
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0, ("pmap_clear_write: found"
 		    " a 4mpage in page %p's pv list", m));
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 retry:
 		oldpte = *pte;
 		if ((oldpte & PG_RW) != 0) {
 			/*
 			 * Regardless of whether a pte is 32 or 64 bits
 			 * in size, PG_RW and PG_M are among the least
 			 * significant 32 bits.
 			 */
 			if (!atomic_cmpset_int((u_int *)pte, oldpte,
 			    oldpte & ~(PG_RW | PG_M)))
 				goto retry;
 			if ((oldpte & PG_M) != 0)
 				vm_page_dirty(m);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_flag_clear(m, PG_WRITEABLE);
 	sched_unpin();
 	vm_page_unlock_queues();
 }
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	XXX: The exact number of bits to check and clear is a matter that
  *	should be tested and standardized at some point in the future for
  *	optimal aging of shared pages.
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv, pvf, pvn;
 	pmap_t pmap;
 	pd_entry_t oldpde, *pde;
 	pt_entry_t *pte;
 	vm_offset_t va;
 	int rtval = 0;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_ts_referenced: page %p is not managed", m));
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	vm_page_lock_queues();
 	sched_pin();
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, va);
 		oldpde = *pde;
 		if ((oldpde & PG_A) != 0) {
 			if (pmap_demote_pde(pmap, pde, va)) {
 				if ((oldpde & PG_W) == 0) {
 					/*
 					 * Remove the mapping to a single page
 					 * so that a subsequent access may
 					 * repromote.  Since the underlying
 					 * page table page is fully populated,
 					 * this removal never frees a page
 					 * table page.
 					 */
 					va += VM_PAGE_TO_PHYS(m) - (oldpde &
 					    PG_PS_FRAME);
 					pmap_remove_page(pmap, va, NULL);
 					rtval++;
 					if (rtval > 4) {
 						PMAP_UNLOCK(pmap);
 						goto out;
 					}
 				}
 			}
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pvf = pv;
 		do {
 			pvn = TAILQ_NEXT(pv, pv_list);
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 			pmap = PV_PMAP(pv);
 			PMAP_LOCK(pmap);
 			pde = pmap_pde(pmap, pv->pv_va);
 			KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced:"
 			    " found a 4mpage in page %p's pv list", m));
 			pte = pmap_pte_quick(pmap, pv->pv_va);
 			if ((*pte & PG_A) != 0) {
 				atomic_clear_int((u_int *)pte, PG_A);
 				pmap_invalidate_page(pmap, pv->pv_va);
 				rtval++;
 				if (rtval > 4)
 					pvn = NULL;
 			}
 			PMAP_UNLOCK(pmap);
 		} while ((pv = pvn) != NULL && pv != pvf);
 	}
 out:
 	sched_unpin();
 	vm_page_unlock_queues();
 	return (rtval);
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t next_pv, pv;
 	pmap_t pmap;
 	pd_entry_t oldpde, *pde;
 	pt_entry_t oldpte, *pte;
 	vm_offset_t va;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	KASSERT((m->oflags & VPO_BUSY) == 0,
 	    ("pmap_clear_modify: page %p is busy", m));
 
 	/*
 	 * If the page is not PG_WRITEABLE, then no PTEs can have PG_M set.
 	 * If the object containing the page is locked and the page is not
 	 * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->flags & PG_WRITEABLE) == 0)
 		return;
 	vm_page_lock_queues();
 	sched_pin();
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, va);
 		oldpde = *pde;
 		if ((oldpde & PG_RW) != 0) {
 			if (pmap_demote_pde(pmap, pde, va)) {
 				if ((oldpde & PG_W) == 0) {
 					/*
 					 * Write protect the mapping to a
 					 * single page so that a subsequent
 					 * write access may repromote.
 					 */
 					va += VM_PAGE_TO_PHYS(m) - (oldpde &
 					    PG_PS_FRAME);
 					pte = pmap_pte_quick(pmap, va);
 					oldpte = *pte;
 					if ((oldpte & PG_V) != 0) {
 						/*
 						 * Regardless of whether a pte is 32 or 64 bits
 						 * in size, PG_RW and PG_M are among the least
 						 * significant 32 bits.
 						 */
 						while (!atomic_cmpset_int((u_int *)pte,
 						    oldpte,
 						    oldpte & ~(PG_M | PG_RW)))
 							oldpte = *pte;
 						vm_page_dirty(m);
 						pmap_invalidate_page(pmap, va);
 					}
 				}
 			}
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found"
 		    " a 4mpage in page %p's pv list", m));
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 			/*
 			 * Regardless of whether a pte is 32 or 64 bits
 			 * in size, PG_M is among the least significant
 			 * 32 bits. 
 			 */
 			atomic_clear_int((u_int *)pte, PG_M);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	sched_unpin();
 	vm_page_unlock_queues();
 }
 
 /*
  *	pmap_clear_reference:
  *
  *	Clear the reference bit on the specified physical page.
  */
 void
 pmap_clear_reference(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t next_pv, pv;
 	pmap_t pmap;
 	pd_entry_t oldpde, *pde;
 	pt_entry_t *pte;
 	vm_offset_t va;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_clear_reference: page %p is not managed", m));
 	vm_page_lock_queues();
 	sched_pin();
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, va);
 		oldpde = *pde;
 		if ((oldpde & PG_A) != 0) {
 			if (pmap_demote_pde(pmap, pde, va)) {
 				/*
 				 * Remove the mapping to a single page so
 				 * that a subsequent access may repromote.
 				 * Since the underlying page table page is
 				 * fully populated, this removal never frees
 				 * a page table page.
 				 */
 				va += VM_PAGE_TO_PHYS(m) - (oldpde &
 				    PG_PS_FRAME);
 				pmap_remove_page(pmap, va, NULL);
 			}
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0, ("pmap_clear_reference: found"
 		    " a 4mpage in page %p's pv list", m));
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		if ((*pte & PG_A) != 0) {
 			/*
 			 * Regardless of whether a pte is 32 or 64 bits
 			 * in size, PG_A is among the least significant
 			 * 32 bits. 
 			 */
 			atomic_clear_int((u_int *)pte, PG_A);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	sched_unpin();
 	vm_page_unlock_queues();
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 /* Adjust the cache mode for a 4KB page mapped via a PTE. */
 static __inline void
 pmap_pte_attr(pt_entry_t *pte, int cache_bits)
 {
 	u_int opte, npte;
 
 	/*
 	 * The cache mode bits are all in the low 32-bits of the
 	 * PTE, so we can just spin on updating the low 32-bits.
 	 */
 	do {
 		opte = *(u_int *)pte;
 		npte = opte & ~PG_PTE_CACHE;
 		npte |= cache_bits;
 	} while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte));
 }
 
 /* Adjust the cache mode for a 2/4MB page mapped via a PDE. */
 static __inline void
 pmap_pde_attr(pd_entry_t *pde, int cache_bits)
 {
 	u_int opde, npde;
 
 	/*
 	 * The cache mode bits are all in the low 32-bits of the
 	 * PDE, so we can just spin on updating the low 32-bits.
 	 */
 	do {
 		opde = *(u_int *)pde;
 		npde = opde & ~PG_PDE_CACHE;
 		npde |= cache_bits;
 	} while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde));
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
 {
 	vm_offset_t va, offset;
 	vm_size_t tmpsize;
 
 	offset = pa & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 	pa = pa & PG_FRAME;
 
 	if (pa < KERNLOAD && pa + size <= KERNLOAD)
 		va = KERNBASE + pa;
 	else
 		va = kmem_alloc_nofault(kernel_map, size);
 	if (!va)
 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 
 	for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
 		pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
 	pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
 	pmap_invalidate_cache_range(va, va + size);
 	return ((void *)(va + offset));
 }
 
 void *
 pmap_mapdev(vm_paddr_t pa, vm_size_t size)
 {
 
 	return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE));
 }
 
 void *
 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 {
 
 	return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK));
 }
 
 void
 pmap_unmapdev(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t base, offset, tmpva;
 
 	if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
 		return;
 	base = trunc_page(va);
 	offset = va & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 	for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
 		pmap_kremove(tmpva);
 	pmap_invalidate_range(kernel_pmap, va, tmpva);
 	kmem_free(kernel_map, base, size);
 }
 
 /*
  * Sets the memory attribute for the specified page.
  */
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
 	struct sysmaps *sysmaps;
 	vm_offset_t sva, eva;
 
 	m->md.pat_mode = ma;
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		return;
 
 	/*
 	 * If "m" is a normal page, flush it from the cache.
 	 * See pmap_invalidate_cache_range().
 	 *
 	 * First, try to find an existing mapping of the page by sf
 	 * buffer. sf_buf_invalidate_cache() modifies mapping and
 	 * flushes the cache.
 	 */    
 	if (sf_buf_invalidate_cache(m))
 		return;
 
 	/*
 	 * If page is not mapped by sf buffer, but CPU does not
 	 * support self snoop, map the page transient and do
 	 * invalidation. In the worst case, whole cache is flushed by
 	 * pmap_invalidate_cache_range().
 	 */
 	if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) {
 		sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 		mtx_lock(&sysmaps->lock);
 		if (*sysmaps->CMAP2)
 			panic("pmap_page_set_memattr: CMAP2 busy");
 		sched_pin();
 		*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) |
 		    PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0);
 		invlcaddr(sysmaps->CADDR2);
 		sva = (vm_offset_t)sysmaps->CADDR2;
 		eva = sva + PAGE_SIZE;
 	} else
 		sva = eva = 0; /* gcc */
 	pmap_invalidate_cache_range(sva, eva);
 	if (sva != 0) {
 		*sysmaps->CMAP2 = 0;
 		sched_unpin();
 		mtx_unlock(&sysmaps->lock);
 	}
 }
 
 /*
  * Changes the specified virtual address range's memory type to that given by
  * the parameter "mode".  The specified virtual address range must be
  * completely contained within either the kernel map.
  *
  * Returns zero if the change completed successfully, and either EINVAL or
  * ENOMEM if the change failed.  Specifically, EINVAL is returned if some part
  * of the virtual address range was not mapped, and ENOMEM is returned if
  * there was insufficient memory available to complete the change.
  */
 int
 pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
 {
 	vm_offset_t base, offset, tmpva;
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	int cache_bits_pte, cache_bits_pde;
 	boolean_t changed;
 
 	base = trunc_page(va);
 	offset = va & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 
 	/*
 	 * Only supported on kernel virtual addresses above the recursive map.
 	 */
 	if (base < VM_MIN_KERNEL_ADDRESS)
 		return (EINVAL);
 
 	cache_bits_pde = pmap_cache_bits(mode, 1);
 	cache_bits_pte = pmap_cache_bits(mode, 0);
 	changed = FALSE;
 
 	/*
 	 * Pages that aren't mapped aren't supported.  Also break down
 	 * 2/4MB pages into 4KB pages if required.
 	 */
 	PMAP_LOCK(kernel_pmap);
 	for (tmpva = base; tmpva < base + size; ) {
 		pde = pmap_pde(kernel_pmap, tmpva);
 		if (*pde == 0) {
 			PMAP_UNLOCK(kernel_pmap);
 			return (EINVAL);
 		}
 		if (*pde & PG_PS) {
 			/*
 			 * If the current 2/4MB page already has
 			 * the required memory type, then we need not
 			 * demote this page.  Just increment tmpva to
 			 * the next 2/4MB page frame.
 			 */
 			if ((*pde & PG_PDE_CACHE) == cache_bits_pde) {
 				tmpva = trunc_4mpage(tmpva) + NBPDR;
 				continue;
 			}
 
 			/*
 			 * If the current offset aligns with a 2/4MB
 			 * page frame and there is at least 2/4MB left
 			 * within the range, then we need not break
 			 * down this page into 4KB pages.
 			 */
 			if ((tmpva & PDRMASK) == 0 &&
 			    tmpva + PDRMASK < base + size) {
 				tmpva += NBPDR;
 				continue;
 			}
 			if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) {
 				PMAP_UNLOCK(kernel_pmap);
 				return (ENOMEM);
 			}
 		}
 		pte = vtopte(tmpva);
 		if (*pte == 0) {
 			PMAP_UNLOCK(kernel_pmap);
 			return (EINVAL);
 		}
 		tmpva += PAGE_SIZE;
 	}
 	PMAP_UNLOCK(kernel_pmap);
 
 	/*
 	 * Ok, all the pages exist, so run through them updating their
 	 * cache mode if required.
 	 */
 	for (tmpva = base; tmpva < base + size; ) {
 		pde = pmap_pde(kernel_pmap, tmpva);
 		if (*pde & PG_PS) {
 			if ((*pde & PG_PDE_CACHE) != cache_bits_pde) {
 				pmap_pde_attr(pde, cache_bits_pde);
 				changed = TRUE;
 			}
 			tmpva = trunc_4mpage(tmpva) + NBPDR;
 		} else {
 			pte = vtopte(tmpva);
 			if ((*pte & PG_PTE_CACHE) != cache_bits_pte) {
 				pmap_pte_attr(pte, cache_bits_pte);
 				changed = TRUE;
 			}
 			tmpva += PAGE_SIZE;
 		}
 	}
 
 	/*
 	 * Flush CPU caches to make sure any data isn't cached that
 	 * shouldn't be, etc.
 	 */
 	if (changed) {
 		pmap_invalidate_range(kernel_pmap, base, tmpva);
 		pmap_invalidate_cache_range(base, tmpva);
 	}
 	return (0);
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
 {
 	pd_entry_t *pdep;
 	pt_entry_t *ptep, pte;
 	vm_paddr_t pa;
 	int val;
 
 	PMAP_LOCK(pmap);
 retry:
 	pdep = pmap_pde(pmap, addr);
 	if (*pdep != 0) {
 		if (*pdep & PG_PS) {
 			pte = *pdep;
 			/* Compute the physical address of the 4KB page. */
 			pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) &
 			    PG_FRAME;
 			val = MINCORE_SUPER;
 		} else {
 			ptep = pmap_pte(pmap, addr);
 			pte = *ptep;
 			pmap_pte_release(ptep);
 			pa = pte & PG_FRAME;
 			val = 0;
 		}
 	} else {
 		pte = 0;
 		pa = 0;
 		val = 0;
 	}
 	if ((pte & PG_V) != 0) {
 		val |= MINCORE_INCORE;
 		if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if ((pte & PG_A) != 0)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 	}
 	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
 	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
 	    (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) {
 		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
 		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
 			goto retry;
 	} else
 		PA_UNLOCK_COND(*locked_pa);
 	PMAP_UNLOCK(pmap);
 	return (val);
 }
 
 void
 pmap_activate(struct thread *td)
 {
 	pmap_t	pmap, oldpmap;
 	u_int32_t  cr3;
 
 	critical_enter();
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	oldpmap = PCPU_GET(curpmap);
 #if defined(SMP)
 	atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
 	atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
 #else
 	oldpmap->pm_active &= ~1;
 	pmap->pm_active |= 1;
 #endif
 #ifdef PAE
 	cr3 = vtophys(pmap->pm_pdpt);
 #else
 	cr3 = vtophys(pmap->pm_pdir);
 #endif
 	/*
 	 * pmap_activate is for the current thread on the current cpu
 	 */
 	td->td_pcb->pcb_cr3 = cr3;
 	load_cr3(cr3);
 	PCPU_SET(curpmap, pmap);
 	critical_exit();
 }
 
 void
 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
 {
 }
 
 /*
  *	Increase the starting virtual address of the given mapping if a
  *	different alignment might result in more superpage mappings.
  */
 void
 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t size)
 {
 	vm_offset_t superpage_offset;
 
 	if (size < NBPDR)
 		return;
 	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
 		offset += ptoa(object->pg_color);
 	superpage_offset = offset & PDRMASK;
 	if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR ||
 	    (*addr & PDRMASK) == superpage_offset)
 		return;
 	if ((*addr & PDRMASK) < superpage_offset)
 		*addr = (*addr & ~PDRMASK) + superpage_offset;
 	else
 		*addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
 }
 
 
 #if defined(PMAP_DEBUG)
 pmap_pid_dump(int pid)
 {
 	pmap_t pmap;
 	struct proc *p;
 	int npte = 0;
 	int index;
 
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_pid != pid)
 			continue;
 
 		if (p->p_vmspace) {
 			int i,j;
 			index = 0;
 			pmap = vmspace_pmap(p->p_vmspace);
 			for (i = 0; i < NPDEPTD; i++) {
 				pd_entry_t *pde;
 				pt_entry_t *pte;
 				vm_offset_t base = i << PDRSHIFT;
 				
 				pde = &pmap->pm_pdir[i];
 				if (pde && pmap_pde_v(pde)) {
 					for (j = 0; j < NPTEPG; j++) {
 						vm_offset_t va = base + (j << PAGE_SHIFT);
 						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 							if (index) {
 								index = 0;
 								printf("\n");
 							}
 							sx_sunlock(&allproc_lock);
 							return (npte);
 						}
 						pte = pmap_pte(pmap, va);
 						if (pte && pmap_pte_v(pte)) {
 							pt_entry_t pa;
 							vm_page_t m;
 							pa = *pte;
 							m = PHYS_TO_VM_PAGE(pa & PG_FRAME);
 							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 								va, pa, m->hold_count, m->wire_count, m->flags);
 							npte++;
 							index++;
 							if (index >= 2) {
 								index = 0;
 								printf("\n");
 							} else {
 								printf(" ");
 							}
 						}
 					}
 				}
 			}
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	return (npte);
 }
 #endif
 
 #if defined(DEBUG)
 
 static void	pads(pmap_t pm);
 void		pmap_pvdump(vm_offset_t pa);
 
 /* print address space of pmap*/
 static void
 pads(pmap_t pm)
 {
 	int i, j;
 	vm_paddr_t va;
 	pt_entry_t *ptep;
 
 	if (pm == kernel_pmap)
 		return;
 	for (i = 0; i < NPDEPTD; i++)
 		if (pm->pm_pdir[i])
 			for (j = 0; j < NPTEPG; j++) {
 				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 				if (pm == kernel_pmap && va < KERNBASE)
 					continue;
 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 					continue;
 				ptep = pmap_pte(pm, va);
 				if (pmap_pte_v(ptep))
 					printf("%x:%x ", va, *ptep);
 			};
 
 }
 
 void
 pmap_pvdump(vm_paddr_t pa)
 {
 	pv_entry_t pv;
 	pmap_t pmap;
 	vm_page_t m;
 
 	printf("pa %x", pa);
 	m = PHYS_TO_VM_PAGE(pa);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va);
 		pads(pmap);
 	}
 	printf(" ");
 }
 #endif
Index: projects/binutils-2.17/sys/i386/include/specialreg.h
===================================================================
--- projects/binutils-2.17/sys/i386/include/specialreg.h	(revision 215829)
+++ projects/binutils-2.17/sys/i386/include/specialreg.h	(revision 215830)
@@ -1,600 +1,609 @@
 /*-
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)specialreg.h	7.1 (Berkeley) 5/9/91
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_SPECIALREG_H_
 #define	_MACHINE_SPECIALREG_H_
 
 /*
  * Bits in 386 special registers:
  */
 #define	CR0_PE	0x00000001	/* Protected mode Enable */
 #define	CR0_MP	0x00000002	/* "Math" (fpu) Present */
 #define	CR0_EM	0x00000004	/* EMulate FPU instructions. (trap ESC only) */
 #define	CR0_TS	0x00000008	/* Task Switched (if MP, trap ESC and WAIT) */
 #define	CR0_PG	0x80000000	/* PaGing enable */
 
 /*
  * Bits in 486 special registers:
  */
 #define	CR0_NE	0x00000020	/* Numeric Error enable (EX16 vs IRQ13) */
 #define	CR0_WP	0x00010000	/* Write Protect (honor page protect in
 							   all modes) */
 #define	CR0_AM	0x00040000	/* Alignment Mask (set to enable AC flag) */
 #define	CR0_NW  0x20000000	/* Not Write-through */
 #define	CR0_CD  0x40000000	/* Cache Disable */
 
 /*
  * Bits in PPro special registers
  */
 #define	CR4_VME	0x00000001	/* Virtual 8086 mode extensions */
 #define	CR4_PVI	0x00000002	/* Protected-mode virtual interrupts */
 #define	CR4_TSD	0x00000004	/* Time stamp disable */
 #define	CR4_DE	0x00000008	/* Debugging extensions */
 #define	CR4_PSE	0x00000010	/* Page size extensions */
 #define	CR4_PAE	0x00000020	/* Physical address extension */
 #define	CR4_MCE	0x00000040	/* Machine check enable */
 #define	CR4_PGE	0x00000080	/* Page global enable */
 #define	CR4_PCE	0x00000100	/* Performance monitoring counter enable */
 #define	CR4_FXSR 0x00000200	/* Fast FPU save/restore used by OS */
 #define	CR4_XMM	0x00000400	/* enable SIMD/MMX2 to use except 16 */
 
 /*
  * Bits in AMD64 special registers.  EFER is 64 bits wide.
  */
 #define	EFER_NXE 0x000000800	/* PTE No-Execute bit enable (R/W) */
 
 /*
  * CPUID instruction features register
  */
 #define	CPUID_FPU	0x00000001
 #define	CPUID_VME	0x00000002
 #define	CPUID_DE	0x00000004
 #define	CPUID_PSE	0x00000008
 #define	CPUID_TSC	0x00000010
 #define	CPUID_MSR	0x00000020
 #define	CPUID_PAE	0x00000040
 #define	CPUID_MCE	0x00000080
 #define	CPUID_CX8	0x00000100
 #define	CPUID_APIC	0x00000200
 #define	CPUID_B10	0x00000400
 #define	CPUID_SEP	0x00000800
 #define	CPUID_MTRR	0x00001000
 #define	CPUID_PGE	0x00002000
 #define	CPUID_MCA	0x00004000
 #define	CPUID_CMOV	0x00008000
 #define	CPUID_PAT	0x00010000
 #define	CPUID_PSE36	0x00020000
 #define	CPUID_PSN	0x00040000
 #define	CPUID_CLFSH	0x00080000
 #define	CPUID_B20	0x00100000
 #define	CPUID_DS	0x00200000
 #define	CPUID_ACPI	0x00400000
 #define	CPUID_MMX	0x00800000
 #define	CPUID_FXSR	0x01000000
 #define	CPUID_SSE	0x02000000
 #define	CPUID_XMM	0x02000000
 #define	CPUID_SSE2	0x04000000
 #define	CPUID_SS	0x08000000
 #define	CPUID_HTT	0x10000000
 #define	CPUID_TM	0x20000000
 #define	CPUID_IA64	0x40000000
 #define	CPUID_PBE	0x80000000
 
 #define	CPUID2_SSE3	0x00000001
 #define	CPUID2_PCLMULQDQ 0x00000002
 #define	CPUID2_DTES64	0x00000004
 #define	CPUID2_MON	0x00000008
 #define	CPUID2_DS_CPL	0x00000010
 #define	CPUID2_VMX	0x00000020
 #define	CPUID2_SMX	0x00000040
 #define	CPUID2_EST	0x00000080
 #define	CPUID2_TM2	0x00000100
 #define	CPUID2_SSSE3	0x00000200
 #define	CPUID2_CNXTID	0x00000400
 #define	CPUID2_CX16	0x00002000
 #define	CPUID2_XTPR	0x00004000
 #define	CPUID2_PDCM	0x00008000
 #define	CPUID2_PCID	0x00020000
 #define	CPUID2_DCA	0x00040000
 #define	CPUID2_SSE41	0x00080000
 #define	CPUID2_SSE42	0x00100000
 #define	CPUID2_X2APIC	0x00200000
 #define	CPUID2_MOVBE	0x00400000
 #define	CPUID2_POPCNT	0x00800000
 #define	CPUID2_AESNI	0x02000000
 
 /*
+ * Important bits in the Thermal and Power Management flags
+ * CPUID.6 EAX and ECX.
+ */
+#define	CPUTPM1_SENSOR	0x00000001
+#define	CPUTPM1_TURBO	0x00000002
+#define	CPUTPM1_ARAT	0x00000004
+#define	CPUTPM2_EFFREQ	0x00000001
+
+/*
  * Important bits in the AMD extended cpuid flags
  */
 #define	AMDID_SYSCALL	0x00000800
 #define	AMDID_MP	0x00080000
 #define	AMDID_NX	0x00100000
 #define	AMDID_EXT_MMX	0x00400000
 #define	AMDID_FFXSR	0x01000000
 #define	AMDID_PAGE1GB	0x04000000
 #define	AMDID_RDTSCP	0x08000000
 #define	AMDID_LM	0x20000000
 #define	AMDID_EXT_3DNOW	0x40000000
 #define	AMDID_3DNOW	0x80000000
 
 #define	AMDID2_LAHF	0x00000001
 #define	AMDID2_CMP	0x00000002
 #define	AMDID2_SVM	0x00000004
 #define	AMDID2_EXT_APIC	0x00000008
 #define	AMDID2_CR8	0x00000010
 #define	AMDID2_ABM	0x00000020
 #define	AMDID2_SSE4A	0x00000040
 #define	AMDID2_MAS	0x00000080
 #define	AMDID2_PREFETCH	0x00000100
 #define	AMDID2_OSVW	0x00000200
 #define	AMDID2_IBS	0x00000400
 #define	AMDID2_SSE5	0x00000800
 #define	AMDID2_SKINIT	0x00001000
 #define	AMDID2_WDT	0x00002000
 
 /*
  * CPUID instruction 1 eax info
  */
 #define	CPUID_STEPPING		0x0000000f
 #define	CPUID_MODEL		0x000000f0
 #define	CPUID_FAMILY		0x00000f00
 #define	CPUID_EXT_MODEL		0x000f0000
 #define	CPUID_EXT_FAMILY	0x0ff00000
 #define	CPUID_TO_MODEL(id) \
     ((((id) & CPUID_MODEL) >> 4) | \
     ((((id) & CPUID_FAMILY) >= 0x600) ? \
     (((id) & CPUID_EXT_MODEL) >> 12) : 0))
 #define	CPUID_TO_FAMILY(id) \
     ((((id) & CPUID_FAMILY) >> 8) + \
     ((((id) & CPUID_FAMILY) == 0xf00) ? \
     (((id) & CPUID_EXT_FAMILY) >> 20) : 0))
 
 /*
  * CPUID instruction 1 ebx info
  */
 #define	CPUID_BRAND_INDEX	0x000000ff
 #define	CPUID_CLFUSH_SIZE	0x0000ff00
 #define	CPUID_HTT_CORES		0x00ff0000
 #define	CPUID_LOCAL_APIC_ID	0xff000000
 
 /* 
  * CPUID instruction 0xb ebx info.
  */
 #define	CPUID_TYPE_INVAL	0
 #define	CPUID_TYPE_SMT		1
 #define	CPUID_TYPE_CORE		2
 
 /*
  * AMD extended function 8000_0007h edx info
  */
 #define	AMDPM_TS		0x00000001
 #define	AMDPM_FID		0x00000002
 #define	AMDPM_VID		0x00000004
 #define	AMDPM_TTP		0x00000008
 #define	AMDPM_TM		0x00000010
 #define	AMDPM_STC		0x00000020
 #define	AMDPM_100MHZ_STEPS	0x00000040
 #define	AMDPM_HW_PSTATE		0x00000080
 #define	AMDPM_TSC_INVARIANT	0x00000100
 #define	AMDPM_CPB		0x00000200
 
 /*
  * AMD extended function 8000_0008h ecx info
  */
 #define	AMDID_CMP_CORES		0x000000ff
 
 /*
  * CPUID manufacturers identifiers
  */
 #define	AMD_VENDOR_ID		"AuthenticAMD"
 #define	CENTAUR_VENDOR_ID	"CentaurHauls"
 #define	CYRIX_VENDOR_ID		"CyrixInstead"
 #define	INTEL_VENDOR_ID		"GenuineIntel"
 #define	NEXGEN_VENDOR_ID	"NexGenDriven"
 #define	NSC_VENDOR_ID		"Geode by NSC"
 #define	RISE_VENDOR_ID		"RiseRiseRise"
 #define	SIS_VENDOR_ID		"SiS SiS SiS "
 #define	TRANSMETA_VENDOR_ID	"GenuineTMx86"
 #define	UMC_VENDOR_ID		"UMC UMC UMC "
 
 /*
  * Model-specific registers for the i386 family
  */
 #define	MSR_P5_MC_ADDR		0x000
 #define	MSR_P5_MC_TYPE		0x001
 #define	MSR_TSC			0x010
 #define	MSR_P5_CESR		0x011
 #define	MSR_P5_CTR0		0x012
 #define	MSR_P5_CTR1		0x013
 #define	MSR_IA32_PLATFORM_ID	0x017
 #define	MSR_APICBASE		0x01b
 #define	MSR_EBL_CR_POWERON	0x02a
 #define	MSR_TEST_CTL		0x033
 #define	MSR_BIOS_UPDT_TRIG	0x079
 #define	MSR_BBL_CR_D0		0x088
 #define	MSR_BBL_CR_D1		0x089
 #define	MSR_BBL_CR_D2		0x08a
 #define	MSR_BIOS_SIGN		0x08b
 #define	MSR_PERFCTR0		0x0c1
 #define	MSR_PERFCTR1		0x0c2
 #define	MSR_MPERF		0x0e7
 #define	MSR_APERF		0x0e8
 #define	MSR_IA32_EXT_CONFIG	0x0ee	/* Undocumented. Core Solo/Duo only */
 #define	MSR_MTRRcap		0x0fe
 #define	MSR_BBL_CR_ADDR		0x116
 #define	MSR_BBL_CR_DECC		0x118
 #define	MSR_BBL_CR_CTL		0x119
 #define	MSR_BBL_CR_TRIG		0x11a
 #define	MSR_BBL_CR_BUSY		0x11b
 #define	MSR_BBL_CR_CTL3		0x11e
 #define	MSR_SYSENTER_CS_MSR	0x174
 #define	MSR_SYSENTER_ESP_MSR	0x175
 #define	MSR_SYSENTER_EIP_MSR	0x176
 #define	MSR_MCG_CAP		0x179
 #define	MSR_MCG_STATUS		0x17a
 #define	MSR_MCG_CTL		0x17b
 #define	MSR_EVNTSEL0		0x186
 #define	MSR_EVNTSEL1		0x187
 #define	MSR_THERM_CONTROL	0x19a
 #define	MSR_THERM_INTERRUPT	0x19b
 #define	MSR_THERM_STATUS	0x19c
 #define	MSR_IA32_MISC_ENABLE	0x1a0
 #define	MSR_IA32_TEMPERATURE_TARGET	0x1a2
 #define	MSR_DEBUGCTLMSR		0x1d9
 #define	MSR_LASTBRANCHFROMIP	0x1db
 #define	MSR_LASTBRANCHTOIP	0x1dc
 #define	MSR_LASTINTFROMIP	0x1dd
 #define	MSR_LASTINTTOIP		0x1de
 #define	MSR_ROB_CR_BKUPTMPDR6	0x1e0
 #define	MSR_MTRRVarBase		0x200
 #define	MSR_MTRR64kBase		0x250
 #define	MSR_MTRR16kBase		0x258
 #define	MSR_MTRR4kBase		0x268
 #define	MSR_PAT			0x277
 #define	MSR_MC0_CTL2		0x280
 #define	MSR_MTRRdefType		0x2ff
 #define	MSR_MC0_CTL		0x400
 #define	MSR_MC0_STATUS		0x401
 #define	MSR_MC0_ADDR		0x402
 #define	MSR_MC0_MISC		0x403
 #define	MSR_MC1_CTL		0x404
 #define	MSR_MC1_STATUS		0x405
 #define	MSR_MC1_ADDR		0x406
 #define	MSR_MC1_MISC		0x407
 #define	MSR_MC2_CTL		0x408
 #define	MSR_MC2_STATUS		0x409
 #define	MSR_MC2_ADDR		0x40a
 #define	MSR_MC2_MISC		0x40b
 #define	MSR_MC3_CTL		0x40c
 #define	MSR_MC3_STATUS		0x40d
 #define	MSR_MC3_ADDR		0x40e
 #define	MSR_MC3_MISC		0x40f
 #define	MSR_MC4_CTL		0x410
 #define	MSR_MC4_STATUS		0x411
 #define	MSR_MC4_ADDR		0x412
 #define	MSR_MC4_MISC		0x413
 
 /*
  * Constants related to MSR's.
  */
 #define	APICBASE_RESERVED	0x000006ff
 #define	APICBASE_BSP		0x00000100
 #define	APICBASE_ENABLED	0x00000800
 #define	APICBASE_ADDRESS	0xfffff000
 
 /*
  * PAT modes.
  */
 #define	PAT_UNCACHEABLE		0x00
 #define	PAT_WRITE_COMBINING	0x01
 #define	PAT_WRITE_THROUGH	0x04
 #define	PAT_WRITE_PROTECTED	0x05
 #define	PAT_WRITE_BACK		0x06
 #define	PAT_UNCACHED		0x07
 #define	PAT_VALUE(i, m)		((long long)(m) << (8 * (i)))
 #define	PAT_MASK(i)		PAT_VALUE(i, 0xff)
 
 /*
  * Constants related to MTRRs
  */
 #define	MTRR_UNCACHEABLE	0x00
 #define	MTRR_WRITE_COMBINING	0x01
 #define	MTRR_WRITE_THROUGH	0x04
 #define	MTRR_WRITE_PROTECTED	0x05
 #define	MTRR_WRITE_BACK		0x06
 #define	MTRR_N64K		8	/* numbers of fixed-size entries */
 #define	MTRR_N16K		16
 #define	MTRR_N4K		64
 #define	MTRR_CAP_WC		0x0000000000000400
 #define	MTRR_CAP_FIXED		0x0000000000000100
 #define	MTRR_CAP_VCNT		0x00000000000000ff
 #define	MTRR_DEF_ENABLE		0x0000000000000800
 #define	MTRR_DEF_FIXED_ENABLE	0x0000000000000400
 #define	MTRR_DEF_TYPE		0x00000000000000ff
 #define	MTRR_PHYSBASE_PHYSBASE	0x000ffffffffff000
 #define	MTRR_PHYSBASE_TYPE	0x00000000000000ff
 #define	MTRR_PHYSMASK_PHYSMASK	0x000ffffffffff000
 #define	MTRR_PHYSMASK_VALID	0x0000000000000800
 
 /*
  * Cyrix configuration registers, accessible as IO ports.
  */
 #define	CCR0			0xc0	/* Configuration control register 0 */
 #define	CCR0_NC0		0x01	/* First 64K of each 1M memory region is
 								   non-cacheable */
 #define	CCR0_NC1		0x02	/* 640K-1M region is non-cacheable */
 #define	CCR0_A20M		0x04	/* Enables A20M# input pin */
 #define	CCR0_KEN		0x08	/* Enables KEN# input pin */
 #define	CCR0_FLUSH		0x10	/* Enables FLUSH# input pin */
 #define	CCR0_BARB		0x20	/* Flushes internal cache when entering hold
 								   state */
 #define	CCR0_CO			0x40	/* Cache org: 1=direct mapped, 0=2x set
 								   assoc */
 #define	CCR0_SUSPEND	0x80	/* Enables SUSP# and SUSPA# pins */
 
 #define	CCR1			0xc1	/* Configuration control register 1 */
 #define	CCR1_RPL		0x01	/* Enables RPLSET and RPLVAL# pins */
 #define	CCR1_SMI		0x02	/* Enables SMM pins */
 #define	CCR1_SMAC		0x04	/* System management memory access */
 #define	CCR1_MMAC		0x08	/* Main memory access */
 #define	CCR1_NO_LOCK	0x10	/* Negate LOCK# */
 #define	CCR1_SM3		0x80	/* SMM address space address region 3 */
 
 #define	CCR2			0xc2
 #define	CCR2_WB			0x02	/* Enables WB cache interface pins */
 #define	CCR2_SADS		0x02	/* Slow ADS */
 #define	CCR2_LOCK_NW	0x04	/* LOCK NW Bit */
 #define	CCR2_SUSP_HLT	0x08	/* Suspend on HALT */
 #define	CCR2_WT1		0x10	/* WT region 1 */
 #define	CCR2_WPR1		0x10	/* Write-protect region 1 */
 #define	CCR2_BARB		0x20	/* Flushes write-back cache when entering
 								   hold state. */
 #define	CCR2_BWRT		0x40	/* Enables burst write cycles */
 #define	CCR2_USE_SUSP	0x80	/* Enables suspend pins */
 
 #define	CCR3			0xc3
 #define	CCR3_SMILOCK	0x01	/* SMM register lock */
 #define	CCR3_NMI		0x02	/* Enables NMI during SMM */
 #define	CCR3_LINBRST	0x04	/* Linear address burst cycles */
 #define	CCR3_SMMMODE	0x08	/* SMM Mode */
 #define	CCR3_MAPEN0		0x10	/* Enables Map0 */
 #define	CCR3_MAPEN1		0x20	/* Enables Map1 */
 #define	CCR3_MAPEN2		0x40	/* Enables Map2 */
 #define	CCR3_MAPEN3		0x80	/* Enables Map3 */
 
 #define	CCR4			0xe8
 #define	CCR4_IOMASK		0x07
 #define	CCR4_MEM		0x08	/* Enables momory bypassing */
 #define	CCR4_DTE		0x10	/* Enables directory table entry cache */
 #define	CCR4_FASTFPE	0x20	/* Fast FPU exception */
 #define	CCR4_CPUID		0x80	/* Enables CPUID instruction */
 
 #define	CCR5			0xe9
 #define	CCR5_WT_ALLOC	0x01	/* Write-through allocate */
 #define	CCR5_SLOP		0x02	/* LOOP instruction slowed down */
 #define	CCR5_LBR1		0x10	/* Local bus region 1 */
 #define	CCR5_ARREN		0x20	/* Enables ARR region */
 
 #define	CCR6			0xea
 
 #define	CCR7			0xeb
 
 /* Performance Control Register (5x86 only). */
 #define	PCR0			0x20
 #define	PCR0_RSTK		0x01	/* Enables return stack */
 #define	PCR0_BTB		0x02	/* Enables branch target buffer */
 #define	PCR0_LOOP		0x04	/* Enables loop */
 #define	PCR0_AIS		0x08	/* Enables all instrcutions stalled to
 								   serialize pipe. */
 #define	PCR0_MLR		0x10	/* Enables reordering of misaligned loads */
 #define	PCR0_BTBRT		0x40	/* Enables BTB test register. */
 #define	PCR0_LSSER		0x80	/* Disable reorder */
 
 /* Device Identification Registers */
 #define	DIR0			0xfe
 #define	DIR1			0xff
 
 /*
  * Machine Check register constants.
  */
 #define	MCG_CAP_COUNT		0x000000ff
 #define	MCG_CAP_CTL_P		0x00000100
 #define	MCG_CAP_EXT_P		0x00000200
 #define	MCG_CAP_CMCI_P		0x00000400
 #define	MCG_CAP_TES_P		0x00000800
 #define	MCG_CAP_EXT_CNT		0x00ff0000
 #define	MCG_CAP_SER_P		0x01000000
 #define	MCG_STATUS_RIPV		0x00000001
 #define	MCG_STATUS_EIPV		0x00000002
 #define	MCG_STATUS_MCIP		0x00000004
 #define	MCG_CTL_ENABLE		0xffffffffffffffff
 #define	MCG_CTL_DISABLE		0x0000000000000000
 #define	MSR_MC_CTL(x)		(MSR_MC0_CTL + (x) * 4)
 #define	MSR_MC_STATUS(x)	(MSR_MC0_STATUS + (x) * 4)
 #define	MSR_MC_ADDR(x)		(MSR_MC0_ADDR + (x) * 4)
 #define	MSR_MC_MISC(x)		(MSR_MC0_MISC + (x) * 4)
 #define	MSR_MC_CTL2(x)		(MSR_MC0_CTL2 + (x))	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_MCA_ERROR	0x000000000000ffff
 #define	MC_STATUS_MODEL_ERROR	0x00000000ffff0000
 #define	MC_STATUS_OTHER_INFO	0x01ffffff00000000
 #define	MC_STATUS_COR_COUNT	0x001fffc000000000	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_TES_STATUS	0x0060000000000000	/* If MCG_CAP_TES_P */
 #define	MC_STATUS_AR		0x0080000000000000	/* If MCG_CAP_TES_P */
 #define	MC_STATUS_S		0x0100000000000000	/* If MCG_CAP_TES_P */
 #define	MC_STATUS_PCC		0x0200000000000000
 #define	MC_STATUS_ADDRV		0x0400000000000000
 #define	MC_STATUS_MISCV		0x0800000000000000
 #define	MC_STATUS_EN		0x1000000000000000
 #define	MC_STATUS_UC		0x2000000000000000
 #define	MC_STATUS_OVER		0x4000000000000000
 #define	MC_STATUS_VAL		0x8000000000000000
 #define	MC_MISC_RA_LSB		0x000000000000003f	/* If MCG_CAP_SER_P */
 #define	MC_MISC_ADDRESS_MODE	0x00000000000001c0	/* If MCG_CAP_SER_P */
 #define	MC_CTL2_THRESHOLD	0x0000000000007fff
 #define	MC_CTL2_CMCI_EN		0x0000000040000000
 
 /*
  * The following four 3-byte registers control the non-cacheable regions.
  * These registers must be written as three separate bytes.
  *
  * NCRx+0: A31-A24 of starting address
  * NCRx+1: A23-A16 of starting address
  * NCRx+2: A15-A12 of starting address | NCR_SIZE_xx.
  *
  * The non-cacheable region's starting address must be aligned to the
  * size indicated by the NCR_SIZE_xx field.
  */
 #define	NCR1	0xc4
 #define	NCR2	0xc7
 #define	NCR3	0xca
 #define	NCR4	0xcd
 
 #define	NCR_SIZE_0K	0
 #define	NCR_SIZE_4K	1
 #define	NCR_SIZE_8K	2
 #define	NCR_SIZE_16K	3
 #define	NCR_SIZE_32K	4
 #define	NCR_SIZE_64K	5
 #define	NCR_SIZE_128K	6
 #define	NCR_SIZE_256K	7
 #define	NCR_SIZE_512K	8
 #define	NCR_SIZE_1M	9
 #define	NCR_SIZE_2M	10
 #define	NCR_SIZE_4M	11
 #define	NCR_SIZE_8M	12
 #define	NCR_SIZE_16M	13
 #define	NCR_SIZE_32M	14
 #define	NCR_SIZE_4G	15
 
 /*
  * The address region registers are used to specify the location and
  * size for the eight address regions.
  *
  * ARRx + 0: A31-A24 of start address
  * ARRx + 1: A23-A16 of start address
  * ARRx + 2: A15-A12 of start address | ARR_SIZE_xx
  */
 #define	ARR0	0xc4
 #define	ARR1	0xc7
 #define	ARR2	0xca
 #define	ARR3	0xcd
 #define	ARR4	0xd0
 #define	ARR5	0xd3
 #define	ARR6	0xd6
 #define	ARR7	0xd9
 
 #define	ARR_SIZE_0K		0
 #define	ARR_SIZE_4K		1
 #define	ARR_SIZE_8K		2
 #define	ARR_SIZE_16K	3
 #define	ARR_SIZE_32K	4
 #define	ARR_SIZE_64K	5
 #define	ARR_SIZE_128K	6
 #define	ARR_SIZE_256K	7
 #define	ARR_SIZE_512K	8
 #define	ARR_SIZE_1M		9
 #define	ARR_SIZE_2M		10
 #define	ARR_SIZE_4M		11
 #define	ARR_SIZE_8M		12
 #define	ARR_SIZE_16M	13
 #define	ARR_SIZE_32M	14
 #define	ARR_SIZE_4G		15
 
 /*
  * The region control registers specify the attributes associated with
  * the ARRx addres regions.
  */
 #define	RCR0	0xdc
 #define	RCR1	0xdd
 #define	RCR2	0xde
 #define	RCR3	0xdf
 #define	RCR4	0xe0
 #define	RCR5	0xe1
 #define	RCR6	0xe2
 #define	RCR7	0xe3
 
 #define	RCR_RCD	0x01	/* Disables caching for ARRx (x = 0-6). */
 #define	RCR_RCE	0x01	/* Enables caching for ARR7. */
 #define	RCR_WWO	0x02	/* Weak write ordering. */
 #define	RCR_WL	0x04	/* Weak locking. */
 #define	RCR_WG	0x08	/* Write gathering. */
 #define	RCR_WT	0x10	/* Write-through. */
 #define	RCR_NLB	0x20	/* LBA# pin is not asserted. */
 
 /* AMD Write Allocate Top-Of-Memory and Control Register */
 #define	AMD_WT_ALLOC_TME	0x40000	/* top-of-memory enable */
 #define	AMD_WT_ALLOC_PRE	0x20000	/* programmable range enable */
 #define	AMD_WT_ALLOC_FRE	0x10000	/* fixed (A0000-FFFFF) range enable */
 
 /* AMD64 MSR's */
 #define	MSR_EFER		0xc0000080	/* extended features */
 #define	MSR_HWCR		0xc0010015
 #define	MSR_K8_UCODE_UPDATE	0xc0010020	/* update microcode */
 #define	MSR_MC0_CTL_MASK	0xc0010044
 
 /* VIA ACE crypto featureset: for via_feature_rng */
 #define	VIA_HAS_RNG		1	/* cpu has RNG */
 
 /* VIA ACE crypto featureset: for via_feature_xcrypt */
 #define	VIA_HAS_AES		1	/* cpu has AES */
 #define	VIA_HAS_SHA		2	/* cpu has SHA1 & SHA256 */
 #define	VIA_HAS_MM		4	/* cpu has RSA instructions */
 #define	VIA_HAS_AESCTR		8	/* cpu has AES-CTR instructions */
 
 /* Centaur Extended Feature flags */
 #define	VIA_CPUID_HAS_RNG	0x000004
 #define	VIA_CPUID_DO_RNG	0x000008
 #define	VIA_CPUID_HAS_ACE	0x000040
 #define	VIA_CPUID_DO_ACE	0x000080
 #define	VIA_CPUID_HAS_ACE2	0x000100
 #define	VIA_CPUID_DO_ACE2	0x000200
 #define	VIA_CPUID_HAS_PHE	0x000400
 #define	VIA_CPUID_DO_PHE	0x000800
 #define	VIA_CPUID_HAS_PMM	0x001000
 #define	VIA_CPUID_DO_PMM	0x002000
 
 /* VIA ACE xcrypt-* instruction context control options */
 #define	VIA_CRYPT_CWLO_ROUND_M		0x0000000f
 #define	VIA_CRYPT_CWLO_ALG_M		0x00000070
 #define	VIA_CRYPT_CWLO_ALG_AES		0x00000000
 #define	VIA_CRYPT_CWLO_KEYGEN_M		0x00000080
 #define	VIA_CRYPT_CWLO_KEYGEN_HW	0x00000000
 #define	VIA_CRYPT_CWLO_KEYGEN_SW	0x00000080
 #define	VIA_CRYPT_CWLO_NORMAL		0x00000000
 #define	VIA_CRYPT_CWLO_INTERMEDIATE	0x00000100
 #define	VIA_CRYPT_CWLO_ENCRYPT		0x00000000
 #define	VIA_CRYPT_CWLO_DECRYPT		0x00000200
 #define	VIA_CRYPT_CWLO_KEY128		0x0000000a	/* 128bit, 10 rds */
 #define	VIA_CRYPT_CWLO_KEY192		0x0000040c	/* 192bit, 12 rds */
 #define	VIA_CRYPT_CWLO_KEY256		0x0000080e	/* 256bit, 15 rds */
 
 #endif /* !_MACHINE_SPECIALREG_H_ */
Index: projects/binutils-2.17/sys/i386/include/xen/hypercall.h
===================================================================
--- projects/binutils-2.17/sys/i386/include/xen/hypercall.h	(revision 215829)
+++ projects/binutils-2.17/sys/i386/include/xen/hypercall.h	(revision 215830)
@@ -1,405 +1,406 @@
 /******************************************************************************
  * hypercall.h
  * 
  * Linux-specific hypervisor handling.
  * 
  * Copyright (c) 2002-2004, K A Fraser
  * 
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this source file (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy, modify,
  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  * and to permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  * 
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
 
 #ifndef __HYPERCALL_H__
 #define __HYPERCALL_H__
 
 #include <sys/systm.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/sched.h>
 
 #define __STR(x) #x
 #define STR(x) __STR(x)
 #define	ENOXENSYS	38
 #define CONFIG_XEN_COMPAT	0x030002
 
 
 #if defined(XEN)
 #define HYPERCALL_STR(name)                                     \
         "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"
 #else
 #define HYPERCALL_STR(name)                                     \
         "mov hypercall_stubs,%%eax; "                           \
         "add $("STR(__HYPERVISOR_##name)" * 32),%%eax; "        \
         "call *%%eax"
 #endif
 
 #define _hypercall0(type, name)                 \
 ({                                              \
         long __res;                             \
         __asm__ volatile (                          \
                 HYPERCALL_STR(name)             \
                 : "=a" (__res)                  \
                 :                               \
                 : "memory" );                   \
         (type)__res;                            \
 })
 
 #define _hypercall1(type, name, a1)                             \
 ({                                                              \
         long __res, __ign1;                                     \
         __asm__ volatile (                                          \
                 HYPERCALL_STR(name)                             \
                 : "=a" (__res), "=b" (__ign1)                   \
                 : "1" ((long)(a1))                              \
                 : "memory" );                                   \
         (type)__res;                                            \
 })
 
 #define _hypercall2(type, name, a1, a2)                         \
 ({                                                              \
         long __res, __ign1, __ign2;                             \
         __asm__ volatile (                                          \
                 HYPERCALL_STR(name)                             \
                 : "=a" (__res), "=b" (__ign1), "=c" (__ign2)    \
                 : "1" ((long)(a1)), "2" ((long)(a2))            \
                 : "memory" );                                   \
         (type)__res;                                            \
 })
 
 #define _hypercall3(type, name, a1, a2, a3)                     \
 ({                                                              \
         long __res, __ign1, __ign2, __ign3;                     \
         __asm__ volatile (                                          \
                 HYPERCALL_STR(name)                             \
                 : "=a" (__res), "=b" (__ign1), "=c" (__ign2),   \
                 "=d" (__ign3)                                   \
                 : "1" ((long)(a1)), "2" ((long)(a2)),           \
                 "3" ((long)(a3))                                \
                 : "memory" );                                   \
         (type)__res;                                            \
 })
 
 #define _hypercall4(type, name, a1, a2, a3, a4)                 \
 ({                                                              \
         long __res, __ign1, __ign2, __ign3, __ign4;             \
         __asm__ volatile (                                          \
                 HYPERCALL_STR(name)                             \
                 : "=a" (__res), "=b" (__ign1), "=c" (__ign2),   \
                 "=d" (__ign3), "=S" (__ign4)                    \
                 : "1" ((long)(a1)), "2" ((long)(a2)),           \
                 "3" ((long)(a3)), "4" ((long)(a4))              \
                 : "memory" );                                   \
         (type)__res;                                            \
 })
 
 #define _hypercall5(type, name, a1, a2, a3, a4, a5)             \
 ({                                                              \
         long __res, __ign1, __ign2, __ign3, __ign4, __ign5;     \
         __asm__ volatile (                                          \
                 HYPERCALL_STR(name)                             \
                 : "=a" (__res), "=b" (__ign1), "=c" (__ign2),   \
                 "=d" (__ign3), "=S" (__ign4), "=D" (__ign5)     \
                 : "1" ((long)(a1)), "2" ((long)(a2)),           \
                 "3" ((long)(a3)), "4" ((long)(a4)),             \
                 "5" ((long)(a5))                                \
                 : "memory" );                                   \
         (type)__res;                                            \
 })								
 
 static inline int
 HYPERVISOR_set_trap_table(
 	trap_info_t *table)
 {
 	return _hypercall1(int, set_trap_table, table);
 }
 
 static inline int
 HYPERVISOR_mmu_update(
 	mmu_update_t *req, int count, int *success_count, domid_t domid)
 {
 	return _hypercall4(int, mmu_update, req, count, success_count, domid);
 }
 
 static inline int
 HYPERVISOR_mmuext_op(
 	mmuext_op_t *op, int count, int *success_count, domid_t domid)
 {
 	return _hypercall4(int, mmuext_op, op, count, success_count, domid);
 }
 
 static inline int
 HYPERVISOR_set_gdt(
 	unsigned long *frame_list, int entries)
 {
 	return _hypercall2(int, set_gdt, frame_list, entries);
 }
 
 static inline int
 HYPERVISOR_stack_switch(
 	unsigned long ss, unsigned long esp)
 {
 	return _hypercall2(int, stack_switch, ss, esp);
 }
 
 static inline int
 HYPERVISOR_set_callbacks(
 	unsigned long event_selector, unsigned long event_address,
 	unsigned long failsafe_selector, unsigned long failsafe_address)
 {
 	return _hypercall4(int, set_callbacks,
 			   event_selector, event_address,
 			   failsafe_selector, failsafe_address);
 }
 
 static inline int
 HYPERVISOR_fpu_taskswitch(
 	int set)
 {
 	return _hypercall1(int, fpu_taskswitch, set);
 }
 
 static inline int 
 HYPERVISOR_sched_op_compat(
 	int cmd, unsigned long arg)
 {
 	return _hypercall2(int, sched_op_compat, cmd, arg);
 }
 
 static inline int
 HYPERVISOR_sched_op(
 	int cmd, void *arg)
 {
 	return _hypercall2(int, sched_op, cmd, arg);
 }
 
 static inline long
 HYPERVISOR_set_timer_op(
 	uint64_t timeout)
 {
 	unsigned long timeout_hi = (unsigned long)(timeout>>32);
 	unsigned long timeout_lo = (unsigned long)timeout;
 	return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
 }
 #if 0
 static inline int
 HYPERVISOR_platform_op(
         struct xen_platform_op *platform_op)
 {
         platform_op->interface_version = XENPF_INTERFACE_VERSION;
         return _hypercall1(int, platform_op, platform_op);
 }
 #endif
 static inline int
 HYPERVISOR_set_debugreg(
 	int reg, unsigned long value)
 {
 	return _hypercall2(int, set_debugreg, reg, value);
 }
 
 static inline unsigned long
 HYPERVISOR_get_debugreg(
 	int reg)
 {
 	return _hypercall1(unsigned long, get_debugreg, reg);
 }
 
 static inline int
 HYPERVISOR_update_descriptor(
 	uint64_t ma, uint64_t desc)
 {
 	return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
 }
 
 static inline int
 HYPERVISOR_memory_op(
 	unsigned int cmd, void *arg)
 {
 	return _hypercall2(int, memory_op, cmd, arg);
 }
 
+int HYPERVISOR_multicall(multicall_entry_t *, int);
 static inline int
-HYPERVISOR_multicall(
+_HYPERVISOR_multicall(
 	void *call_list, int nr_calls)
 {
 	return _hypercall2(int, multicall, call_list, nr_calls);
 }
 
 static inline int
 HYPERVISOR_update_va_mapping(
 	unsigned long va, uint64_t new_val, unsigned long flags)
 {
 	uint32_t hi, lo;
 
 	lo = (uint32_t)(new_val & 0xffffffff);
 	hi = (uint32_t)(new_val >> 32);
 	
 	return _hypercall4(int, update_va_mapping, va,
 			   lo, hi, flags);
 }
 
 static inline int
 HYPERVISOR_event_channel_op(
 	int cmd, void *arg)
 {
 	int rc = _hypercall2(int, event_channel_op, cmd, arg);
 
 #if CONFIG_XEN_COMPAT <= 0x030002
 	if (__predict_false(rc == -ENOXENSYS)) {
 		struct evtchn_op op;
 		op.cmd = cmd;
 		memcpy(&op.u, arg, sizeof(op.u));
 		rc = _hypercall1(int, event_channel_op_compat, &op);
 		memcpy(arg, &op.u, sizeof(op.u));
 	}
 #endif
 	return (rc);
 }
 
 static inline int
 HYPERVISOR_xen_version(
 	int cmd, void *arg)
 {
 	return _hypercall2(int, xen_version, cmd, arg);
 }
 
 static inline int
 HYPERVISOR_console_io(
 	int cmd, int count, char *str)
 {
 	return _hypercall3(int, console_io, cmd, count, str);
 }
 
 static inline int
 HYPERVISOR_physdev_op(
 	int cmd, void *arg)
 {
 	int rc = _hypercall2(int, physdev_op, cmd, arg);
 #if CONFIG_XEN_COMPAT <= 0x030002
 	if (__predict_false(rc == -ENOXENSYS)) {
 		struct physdev_op op;
 		op.cmd = cmd;
 		memcpy(&op.u, arg, sizeof(op.u));
 		rc = _hypercall1(int, physdev_op_compat, &op);
 		memcpy(arg, &op.u, sizeof(op.u));
 	}
 #endif
 	return (rc);
 }
 
 static inline int
 HYPERVISOR_grant_table_op(
 	unsigned int cmd, void *uop, unsigned int count)
 {
 	return _hypercall3(int, grant_table_op, cmd, uop, count);
 }
 
 static inline int
 HYPERVISOR_update_va_mapping_otherdomain(
 	unsigned long va, uint64_t new_val, unsigned long flags, domid_t domid)
 {
 	uint32_t hi, lo;
 	
 	lo = (uint32_t)(new_val & 0xffffffff);
 	hi = (uint32_t)(new_val >> 32);
 	
 	return _hypercall5(int, update_va_mapping_otherdomain, va,
 			   lo, hi, flags, domid);
 }
 
 static inline int
 HYPERVISOR_vm_assist(
 	unsigned int cmd, unsigned int type)
 {
 	return _hypercall2(int, vm_assist, cmd, type);
 }
 
 static inline int
 HYPERVISOR_vcpu_op(
 	int cmd, int vcpuid, void *extra_args)
 {
 	return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
 }
 
 static inline int
 HYPERVISOR_suspend(
 	unsigned long srec)
 {
 	struct sched_shutdown sched_shutdown = {
 		.reason = SHUTDOWN_suspend
 	};
 	int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown,
 			   &sched_shutdown, srec);
 #if CONFIG_XEN_COMPAT <= 0x030002
 	if (rc == -ENOXENSYS)
 		rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown,
 				 SHUTDOWN_suspend, srec);
 #endif	
 	return (rc);
 }
 
 #if CONFIG_XEN_COMPAT <= 0x030002
 static inline int
 HYPERVISOR_nmi_op(
         unsigned long op, void *arg)
 {
         return _hypercall2(int, nmi_op, op, arg);
 }
 #endif
 
 static inline int
 HYPERVISOR_callback_op(
         int cmd, void *arg)
 {
         return _hypercall2(int, callback_op, cmd, arg);
 }
 
 #ifndef CONFIG_XEN
 static inline unsigned long
 HYPERVISOR_hvm_op(
     int op, void *arg)
 {
     return _hypercall2(unsigned long, hvm_op, op, arg);
 }
 #endif
 
 static inline int
 HYPERVISOR_xenoprof_op(
         int op, void *arg)
 {
         return _hypercall2(int, xenoprof_op, op, arg);
 }
 
 static inline int
 HYPERVISOR_kexec_op(
         unsigned long op, void *args)
 {
         return _hypercall2(int, kexec_op, op, args);
 }
 #endif /* __HYPERCALL_H__ */
 
 /*
  * Local variables:
  *  c-file-style: "linux"
  *  indent-tabs-mode: t
  *  c-indent-level: 8
  *  c-basic-offset: 8
  *  tab-width: 8
  * End:
  */
Index: projects/binutils-2.17/sys/i386/isa/npx.c
===================================================================
--- projects/binutils-2.17/sys/i386/isa/npx.c	(revision 215829)
+++ projects/binutils-2.17/sys/i386/isa/npx.c	(revision 215830)
@@ -1,1104 +1,1104 @@
 /*-
  * Copyright (c) 1990 William Jolitz.
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)npx.c	7.2 (Berkeley) 5/12/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 #include "opt_isa.h"
 #include "opt_npx.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #ifdef NPX_DEBUG
 #include <sys/syslog.h>
 #endif
 #include <sys/signalvar.h>
 
 #include <machine/asmacros.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
 #include <machine/resource.h>
 #include <machine/specialreg.h>
 #include <machine/segments.h>
 #include <machine/ucontext.h>
 
 #include <machine/intr_machdep.h>
 #ifdef XEN
 #include <machine/xen/xen-os.h>
 #include <xen/hypervisor.h>
 #endif
 
 #ifdef DEV_ISA
 #include <isa/isavar.h>
 #endif
 
 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
 #define CPU_ENABLE_SSE
 #endif
 
 /*
  * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
  */
 
 #if defined(__GNUCLIKE_ASM) && !defined(lint)
 
 #define	fldcw(cw)		__asm __volatile("fldcw %0" : : "m" (cw))
 #define	fnclex()		__asm __volatile("fnclex")
 #define	fninit()		__asm __volatile("fninit")
 #define	fnsave(addr)		__asm __volatile("fnsave %0" : "=m" (*(addr)))
 #define	fnstcw(addr)		__asm __volatile("fnstcw %0" : "=m" (*(addr)))
 #define	fnstsw(addr)		__asm __volatile("fnstsw %0" : "=am" (*(addr)))
 #define	fp_divide_by_0()	__asm __volatile( \
 				    "fldz; fld1; fdiv %st,%st(1); fnop")
 #define	frstor(addr)		__asm __volatile("frstor %0" : : "m" (*(addr)))
 #ifdef CPU_ENABLE_SSE
 #define	fxrstor(addr)		__asm __volatile("fxrstor %0" : : "m" (*(addr)))
 #define	fxsave(addr)		__asm __volatile("fxsave %0" : "=m" (*(addr)))
 #endif
 #ifdef XEN
 #define	start_emulating()	(HYPERVISOR_fpu_taskswitch(1))
 #define	stop_emulating()	(HYPERVISOR_fpu_taskswitch(0))
 #else
 #define	start_emulating()	__asm __volatile( \
 				    "smsw %%ax; orb %0,%%al; lmsw %%ax" \
 				    : : "n" (CR0_TS) : "ax")
 #define	stop_emulating()	__asm __volatile("clts")
 #endif
 #else	/* !(__GNUCLIKE_ASM && !lint) */
 
 void	fldcw(u_short cw);
 void	fnclex(void);
 void	fninit(void);
 void	fnsave(caddr_t addr);
 void	fnstcw(caddr_t addr);
 void	fnstsw(caddr_t addr);
 void	fp_divide_by_0(void);
 void	frstor(caddr_t addr);
 #ifdef CPU_ENABLE_SSE
 void	fxsave(caddr_t addr);
 void	fxrstor(caddr_t addr);
 #endif
 void	start_emulating(void);
 void	stop_emulating(void);
 
 #endif	/* __GNUCLIKE_ASM && !lint */
 
 #ifdef CPU_ENABLE_SSE
 #define GET_FPU_CW(thread) \
 	(cpu_fxsr ? \
 		(thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \
 		(thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw)
 #define GET_FPU_SW(thread) \
 	(cpu_fxsr ? \
 		(thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \
 		(thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw)
 #define SET_FPU_CW(savefpu, value) do { \
 	if (cpu_fxsr) \
 		(savefpu)->sv_xmm.sv_env.en_cw = (value); \
 	else \
 		(savefpu)->sv_87.sv_env.en_cw = (value); \
 } while (0)
 #else /* CPU_ENABLE_SSE */
 #define GET_FPU_CW(thread) \
 	(thread->td_pcb->pcb_save->sv_87.sv_env.en_cw)
 #define GET_FPU_SW(thread) \
 	(thread->td_pcb->pcb_save->sv_87.sv_env.en_sw)
 #define SET_FPU_CW(savefpu, value) \
 	(savefpu)->sv_87.sv_env.en_cw = (value)
 #endif /* CPU_ENABLE_SSE */
 
 typedef u_char bool_t;
 
 #ifdef CPU_ENABLE_SSE
 static	void	fpu_clean_state(void);
 #endif
 
 static	void	fpusave(union savefpu *);
 static	void	fpurstor(union savefpu *);
 static	int	npx_attach(device_t dev);
 static	void	npx_identify(driver_t *driver, device_t parent);
 static	int	npx_probe(device_t dev);
 
 int	hw_float;
 
 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
     &hw_float, 0, "Floating point instructions executed in hardware");
 
 static	volatile u_int		npx_traps_while_probing;
 static	union savefpu		npx_initialstate;
 
 alias_for_inthand_t probetrap;
 __asm("								\n\
 	.text							\n\
 	.p2align 2,0x90						\n\
 	.type	" __XSTRING(CNAME(probetrap)) ",@function	\n\
 " __XSTRING(CNAME(probetrap)) ":				\n\
 	ss							\n\
 	incl	" __XSTRING(CNAME(npx_traps_while_probing)) "	\n\
 	fnclex							\n\
 	iret							\n\
 ");
 
 /*
  * Identify routine.  Create a connection point on our parent for probing.
  */
 static void
 npx_identify(driver, parent)
 	driver_t *driver;
 	device_t parent;
 {
 	device_t child;
 
 	child = BUS_ADD_CHILD(parent, 0, "npx", 0);
 	if (child == NULL)
 		panic("npx_identify");
 }
 
 /*
  * Probe routine.  Set flags to tell npxattach() what to do.  Set up an
  * interrupt handler if npx needs to use interrupts.
  */
 static int
 npx_probe(device_t dev)
 {
 	struct gate_descriptor save_idt_npxtrap;
 	u_short control, status;
 
 	device_set_desc(dev, "math processor");
 
 	/*
 	 * Modern CPUs all have an FPU that uses the INT16 interface
 	 * and provide a simple way to verify that, so handle the
 	 * common case right away.
 	 */
 	if (cpu_feature & CPUID_FPU) {
 		hw_float = 1;
 		device_quiet(dev);
 		return (0);
 	}
 
 	save_idt_npxtrap = idt[IDT_MF];
 	setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	/*
 	 * Don't trap while we're probing.
 	 */
 	stop_emulating();
 
 	/*
 	 * Finish resetting the coprocessor, if any.  If there is an error
 	 * pending, then we may get a bogus IRQ13, but npx_intr() will handle
 	 * it OK.  Bogus halts have never been observed, but we enabled
 	 * IRQ13 and cleared the BUSY# latch early to handle them anyway.
 	 */
 	fninit();
 
 	/*
 	 * Don't use fwait here because it might hang.
 	 * Don't use fnop here because it usually hangs if there is no FPU.
 	 */
 	DELAY(1000);		/* wait for any IRQ13 */
 #ifdef DIAGNOSTIC
 	if (npx_traps_while_probing != 0)
 		printf("fninit caused %u bogus npx trap(s)\n",
 		       npx_traps_while_probing);
 #endif
 	/*
 	 * Check for a status of mostly zero.
 	 */
 	status = 0x5a5a;
 	fnstsw(&status);
 	if ((status & 0xb8ff) == 0) {
 		/*
 		 * Good, now check for a proper control word.
 		 */
 		control = 0x5a5a;
 		fnstcw(&control);
 		if ((control & 0x1f3f) == 0x033f) {
 			/*
 			 * We have an npx, now divide by 0 to see if exception
 			 * 16 works.
 			 */
 			control &= ~(1 << 2);	/* enable divide by 0 trap */
 			fldcw(control);
 #ifdef FPU_ERROR_BROKEN
 			/*
 			 * FPU error signal doesn't work on some CPU
 			 * accelerator board.
 			 */
 			hw_float = 1;
 			return (0);
 #endif
 			npx_traps_while_probing = 0;
 			fp_divide_by_0();
 			if (npx_traps_while_probing != 0) {
 				/*
 				 * Good, exception 16 works.
 				 */
 				hw_float = 1;
 				goto cleanup;
 			}
 			device_printf(dev,
 	"FPU does not use exception 16 for error reporting\n");
 			goto cleanup;
 		}
 	}
 
 	/*
 	 * Probe failed.  Floating point simply won't work.
 	 * Notify user and disable FPU/MMX/SSE instruction execution.
 	 */
 	device_printf(dev, "WARNING: no FPU!\n");
 	__asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : :
 	    "n" (CR0_EM | CR0_MP) : "ax");
 
 cleanup:
 	idt[IDT_MF] = save_idt_npxtrap;
 	return (hw_float ? 0 : ENXIO);
 }
 
 /*
  * Attach routine - announce which it is, and wire into system
  */
 static int
 npx_attach(device_t dev)
 {
 
 	npxinit();
 	critical_enter();
 	stop_emulating();
 	fpusave(&npx_initialstate);
 	start_emulating();
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		if (npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask)
 			cpu_mxcsr_mask = 
 			    npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask;
 		else
 			cpu_mxcsr_mask = 0xFFBF;
 		bzero(npx_initialstate.sv_xmm.sv_fp,
 		    sizeof(npx_initialstate.sv_xmm.sv_fp));
 		bzero(npx_initialstate.sv_xmm.sv_xmm,
 		    sizeof(npx_initialstate.sv_xmm.sv_xmm));
 		/* XXX might need even more zeroing. */
 	} else
 #endif
 		bzero(npx_initialstate.sv_87.sv_ac,
 		    sizeof(npx_initialstate.sv_87.sv_ac));
 	critical_exit();
 
 	return (0);
 }
 
 /*
  * Initialize floating point unit.
  */
 void
 npxinit(void)
 {
 	static union savefpu dummy;
 	register_t saveintr;
 	u_short control;
 
 	if (!hw_float)
 		return;
 	/*
 	 * fninit has the same h/w bugs as fnsave.  Use the detoxified
 	 * fnsave to throw away any junk in the fpu.  npxsave() initializes
 	 * the fpu and sets fpcurthread = NULL as important side effects.
 	 *
 	 * It is too early for critical_enter() to work on AP.
 	 */
 	saveintr = intr_disable();
 	npxsave(&dummy);
 	stop_emulating();
 #ifdef CPU_ENABLE_SSE
 	/* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */
 	if (cpu_fxsr)
 		fninit();
 #endif
 	control = __INITIAL_NPXCW__;
 	fldcw(control);
 	start_emulating();
 	intr_restore(saveintr);
 }
 
 /*
  * Free coprocessor (if we have it).
  */
 void
 npxexit(td)
 	struct thread *td;
 {
 
 	critical_enter();
 	if (curthread == PCPU_GET(fpcurthread))
 		npxsave(PCPU_GET(curpcb)->pcb_save);
 	critical_exit();
 #ifdef NPX_DEBUG
 	if (hw_float) {
 		u_int	masked_exceptions;
 
 		masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f;
 		/*
 		 * Log exceptions that would have trapped with the old
 		 * control word (overflow, divide by 0, and invalid operand).
 		 */
 		if (masked_exceptions & 0x0d)
 			log(LOG_ERR,
 	"pid %d (%s) exited with masked floating point exceptions 0x%02x\n",
 			    td->td_proc->p_pid, td->td_proc->p_comm,
 			    masked_exceptions);
 	}
 #endif
 }
 
 int
 npxformat()
 {
 
 	if (!hw_float)
 		return (_MC_FPFMT_NODEV);
 #ifdef	CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		return (_MC_FPFMT_XMM);
 #endif
 	return (_MC_FPFMT_387);
 }
 
 /* 
  * The following mechanism is used to ensure that the FPE_... value
  * that is passed as a trapcode to the signal handler of the user
  * process does not have more than one bit set.
  * 
  * Multiple bits may be set if the user process modifies the control
  * word while a status word bit is already set.  While this is a sign
  * of bad coding, we have no choise than to narrow them down to one
  * bit, since we must not send a trapcode that is not exactly one of
  * the FPE_ macros.
  *
  * The mechanism has a static table with 127 entries.  Each combination
  * of the 7 FPU status word exception bits directly translates to a
  * position in this table, where a single FPE_... value is stored.
  * This FPE_... value stored there is considered the "most important"
  * of the exception bits and will be sent as the signal code.  The
  * precedence of the bits is based upon Intel Document "Numerical
  * Applications", Chapter "Special Computational Situations".
  *
  * The macro to choose one of these values does these steps: 1) Throw
  * away status word bits that cannot be masked.  2) Throw away the bits
  * currently masked in the control word, assuming the user isn't
  * interested in them anymore.  3) Reinsert status word bit 7 (stack
  * fault) if it is set, which cannot be masked but must be presered.
  * 4) Use the remaining bits to point into the trapcode table.
  *
  * The 6 maskable bits in order of their preference, as stated in the
  * above referenced Intel manual:
  * 1  Invalid operation (FP_X_INV)
  * 1a   Stack underflow
  * 1b   Stack overflow
  * 1c   Operand of unsupported format
  * 1d   SNaN operand.
  * 2  QNaN operand (not an exception, irrelavant here)
  * 3  Any other invalid-operation not mentioned above or zero divide
  *      (FP_X_INV, FP_X_DZ)
  * 4  Denormal operand (FP_X_DNML)
  * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
  * 6  Inexact result (FP_X_IMP) 
  */
 static char fpetable[128] = {
 	0,
 	FPE_FLTINV,	/*  1 - INV */
 	FPE_FLTUND,	/*  2 - DNML */
 	FPE_FLTINV,	/*  3 - INV | DNML */
 	FPE_FLTDIV,	/*  4 - DZ */
 	FPE_FLTINV,	/*  5 - INV | DZ */
 	FPE_FLTDIV,	/*  6 - DNML | DZ */
 	FPE_FLTINV,	/*  7 - INV | DNML | DZ */
 	FPE_FLTOVF,	/*  8 - OFL */
 	FPE_FLTINV,	/*  9 - INV | OFL */
 	FPE_FLTUND,	/*  A - DNML | OFL */
 	FPE_FLTINV,	/*  B - INV | DNML | OFL */
 	FPE_FLTDIV,	/*  C - DZ | OFL */
 	FPE_FLTINV,	/*  D - INV | DZ | OFL */
 	FPE_FLTDIV,	/*  E - DNML | DZ | OFL */
 	FPE_FLTINV,	/*  F - INV | DNML | DZ | OFL */
 	FPE_FLTUND,	/* 10 - UFL */
 	FPE_FLTINV,	/* 11 - INV | UFL */
 	FPE_FLTUND,	/* 12 - DNML | UFL */
 	FPE_FLTINV,	/* 13 - INV | DNML | UFL */
 	FPE_FLTDIV,	/* 14 - DZ | UFL */
 	FPE_FLTINV,	/* 15 - INV | DZ | UFL */
 	FPE_FLTDIV,	/* 16 - DNML | DZ | UFL */
 	FPE_FLTINV,	/* 17 - INV | DNML | DZ | UFL */
 	FPE_FLTOVF,	/* 18 - OFL | UFL */
 	FPE_FLTINV,	/* 19 - INV | OFL | UFL */
 	FPE_FLTUND,	/* 1A - DNML | OFL | UFL */
 	FPE_FLTINV,	/* 1B - INV | DNML | OFL | UFL */
 	FPE_FLTDIV,	/* 1C - DZ | OFL | UFL */
 	FPE_FLTINV,	/* 1D - INV | DZ | OFL | UFL */
 	FPE_FLTDIV,	/* 1E - DNML | DZ | OFL | UFL */
 	FPE_FLTINV,	/* 1F - INV | DNML | DZ | OFL | UFL */
 	FPE_FLTRES,	/* 20 - IMP */
 	FPE_FLTINV,	/* 21 - INV | IMP */
 	FPE_FLTUND,	/* 22 - DNML | IMP */
 	FPE_FLTINV,	/* 23 - INV | DNML | IMP */
 	FPE_FLTDIV,	/* 24 - DZ | IMP */
 	FPE_FLTINV,	/* 25 - INV | DZ | IMP */
 	FPE_FLTDIV,	/* 26 - DNML | DZ | IMP */
 	FPE_FLTINV,	/* 27 - INV | DNML | DZ | IMP */
 	FPE_FLTOVF,	/* 28 - OFL | IMP */
 	FPE_FLTINV,	/* 29 - INV | OFL | IMP */
 	FPE_FLTUND,	/* 2A - DNML | OFL | IMP */
 	FPE_FLTINV,	/* 2B - INV | DNML | OFL | IMP */
 	FPE_FLTDIV,	/* 2C - DZ | OFL | IMP */
 	FPE_FLTINV,	/* 2D - INV | DZ | OFL | IMP */
 	FPE_FLTDIV,	/* 2E - DNML | DZ | OFL | IMP */
 	FPE_FLTINV,	/* 2F - INV | DNML | DZ | OFL | IMP */
 	FPE_FLTUND,	/* 30 - UFL | IMP */
 	FPE_FLTINV,	/* 31 - INV | UFL | IMP */
 	FPE_FLTUND,	/* 32 - DNML | UFL | IMP */
 	FPE_FLTINV,	/* 33 - INV | DNML | UFL | IMP */
 	FPE_FLTDIV,	/* 34 - DZ | UFL | IMP */
 	FPE_FLTINV,	/* 35 - INV | DZ | UFL | IMP */
 	FPE_FLTDIV,	/* 36 - DNML | DZ | UFL | IMP */
 	FPE_FLTINV,	/* 37 - INV | DNML | DZ | UFL | IMP */
 	FPE_FLTOVF,	/* 38 - OFL | UFL | IMP */
 	FPE_FLTINV,	/* 39 - INV | OFL | UFL | IMP */
 	FPE_FLTUND,	/* 3A - DNML | OFL | UFL | IMP */
 	FPE_FLTINV,	/* 3B - INV | DNML | OFL | UFL | IMP */
 	FPE_FLTDIV,	/* 3C - DZ | OFL | UFL | IMP */
 	FPE_FLTINV,	/* 3D - INV | DZ | OFL | UFL | IMP */
 	FPE_FLTDIV,	/* 3E - DNML | DZ | OFL | UFL | IMP */
 	FPE_FLTINV,	/* 3F - INV | DNML | DZ | OFL | UFL | IMP */
 	FPE_FLTSUB,	/* 40 - STK */
 	FPE_FLTSUB,	/* 41 - INV | STK */
 	FPE_FLTUND,	/* 42 - DNML | STK */
 	FPE_FLTSUB,	/* 43 - INV | DNML | STK */
 	FPE_FLTDIV,	/* 44 - DZ | STK */
 	FPE_FLTSUB,	/* 45 - INV | DZ | STK */
 	FPE_FLTDIV,	/* 46 - DNML | DZ | STK */
 	FPE_FLTSUB,	/* 47 - INV | DNML | DZ | STK */
 	FPE_FLTOVF,	/* 48 - OFL | STK */
 	FPE_FLTSUB,	/* 49 - INV | OFL | STK */
 	FPE_FLTUND,	/* 4A - DNML | OFL | STK */
 	FPE_FLTSUB,	/* 4B - INV | DNML | OFL | STK */
 	FPE_FLTDIV,	/* 4C - DZ | OFL | STK */
 	FPE_FLTSUB,	/* 4D - INV | DZ | OFL | STK */
 	FPE_FLTDIV,	/* 4E - DNML | DZ | OFL | STK */
 	FPE_FLTSUB,	/* 4F - INV | DNML | DZ | OFL | STK */
 	FPE_FLTUND,	/* 50 - UFL | STK */
 	FPE_FLTSUB,	/* 51 - INV | UFL | STK */
 	FPE_FLTUND,	/* 52 - DNML | UFL | STK */
 	FPE_FLTSUB,	/* 53 - INV | DNML | UFL | STK */
 	FPE_FLTDIV,	/* 54 - DZ | UFL | STK */
 	FPE_FLTSUB,	/* 55 - INV | DZ | UFL | STK */
 	FPE_FLTDIV,	/* 56 - DNML | DZ | UFL | STK */
 	FPE_FLTSUB,	/* 57 - INV | DNML | DZ | UFL | STK */
 	FPE_FLTOVF,	/* 58 - OFL | UFL | STK */
 	FPE_FLTSUB,	/* 59 - INV | OFL | UFL | STK */
 	FPE_FLTUND,	/* 5A - DNML | OFL | UFL | STK */
 	FPE_FLTSUB,	/* 5B - INV | DNML | OFL | UFL | STK */
 	FPE_FLTDIV,	/* 5C - DZ | OFL | UFL | STK */
 	FPE_FLTSUB,	/* 5D - INV | DZ | OFL | UFL | STK */
 	FPE_FLTDIV,	/* 5E - DNML | DZ | OFL | UFL | STK */
 	FPE_FLTSUB,	/* 5F - INV | DNML | DZ | OFL | UFL | STK */
 	FPE_FLTRES,	/* 60 - IMP | STK */
 	FPE_FLTSUB,	/* 61 - INV | IMP | STK */
 	FPE_FLTUND,	/* 62 - DNML | IMP | STK */
 	FPE_FLTSUB,	/* 63 - INV | DNML | IMP | STK */
 	FPE_FLTDIV,	/* 64 - DZ | IMP | STK */
 	FPE_FLTSUB,	/* 65 - INV | DZ | IMP | STK */
 	FPE_FLTDIV,	/* 66 - DNML | DZ | IMP | STK */
 	FPE_FLTSUB,	/* 67 - INV | DNML | DZ | IMP | STK */
 	FPE_FLTOVF,	/* 68 - OFL | IMP | STK */
 	FPE_FLTSUB,	/* 69 - INV | OFL | IMP | STK */
 	FPE_FLTUND,	/* 6A - DNML | OFL | IMP | STK */
 	FPE_FLTSUB,	/* 6B - INV | DNML | OFL | IMP | STK */
 	FPE_FLTDIV,	/* 6C - DZ | OFL | IMP | STK */
 	FPE_FLTSUB,	/* 6D - INV | DZ | OFL | IMP | STK */
 	FPE_FLTDIV,	/* 6E - DNML | DZ | OFL | IMP | STK */
 	FPE_FLTSUB,	/* 6F - INV | DNML | DZ | OFL | IMP | STK */
 	FPE_FLTUND,	/* 70 - UFL | IMP | STK */
 	FPE_FLTSUB,	/* 71 - INV | UFL | IMP | STK */
 	FPE_FLTUND,	/* 72 - DNML | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 73 - INV | DNML | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 74 - DZ | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 75 - INV | DZ | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 76 - DNML | DZ | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 77 - INV | DNML | DZ | UFL | IMP | STK */
 	FPE_FLTOVF,	/* 78 - OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 79 - INV | OFL | UFL | IMP | STK */
 	FPE_FLTUND,	/* 7A - DNML | OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 7B - INV | DNML | OFL | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 7C - DZ | OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 7D - INV | DZ | OFL | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 7E - DNML | DZ | OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
 };
 
 /*
  * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE.
  *
  * Clearing exceptions is necessary mainly to avoid IRQ13 bugs.  We now
  * depend on longjmp() restoring a usable state.  Restoring the state
  * or examining it might fail if we didn't clear exceptions.
  *
  * The error code chosen will be one of the FPE_... macros. It will be
  * sent as the second argument to old BSD-style signal handlers and as
  * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers.
  *
  * XXX the FP state is not preserved across signal handlers.  So signal
  * handlers cannot afford to do FP unless they preserve the state or
  * longjmp() out.  Both preserving the state and longjmp()ing may be
  * destroyed by IRQ13 bugs.  Clearing FP exceptions is not an acceptable
  * solution for signals other than SIGFPE.
  */
 int
 npxtrap()
 {
 	u_short control, status;
 
 	if (!hw_float) {
 		printf("npxtrap: fpcurthread = %p, curthread = %p, hw_float = %d\n",
 		       PCPU_GET(fpcurthread), curthread, hw_float);
 		panic("npxtrap from nowhere");
 	}
 	critical_enter();
 
 	/*
 	 * Interrupt handling (for another interrupt) may have pushed the
 	 * state to memory.  Fetch the relevant parts of the state from
 	 * wherever they are.
 	 */
 	if (PCPU_GET(fpcurthread) != curthread) {
 		control = GET_FPU_CW(curthread);
 		status = GET_FPU_SW(curthread);
 	} else {
 		fnstcw(&control);
 		fnstsw(&status);
 	}
 
 	if (PCPU_GET(fpcurthread) == curthread)
 		fnclex();
 	critical_exit();
 	return (fpetable[status & ((~control & 0x3f) | 0x40)]);
 }
 
 /*
  * Implement device not available (DNA) exception
  *
  * It would be better to switch FP context here (if curthread != fpcurthread)
  * and not necessarily for every context switch, but it is too hard to
  * access foreign pcb's.
  */
 
 static int err_count = 0;
 
 int
 npxdna(void)
 {
 	struct pcb *pcb;
 
 	if (!hw_float)
 		return (0);
 	critical_enter();
 	if (PCPU_GET(fpcurthread) == curthread) {
 		printf("npxdna: fpcurthread == curthread %d times\n",
 		    ++err_count);
 		stop_emulating();
 		critical_exit();
 		return (1);
 	}
 	if (PCPU_GET(fpcurthread) != NULL) {
 		printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
 		       PCPU_GET(fpcurthread),
 		       PCPU_GET(fpcurthread)->td_proc->p_pid,
 		       curthread, curthread->td_proc->p_pid);
 		panic("npxdna");
 	}
 	stop_emulating();
 	/*
 	 * Record new context early in case frstor causes an IRQ13.
 	 */
 	PCPU_SET(fpcurthread, curthread);
 	pcb = PCPU_GET(curpcb);
 
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		fpu_clean_state();
 #endif
 
 	if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
 		/*
 		 * This is the first time this thread has used the FPU or
 		 * the PCB doesn't contain a clean FPU state.  Explicitly
 		 * load an initial state.
 		 */
 		fpurstor(&npx_initialstate);
 		if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__)
 			fldcw(pcb->pcb_initial_npxcw);
 		pcb->pcb_flags |= PCB_NPXINITDONE;
 		if (PCB_USER_FPU(pcb))
 			pcb->pcb_flags |= PCB_NPXUSERINITDONE;
 	} else {
 		/*
 		 * The following fpurstor() may cause an IRQ13 when the
 		 * state being restored has a pending error.  The error will
 		 * appear to have been triggered by the current (npx) user
 		 * instruction even when that instruction is a no-wait
 		 * instruction that should not trigger an error (e.g.,
 		 * fnclex).  On at least one 486 system all of the no-wait
 		 * instructions are broken the same as frstor, so our
 		 * treatment does not amplify the breakage.  On at least
 		 * one 386/Cyrix 387 system, fnclex works correctly while
 		 * frstor and fnsave are broken, so our treatment breaks
 		 * fnclex if it is the first FPU instruction after a context
 		 * switch.
 		 */
 		fpurstor(pcb->pcb_save);
 	}
 	critical_exit();
 
 	return (1);
 }
 
 /*
  * Wrapper for fnsave instruction, partly to handle hardware bugs.  When npx
  * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by
  * no-wait npx instructions.  See the Intel application note AP-578 for
  * details.  This doesn't cause any additional complications here.  IRQ13's
  * are inherently asynchronous unless the CPU is frozen to deliver them --
  * one that started in userland may be delivered many instructions later,
  * after the process has entered the kernel.  It may even be delivered after
  * the fnsave here completes.  A spurious IRQ13 for the fnsave is handled in
  * the same way as a very-late-arriving non-spurious IRQ13 from user mode:
  * it is normally ignored at first because we set fpcurthread to NULL; it is
  * normally retriggered in npxdna() after return to user mode.
  *
  * npxsave() must be called with interrupts disabled, so that it clears
  * fpcurthread atomically with saving the state.  We require callers to do the
  * disabling, since most callers need to disable interrupts anyway to call
  * npxsave() atomically with checking fpcurthread.
  *
  * A previous version of npxsave() went to great lengths to excecute fnsave
  * with interrupts enabled in case executing it froze the CPU.  This case
  * can't happen, at least for Intel CPU/NPX's.  Spurious IRQ13's don't imply
  * spurious freezes.
  */
 void
 npxsave(addr)
 	union savefpu *addr;
 {
 
 	stop_emulating();
 	fpusave(addr);
 
 	start_emulating();
 	PCPU_SET(fpcurthread, NULL);
 }
 
 void
 npxdrop()
 {
 	struct thread *td;
 
 	/*
 	 * Discard pending exceptions in the !cpu_fxsr case so that unmasked
 	 * ones don't cause a panic on the next frstor.
 	 */
 #ifdef CPU_ENABLE_SSE
 	if (!cpu_fxsr)
 #endif
 		fnclex();
 
 	td = PCPU_GET(fpcurthread);
 	KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread"));
 	CRITICAL_ASSERT(td);
 	PCPU_SET(fpcurthread, NULL);
 	td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
 	start_emulating();
 }
 
 /*
  * Get the state of the FPU without dropping ownership (if possible).
  * It returns the FPU ownership status.
  */
 int
 npxgetregs(struct thread *td, union savefpu *addr)
 {
 	struct pcb *pcb;
 
 	if (!hw_float)
 		return (_MC_FPOWNED_NONE);
 
 	pcb = td->td_pcb;
 	if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
 		bcopy(&npx_initialstate, addr, sizeof(npx_initialstate));
 		SET_FPU_CW(addr, pcb->pcb_initial_npxcw);
 		return (_MC_FPOWNED_NONE);
 	}
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread)) {
 		fpusave(addr);
 #ifdef CPU_ENABLE_SSE
 		if (!cpu_fxsr)
 #endif
 			/*
 			 * fnsave initializes the FPU and destroys whatever
 			 * context it contains.  Make sure the FPU owner
 			 * starts with a clean state next time.
 			 */
 			npxdrop();
 		critical_exit();
 		return (_MC_FPOWNED_FPU);
 	} else {
 		critical_exit();
 		bcopy(pcb->pcb_save, addr, sizeof(*addr));
 		return (_MC_FPOWNED_PCB);
 	}
 }
 
 int
 npxgetuserregs(struct thread *td, union savefpu *addr)
 {
 	struct pcb *pcb;
 
 	if (!hw_float)
 		return (_MC_FPOWNED_NONE);
 
 	pcb = td->td_pcb;
 	if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) == 0) {
 		bcopy(&npx_initialstate, addr, sizeof(npx_initialstate));
 		SET_FPU_CW(addr, pcb->pcb_initial_npxcw);
 		return (_MC_FPOWNED_NONE);
 	}
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
 		fpusave(addr);
 #ifdef CPU_ENABLE_SSE
 		if (!cpu_fxsr)
 #endif
 			/*
 			 * fnsave initializes the FPU and destroys whatever
 			 * context it contains.  Make sure the FPU owner
 			 * starts with a clean state next time.
 			 */
 			npxdrop();
 		critical_exit();
 		return (_MC_FPOWNED_FPU);
 	} else {
 		critical_exit();
 		bcopy(&pcb->pcb_user_save, addr, sizeof(*addr));
 		return (_MC_FPOWNED_PCB);
 	}
 }
 
 /*
  * Set the state of the FPU.
  */
 void
 npxsetregs(struct thread *td, union savefpu *addr)
 {
 	struct pcb *pcb;
 
 	if (!hw_float)
 		return;
 
 	pcb = td->td_pcb;
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread)) {
 #ifdef CPU_ENABLE_SSE
 		if (!cpu_fxsr)
 #endif
 			fnclex();	/* As in npxdrop(). */
 		fpurstor(addr);
 		critical_exit();
 	} else {
 		critical_exit();
 		bcopy(addr, pcb->pcb_save, sizeof(*addr));
 	}
 	if (PCB_USER_FPU(pcb))
 		pcb->pcb_flags |= PCB_NPXUSERINITDONE;
 	pcb->pcb_flags |= PCB_NPXINITDONE;
 }
 
 void
 npxsetuserregs(struct thread *td, union savefpu *addr)
 {
 	struct pcb *pcb;
 
 	if (!hw_float)
 		return;
 
 	pcb = td->td_pcb;
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
 #ifdef CPU_ENABLE_SSE
 		if (!cpu_fxsr)
 #endif
 			fnclex();	/* As in npxdrop(). */
 		fpurstor(addr);
 		critical_exit();
 		pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE;
 	} else {
 		critical_exit();
 		bcopy(addr, &pcb->pcb_user_save, sizeof(*addr));
 		if (PCB_USER_FPU(pcb))
 			pcb->pcb_flags |= PCB_NPXINITDONE;
 		pcb->pcb_flags |= PCB_NPXUSERINITDONE;
 	}
 }
 
 static void
 fpusave(addr)
 	union savefpu *addr;
 {
 	
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		fxsave(addr);
 	else
 #endif
 		fnsave(addr);
 }
 
 #ifdef CPU_ENABLE_SSE
 /*
  * On AuthenticAMD processors, the fxrstor instruction does not restore
  * the x87's stored last instruction pointer, last data pointer, and last
  * opcode values, except in the rare case in which the exception summary
  * (ES) bit in the x87 status word is set to 1.
  *
  * In order to avoid leaking this information across processes, we clean
  * these values by performing a dummy load before executing fxrstor().
  */
 static void
 fpu_clean_state(void)
 {
 	static float dummy_variable = 0.0;
 	u_short status;
 
 	/*
 	 * Clear the ES bit in the x87 status word if it is currently
 	 * set, in order to avoid causing a fault in the upcoming load.
 	 */
 	fnstsw(&status);
 	if (status & 0x80)
 		fnclex();
 
 	/*
 	 * Load the dummy variable into the x87 stack.  This mangles
 	 * the x87 stack, but we don't care since we're about to call
 	 * fxrstor() anyway.
 	 */
-	__asm __volatile("ffree %%st(7); fld %0" : : "m" (dummy_variable));
+	__asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable));
 }
 #endif /* CPU_ENABLE_SSE */
 
 static void
 fpurstor(addr)
 	union savefpu *addr;
 {
 
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		fxrstor(addr);
 	else
 #endif
 		frstor(addr);
 }
 
 static device_method_t npx_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	npx_identify),
 	DEVMETHOD(device_probe,		npx_probe),
 	DEVMETHOD(device_attach,	npx_attach),
 	DEVMETHOD(device_detach,	bus_generic_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	bus_generic_suspend),
 	DEVMETHOD(device_resume,	bus_generic_resume),
 	
 	{ 0, 0 }
 };
 
 static driver_t npx_driver = {
 	"npx",
 	npx_methods,
 	1,			/* no softc */
 };
 
 static devclass_t npx_devclass;
 
 /*
  * We prefer to attach to the root nexus so that the usual case (exception 16)
  * doesn't describe the processor as being `on isa'.
  */
 DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0);
 
 #ifdef DEV_ISA
 /*
  * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI.
  */
 static struct isa_pnp_id npxisa_ids[] = {
 	{ 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */
 	{ 0 }
 };
 
 static int
 npxisa_probe(device_t dev)
 {
 	int result;
 	if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) {
 		device_quiet(dev);
 	}
 	return(result);
 }
 
 static int
 npxisa_attach(device_t dev)
 {
 	return (0);
 }
 
 static device_method_t npxisa_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		npxisa_probe),
 	DEVMETHOD(device_attach,	npxisa_attach),
 	DEVMETHOD(device_detach,	bus_generic_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	bus_generic_suspend),
 	DEVMETHOD(device_resume,	bus_generic_resume),
 	
 	{ 0, 0 }
 };
 
 static driver_t npxisa_driver = {
 	"npxisa",
 	npxisa_methods,
 	1,			/* no softc */
 };
 
 static devclass_t npxisa_devclass;
 
 DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0);
 #ifndef PC98
 DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0);
 #endif
 #endif /* DEV_ISA */
 
 int
 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save,
 	    ("mangled pcb_save"));
 	ctx->flags = 0;
 	if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0)
 		ctx->flags |= FPU_KERN_CTX_NPXINITDONE;
 	npxexit(td);
 	ctx->prev = pcb->pcb_save;
 	pcb->pcb_save = &ctx->hwstate;
 	pcb->pcb_flags |= PCB_KERNNPX;
 	pcb->pcb_flags &= ~PCB_NPXINITDONE;
 	return (0);
 }
 
 int
 fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	critical_enter();
 	if (curthread == PCPU_GET(fpcurthread))
 		npxdrop();
 	critical_exit();
 	pcb->pcb_save = ctx->prev;
 	if (pcb->pcb_save == &pcb->pcb_user_save) {
 		if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0)
 			pcb->pcb_flags |= PCB_NPXINITDONE;
 		else
 			pcb->pcb_flags &= ~PCB_NPXINITDONE;
 		pcb->pcb_flags &= ~PCB_KERNNPX;
 	} else {
 		if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0)
 			pcb->pcb_flags |= PCB_NPXINITDONE;
 		else
 			pcb->pcb_flags &= ~PCB_NPXINITDONE;
 		KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave"));
 	}
 	return (0);
 }
 
 int
 fpu_kern_thread(u_int flags)
 {
 	struct pcb *pcb;
 
 	pcb = PCPU_GET(curpcb);
 	KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
 	    ("Only kthread may use fpu_kern_thread"));
 	KASSERT(pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save"));
 	KASSERT(PCB_USER_FPU(pcb), ("recursive call"));
 
 	pcb->pcb_flags |= PCB_KERNNPX;
 	return (0);
 }
 
 int
 is_fpu_kern_thread(u_int flags)
 {
 
 	if ((curthread->td_pflags & TDP_KTHREAD) == 0)
 		return (0);
 	return ((PCPU_GET(curpcb)->pcb_flags & PCB_KERNNPX) != 0);
 }
Index: projects/binutils-2.17/sys/i386/pci/pci_bus.c
===================================================================
--- projects/binutils-2.17/sys/i386/pci/pci_bus.c	(revision 215829)
+++ projects/binutils-2.17/sys/i386/pci/pci_bus.c	(revision 215830)
@@ -1,683 +1,688 @@
 /*-
  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/sysctl.h>
 
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcib_private.h>
 #include <isa/isavar.h>
 #ifdef CPU_ELAN
 #include <machine/md_var.h>
 #endif
 #include <machine/legacyvar.h>
 #include <machine/pci_cfgreg.h>
 #include <machine/resource.h>
 
 #include "pcib_if.h"
 
 static int	pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
     int pin);
 
 int
 legacy_pcib_maxslots(device_t dev)
 {
 	return 31;
 }
 
 /* read configuration space register */
 
 u_int32_t
 legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
 			u_int reg, int bytes)
 {
 	return(pci_cfgregread(bus, slot, func, reg, bytes));
 }
 
 /* write configuration space register */
 
 void
 legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
 			 u_int reg, u_int32_t data, int bytes)
 {
 	pci_cfgregwrite(bus, slot, func, reg, data, bytes);
 }
 
 /* Pass MSI requests up to the nexus. */
 
 static int
 legacy_pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount,
     int *irqs)
 {
 	device_t bus;
 
 	bus = device_get_parent(pcib);
 	return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount,
 	    irqs));
 }
 
 static int
 legacy_pcib_alloc_msix(device_t pcib, device_t dev, int *irq)
 {
 	device_t bus;
 
 	bus = device_get_parent(pcib);
 	return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
 }
 
 static int
 legacy_pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr,
     uint32_t *data)
 {
 	device_t bus;
 
 	bus = device_get_parent(pcib);
 	return (PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data));
 }
 
 static const char *
 legacy_pcib_is_host_bridge(int bus, int slot, int func,
 			  uint32_t id, uint8_t class, uint8_t subclass,
 			  uint8_t *busnum)
 {
 	const char *s = NULL;
 	static uint8_t pxb[4];	/* hack for 450nx */
 
 	*busnum = 0;
 
 	switch (id) {
 	case 0x12258086:
 		s = "Intel 824?? host to PCI bridge";
 		/* XXX This is a guess */
 		/* *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x41, 1); */
 		*busnum = bus;
 		break;
 	case 0x71208086:
 		s = "Intel 82810 (i810 GMCH) Host To Hub bridge";
 		break;
 	case 0x71228086:
 		s = "Intel 82810-DC100 (i810-DC100 GMCH) Host To Hub bridge";
 		break;
 	case 0x71248086:
 		s = "Intel 82810E (i810E GMCH) Host To Hub bridge";
 		break;
 	case 0x11308086:
 		s = "Intel 82815 (i815 GMCH) Host To Hub bridge";
 		break;
 	case 0x71808086:
 		s = "Intel 82443LX (440 LX) host to PCI bridge";
 		break;
 	case 0x71908086:
 		s = "Intel 82443BX (440 BX) host to PCI bridge";
 		break;
 	case 0x71928086:
 		s = "Intel 82443BX host to PCI bridge (AGP disabled)";
 		break;
 	case 0x71948086:
 		s = "Intel 82443MX host to PCI bridge";
 		break;
 	case 0x71a08086:
 		s = "Intel 82443GX host to PCI bridge";
 		break;
 	case 0x71a18086:
 		s = "Intel 82443GX host to AGP bridge";
 		break;
 	case 0x71a28086:
 		s = "Intel 82443GX host to PCI bridge (AGP disabled)";
 		break;
 	case 0x84c48086:
 		s = "Intel 82454KX/GX (Orion) host to PCI bridge";
 		*busnum = legacy_pcib_read_config(0, bus, slot, func, 0x4a, 1);
 		break;
 	case 0x84ca8086:
 		/*
 		 * For the 450nx chipset, there is a whole bundle of
 		 * things pretending to be host bridges. The MIOC will
 		 * be seen first and isn't really a pci bridge (the
 		 * actual busses are attached to the PXB's). We need to
 		 * read the registers of the MIOC to figure out the
 		 * bus numbers for the PXB channels.
 		 *
 		 * Since the MIOC doesn't have a pci bus attached, we
 		 * pretend it wasn't there.
 		 */
 		pxb[0] = legacy_pcib_read_config(0, bus, slot, func,
 						0xd0, 1); /* BUSNO[0] */
 		pxb[1] = legacy_pcib_read_config(0, bus, slot, func,
 						0xd1, 1) + 1;	/* SUBA[0]+1 */
 		pxb[2] = legacy_pcib_read_config(0, bus, slot, func,
 						0xd3, 1); /* BUSNO[1] */
 		pxb[3] = legacy_pcib_read_config(0, bus, slot, func,
 						0xd4, 1) + 1;	/* SUBA[1]+1 */
 		return NULL;
 	case 0x84cb8086:
 		switch (slot) {
 		case 0x12:
 			s = "Intel 82454NX PXB#0, Bus#A";
 			*busnum = pxb[0];
 			break;
 		case 0x13:
 			s = "Intel 82454NX PXB#0, Bus#B";
 			*busnum = pxb[1];
 			break;
 		case 0x14:
 			s = "Intel 82454NX PXB#1, Bus#A";
 			*busnum = pxb[2];
 			break;
 		case 0x15:
 			s = "Intel 82454NX PXB#1, Bus#B";
 			*busnum = pxb[3];
 			break;
 		}
 		break;
+	case 0x1A308086:
+		s = "Intel 82845 Host to PCI bridge";
+		break;
 
 		/* AMD -- vendor 0x1022 */
 	case 0x30001022:
 		s = "AMD Elan SC520 host to PCI bridge";
 #ifdef CPU_ELAN
 		init_AMD_Elan_sc520();
 #else
 		printf(
 "*** WARNING: missing CPU_ELAN -- timekeeping may be wrong\n");
 #endif
 		break;
 	case 0x70061022:
 		s = "AMD-751 host to PCI bridge";
 		break;
 	case 0x700e1022:
 		s = "AMD-761 host to PCI bridge";
 		break;
 
 		/* SiS -- vendor 0x1039 */
 	case 0x04961039:
 		s = "SiS 85c496";
 		break;
 	case 0x04061039:
 		s = "SiS 85c501";
 		break;
 	case 0x06011039:
 		s = "SiS 85c601";
 		break;
 	case 0x55911039:
 		s = "SiS 5591 host to PCI bridge";
 		break;
 	case 0x00011039:
 		s = "SiS 5591 host to AGP bridge";
 		break;
 
 		/* VLSI -- vendor 0x1004 */
 	case 0x00051004:
 		s = "VLSI 82C592 Host to PCI bridge";
 		break;
 
 		/* XXX Here is MVP3, I got the datasheet but NO M/B to test it  */
 		/* totally. Please let me know if anything wrong.            -F */
 		/* XXX need info on the MVP3 -- any takers? */
 	case 0x05981106:
 		s = "VIA 82C598MVP (Apollo MVP3) host bridge";
 		break;
 
 		/* AcerLabs -- vendor 0x10b9 */
 		/* Funny : The datasheet told me vendor id is "10b8",sub-vendor */
 		/* id is '10b9" but the register always shows "10b9". -Foxfair  */
 	case 0x154110b9:
 		s = "AcerLabs M1541 (Aladdin-V) PCI host bridge";
 		break;
 
 		/* OPTi -- vendor 0x1045 */
 	case 0xc7011045:
 		s = "OPTi 82C700 host to PCI bridge";
 		break;
 	case 0xc8221045:
 		s = "OPTi 82C822 host to PCI Bridge";
 		break;
 
 		/* ServerWorks -- vendor 0x1166 */
 	case 0x00051166:
 		s = "ServerWorks NB6536 2.0HE host to PCI bridge";
 		*busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1);
 		break;
 
 	case 0x00061166:
 		/* FALLTHROUGH */
 	case 0x00081166:
 		/* FALLTHROUGH */
 	case 0x02011166:
 		/* FALLTHROUGH */
 	case 0x010f1014: /* IBM re-badged ServerWorks chipset */
 		s = "ServerWorks host to PCI bridge";
 		*busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1);
 		break;
 
 	case 0x00091166:
 		s = "ServerWorks NB6635 3.0LE host to PCI bridge";
 		*busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1);
 		break;
 
 	case 0x00101166:
 		s = "ServerWorks CIOB30 host to PCI bridge";
 		*busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1);
 		break;
 
 	case 0x00111166:
 		/* FALLTHROUGH */
 	case 0x03021014: /* IBM re-badged ServerWorks chipset */
 		s = "ServerWorks CMIC-HE host to PCI-X bridge";
 		*busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1);
 		break;
 
 		/* XXX unknown chipset, but working */
 	case 0x00171166:
 		/* FALLTHROUGH */
 	case 0x01011166:
+	case 0x01101166:
+	case 0x02251166:
 		s = "ServerWorks host to PCI bridge(unknown chipset)";
 		*busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1);
 		break;
 
 		/* Compaq/HP -- vendor 0x0e11 */
 	case 0x60100e11:
 		s = "Compaq/HP Model 6010 HotPlug PCI Bridge";
 		*busnum = legacy_pcib_read_config(0, bus, slot, func, 0xc8, 1);
 		break;
 
 		/* Integrated Micro Solutions -- vendor 0x10e0 */
 	case 0x884910e0:
 		s = "Integrated Micro Solutions VL Bridge";
 		break;
 
 	default:
 		if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
 			s = "Host to PCI bridge";
 		break;
 	}
 
 	return s;
 }
 
 /*
  * Scan the first pci bus for host-pci bridges and add pcib instances
  * to the nexus for each bridge.
  */
 static void
 legacy_pcib_identify(driver_t *driver, device_t parent)
 {
 	int bus, slot, func;
 	u_int8_t  hdrtype;
 	int found = 0;
 	int pcifunchigh;
 	int found824xx = 0;
 	int found_orion = 0;
 	device_t child;
 	devclass_t pci_devclass;
 
 	if (pci_cfgregopen() == 0)
 		return;
 	/*
 	 * Check to see if we haven't already had a PCI bus added
 	 * via some other means.  If we have, bail since otherwise
 	 * we're going to end up duplicating it.
 	 */
 	if ((pci_devclass = devclass_find("pci")) &&
 		devclass_get_device(pci_devclass, 0))
 		return;
 
 
 	bus = 0;
  retry:
 	for (slot = 0; slot <= PCI_SLOTMAX; slot++) {
 		func = 0;
 		hdrtype = legacy_pcib_read_config(0, bus, slot, func,
 						 PCIR_HDRTYPE, 1);
 		/*
 		 * When enumerating bus devices, the standard says that
 		 * one should check the header type and ignore the slots whose
 		 * header types that the software doesn't know about.  We use
 		 * this to filter out devices.
 		 */
 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
 			continue;
 		if ((hdrtype & PCIM_MFDEV) &&
 		    (!found_orion || hdrtype != 0xff))
 			pcifunchigh = PCI_FUNCMAX;
 		else
 			pcifunchigh = 0;
 		for (func = 0; func <= pcifunchigh; func++) {
 			/*
 			 * Read the IDs and class from the device.
 			 */
 			u_int32_t id;
 			u_int8_t class, subclass, busnum;
 			const char *s;
 			device_t *devs;
 			int ndevs, i;
 
 			id = legacy_pcib_read_config(0, bus, slot, func,
 						    PCIR_DEVVENDOR, 4);
 			if (id == -1)
 				continue;
 			class = legacy_pcib_read_config(0, bus, slot, func,
 						       PCIR_CLASS, 1);
 			subclass = legacy_pcib_read_config(0, bus, slot, func,
 							  PCIR_SUBCLASS, 1);
 
 			s = legacy_pcib_is_host_bridge(bus, slot, func,
 						      id, class, subclass,
 						      &busnum);
 			if (s == NULL)
 				continue;
 
 			/*
 			 * Check to see if the physical bus has already
 			 * been seen.  Eg: hybrid 32 and 64 bit host
 			 * bridges to the same logical bus.
 			 */
 			if (device_get_children(parent, &devs, &ndevs) == 0) {
 				for (i = 0; s != NULL && i < ndevs; i++) {
 					if (strcmp(device_get_name(devs[i]),
 					    "pcib") != 0)
 						continue;
 					if (legacy_get_pcibus(devs[i]) == busnum)
 						s = NULL;
 				}
 				free(devs, M_TEMP);
 			}
 
 			if (s == NULL)
 				continue;
 			/*
 			 * Add at priority 100 to make sure we
 			 * go after any motherboard resources
 			 */
 			child = BUS_ADD_CHILD(parent, 100,
 					      "pcib", busnum);
 			device_set_desc(child, s);
 			legacy_set_pcibus(child, busnum);
 
 			found = 1;
 			if (id == 0x12258086)
 				found824xx = 1;
 			if (id == 0x84c48086)
 				found_orion = 1;
 		}
 	}
 	if (found824xx && bus == 0) {
 		bus++;
 		goto retry;
 	}
 
 	/*
 	 * Make sure we add at least one bridge since some old
 	 * hardware doesn't actually have a host-pci bridge device.
 	 * Note that pci_cfgregopen() thinks we have PCI devices..
 	 */
 	if (!found) {
 		if (bootverbose)
 			printf(
 	"legacy_pcib_identify: no bridge found, adding pcib0 anyway\n");
 		child = BUS_ADD_CHILD(parent, 100, "pcib", 0);
 		legacy_set_pcibus(child, 0);
 	}
 }
 
 static int
 legacy_pcib_probe(device_t dev)
 {
 
 	if (pci_cfgregopen() == 0)
 		return ENXIO;
 	return -100;
 }
 
 static int
 legacy_pcib_attach(device_t dev)
 {
 	device_t pir;
 	int bus;
 
 	/*
 	 * Look for a PCI BIOS interrupt routing table as that will be
 	 * our method of routing interrupts if we have one.
 	 */
 	bus = pcib_get_bus(dev);
 	if (pci_pir_probe(bus, 0)) {
 		pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
 		if (pir != NULL)
 			device_probe_and_attach(pir);
 	}
 	device_add_child(dev, "pci", bus);
 	return bus_generic_attach(dev);
 }
 
 int
 legacy_pcib_read_ivar(device_t dev, device_t child, int which,
     uintptr_t *result)
 {
 
 	switch (which) {
 	case  PCIB_IVAR_DOMAIN:
 		*result = 0;
 		return 0;
 	case  PCIB_IVAR_BUS:
 		*result = legacy_get_pcibus(dev);
 		return 0;
 	}
 	return ENOENT;
 }
 
 int
 legacy_pcib_write_ivar(device_t dev, device_t child, int which,
     uintptr_t value)
 {
 
 	switch (which) {
 	case  PCIB_IVAR_DOMAIN:
 		return EINVAL;
 	case  PCIB_IVAR_BUS:
 		legacy_set_pcibus(dev, value);
 		return 0;
 	}
 	return ENOENT;
 }
 
 SYSCTL_DECL(_hw_pci);
 
 static unsigned long legacy_host_mem_start = 0x80000000;
 TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
 SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
     &legacy_host_mem_start, 0x80000000,
     "Limit the host bridge memory to being above this address.  Must be\n\
 set at boot via a tunable.");
 
 struct resource *
 legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
     u_long start, u_long end, u_long count, u_int flags)
 {
     /*
      * If no memory preference is given, use upper 32MB slot most
      * bioses use for their memory window.  Typically other bridges
      * before us get in the way to assert their preferences on memory.
      * Hardcoding like this sucks, so a more MD/MI way needs to be
      * found to do it.  This is typically only used on older laptops
      * that don't have pci busses behind pci bridge, so assuming > 32MB
      * is liekly OK.
      *
      * However, this can cause problems for other chipsets, so we make
      * this tunable by hw.pci.host_mem_start.
      */
     if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
 	start = legacy_host_mem_start;
     if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
 	start = 0x1000;
     return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
 	count, flags));
 }
 
 static device_method_t legacy_pcib_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	legacy_pcib_identify),
 	DEVMETHOD(device_probe,		legacy_pcib_probe),
 	DEVMETHOD(device_attach,	legacy_pcib_attach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	bus_generic_suspend),
 	DEVMETHOD(device_resume,	bus_generic_resume),
 
 	/* Bus interface */
 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
 	DEVMETHOD(bus_read_ivar,	legacy_pcib_read_ivar),
 	DEVMETHOD(bus_write_ivar,	legacy_pcib_write_ivar),
 	DEVMETHOD(bus_alloc_resource,	legacy_pcib_alloc_resource),
 	DEVMETHOD(bus_release_resource,	bus_generic_release_resource),
 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
 	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
 	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
 
 	/* pcib interface */
 	DEVMETHOD(pcib_maxslots,	legacy_pcib_maxslots),
 	DEVMETHOD(pcib_read_config,	legacy_pcib_read_config),
 	DEVMETHOD(pcib_write_config,	legacy_pcib_write_config),
 	DEVMETHOD(pcib_route_interrupt,	pcibios_pcib_route_interrupt),
 	DEVMETHOD(pcib_alloc_msi,	legacy_pcib_alloc_msi),
 	DEVMETHOD(pcib_release_msi,	pcib_release_msi),
 	DEVMETHOD(pcib_alloc_msix,	legacy_pcib_alloc_msix),
 	DEVMETHOD(pcib_release_msix,	pcib_release_msix),
 	DEVMETHOD(pcib_map_msi,		legacy_pcib_map_msi),
 
 	{ 0, 0 }
 };
 
 static devclass_t hostb_devclass;
 
 DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
 DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
 
 
 /*
  * Install placeholder to claim the resources owned by the
  * PCI bus interface.  This could be used to extract the
  * config space registers in the extreme case where the PnP
  * ID is available and the PCI BIOS isn't, but for now we just
  * eat the PnP ID and do nothing else.
  *
  * XXX we should silence this probe, as it will generally confuse
  * people.
  */
 static struct isa_pnp_id pcibus_pnp_ids[] = {
 	{ 0x030ad041 /* PNP0A03 */, "PCI Bus" },
 	{ 0x080ad041 /* PNP0A08 */, "PCIe Bus" },
 	{ 0 }
 };
 
 static int
 pcibus_pnp_probe(device_t dev)
 {
 	int result;
 
 	if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, pcibus_pnp_ids)) <= 0)
 		device_quiet(dev);
 	return(result);
 }
 
 static int
 pcibus_pnp_attach(device_t dev)
 {
 	return(0);
 }
 
 static device_method_t pcibus_pnp_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		pcibus_pnp_probe),
 	DEVMETHOD(device_attach,	pcibus_pnp_attach),
 	DEVMETHOD(device_detach,	bus_generic_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	bus_generic_suspend),
 	DEVMETHOD(device_resume,	bus_generic_resume),
 	{ 0, 0 }
 };
 
 static devclass_t pcibus_pnp_devclass;
 
 DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
 DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
 
 
 /*
  * Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
  * that appear in the PCIBIOS Interrupt Routing Table to use the routing
  * table for interrupt routing when possible.
  */
 static int	pcibios_pcib_probe(device_t bus);
 
 static device_method_t pcibios_pcib_pci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		pcibios_pcib_probe),
 
 	/* pcib interface */
 	DEVMETHOD(pcib_route_interrupt,	pcibios_pcib_route_interrupt),
 
 	{0, 0}
 };
 
 static devclass_t pcib_devclass;
 
 DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
     sizeof(struct pcib_softc), pcib_driver);
 DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
 
 static int
 pcibios_pcib_probe(device_t dev)
 {
 	int bus;
 
 	if ((pci_get_class(dev) != PCIC_BRIDGE) ||
 	    (pci_get_subclass(dev) != PCIS_BRIDGE_PCI))
 		return (ENXIO);
 	bus = pci_read_config(dev, PCIR_SECBUS_1, 1);
 	if (bus == 0)
 		return (ENXIO);
 	if (!pci_pir_probe(bus, 1))
 		return (ENXIO);
 	device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
 	return (-2000);
 }
 
 static int
 pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
 {
 	return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
 		pci_get_function(dev), pin));
 }
Index: projects/binutils-2.17/sys/i386/xen/pmap.c
===================================================================
--- projects/binutils-2.17/sys/i386/xen/pmap.c	(revision 215829)
+++ projects/binutils-2.17/sys/i386/xen/pmap.c	(revision 215830)
@@ -1,4317 +1,4329 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 2005 Alan L. Cox <alc@cs.rice.edu>
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  */
 /*-
  * Copyright (c) 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jake Burkholder,
  * Safeport Network Services, and Network Associates Laboratories, the
  * Security Research Division of Network Associates, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
  * CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Manages physical address maps.
  *
  *	In addition to hardware address maps, this
  *	module is called upon to provide software-use-only
  *	maps which may or may not be stored in the same
  *	form as hardware maps.  These pseudo-maps are
  *	used to store intermediate results from copy
  *	operations to and from address spaces.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include "opt_cpu.h"
 #include "opt_pmap.h"
 #include "opt_msgbuf.h"
 #include "opt_smp.h"
 #include "opt_xbox.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sf_buf.h>
 #include <sys/sx.h>
 #include <sys/vmmeter.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #ifdef SMP
 #include <sys/smp.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/uma.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/specialreg.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 
 #ifdef XBOX
 #include <machine/xbox.h>
 #endif
 
 #include <xen/interface/xen.h>
 #include <xen/hypervisor.h>
 #include <machine/xen/hypercall.h>
 #include <machine/xen/xenvar.h>
 #include <machine/xen/xenfunc.h>
 
 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
 #define CPU_ENABLE_SSE
 #endif
 
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC 200
 #endif
 
 #define DIAGNOSTIC
 
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
 #define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
 #else
 #define PMAP_INLINE	extern inline
 #endif
 #else
 #define PMAP_INLINE
 #endif
 
 #define PV_STATS
 #ifdef PV_STATS
 #define PV_STAT(x)	do { x ; } while (0)
 #else
 #define PV_STAT(x)	do { } while (0)
 #endif
 
 #define	pa_index(pa)	((pa) >> PDRSHIFT)
 #define	pa_to_pvh(pa)	(&pv_table[pa_index(pa)])
 
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
 #define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
 
 #define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
 #define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
 #define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
 #define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
 #define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
 
 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
 
 struct pmap kernel_pmap_store;
 LIST_HEAD(pmaplist, pmap);
 static struct pmaplist allpmaps;
 static struct mtx allpmaps_lock;
 
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 int pgeflag = 0;		/* PG_G or-in */
 int pseflag = 0;		/* PG_PS or-in */
 
 int nkpt;
 vm_offset_t kernel_vm_end;
 extern u_int32_t KERNend;
 
 #ifdef PAE
 pt_entry_t pg_nx;
 #endif
 
 static int pat_works;			/* Is page attribute table sane? */
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
 static struct md_page *pv_table;
 static int shpgperproc = PMAP_SHPGPERPROC;
 
 struct pv_chunk *pv_chunkbase;		/* KVA block for pv_chunks */
 int pv_maxchunks;			/* How many chunks we have KVA for */
 vm_offset_t pv_vafree;			/* freelist stored in the PTE */
 
 /*
  * All those kernel PT submaps that BSD is so fond of
  */
 struct sysmaps {
 	struct	mtx lock;
 	pt_entry_t *CMAP1;
 	pt_entry_t *CMAP2;
 	caddr_t	CADDR1;
 	caddr_t	CADDR2;
 };
 static struct sysmaps sysmaps_pcpu[MAXCPU];
 static pt_entry_t *CMAP3;
 caddr_t ptvmmap = 0;
 static caddr_t CADDR3;
 struct msgbuf *msgbufp = 0;
 
 /*
  * Crashdump maps.
  */
 static caddr_t crashdumpmap;
 
 static pt_entry_t *PMAP1 = 0, *PMAP2;
 static pt_entry_t *PADDR1 = 0, *PADDR2;
 #ifdef SMP
 static int PMAP1cpu;
 static int PMAP1changedcpu;
 SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 
 	   &PMAP1changedcpu, 0,
 	   "Number of times pmap_pte_quick changed CPU with same PMAP1");
 #endif
 static int PMAP1changed;
 SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 
 	   &PMAP1changed, 0,
 	   "Number of times pmap_pte_quick changed PMAP1");
 static int PMAP1unchanged;
 SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 
 	   &PMAP1unchanged, 0,
 	   "Number of times pmap_pte_quick didn't change PMAP1");
 static struct mtx PMAP2mutex;
 
 SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
 static int pg_ps_enabled;
 SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0,
     "Are large page mappings enabled?");
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
 	"Max number of PV entries");
 SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
 	"Page share factor per proc");
 SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
     "2/4MB page mapping counters");
 
 static u_long pmap_pde_mappings;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
     &pmap_pde_mappings, 0, "2/4MB page mappings");
 
 static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
 static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try);
 static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
 		    vm_offset_t va);
 
 static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
     vm_page_t *free);
 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
     vm_page_t *free);
 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
 					vm_offset_t va);
 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
     vm_page_t m);
 
 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
 
 static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
 static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free);
 static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
 static void pmap_pte_release(pt_entry_t *pte);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
 static boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr);
 static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
 
 static __inline void pagezero(void *page);
 
 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
 
 /*
  * If you get an error here, then you set KVA_PAGES wrong! See the
  * description of KVA_PAGES in sys/i386/include/pmap.h. It must be
  * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE.
  */
 CTASSERT(KERNBASE % (1 << 24) == 0);
 
 
 
 void 
 pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type)
 {
 	vm_paddr_t pdir_ma = vtomach(&pmap->pm_pdir[ptepindex]);
 	
 	switch (type) {
 	case SH_PD_SET_VA:
 #if 0		
 		xen_queue_pt_update(shadow_pdir_ma,
 				    xpmap_ptom(val & ~(PG_RW)));
 #endif		
 		xen_queue_pt_update(pdir_ma,
 				    xpmap_ptom(val)); 	
 		break;
 	case SH_PD_SET_VA_MA:
 #if 0		
 		xen_queue_pt_update(shadow_pdir_ma,
 				    val & ~(PG_RW));
 #endif		
 		xen_queue_pt_update(pdir_ma, val); 	
 		break;
 	case SH_PD_SET_VA_CLEAR:
 #if 0
 		xen_queue_pt_update(shadow_pdir_ma, 0);
 #endif		
 		xen_queue_pt_update(pdir_ma, 0); 	
 		break;
 	}
 }
 
 /*
  * Move the kernel virtual free pointer to the next
  * 4MB.  This is used to help improve performance
  * by using a large (4MB) page for much of the kernel
  * (.text, .data, .bss)
  */
 static vm_offset_t
 pmap_kmem_choose(vm_offset_t addr)
 {
 	vm_offset_t newaddr = addr;
 
 #ifndef DISABLE_PSE
 	if (cpu_feature & CPUID_PSE)
 		newaddr = (addr + PDRMASK) & ~PDRMASK;
 #endif
 	return newaddr;
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *
  *	On the i386 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
  *	from the linked base (virtual) address "KERNBASE" to the actual
  *	(physical) address starting relative to 0]
  */
 void
 pmap_bootstrap(vm_paddr_t firstaddr)
 {
 	vm_offset_t va;
 	pt_entry_t *pte, *unused;
 	struct sysmaps *sysmaps;
 	int i;
 
 	/*
 	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
 	 * large. It should instead be correctly calculated in locore.s and
 	 * not based on 'first' (which is a physical address, not a virtual
 	 * address, for the start of unused physical memory). The kernel
 	 * page tables are NOT double mapped and thus should not be included
 	 * in this calculation.
 	 */
 	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
 	virtual_avail = pmap_kmem_choose(virtual_avail);
 
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
 	/*
 	 * Initialize the kernel pmap (which is statically allocated).
 	 */
 	PMAP_LOCK_INIT(kernel_pmap);
 	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
 #ifdef PAE
 	kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
 #endif
 	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 	LIST_INIT(&allpmaps);
 	mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 	if (nkpt == 0)
 		nkpt = NKPT;
 
 	/*
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
 #define	SYSMAP(c, p, v, n)	\
 	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
 
 	va = virtual_avail;
 	pte = vtopte(va);
 
 	/*
 	 * CMAP1/CMAP2 are used for zeroing and copying pages.
 	 * CMAP3 is used for the idle process page zeroing.
 	 */
 	for (i = 0; i < MAXCPU; i++) {
 		sysmaps = &sysmaps_pcpu[i];
 		mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF);
 		SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1)
 		SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1)
 		PT_SET_MA(sysmaps->CADDR1, 0);
 		PT_SET_MA(sysmaps->CADDR2, 0);
 	}
 	SYSMAP(caddr_t, CMAP3, CADDR3, 1)
 	PT_SET_MA(CADDR3, 0);
 
 	/*
 	 * Crashdump maps.
 	 */
 	SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
 
 	/*
 	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
 	 */
 	SYSMAP(caddr_t, unused, ptvmmap, 1)
 
 	/*
 	 * msgbufp is used to map the system message buffer.
 	 */
 	SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE)))
 
 	/*
 	 * ptemap is used for pmap_pte_quick
 	 */
 	SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
 	SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1);
 
 	mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
 
 	virtual_avail = va;
 
 	/*
 	 * Leave in place an identity mapping (virt == phys) for the low 1 MB
 	 * physical memory region that is used by the ACPI wakeup code.  This
 	 * mapping must not have PG_G set. 
 	 */
 #ifndef XEN
 	/*
 	 * leave here deliberately to show that this is not supported
 	 */
 #ifdef XBOX
 	/* FIXME: This is gross, but needed for the XBOX. Since we are in such
 	 * an early stadium, we cannot yet neatly map video memory ... :-(
 	 * Better fixes are very welcome! */
 	if (!arch_i386_is_xbox)
 #endif
 	for (i = 1; i < NKPT; i++)
 		PTD[i] = 0;
 
 	/* Initialize the PAT MSR if present. */
 	pmap_init_pat();
 
 	/* Turn on PG_G on kernel page(s) */
 	pmap_set_pg();
 #endif
 }
 
 /*
  * Setup the PAT MSR.
  */
 void
 pmap_init_pat(void)
 {
 	uint64_t pat_msr;
 
 	/* Bail if this CPU doesn't implement PAT. */
 	if (!(cpu_feature & CPUID_PAT))
 		return;
 
 	if (cpu_vendor_id != CPU_VENDOR_INTEL ||
 	    (CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) {
 		/*
 		 * Leave the indices 0-3 at the default of WB, WT, UC, and UC-.
 		 * Program 4 and 5 as WP and WC.
 		 * Leave 6 and 7 as UC and UC-.
 		 */
 		pat_msr = rdmsr(MSR_PAT);
 		pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5));
 		pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) |
 		    PAT_VALUE(5, PAT_WRITE_COMBINING);
 		pat_works = 1;
 	} else {
 		/*
 		 * Due to some Intel errata, we can only safely use the lower 4
 		 * PAT entries.  Thus, just replace PAT Index 2 with WC instead
 		 * of UC-.
 		 *
 		 *   Intel Pentium III Processor Specification Update
 		 * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B
 		 * or Mode C Paging)
 		 *
 		 *   Intel Pentium IV  Processor Specification Update
 		 * Errata N46 (PAT Index MSB May Be Calculated Incorrectly)
 		 */
 		pat_msr = rdmsr(MSR_PAT);
 		pat_msr &= ~PAT_MASK(2);
 		pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
 		pat_works = 0;
 	}
 	wrmsr(MSR_PAT, pat_msr);
 }
 
 /*
  * Initialize a vm_page's machine-dependent fields.
  */
 void
 pmap_page_init(vm_page_t m)
 {
 
 	TAILQ_INIT(&m->md.pv_list);
 	m->md.pat_mode = PAT_WRITE_BACK;
 }
 
 /*
  * ABuse the pte nodes for unmapped kva to thread a kva freelist through.
  * Requirements:
  *  - Must deal with pages in order to ensure that none of the PG_* bits
  *    are ever set, PG_V in particular.
  *  - Assumes we can write to ptes without pte_store() atomic ops, even
  *    on PAE systems.  This should be ok.
  *  - Assumes nothing will ever test these addresses for 0 to indicate
  *    no mapping instead of correctly checking PG_V.
  *  - Assumes a vm_offset_t will fit in a pte (true for i386).
  * Because PG_V is never set, there can be no mappings to invalidate.
  */
 static int ptelist_count = 0;
 static vm_offset_t
 pmap_ptelist_alloc(vm_offset_t *head)
 {
 	vm_offset_t va;
 	vm_offset_t *phead = (vm_offset_t *)*head;
 	
 	if (ptelist_count == 0) {
 		printf("out of memory!!!!!!\n");
 		return (0);	/* Out of memory */
 	}
 	ptelist_count--;
 	va = phead[ptelist_count];
 	return (va);
 }
 
 static void
 pmap_ptelist_free(vm_offset_t *head, vm_offset_t va)
 {
 	vm_offset_t *phead = (vm_offset_t *)*head;
 
 	phead[ptelist_count++] = va;
 }
 
 static void
 pmap_ptelist_init(vm_offset_t *head, void *base, int npages)
 {
 	int i, nstackpages;
 	vm_offset_t va;
 	vm_page_t m;
 	
 	nstackpages = (npages + PAGE_SIZE/sizeof(vm_offset_t) - 1)/ (PAGE_SIZE/sizeof(vm_offset_t));
 	for (i = 0; i < nstackpages; i++) {
 		va = (vm_offset_t)base + i * PAGE_SIZE;
 		m = vm_page_alloc(NULL, i,
 		    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 		    VM_ALLOC_ZERO);
 		pmap_qenter(va, &m, 1);
 	}
 
 	*head = (vm_offset_t)base;
 	for (i = npages - 1; i >= nstackpages; i--) {
 		va = (vm_offset_t)base + i * PAGE_SIZE;
 		pmap_ptelist_free(head, va);
 	}
 }
 
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  */
 void
 pmap_init(void)
 {
 	vm_page_t mpte;
 	vm_size_t s;
 	int i, pv_npg;
 
 	/*
 	 * Initialize the vm page array entries for the kernel pmap's
 	 * page table pages.
 	 */ 
 	for (i = 0; i < nkpt; i++) {
 		mpte = PHYS_TO_VM_PAGE(xpmap_mtop(PTD[i + KPTDI] & PG_FRAME));
 		KASSERT(mpte >= vm_page_array &&
 		    mpte < &vm_page_array[vm_page_array_size],
 		    ("pmap_init: page table page is out of range"));
 		mpte->pindex = i + KPTDI;
 		mpte->phys_addr = xpmap_mtop(PTD[i + KPTDI] & PG_FRAME);
 	}
 
         /*
 	 * Initialize the address space (zone) for the pv entries.  Set a
 	 * high water mark so that the system can recover from excessive
 	 * numbers of pv entries.
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
 	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
 	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
 	pv_entry_max = roundup(pv_entry_max, _NPCPV);
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 
 	/*
 	 * Are large page mappings enabled?
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
 
 	/*
 	 * Calculate the size of the pv head table for superpages.
 	 */
 	for (i = 0; phys_avail[i + 1]; i += 2);
 	pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR;
 
 	/*
 	 * Allocate memory for the pv head table for superpages.
 	 */
 	s = (vm_size_t)(pv_npg * sizeof(struct md_page));
 	s = round_page(s);
 	pv_table = (struct md_page *)kmem_alloc(kernel_map, s);
 	for (i = 0; i < pv_npg; i++)
 		TAILQ_INIT(&pv_table[i].pv_list);
 
 	pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
 	pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map,
 	    PAGE_SIZE * pv_maxchunks);
 	if (pv_chunkbase == NULL)
 		panic("pmap_init: not enough kvm for pv chunks");
 	pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
 }
 
 
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
 
 /*
  * Determine the appropriate bits to set in a PTE or PDE for a specified
  * caching mode.
  */
 int
 pmap_cache_bits(int mode, boolean_t is_pde)
 {
 	int pat_flag, pat_index, cache_bits;
 
 	/* The PAT bit is different for PTE's and PDE's. */
 	pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
 
 	/* If we don't support PAT, map extended modes to older ones. */
 	if (!(cpu_feature & CPUID_PAT)) {
 		switch (mode) {
 		case PAT_UNCACHEABLE:
 		case PAT_WRITE_THROUGH:
 		case PAT_WRITE_BACK:
 			break;
 		case PAT_UNCACHED:
 		case PAT_WRITE_COMBINING:
 		case PAT_WRITE_PROTECTED:
 			mode = PAT_UNCACHEABLE;
 			break;
 		}
 	}
 	
 	/* Map the caching mode to a PAT index. */
 	if (pat_works) {
 		switch (mode) {
 			case PAT_UNCACHEABLE:
 				pat_index = 3;
 				break;
 			case PAT_WRITE_THROUGH:
 				pat_index = 1;
 				break;
 			case PAT_WRITE_BACK:
 				pat_index = 0;
 				break;
 			case PAT_UNCACHED:
 				pat_index = 2;
 				break;
 			case PAT_WRITE_COMBINING:
 				pat_index = 5;
 				break;
 			case PAT_WRITE_PROTECTED:
 				pat_index = 4;
 				break;
 			default:
 				panic("Unknown caching mode %d\n", mode);
 		}
 	} else {
 		switch (mode) {
 			case PAT_UNCACHED:
 			case PAT_UNCACHEABLE:
 			case PAT_WRITE_PROTECTED:
 				pat_index = 3;
 				break;
 			case PAT_WRITE_THROUGH:
 				pat_index = 1;
 				break;
 			case PAT_WRITE_BACK:
 				pat_index = 0;
 				break;
 			case PAT_WRITE_COMBINING:
 				pat_index = 2;
 				break;
 			default:
 				panic("Unknown caching mode %d\n", mode);
 		}
 	}	
 
 	/* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
 	cache_bits = 0;
 	if (pat_index & 0x4)
 		cache_bits |= pat_flag;
 	if (pat_index & 0x2)
 		cache_bits |= PG_NC_PCD;
 	if (pat_index & 0x1)
 		cache_bits |= PG_NC_PWT;
 	return (cache_bits);
 }
 #ifdef SMP
 /*
  * For SMP, these functions have to use the IPI mechanism for coherence.
  *
  * N.B.: Before calling any of the following TLB invalidation functions,
  * the calling processor must ensure that all stores updating a non-
  * kernel page table are globally performed.  Otherwise, another
  * processor could cache an old, pre-update entry without being
  * invalidated.  This can happen one of two ways: (1) The pmap becomes
  * active on another processor after its pm_active field is checked by
  * one of the following functions but before a store updating the page
  * table is globally performed. (2) The pmap becomes active on another
  * processor before its pm_active field is checked but due to
  * speculative loads one of the following functions stills reads the
  * pmap as inactive on the other processor.
  * 
  * The kernel page table is exempt because its pm_active field is
  * immutable.  The kernel page table is always active on every
  * processor.
  */
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 	cpumask_t cpumask, other_cpus;
 
 	CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
 	    pmap, va);
 	
 	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
 		invlpg(va);
 		smp_invlpg(va);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
 		if (pmap->pm_active & cpumask)
 			invlpg(va);
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
 	}
 	sched_unpin();
 	PT_UPDATES_FLUSH();
 }
 
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	cpumask_t cpumask, other_cpus;
 	vm_offset_t addr;
 
 	CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x",
 	    pmap, sva, eva);
 
 	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 		smp_invlpg_range(sva, eva);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
 		if (pmap->pm_active & cpumask)
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
 			    sva, eva);
 	}
 	sched_unpin();
 	PT_UPDATES_FLUSH();
 }
 
 void
 pmap_invalidate_all(pmap_t pmap)
 {
 	cpumask_t cpumask, other_cpus;
 
 	CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap);
 
 	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
 		invltlb();
 		smp_invltlb();
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
 		if (pmap->pm_active & cpumask)
 			invltlb();
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invltlb(pmap->pm_active & other_cpus);
 	}
 	sched_unpin();
 }
 
 void
 pmap_invalidate_cache(void)
 {
 
 	sched_pin();
 	wbinvd();
 	smp_cache_flush();
 	sched_unpin();
 }
 #else /* !SMP */
 /*
  * Normal, non-SMP, 486+ invalidation functions.
  * We inline these within pmap.c for speed.
  */
 PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 	CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
 	    pmap, va);
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invlpg(va);
 	PT_UPDATES_FLUSH();
 }
 
 PMAP_INLINE void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t addr;
 
 	if (eva - sva > PAGE_SIZE)
 		CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x",
 		    pmap, sva, eva);
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 	PT_UPDATES_FLUSH();
 }
 
 PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
 	CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap);
 	
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invltlb();
 }
 
 PMAP_INLINE void
 pmap_invalidate_cache(void)
 {
 
 	wbinvd();
 }
 #endif /* !SMP */
 
 void
 pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
 {
 
 	KASSERT((sva & PAGE_MASK) == 0,
 	    ("pmap_invalidate_cache_range: sva not page-aligned"));
 	KASSERT((eva & PAGE_MASK) == 0,
 	    ("pmap_invalidate_cache_range: eva not page-aligned"));
 
 	if (cpu_feature & CPUID_SS)
 		; /* If "Self Snoop" is supported, do nothing. */
 	else if (cpu_feature & CPUID_CLFSH) {
 
 		/*
 		 * Otherwise, do per-cache line flush.  Use the mfence
 		 * instruction to insure that previous stores are
 		 * included in the write-back.  The processor
 		 * propagates flush to other processors in the cache
 		 * coherence domain.
 		 */
 		mfence();
 		for (; sva < eva; sva += cpu_clflush_line_size)
 			clflush(sva);
 		mfence();
 	} else {
 
 		/*
 		 * No targeted cache flush methods are supported by CPU,
 		 * globally invalidate cache as a last resort.
 		 */
 		pmap_invalidate_cache();
 	}
 }
 
 /*
  * Are we current address space or kernel?  N.B. We return FALSE when
  * a pmap's page table is in use because a kernel thread is borrowing
  * it.  The borrowed page table can change spontaneously, making any
  * dependence on its continued use subject to a race condition.
  */
 static __inline int
 pmap_is_current(pmap_t pmap)
 {
 
 	return (pmap == kernel_pmap ||
 	    (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
 		(pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
 }
 
 /*
  * If the given pmap is not the current or kernel pmap, the returned pte must
  * be released by passing it to pmap_pte_release().
  */
 pt_entry_t *
 pmap_pte(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t newpf;
 	pd_entry_t *pde;
 
 	pde = pmap_pde(pmap, va);
 	if (*pde & PG_PS)
 		return (pde);
 	if (*pde != 0) {
 		/* are we current address space or kernel? */
 		if (pmap_is_current(pmap))
 			return (vtopte(va));
 		mtx_lock(&PMAP2mutex);
 		newpf = *pde & PG_FRAME;
 		if ((*PMAP2 & PG_FRAME) != newpf) {
 			vm_page_lock_queues();
 			PT_SET_MA(PADDR2, newpf | PG_V | PG_A | PG_M);
 			vm_page_unlock_queues();
 			CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x",
 			    pmap, va, (*PMAP2 & 0xffffffff));
 		}
 		
 		return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
 	}
 	return (0);
 }
 
 /*
  * Releases a pte that was obtained from pmap_pte().  Be prepared for the pte
  * being NULL.
  */
 static __inline void
 pmap_pte_release(pt_entry_t *pte)
 {
 
 	if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) {
 		CTR1(KTR_PMAP, "pmap_pte_release: pte=0x%jx",
 		    *PMAP2);
 		PT_SET_VA(PMAP2, 0, TRUE);
 		mtx_unlock(&PMAP2mutex);
 	}
 }
 
 static __inline void
 invlcaddr(void *caddr)
 {
 
 	invlpg((u_int)caddr);
 	PT_UPDATES_FLUSH();
 }
 
 /*
  * Super fast pmap_pte routine best used when scanning
  * the pv lists.  This eliminates many coarse-grained
  * invltlb calls.  Note that many of the pv list
  * scans are across different pmaps.  It is very wasteful
  * to do an entire invltlb for checking a single mapping.
  *
  * If the given pmap is not the current pmap, vm_page_queue_mtx
  * must be held and curthread pinned to a CPU.
  */
 static pt_entry_t *
 pmap_pte_quick(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t newpf;
 	pd_entry_t *pde;
 
 	pde = pmap_pde(pmap, va);
 	if (*pde & PG_PS)
 		return (pde);
 	if (*pde != 0) {
 		/* are we current address space or kernel? */
 		if (pmap_is_current(pmap))
 			return (vtopte(va));
 		mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 		KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
 		newpf = *pde & PG_FRAME;
 		if ((*PMAP1 & PG_FRAME) != newpf) {
 			PT_SET_MA(PADDR1, newpf | PG_V | PG_A | PG_M);
 			CTR3(KTR_PMAP, "pmap_pte_quick: pmap=%p va=0x%x newpte=0x%08x",
 			    pmap, va, (u_long)*PMAP1);
 			
 #ifdef SMP
 			PMAP1cpu = PCPU_GET(cpuid);
 #endif
 			PMAP1changed++;
 		} else
 #ifdef SMP
 		if (PMAP1cpu != PCPU_GET(cpuid)) {
 			PMAP1cpu = PCPU_GET(cpuid);
 			invlcaddr(PADDR1);
 			PMAP1changedcpu++;
 		} else
 #endif
 			PMAP1unchanged++;
 		return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
 	}
 	return (0);
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_paddr_t 
 pmap_extract(pmap_t pmap, vm_offset_t va)
 {
 	vm_paddr_t rtval;
 	pt_entry_t *pte;
 	pd_entry_t pde;
 	pt_entry_t pteval;
 	
 	rtval = 0;
 	PMAP_LOCK(pmap);
 	pde = pmap->pm_pdir[va >> PDRSHIFT];
 	if (pde != 0) {
 		if ((pde & PG_PS) != 0) {
 			rtval = xpmap_mtop(pde & PG_PS_FRAME) | (va & PDRMASK);
 			PMAP_UNLOCK(pmap);
 			return rtval;
 		}
 		pte = pmap_pte(pmap, va);
 		pteval = *pte ? xpmap_mtop(*pte) : 0;
 		rtval = (pteval & PG_FRAME) | (va & PAGE_MASK);
 		pmap_pte_release(pte);
 	}
 	PMAP_UNLOCK(pmap);
 	return (rtval);
 }
 
 /*
  *	Routine:	pmap_extract_ma
  *	Function:
  *		Like pmap_extract, but returns machine address
  */
 vm_paddr_t 
 pmap_extract_ma(pmap_t pmap, vm_offset_t va)
 {
 	vm_paddr_t rtval;
 	pt_entry_t *pte;
 	pd_entry_t pde;
 
 	rtval = 0;
 	PMAP_LOCK(pmap);
 	pde = pmap->pm_pdir[va >> PDRSHIFT];
 	if (pde != 0) {
 		if ((pde & PG_PS) != 0) {
 			rtval = (pde & ~PDRMASK) | (va & PDRMASK);
 			PMAP_UNLOCK(pmap);
 			return rtval;
 		}
 		pte = pmap_pte(pmap, va);
 		rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
 		pmap_pte_release(pte);
 	}
 	PMAP_UNLOCK(pmap);
 	return (rtval);
 }
 
 /*
  *	Routine:	pmap_extract_and_hold
  *	Function:
  *		Atomically extract and hold the physical page
  *		with the given pmap and virtual address pair
  *		if that mapping permits the given protection.
  */
 vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	pd_entry_t pde;
 	pt_entry_t pte;
 	vm_page_t m;
 	vm_paddr_t pa;
 
 	pa = 0;
 	m = NULL;
 	PMAP_LOCK(pmap);
 retry:
 	pde = PT_GET(pmap_pde(pmap, va));
 	if (pde != 0) {
 		if (pde & PG_PS) {
 			if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
 				if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) |
 				       (va & PDRMASK), &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
 				    (va & PDRMASK));
 				vm_page_hold(m);
 			}
 		} else {
 			sched_pin();
 			pte = PT_GET(pmap_pte_quick(pmap, va));
 			if (*PMAP1)
 				PT_SET_MA(PADDR1, 0);
 			if ((pte & PG_V) &&
 			    ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
 				if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
 				vm_page_hold(m);
 			}
 			sched_unpin();
 		}
 	}
 	PA_UNLOCK_COND(pa);
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 /*
  * Add a wired page to the kva.
  * Note: not SMP coherent.
  */
 void 
 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 {
 	PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag);
 }
 
 void 
 pmap_kenter_ma(vm_offset_t va, vm_paddr_t ma)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag);
 }
 
 
 static __inline void 
 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
 {
 	PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
 }
 
 /*
  * Remove a page from the kernel pagetables.
  * Note: not SMP coherent.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	PT_CLEAR_VA(pte, FALSE);
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	The value passed in '*virt' is a suggested virtual address for
  *	the mapping. Architectures which can support a direct-mapped
  *	physical to virtual region can return the appropriate address
  *	within that region, leaving '*virt' unchanged. Other
  *	architectures should map the pages starting at '*virt' and
  *	update '*virt' with the first usable address after the mapped
  *	region.
  */
 vm_offset_t
 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 {
 	vm_offset_t va, sva;
 
 	va = sva = *virt;
 	CTR4(KTR_PMAP, "pmap_map: va=0x%x start=0x%jx end=0x%jx prot=0x%x",
 	    va, start, end, prot);
 	while (start < end) {
 		pmap_kenter(va, start);
 		va += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 	*virt = va;
 	return (sva);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
 	pt_entry_t *endpte, *pte;
 	vm_paddr_t pa;
 	vm_offset_t va = sva;
 	int mclcount = 0;
 	multicall_entry_t mcl[16];
 	multicall_entry_t *mclp = mcl;
 	int error;
 
 	CTR2(KTR_PMAP, "pmap_qenter:sva=0x%x count=%d", va, count);
 	pte = vtopte(sva);
 	endpte = pte + count;
 	while (pte < endpte) {
 		pa = VM_PAGE_TO_MACH(*ma) | pgeflag | PG_RW | PG_V | PG_M | PG_A;
 
 		mclp->op = __HYPERVISOR_update_va_mapping;
 		mclp->args[0] = va;
 		mclp->args[1] = (uint32_t)(pa & 0xffffffff);
 		mclp->args[2] = (uint32_t)(pa >> 32);
+#if 0
 		mclp->args[3] = (*pte & PG_V) ? UVMF_INVLPG|UVMF_ALL : 0;
+#else
+		/*
+		 * Somehow we seem to be ending up with pages which are in
+		 * the TLB in spite of not having PG_V set, resulting in
+		 * pages newly loaded into the bufcache not showing up
+		 * immediately (i.e., accessing them provides the old data).
+		 * As a workaround, always perform a TLB flush, even if the
+		 * old page didn't have PG_V.
+		 */
+		mclp->args[3] = UVMF_INVLPG|UVMF_ALL;
+#endif
 	
 		va += PAGE_SIZE;
 		pte++;
 		ma++;
 		mclp++;
 		mclcount++;
 		if (mclcount == 16) {
 			error = HYPERVISOR_multicall(mcl, mclcount);
 			mclp = mcl;
 			mclcount = 0;
 			KASSERT(error == 0, ("bad multicall %d", error));
 		}		
 	}
 	if (mclcount) {
 		error = HYPERVISOR_multicall(mcl, mclcount);
 		KASSERT(error == 0, ("bad multicall %d", error));
 	}
 	
 #ifdef INVARIANTS
 	for (pte = vtopte(sva), mclcount = 0; mclcount < count; mclcount++, pte++)
 		KASSERT(*pte, ("pte not set for va=0x%x", sva + mclcount*PAGE_SIZE));
 #endif	
 }
 
 
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
 	vm_offset_t va;
 
 	CTR2(KTR_PMAP, "pmap_qremove: sva=0x%x count=%d", sva, count);
 	va = sva;
 	vm_page_lock_queues();
 	critical_enter();
 	while (count-- > 0) {
 		pmap_kremove(va);
 		va += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 	critical_exit();
 	vm_page_unlock_queues();
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 static __inline void
 pmap_free_zero_pages(vm_page_t free)
 {
 	vm_page_t m;
 
 	while (free != NULL) {
 		m = free;
 		free = m->right;
 		vm_page_free_zero(m);
 	}
 }
 
 /*
  * This routine unholds page table pages, and if the hold count
  * drops to zero, then it decrements the wire count.
  */
 static __inline int
 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free)
 {
 
 	--m->wire_count;
 	if (m->wire_count == 0)
 		return _pmap_unwire_pte_hold(pmap, m, free);
 	else
 		return 0;
 }
 
 static int 
 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free)
 {
 	vm_offset_t pteva;
 
 	PT_UPDATES_FLUSH();
 	/*
 	 * unmap the page table page
 	 */
 	xen_pt_unpin(pmap->pm_pdir[m->pindex]);
 	/*
 	 * page *might* contain residual mapping :-/  
 	 */
 	PD_CLEAR_VA(pmap, m->pindex, TRUE);
 	pmap_zero_page(m);
 	--pmap->pm_stats.resident_count;
 
 	/*
 	 * This is a release store so that the ordinary store unmapping
 	 * the page table page is globally performed before TLB shoot-
 	 * down is begun.
 	 */
 	atomic_subtract_rel_int(&cnt.v_wire_count, 1);
 
 	/*
 	 * Do an invltlb to make the invalidated mapping
 	 * take effect immediately.
 	 */
 	pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
 	pmap_invalidate_page(pmap, pteva);
 
 	/* 
 	 * Put page on a list so that it is released after
 	 * *ALL* TLB shootdown is done
 	 */
 	m->right = *free;
 	*free = m;
 
 	return 1;
 }
 
 /*
  * After removing a page table entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free)
 {
 	pd_entry_t ptepde;
 	vm_page_t mpte;
 
 	if (va >= VM_MAXUSER_ADDRESS)
 		return 0;
 	ptepde = PT_GET(pmap_pde(pmap, va));
 	mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
 	return pmap_unwire_pte_hold(pmap, mpte, free);
 }
 
 void
 pmap_pinit0(pmap_t pmap)
 {
 
 	PMAP_LOCK_INIT(pmap);
 	pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
 #ifdef PAE
 	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
 #endif
 	pmap->pm_active = 0;
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 int
 pmap_pinit(pmap_t pmap)
 {
 	vm_page_t m, ptdpg[NPGPTD + 1];
 	int npgptd = NPGPTD + 1;
 	static int color;
 	int i;
 
 	PMAP_LOCK_INIT(pmap);
 
 	/*
 	 * No need to allocate page table space yet but we do need a valid
 	 * page directory table.
 	 */
 	if (pmap->pm_pdir == NULL) {
 		pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
 		    NBPTD);
 		if (pmap->pm_pdir == NULL) {
 			PMAP_LOCK_DESTROY(pmap);
 			return (0);
 		}
 #ifdef PAE
 		pmap->pm_pdpt = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1);
 #endif
 	}
 
 	/*
 	 * allocate the page directory page(s)
 	 */
 	for (i = 0; i < npgptd;) {
 		m = vm_page_alloc(NULL, color++,
 		    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 		    VM_ALLOC_ZERO);
 		if (m == NULL)
 			VM_WAIT;
 		else {
 			ptdpg[i++] = m;
 		}
 	}
 	pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
 	for (i = 0; i < NPGPTD; i++) {
 		if ((ptdpg[i]->flags & PG_ZERO) == 0)
 			pagezero(&pmap->pm_pdir[i*NPTEPG]);
 	}
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 	/* Wire in kernel global address entries. */
 
 	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
 #ifdef PAE
 	pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1);
 	if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0)
 		bzero(pmap->pm_pdpt, PAGE_SIZE);
 	for (i = 0; i < NPGPTD; i++) {
 		vm_paddr_t ma;
 		
 		ma = VM_PAGE_TO_MACH(ptdpg[i]);
 		pmap->pm_pdpt[i] = ma | PG_V;
 
 	}
 #endif	
 	for (i = 0; i < NPGPTD; i++) {
 		pt_entry_t *pd;
 		vm_paddr_t ma;
 		
 		ma = VM_PAGE_TO_MACH(ptdpg[i]);
 		pd = pmap->pm_pdir + (i * NPDEPG);
 		PT_SET_MA(pd, *vtopte((vm_offset_t)pd) & ~(PG_M|PG_A|PG_U|PG_RW));
 #if 0		
 		xen_pgd_pin(ma);
 #endif		
 	}
 	
 #ifdef PAE	
 	PT_SET_MA(pmap->pm_pdpt, *vtopte((vm_offset_t)pmap->pm_pdpt) & ~PG_RW);
 #endif
 	vm_page_lock_queues();
 	xen_flush_queue();
 	xen_pgdpt_pin(VM_PAGE_TO_MACH(ptdpg[NPGPTD]));
 	for (i = 0; i < NPGPTD; i++) {
 		vm_paddr_t ma = VM_PAGE_TO_MACH(ptdpg[i]);
 		PT_SET_VA_MA(&pmap->pm_pdir[PTDPTDI + i], ma | PG_V | PG_A, FALSE);
 	}
 	xen_flush_queue();
 	vm_page_unlock_queues();
 	pmap->pm_active = 0;
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 
 	return (1);
 }
 
 /*
  * this routine is called if the page table page is not
  * mapped correctly.
  */
 static vm_page_t
 _pmap_allocpte(pmap_t pmap, unsigned int ptepindex, int flags)
 {
 	vm_paddr_t ptema;
 	vm_page_t m;
 
 	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 	    ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 
 	/*
 	 * Allocate a page table page.
 	 */
 	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 		if (flags & M_WAITOK) {
 			PMAP_UNLOCK(pmap);
 			vm_page_unlock_queues();
 			VM_WAIT;
 			vm_page_lock_queues();
 			PMAP_LOCK(pmap);
 		}
 
 		/*
 		 * Indicate the need to retry.  While waiting, the page table
 		 * page may have been allocated.
 		 */
 		return (NULL);
 	}
 	if ((m->flags & PG_ZERO) == 0)
 		pmap_zero_page(m);
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 	pmap->pm_stats.resident_count++;
 
 	ptema = VM_PAGE_TO_MACH(m);
 	xen_pt_pin(ptema);
 	PT_SET_VA_MA(&pmap->pm_pdir[ptepindex],
 		(ptema | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE);
 	
 	KASSERT(pmap->pm_pdir[ptepindex],
 	    ("_pmap_allocpte: ptepindex=%d did not get mapped", ptepindex));
 	return (m);
 }
 
 static vm_page_t
 pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
 {
 	unsigned ptepindex;
 	pd_entry_t ptema;
 	vm_page_t m;
 
 	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 	    ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = va >> PDRSHIFT;
 retry:
 	/*
 	 * Get the page directory entry
 	 */
 	ptema = pmap->pm_pdir[ptepindex];
 
 	/*
 	 * This supports switching from a 4MB page to a
 	 * normal 4K page.
 	 */
 	if (ptema & PG_PS) {
 		/*
 		 * XXX 
 		 */
 		pmap->pm_pdir[ptepindex] = 0;
 		ptema = 0;
 		pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 		pmap_invalidate_all(kernel_pmap);
 	}
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (ptema & PG_V) {
 		m = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME);
 		m->wire_count++;
 	} else {
 		/*
 		 * Here if the pte page isn't mapped, or if it has
 		 * been deallocated. 
 		 */
 		CTR3(KTR_PMAP, "pmap_allocpte: pmap=%p va=0x%08x flags=0x%x",
 		    pmap, va, flags);
 		m = _pmap_allocpte(pmap, ptepindex, flags);
 		if (m == NULL && (flags & M_WAITOK))
 			goto retry;
 
 		KASSERT(pmap->pm_pdir[ptepindex], ("ptepindex=%d did not get mapped", ptepindex));
 	}
 	return (m);
 }
 
 
 /***************************************************
 * Pmap allocation/deallocation routines.
  ***************************************************/
 
 #ifdef SMP
 /*
  * Deal with a SMP shootdown of other users of the pmap that we are
  * trying to dispose of.  This can be a bit hairy.
  */
 static cpumask_t *lazymask;
 static u_int lazyptd;
 static volatile u_int lazywait;
 
 void pmap_lazyfix_action(void);
 
 void
 pmap_lazyfix_action(void)
 {
 	cpumask_t mymask = PCPU_GET(cpumask);
 
 #ifdef COUNT_IPIS
 	(*ipi_lazypmap_counts[PCPU_GET(cpuid)])++;
 #endif
 	if (rcr3() == lazyptd)
 		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 	atomic_clear_int(lazymask, mymask);
 	atomic_store_rel_int(&lazywait, 1);
 }
 
 static void
 pmap_lazyfix_self(cpumask_t mymask)
 {
 
 	if (rcr3() == lazyptd)
 		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 	atomic_clear_int(lazymask, mymask);
 }
 
 
 static void
 pmap_lazyfix(pmap_t pmap)
 {
 	cpumask_t mymask, mask;
 	u_int spins;
 
 	while ((mask = pmap->pm_active) != 0) {
 		spins = 50000000;
 		mask = mask & -mask;	/* Find least significant set bit */
 		mtx_lock_spin(&smp_ipi_mtx);
 #ifdef PAE
 		lazyptd = vtophys(pmap->pm_pdpt);
 #else
 		lazyptd = vtophys(pmap->pm_pdir);
 #endif
 		mymask = PCPU_GET(cpumask);
 		if (mask == mymask) {
 			lazymask = &pmap->pm_active;
 			pmap_lazyfix_self(mymask);
 		} else {
 			atomic_store_rel_int((u_int *)&lazymask,
 			    (u_int)&pmap->pm_active);
 			atomic_store_rel_int(&lazywait, 0);
 			ipi_selected(mask, IPI_LAZYPMAP);
 			while (lazywait == 0) {
 				ia32_pause();
 				if (--spins == 0)
 					break;
 			}
 		}
 		mtx_unlock_spin(&smp_ipi_mtx);
 		if (spins == 0)
 			printf("pmap_lazyfix: spun for 50000000\n");
 	}
 }
 
 #else	/* SMP */
 
 /*
  * Cleaning up on uniprocessor is easy.  For various reasons, we're
  * unlikely to have to even execute this code, including the fact
  * that the cleanup is deferred until the parent does a wait(2), which
  * means that another userland process has run.
  */
 static void
 pmap_lazyfix(pmap_t pmap)
 {
 	u_int cr3;
 
 	cr3 = vtophys(pmap->pm_pdir);
 	if (cr3 == rcr3()) {
 		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 		pmap->pm_active &= ~(PCPU_GET(cpumask));
 	}
 }
 #endif	/* SMP */
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 	vm_page_t m, ptdpg[2*NPGPTD+1];
 	vm_paddr_t ma;
 	int i;
 #ifdef PAE	
 	int npgptd = NPGPTD + 1;
 #else
 	int npgptd = NPGPTD;
 #endif
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
 	PT_UPDATES_FLUSH();
 
 	pmap_lazyfix(pmap);
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_REMOVE(pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 
 	for (i = 0; i < NPGPTD; i++)
 		ptdpg[i] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdir + (i*NPDEPG)) & PG_FRAME);
 	pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
 #ifdef PAE
 	ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdpt));
 #endif	
 
 	for (i = 0; i < npgptd; i++) {
 		m = ptdpg[i];
 		ma = VM_PAGE_TO_MACH(m);
 		/* unpinning L1 and L2 treated the same */
 #if 0
                 xen_pgd_unpin(ma);
 #else
 		if (i == NPGPTD)
 	                xen_pgd_unpin(ma);
 #endif
 #ifdef PAE
 		if (i < NPGPTD)
 			KASSERT(VM_PAGE_TO_MACH(m) == (pmap->pm_pdpt[i] & PG_FRAME),
 			    ("pmap_release: got wrong ptd page"));
 #endif
 		m->wire_count--;
 		atomic_subtract_int(&cnt.v_wire_count, 1);
 		vm_page_free(m);
 	}
 #ifdef PAE
 	pmap_qremove((vm_offset_t)pmap->pm_pdpt, 1);
 #endif
 	PMAP_LOCK_DESTROY(pmap);
 }
 
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 
 	return sysctl_handle_long(oidp, &ksize, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_size, "IU", "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
 	return sysctl_handle_long(oidp, &kfree, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "IU", "Amount of KVM free");
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	struct pmap *pmap;
 	vm_paddr_t ptppaddr;
 	vm_page_t nkpg;
 	pd_entry_t newpdir;
 
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 	if (kernel_vm_end == 0) {
 		kernel_vm_end = KERNBASE;
 		nkpt = 0;
 		while (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			nkpt++;
 			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 				kernel_vm_end = kernel_map->max_offset;
 				break;
 			}
 		}
 	}
 	addr = roundup2(addr, PAGE_SIZE * NPTEPG);
 	if (addr - 1 >= kernel_map->max_offset)
 		addr = kernel_map->max_offset;
 	while (kernel_vm_end < addr) {
 		if (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 				kernel_vm_end = kernel_map->max_offset;
 				break;
 			}
 			continue;
 		}
 
 		/*
 		 * This index is bogus, but out of the way
 		 */
 		nkpg = vm_page_alloc(NULL, nkpt,
 		    VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
 		if (!nkpg)
 			panic("pmap_growkernel: no memory to grow kernel");
 
 		nkpt++;
 
 		pmap_zero_page(nkpg);
 		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 		vm_page_lock_queues();
 		PD_SET_VA(kernel_pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
 		mtx_lock_spin(&allpmaps_lock);
 		LIST_FOREACH(pmap, &allpmaps, pm_list)
 			PD_SET_VA(pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
 
 		mtx_unlock_spin(&allpmaps_lock);
 		vm_page_unlock_queues();
 
 		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 			kernel_vm_end = kernel_map->max_offset;
 			break;
 		}
 	}
 }
 
 
 /***************************************************
  * page management routines.
  ***************************************************/
 
 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 CTASSERT(_NPCM == 11);
 
 static __inline struct pv_chunk *
 pv_to_chunk(pv_entry_t pv)
 {
 
 	return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK);
 }
 
 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 
 #define	PC_FREE0_9	0xfffffffful	/* Free values for index 0 through 9 */
 #define	PC_FREE10	0x0000fffful	/* Free values for index 10 */
 
 static uint32_t pc_freemask[11] = {
 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 	PC_FREE0_9, PC_FREE10
 };
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 	"Current number of pv entries");
 
 #ifdef PV_STATS
 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
 	"Current number of pv entry chunks");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
 	"Current number of pv entry chunks allocated");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
 	"Current number of pv entry chunks frees");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
 	"Number of times tried to get a chunk page but failed.");
 
 static long pv_entry_frees, pv_entry_allocs;
 static int pv_entry_spare;
 
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
 	"Current number of pv entry frees");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
 	"Current number of pv entry allocs");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
 	"Current number of spare pv entries");
 
 static int pmap_collect_inactive, pmap_collect_active;
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0,
 	"Current number times pmap_collect called on inactive queue");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0,
 	"Current number times pmap_collect called on active queue");
 #endif
 
 /*
  * We are in a serious low memory condition.  Resort to
  * drastic measures to free some pages so we can allocate
  * another pv entry chunk.  This is normally called to
  * unmap inactive pages, and if necessary, active pages.
  */
 static void
 pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
 {
 	pmap_t pmap;
 	pt_entry_t *pte, tpte;
 	pv_entry_t next_pv, pv;
 	vm_offset_t va;
 	vm_page_t m, free;
 
 	sched_pin();
 	TAILQ_FOREACH(m, &vpq->pl, pageq) {
 		if (m->hold_count || m->busy)
 			continue;
 		TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
 			va = pv->pv_va;
 			pmap = PV_PMAP(pv);
 			/* Avoid deadlock and lock recursion. */
 			if (pmap > locked_pmap)
 				PMAP_LOCK(pmap);
 			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
 				continue;
 			pmap->pm_stats.resident_count--;
 			pte = pmap_pte_quick(pmap, va);
 			tpte = pte_load_clear(pte);
 			KASSERT((tpte & PG_W) == 0,
 			    ("pmap_collect: wired pte %#jx", (uintmax_t)tpte));
 			if (tpte & PG_A)
 				vm_page_flag_set(m, PG_REFERENCED);
 			if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 				vm_page_dirty(m);
 			free = NULL;
 			pmap_unuse_pt(pmap, va, &free);
 			pmap_invalidate_page(pmap, va);
 			pmap_free_zero_pages(free);
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 			free_pv_entry(pmap, pv);
 			if (pmap != locked_pmap)
 				PMAP_UNLOCK(pmap);
 		}
 		if (TAILQ_EMPTY(&m->md.pv_list))
 			vm_page_flag_clear(m, PG_WRITEABLE);
 	}
 	sched_unpin();
 }
 
 
 /*
  * free the pv_entry back to the free list
  */
 static void
 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 {
 	vm_page_t m;
 	struct pv_chunk *pc;
 	int idx, field, bit;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(pv_entry_frees++);
 	PV_STAT(pv_entry_spare++);
 	pv_entry_count--;
 	pc = pv_to_chunk(pv);
 	idx = pv - &pc->pc_pventry[0];
 	field = idx / 32;
 	bit = idx % 32;
 	pc->pc_map[field] |= 1ul << bit;
 	/* move to head of list */
 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 	for (idx = 0; idx < _NPCM; idx++)
 		if (pc->pc_map[idx] != pc_freemask[idx])
 			return;
 	PV_STAT(pv_entry_spare -= _NPCPV);
 	PV_STAT(pc_chunk_count--);
 	PV_STAT(pc_chunk_frees++);
 	/* entire chunk is free, return it */
 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 	m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 	pmap_qremove((vm_offset_t)pc, 1);
 	vm_page_unwire(m, 0);
 	vm_page_free(m);
 	pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  */
 static pv_entry_t
 get_pv_entry(pmap_t pmap, int try)
 {
 	static const struct timeval printinterval = { 60, 0 };
 	static struct timeval lastprint;
 	static vm_pindex_t colour;
 	struct vpgqueues *pq;
 	int bit, field;
 	pv_entry_t pv;
 	struct pv_chunk *pc;
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	PV_STAT(pv_entry_allocs++);
 	pv_entry_count++;
 	if (pv_entry_count > pv_entry_high_water)
 		if (ratecheck(&lastprint, &printinterval))
 			printf("Approaching the limit on PV entries, consider "
 			    "increasing either the vm.pmap.shpgperproc or the "
 			    "vm.pmap.pv_entry_max tunable.\n");
 	pq = NULL;
 retry:
 	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 	if (pc != NULL) {
 		for (field = 0; field < _NPCM; field++) {
 			if (pc->pc_map[field]) {
 				bit = bsfl(pc->pc_map[field]);
 				break;
 			}
 		}
 		if (field < _NPCM) {
 			pv = &pc->pc_pventry[field * 32 + bit];
 			pc->pc_map[field] &= ~(1ul << bit);
 			/* If this was the last item, move it to tail */
 			for (field = 0; field < _NPCM; field++)
 				if (pc->pc_map[field] != 0) {
 					PV_STAT(pv_entry_spare--);
 					return (pv);	/* not full, return */
 				}
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 			PV_STAT(pv_entry_spare--);
 			return (pv);
 		}
 	}
 	/*
 	 * Access to the ptelist "pv_vafree" is synchronized by the page
 	 * queues lock.  If "pv_vafree" is currently non-empty, it will
 	 * remain non-empty until pmap_ptelist_alloc() completes.
 	 */
 	if (pv_vafree == 0 || (m = vm_page_alloc(NULL, colour, (pq ==
 	    &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
 		if (try) {
 			pv_entry_count--;
 			PV_STAT(pc_chunk_tryfail++);
 			return (NULL);
 		}
 		/*
 		 * Reclaim pv entries: At first, destroy mappings to
 		 * inactive pages.  After that, if a pv chunk entry
 		 * is still needed, destroy mappings to active pages.
 		 */
 		if (pq == NULL) {
 			PV_STAT(pmap_collect_inactive++);
 			pq = &vm_page_queues[PQ_INACTIVE];
 		} else if (pq == &vm_page_queues[PQ_INACTIVE]) {
 			PV_STAT(pmap_collect_active++);
 			pq = &vm_page_queues[PQ_ACTIVE];
 		} else
 			panic("get_pv_entry: increase vm.pmap.shpgperproc");
 		pmap_collect(pmap, pq);
 		goto retry;
 	}
 	PV_STAT(pc_chunk_count++);
 	PV_STAT(pc_chunk_allocs++);
 	colour++;
 	pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree);
 	pmap_qenter((vm_offset_t)pc, &m, 1);
 	if ((m->flags & PG_ZERO) == 0)
 		pagezero(pc);
 	pc->pc_pmap = pmap;
 	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
 	for (field = 1; field < _NPCM; field++)
 		pc->pc_map[field] = pc_freemask[field];
 	pv = &pc->pc_pventry[0];
 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 	PV_STAT(pv_entry_spare += _NPCPV - 1);
 	return (pv);
 }
 
 static __inline pv_entry_t
 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
 			break;
 		}
 	}
 	return (pv);
 }
 
 static void
 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
 	free_pv_entry(pmap, pv);
 }
 
 static void
 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 {
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	pmap_pvh_free(&m->md, pmap, va);
 	if (TAILQ_EMPTY(&m->md.pv_list))
 		vm_page_flag_clear(m, PG_WRITEABLE);
 }
 
 /*
  * Conditionally create a pv entry.
  */
 static boolean_t
 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pv_entry_t pv;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	if (pv_entry_count < pv_entry_high_water && 
 	    (pv = get_pv_entry(pmap, TRUE)) != NULL) {
 		pv->pv_va = va;
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
 pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free)
 {
 	pt_entry_t oldpte;
 	vm_page_t m;
 
 	CTR3(KTR_PMAP, "pmap_remove_pte: pmap=%p *ptq=0x%x va=0x%x",
 	    pmap, (u_long)*ptq, va);
 	
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	oldpte = *ptq;
 	PT_SET_VA_MA(ptq, 0, TRUE);
 	if (oldpte & PG_W)
 		pmap->pm_stats.wired_count -= 1;
 	/*
 	 * Machines that don't support invlpg, also don't support
 	 * PG_G.
 	 */
 	if (oldpte & PG_G)
 		pmap_invalidate_page(kernel_pmap, va);
 	pmap->pm_stats.resident_count -= 1;
 	/*
 	 * XXX This is not strictly correctly, but somewhere along the line
 	 * we are losing the managed bit on some pages. It is unclear to me
 	 * why, but I think the most likely explanation is that xen's writable
 	 * page table implementation doesn't respect the unused bits.
 	 */
 	if ((oldpte & PG_MANAGED) || ((oldpte & PG_V) && (va < VM_MAXUSER_ADDRESS))
 		) {
 		m = PHYS_TO_VM_PAGE(xpmap_mtop(oldpte) & PG_FRAME);
 
 		if (!(oldpte & PG_MANAGED))
 			printf("va=0x%x is unmanaged :-( pte=0x%llx\n", va, oldpte);
 
 		if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			vm_page_dirty(m);
 		if (oldpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 		pmap_remove_entry(pmap, m, va);
 	} else if ((va < VM_MAXUSER_ADDRESS) && (oldpte & PG_V))
 		printf("va=0x%x is unmanaged :-( pte=0x%llx\n", va, oldpte);
 
 	return (pmap_unuse_pt(pmap, va, free));
 }
 
 /*
  * Remove a single page from a process address space
  */
 static void
 pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free)
 {
 	pt_entry_t *pte;
 
 	CTR2(KTR_PMAP, "pmap_remove_page: pmap=%p va=0x%x",
 	    pmap, va);
 	
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if ((pte = pmap_pte_quick(pmap, va)) == NULL || (*pte & PG_V) == 0)
 		return;
 	pmap_remove_pte(pmap, pte, va, free);
 	pmap_invalidate_page(pmap, va);
 	if (*PMAP1)
 		PT_SET_MA(PADDR1, 0);
 
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t pdnxt;
 	pd_entry_t ptpaddr;
 	pt_entry_t *pte;
 	vm_page_t free = NULL;
 	int anyvalid;
 	
 	CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x",
 	    pmap, sva, eva);
 	
 	/*
 	 * Perform an unsynchronized read.  This is, however, safe.
 	 */
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	anyvalid = 0;
 
 	vm_page_lock_queues();
 	sched_pin();
 	PMAP_LOCK(pmap);
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if ((sva + PAGE_SIZE == eva) && 
 	    ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 		pmap_remove_page(pmap, sva, &free);
 		goto out;
 	}
 
 	for (; sva < eva; sva = pdnxt) {
 		unsigned pdirindex;
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		pdnxt = (sva + NBPDR) & ~PDRMASK;
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		pdirindex = sva >> PDRSHIFT;
 		ptpaddr = pmap->pm_pdir[pdirindex];
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
 			PD_CLEAR_VA(pmap, pdirindex, TRUE);
 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 			anyvalid = 1;
 			continue;
 		}
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (pdnxt > eva)
 			pdnxt = eva;
 
 		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
 		    sva += PAGE_SIZE) {
 			if ((*pte & PG_V) == 0)
 				continue;
 
 			/*
 			 * The TLB entry for a PG_G mapping is invalidated
 			 * by pmap_remove_pte().
 			 */
 			if ((*pte & PG_G) == 0)
 				anyvalid = 1;
 			if (pmap_remove_pte(pmap, pte, sva, &free))
 				break;
 		}
 	}
 	PT_UPDATES_FLUSH();
 	if (*PMAP1)
 		PT_SET_VA_MA(PMAP1, 0, TRUE);
 out:
 	if (anyvalid)
 		pmap_invalidate_all(pmap);
 	sched_unpin();
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 	pmap_free_zero_pages(free);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 void
 pmap_remove_all(vm_page_t m)
 {
 	pv_entry_t pv;
 	pmap_t pmap;
 	pt_entry_t *pte, tpte;
 	vm_page_t free;
 
 	KASSERT((m->flags & PG_FICTITIOUS) == 0,
 	    ("pmap_remove_all: page %p is fictitious", m));
 	free = NULL;
 	vm_page_lock_queues();
 	sched_pin();
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pmap->pm_stats.resident_count--;
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 
 		tpte = *pte;
 		PT_SET_VA_MA(pte, 0, TRUE);
 		if (tpte & PG_W)
 			pmap->pm_stats.wired_count--;
 		if (tpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			vm_page_dirty(m);
 		pmap_unuse_pt(pmap, pv->pv_va, &free);
 		pmap_invalidate_page(pmap, pv->pv_va);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		free_pv_entry(pmap, pv);
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_flag_clear(m, PG_WRITEABLE);
 	PT_UPDATES_FLUSH();
 	if (*PMAP1)
 		PT_SET_MA(PADDR1, 0);
 	sched_unpin();
 	vm_page_unlock_queues();
 	pmap_free_zero_pages(free);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	vm_offset_t pdnxt;
 	pd_entry_t ptpaddr;
 	pt_entry_t *pte;
 	int anychanged;
 
 	CTR4(KTR_PMAP, "pmap_protect: pmap=%p sva=0x%x eva=0x%x prot=0x%x",
 	    pmap, sva, eva, prot);
 	
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 #ifdef PAE
 	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
 	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
 		return;
 #else
 	if (prot & VM_PROT_WRITE)
 		return;
 #endif
 
 	anychanged = 0;
 
 	vm_page_lock_queues();
 	sched_pin();
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = pdnxt) {
 		pt_entry_t obits, pbits;
 		unsigned pdirindex;
 
 		pdnxt = (sva + NBPDR) & ~PDRMASK;
 
 		pdirindex = sva >> PDRSHIFT;
 		ptpaddr = pmap->pm_pdir[pdirindex];
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
 			if ((prot & VM_PROT_WRITE) == 0)
 				pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
 #ifdef PAE
 			if ((prot & VM_PROT_EXECUTE) == 0)
 				pmap->pm_pdir[pdirindex] |= pg_nx;
 #endif
 			anychanged = 1;
 			continue;
 		}
 
 		if (pdnxt > eva)
 			pdnxt = eva;
 
 		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
 		    sva += PAGE_SIZE) {
 			vm_page_t m;
 
 retry:
 			/*
 			 * Regardless of whether a pte is 32 or 64 bits in
 			 * size, PG_RW, PG_A, and PG_M are among the least
 			 * significant 32 bits.
 			 */
 			obits = pbits = *pte;
 			if ((pbits & PG_V) == 0)
 				continue;
 
 			if ((prot & VM_PROT_WRITE) == 0) {
 				if ((pbits & (PG_MANAGED | PG_M | PG_RW)) ==
 				    (PG_MANAGED | PG_M | PG_RW)) {
 					m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) &
 					    PG_FRAME);
 					vm_page_dirty(m);
 				}
 				pbits &= ~(PG_RW | PG_M);
 			}
 #ifdef PAE
 			if ((prot & VM_PROT_EXECUTE) == 0)
 				pbits |= pg_nx;
 #endif
 
 			if (pbits != obits) {
 				obits = *pte;
 				PT_SET_VA_MA(pte, pbits, TRUE);
 				if (*pte != pbits)
 					goto retry;
 				if (obits & PG_G)
 					pmap_invalidate_page(pmap, sva);
 				else
 					anychanged = 1;
 			}
 		}
 	}
 	PT_UPDATES_FLUSH();
 	if (*PMAP1)
 		PT_SET_VA_MA(PMAP1, 0, TRUE);
 	if (anychanged)
 		pmap_invalidate_all(pmap);
 	sched_unpin();
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 void
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
     vm_prot_t prot, boolean_t wired)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	pt_entry_t newpte, origpte;
 	pv_entry_t pv;
 	vm_paddr_t opa, pa;
 	vm_page_t mpte, om;
 	boolean_t invlva;
 
 	CTR6(KTR_PMAP, "pmap_enter: pmap=%08p va=0x%08x access=0x%x ma=0x%08x prot=0x%x wired=%d",
 	    pmap, va, access, VM_PAGE_TO_MACH(m), prot, wired);
 	va = trunc_page(va);
 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
 	KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
 	    ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
 	    va));
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 ||
 	    (m->oflags & VPO_BUSY) != 0,
 	    ("pmap_enter: page %p is not busy", m));
 
 	mpte = NULL;
 
 	vm_page_lock_queues();
 	PMAP_LOCK(pmap);
 	sched_pin();
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		mpte = pmap_allocpte(pmap, va, M_WAITOK);
 	}
 
 	pde = pmap_pde(pmap, va);
 	if ((*pde & PG_PS) != 0)
 		panic("pmap_enter: attempted pmap_enter on 4MB page");
 	pte = pmap_pte_quick(pmap, va);
 
 	/*
 	 * Page Directory table entry not valid, we need a new PT page
 	 */
 	if (pte == NULL) {
 		panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x",
 			(uintmax_t)pmap->pm_pdir[va >> PDRSHIFT], va);
 	}
 
 	pa = VM_PAGE_TO_PHYS(m);
 	om = NULL;
 	opa = origpte = 0;
 
 #if 0
 	KASSERT((*pte & PG_V) || (*pte == 0), ("address set but not valid pte=%p *pte=0x%016jx",
 		pte, *pte));
 #endif
 	origpte = *pte;
 	if (origpte)
 		origpte = xpmap_mtop(origpte);
 	opa = origpte & PG_FRAME;
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (origpte && (opa == pa)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && ((origpte & PG_W) == 0))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && (origpte & PG_W))
 			pmap->pm_stats.wired_count--;
 
 		/*
 		 * Remove extra pte reference
 		 */
 		if (mpte)
 			mpte->wire_count--;
 
 		if (origpte & PG_MANAGED) {
 			om = m;
 			pa |= PG_MANAGED;
 		}
 		goto validate;
 	} 
 
 	pv = NULL;
 
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
 	if (opa) {
 		if (origpte & PG_W)
 			pmap->pm_stats.wired_count--;
 		if (origpte & PG_MANAGED) {
 			om = PHYS_TO_VM_PAGE(opa);
 			pv = pmap_pvh_remove(&om->md, pmap, va);
 		} else if (va < VM_MAXUSER_ADDRESS) 
 			printf("va=0x%x is unmanaged :-( \n", va);
 			
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			KASSERT(mpte->wire_count > 0,
 			    ("pmap_enter: missing reference to page table page,"
 			     " va: 0x%x", va));
 		}
 	} else
 		pmap->pm_stats.resident_count++;
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
 		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
 		    ("pmap_enter: managed mapping within the clean submap"));
 		if (pv == NULL)
 			pv = get_pv_entry(pmap, FALSE);
 		pv->pv_va = va;
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 		pa |= PG_MANAGED;
 	} else if (pv != NULL)
 		free_pv_entry(pmap, pv);
 
 	/*
 	 * Increment counters
 	 */
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	newpte = (pt_entry_t)(pa | PG_V);
 	if ((prot & VM_PROT_WRITE) != 0) {
 		newpte |= PG_RW;
 		if ((newpte & PG_MANAGED) != 0)
 			vm_page_flag_set(m, PG_WRITEABLE);
 	}
 #ifdef PAE
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpte |= pg_nx;
 #endif
 	if (wired)
 		newpte |= PG_W;
 	if (va < VM_MAXUSER_ADDRESS)
 		newpte |= PG_U;
 	if (pmap == kernel_pmap)
 		newpte |= pgeflag;
 
 	critical_enter();
 	/*
 	 * if the mapping or permission bits are different, we need
 	 * to update the pte.
 	 */
 	if ((origpte & ~(PG_M|PG_A)) != newpte) {
 		if (origpte) {
 			invlva = FALSE;
 			origpte = *pte;
 			PT_SET_VA(pte, newpte | PG_A, FALSE);
 			if (origpte & PG_A) {
 				if (origpte & PG_MANAGED)
 					vm_page_flag_set(om, PG_REFERENCED);
 				if (opa != VM_PAGE_TO_PHYS(m))
 					invlva = TRUE;
 #ifdef PAE
 				if ((origpte & PG_NX) == 0 &&
 				    (newpte & PG_NX) != 0)
 					invlva = TRUE;
 #endif
 			}
 			if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 				if ((origpte & PG_MANAGED) != 0)
 					vm_page_dirty(om);
 				if ((prot & VM_PROT_WRITE) == 0)
 					invlva = TRUE;
 			}
 			if ((origpte & PG_MANAGED) != 0 &&
 			    TAILQ_EMPTY(&om->md.pv_list))
 				vm_page_flag_clear(om, PG_WRITEABLE);
 			if (invlva)
 				pmap_invalidate_page(pmap, va);
 		} else{
 			PT_SET_VA(pte, newpte | PG_A, FALSE);
 		}
 		
 	}
 	PT_UPDATES_FLUSH();
 	critical_exit();
 	if (*PMAP1)
 		PT_SET_VA_MA(PMAP1, 0, TRUE);
 	sched_unpin();
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * Maps a sequence of resident pages belonging to the same object.
  * The sequence begins with the given page m_start.  This page is
  * mapped at the given virtual address start.  Each subsequent page is
  * mapped at a virtual address that is offset from start by the same
  * amount as the page is offset from m_start within the object.  The
  * last page in the sequence is the page with the largest offset from
  * m_start that can be mapped at a virtual address less than the given
  * virtual address end.  Not every virtual page between start and end
  * is mapped; only those for which a resident page exists with the
  * corresponding offset from m_start are mapped.
  */
 void
 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	vm_page_t m, mpte;
 	vm_pindex_t diff, psize;
 	multicall_entry_t mcl[16];
 	multicall_entry_t *mclp = mcl;
 	int error, count = 0;
 	
 	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
 	psize = atop(end - start);
 	    
 	mpte = NULL;
 	m = m_start;
 	vm_page_lock_queues();
 	PMAP_LOCK(pmap);
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		mpte = pmap_enter_quick_locked(&mclp, &count, pmap, start + ptoa(diff), m,
 		    prot, mpte);
 		m = TAILQ_NEXT(m, listq);
 		if (count == 16) {
 			error = HYPERVISOR_multicall(mcl, count);
 			KASSERT(error == 0, ("bad multicall %d", error));
 			mclp = mcl;
 			count = 0;
 		}
 	}
 	if (count) {
 		error = HYPERVISOR_multicall(mcl, count);
 		KASSERT(error == 0, ("bad multicall %d", error));
 	}
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * but is *MUCH* faster than pmap_enter...
  */
 
 void
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 	multicall_entry_t mcl, *mclp;
 	int count = 0;
 	mclp = &mcl;
 	
 	CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x",
 	    pmap, va, m, prot);
 	
 	vm_page_lock_queues();
 	PMAP_LOCK(pmap);
 	(void)pmap_enter_quick_locked(&mclp, &count, pmap, va, m, prot, NULL);
 	if (count)
 		HYPERVISOR_multicall(&mcl, count);
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 }
 
 #ifdef notyet
 void
 pmap_enter_quick_range(pmap_t pmap, vm_offset_t *addrs, vm_page_t *pages, vm_prot_t *prots, int count)
 {
 	int i, error, index = 0;
 	multicall_entry_t mcl[16];
 	multicall_entry_t *mclp = mcl;
 		
 	PMAP_LOCK(pmap);
 	for (i = 0; i < count; i++, addrs++, pages++, prots++) {
 		if (!pmap_is_prefaultable_locked(pmap, *addrs))
 			continue;
 
 		(void) pmap_enter_quick_locked(&mclp, &index, pmap, *addrs, *pages, *prots, NULL);
 		if (index == 16) {
 			error = HYPERVISOR_multicall(mcl, index);
 			mclp = mcl;
 			index = 0;
 			KASSERT(error == 0, ("bad multicall %d", error));
 		}
 	}
 	if (index) {
 		error = HYPERVISOR_multicall(mcl, index);
 		KASSERT(error == 0, ("bad multicall %d", error));
 	}
 	
 	PMAP_UNLOCK(pmap);
 }
 #endif
 
 static vm_page_t
 pmap_enter_quick_locked(multicall_entry_t **mclpp, int *count, pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, vm_page_t mpte)
 {
 	pt_entry_t *pte;
 	vm_paddr_t pa;
 	vm_page_t free;
 	multicall_entry_t *mcl = *mclpp;
 	
 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 	    (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
 	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		unsigned ptepindex;
 		pd_entry_t ptema;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		ptepindex = va >> PDRSHIFT;
 		if (mpte && (mpte->pindex == ptepindex)) {
 			mpte->wire_count++;
 		} else {
 			/*
 			 * Get the page directory entry
 			 */
 			ptema = pmap->pm_pdir[ptepindex];
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.
 			 */
 			if (ptema & PG_V) {
 				if (ptema & PG_PS)
 					panic("pmap_enter_quick: unexpected mapping into 4MB page");
 				mpte = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME);
 				mpte->wire_count++;
 			} else {
 				mpte = _pmap_allocpte(pmap, ptepindex,
 				    M_NOWAIT);
 				if (mpte == NULL)
 					return (mpte);
 			}
 		}
 	} else {
 		mpte = NULL;
 	}
 
 	/*
 	 * This call to vtopte makes the assumption that we are
 	 * entering the page into the current pmap.  In order to support
 	 * quick entry into any pmap, one would likely use pmap_pte_quick.
 	 * But that isn't as quick as vtopte.
 	 */
 	KASSERT(pmap_is_current(pmap), ("entering pages in non-current pmap"));
 	pte = vtopte(va);
 	if (*pte & PG_V) {
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 &&
 	    !pmap_try_insert_pv_entry(pmap, va, m)) {
 		if (mpte != NULL) {
 			free = NULL;
 			if (pmap_unwire_pte_hold(pmap, mpte, &free)) {
 				pmap_invalidate_page(pmap, va);
 				pmap_free_zero_pages(free);
 			}
 			
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	pa = VM_PAGE_TO_PHYS(m);
 #ifdef PAE
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		pa |= pg_nx;
 #endif
 
 #if 0
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 		pte_store(pte, pa | PG_V | PG_U);
 	else
 		pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
 #else
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 		pa = 	xpmap_ptom(pa | PG_V | PG_U);
 	else
 		pa = xpmap_ptom(pa | PG_V | PG_U | PG_MANAGED);
 
 	mcl->op = __HYPERVISOR_update_va_mapping;
 	mcl->args[0] = va;
 	mcl->args[1] = (uint32_t)(pa & 0xffffffff);
 	mcl->args[2] = (uint32_t)(pa >> 32);
 	mcl->args[3] = 0;
 	*mclpp = mcl + 1;
 	*count = *count + 1;
 #endif	
 	return mpte;
 }
 
 /*
  * Make a temporary mapping for a physical address.  This is only intended
  * to be used for panic dumps.
  */
 void *
 pmap_kenter_temporary(vm_paddr_t pa, int i)
 {
 	vm_offset_t va;
 	vm_paddr_t ma = xpmap_ptom(pa);
 
 	va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 	PT_SET_MA(va, (ma & ~PAGE_MASK) | PG_V | pgeflag);
 	invlpg(va);
 	return ((void *)crashdumpmap);
 }
 
 /*
  * This code maps large physical mmap regions into the
  * processor address space.  Note that some shortcuts
  * are taken, but the code works.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
 		    vm_object_t object, vm_pindex_t pindex,
 		    vm_size_t size)
 {
 	pd_entry_t *pde;
 	vm_paddr_t pa, ptepa;
 	vm_page_t p;
 	int pat_mode;
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 	    ("pmap_object_init_pt: non-device object"));
 	if (pseflag && 
 	    (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
 		if (!vm_object_populate(object, pindex, pindex + atop(size)))
 			return;
 		p = vm_page_lookup(object, pindex);
 		KASSERT(p->valid == VM_PAGE_BITS_ALL,
 		    ("pmap_object_init_pt: invalid page %p", p));
 		pat_mode = p->md.pat_mode;
 		/*
 		 * Abort the mapping if the first page is not physically
 		 * aligned to a 2/4MB page boundary.
 		 */
 		ptepa = VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1))
 			return;
 		/*
 		 * Skip the first page.  Abort the mapping if the rest of
 		 * the pages are not physically contiguous or have differing
 		 * memory attributes.
 		 */
 		p = TAILQ_NEXT(p, listq);
 		for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
 		    pa += PAGE_SIZE) {
 			KASSERT(p->valid == VM_PAGE_BITS_ALL,
 			    ("pmap_object_init_pt: invalid page %p", p));
 			if (pa != VM_PAGE_TO_PHYS(p) ||
 			    pat_mode != p->md.pat_mode)
 				return;
 			p = TAILQ_NEXT(p, listq);
 		}
 		/* Map using 2/4MB pages. */
 		PMAP_LOCK(pmap);
 		for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
 		    size; pa += NBPDR) {
 			pde = pmap_pde(pmap, addr);
 			if (*pde == 0) {
 				pde_store(pde, pa | PG_PS | PG_M | PG_A |
 				    PG_U | PG_RW | PG_V);
 				pmap->pm_stats.resident_count += NBPDR /
 				    PAGE_SIZE;
 				pmap_pde_mappings++;
 			}
 			/* Else continue on if the PDE is already valid. */
 			addr += NBPDR;
 		}
 		PMAP_UNLOCK(pmap);
 	}
 }
 
 /*
  *	Routine:	pmap_change_wiring
  *	Function:	Change the wiring attribute for a map/virtual-address
  *			pair.
  *	In/out conditions:
  *			The mapping must already exist in the pmap.
  */
 void
 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
 {
 	pt_entry_t *pte;
 
 	vm_page_lock_queues();
 	PMAP_LOCK(pmap);
 	pte = pmap_pte(pmap, va);
 
 	if (wired && !pmap_pte_w(pte)) {
 		PT_SET_VA_MA((pte), *(pte) | PG_W, TRUE);
 		pmap->pm_stats.wired_count++;
 	} else if (!wired && pmap_pte_w(pte)) {
 		PT_SET_VA_MA((pte), *(pte) & ~PG_W, TRUE);
 		pmap->pm_stats.wired_count--;
 	}
 	
 	/*
 	 * Wiring is not a hardware characteristic so there is no need to
 	 * invalidate TLB.
 	 */
 	pmap_pte_release(pte);
 	PMAP_UNLOCK(pmap);
 	vm_page_unlock_queues();
 }
 
 
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
 	  vm_offset_t src_addr)
 {
 	vm_page_t   free;
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t pdnxt;
 
 	if (dst_addr != src_addr)
 		return;
 
 	if (!pmap_is_current(src_pmap)) {
 		CTR2(KTR_PMAP,
 		    "pmap_copy, skipping: pdir[PTDPTDI]=0x%jx PTDpde[0]=0x%jx",
 		    (src_pmap->pm_pdir[PTDPTDI] & PG_FRAME), (PTDpde[0] & PG_FRAME));
 		
 		return;
 	}
 	CTR5(KTR_PMAP, "pmap_copy:  dst_pmap=%p src_pmap=%p dst_addr=0x%x len=%d src_addr=0x%x",
 	    dst_pmap, src_pmap, dst_addr, len, src_addr);
 	
 	vm_page_lock_queues();
 	if (dst_pmap < src_pmap) {
 		PMAP_LOCK(dst_pmap);
 		PMAP_LOCK(src_pmap);
 	} else {
 		PMAP_LOCK(src_pmap);
 		PMAP_LOCK(dst_pmap);
 	}
 	sched_pin();
 	for (addr = src_addr; addr < end_addr; addr = pdnxt) {
 		pt_entry_t *src_pte, *dst_pte;
 		vm_page_t dstmpte, srcmpte;
 		pd_entry_t srcptepaddr;
 		unsigned ptepindex;
 
 		KASSERT(addr < UPT_MIN_ADDRESS,
 		    ("pmap_copy: invalid to pmap_copy page tables"));
 
 		pdnxt = (addr + NBPDR) & ~PDRMASK;
 		ptepindex = addr >> PDRSHIFT;
 
 		srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]);
 		if (srcptepaddr == 0)
 			continue;
 			
 		if (srcptepaddr & PG_PS) {
 			if (dst_pmap->pm_pdir[ptepindex] == 0) {
 				PD_SET_VA(dst_pmap, ptepindex, srcptepaddr & ~PG_W, TRUE);
 				dst_pmap->pm_stats.resident_count +=
 				    NBPDR / PAGE_SIZE;
 			}
 			continue;
 		}
 
 		srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME);
 		KASSERT(srcmpte->wire_count > 0,
 		    ("pmap_copy: source page table page is unused"));
 
 		if (pdnxt > end_addr)
 			pdnxt = end_addr;
 
 		src_pte = vtopte(addr);
 		while (addr < pdnxt) {
 			pt_entry_t ptetemp;
 			ptetemp = *src_pte;
 			/*
 			 * we only virtual copy managed pages
 			 */
 			if ((ptetemp & PG_MANAGED) != 0) {
 				dstmpte = pmap_allocpte(dst_pmap, addr,
 				    M_NOWAIT);
 				if (dstmpte == NULL)
 					break;
 				dst_pte = pmap_pte_quick(dst_pmap, addr);
 				if (*dst_pte == 0 &&
 				    pmap_try_insert_pv_entry(dst_pmap, addr,
 				    PHYS_TO_VM_PAGE(xpmap_mtop(ptetemp) & PG_FRAME))) {
 					/*
 					 * Clear the wired, modified, and
 					 * accessed (referenced) bits
 					 * during the copy.
 					 */
 					KASSERT(ptetemp != 0, ("src_pte not set"));
 					PT_SET_VA_MA(dst_pte, ptetemp & ~(PG_W | PG_M | PG_A), TRUE /* XXX debug */);
 					KASSERT(*dst_pte == (ptetemp & ~(PG_W | PG_M | PG_A)),
 					    ("no pmap copy expected: 0x%jx saw: 0x%jx",
 						ptetemp &  ~(PG_W | PG_M | PG_A), *dst_pte));
 					dst_pmap->pm_stats.resident_count++;
 	 			} else {
 					free = NULL;
 					if (pmap_unwire_pte_hold(dst_pmap,
 					    dstmpte, &free)) {
 						pmap_invalidate_page(dst_pmap,
 						    addr);
 						pmap_free_zero_pages(free);
 					}
 				}
 				if (dstmpte->wire_count >= srcmpte->wire_count)
 					break;
 			}
 			addr += PAGE_SIZE;
 			src_pte++;
 		}
 	}
 	PT_UPDATES_FLUSH();
 	sched_unpin();
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(src_pmap);
 	PMAP_UNLOCK(dst_pmap);
 }	
 
 static __inline void
 pagezero(void *page)
 {
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686) {
 #if defined(CPU_ENABLE_SSE)
 		if (cpu_feature & CPUID_SSE2)
 			sse2_pagezero(page);
 		else
 #endif
 			i686_pagezero(page);
 	} else
 #endif
 		bzero(page, PAGE_SIZE);
 }
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(vm_page_t m)
 {
 	struct sysmaps *sysmaps;
 
 	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 	mtx_lock(&sysmaps->lock);
 	if (*sysmaps->CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 	sched_pin();
 	PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
 	pagezero(sysmaps->CADDR2);
 	PT_SET_MA(sysmaps->CADDR2, 0);
 	sched_unpin();
 	mtx_unlock(&sysmaps->lock);
 }
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(vm_page_t m, int off, int size)
 {
 	struct sysmaps *sysmaps;
 
 	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 	mtx_lock(&sysmaps->lock);
 	if (*sysmaps->CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 	sched_pin();
 	PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
 
 	if (off == 0 && size == PAGE_SIZE) 
 		pagezero(sysmaps->CADDR2);
 	else
 		bzero((char *)sysmaps->CADDR2 + off, size);
 	PT_SET_MA(sysmaps->CADDR2, 0);
 	sched_unpin();
 	mtx_unlock(&sysmaps->lock);
 }
 
 /*
  *	pmap_zero_page_idle zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.  This
  *	is intended to be called from the vm_pagezero process only and
  *	outside of Giant.
  */
 void
 pmap_zero_page_idle(vm_page_t m)
 {
 
 	if (*CMAP3)
 		panic("pmap_zero_page: CMAP3 busy");
 	sched_pin();
 	PT_SET_MA(CADDR3, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
 	pagezero(CADDR3);
 	PT_SET_MA(CADDR3, 0);
 	sched_unpin();
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(vm_page_t src, vm_page_t dst)
 {
 	struct sysmaps *sysmaps;
 
 	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 	mtx_lock(&sysmaps->lock);
 	if (*sysmaps->CMAP1)
 		panic("pmap_copy_page: CMAP1 busy");
 	if (*sysmaps->CMAP2)
 		panic("pmap_copy_page: CMAP2 busy");
 	sched_pin();
 	PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(src) | PG_A);
 	PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(dst) | PG_A | PG_M);
 	bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE);
 	PT_SET_MA(sysmaps->CADDR1, 0);
 	PT_SET_MA(sysmaps->CADDR2, 0);
 	sched_unpin();
 	mtx_unlock(&sysmaps->lock);
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 {
 	pv_entry_t pv;
 	int loops = 0;
 	boolean_t rv;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_page_exists_quick: page %p is not managed", m));
 	rv = FALSE;
 	vm_page_lock_queues();
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		if (PV_PMAP(pv) == pmap) {
 			rv = TRUE;
 			break;
 		}
 		loops++;
 		if (loops >= 16)
 			break;
 	}
 	vm_page_unlock_queues();
 	return (rv);
 }
 
 /*
  *	pmap_page_wired_mappings:
  *
  *	Return the number of managed mappings to the given physical page
  *	that are wired.
  */
 int
 pmap_page_wired_mappings(vm_page_t m)
 {
 	pv_entry_t pv;
 	pt_entry_t *pte;
 	pmap_t pmap;
 	int count;
 
 	count = 0;
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		return (count);
 	vm_page_lock_queues();
 	sched_pin();
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		if ((*pte & PG_W) != 0)
 			count++;
 		PMAP_UNLOCK(pmap);
 	}
 	sched_unpin();
 	vm_page_unlock_queues();
 	return (count);
 }
 
 /*
  * Returns TRUE if the given page is mapped individually or as part of
  * a 4mpage.  Otherwise, returns FALSE.
  */
 boolean_t
 pmap_page_is_mapped(vm_page_t m)
 {
 	boolean_t rv;
 
 	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
 		return (FALSE);
 	vm_page_lock_queues();
 	rv = !TAILQ_EMPTY(&m->md.pv_list) ||
 	    !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list);
 	vm_page_unlock_queues();
 	return (rv);
 }
 
 /*
  * Remove all pages from specified address space
  * this aids process exit speeds.  Also, this code
  * is special cased for current process only, but
  * can have the more generic (and slightly slower)
  * mode enabled.  This is much faster than pmap_remove
  * in the case of running down an entire address space.
  */
 void
 pmap_remove_pages(pmap_t pmap)
 {
 	pt_entry_t *pte, tpte;
 	vm_page_t m, free = NULL;
 	pv_entry_t pv;
 	struct pv_chunk *pc, *npc;
 	int field, idx;
 	int32_t bit;
 	uint32_t inuse, bitmask;
 	int allfree;
 
 	CTR1(KTR_PMAP, "pmap_remove_pages: pmap=%p", pmap);
 	
 	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
 		printf("warning: pmap_remove_pages called with non-current pmap\n");
 		return;
 	}
 	vm_page_lock_queues();
 	KASSERT(pmap_is_current(pmap), ("removing pages from non-current pmap"));
 	PMAP_LOCK(pmap);
 	sched_pin();
 	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 		allfree = 1;
 		for (field = 0; field < _NPCM; field++) {
 			inuse = (~(pc->pc_map[field])) & pc_freemask[field];
 			while (inuse != 0) {
 				bit = bsfl(inuse);
 				bitmask = 1UL << bit;
 				idx = field * 32 + bit;
 				pv = &pc->pc_pventry[idx];
 				inuse &= ~bitmask;
 
 				pte = vtopte(pv->pv_va);
 				tpte = *pte ? xpmap_mtop(*pte) : 0;
 
 				if (tpte == 0) {
 					printf(
 					    "TPTE at %p  IS ZERO @ VA %08x\n",
 					    pte, pv->pv_va);
 					panic("bad pte");
 				}
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 				if (tpte & PG_W) {
 					allfree = 0;
 					continue;
 				}
 
 				m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
 				KASSERT(m->phys_addr == (tpte & PG_FRAME),
 				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 				    m, (uintmax_t)m->phys_addr,
 				    (uintmax_t)tpte));
 
 				KASSERT(m < &vm_page_array[vm_page_array_size],
 					("pmap_remove_pages: bad tpte %#jx",
 					(uintmax_t)tpte));
 
 
 				PT_CLEAR_VA(pte, FALSE);
 				
 				/*
 				 * Update the vm_page_t clean/reference bits.
 				 */
 				if (tpte & PG_M)
 					vm_page_dirty(m);
 
 				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 				if (TAILQ_EMPTY(&m->md.pv_list))
 					vm_page_flag_clear(m, PG_WRITEABLE);
 
 				pmap_unuse_pt(pmap, pv->pv_va, &free);
 
 				/* Mark free */
 				PV_STAT(pv_entry_frees++);
 				PV_STAT(pv_entry_spare++);
 				pv_entry_count--;
 				pc->pc_map[field] |= bitmask;
 				pmap->pm_stats.resident_count--;			
 			}
 		}
 		PT_UPDATES_FLUSH();
 		if (allfree) {
 			PV_STAT(pv_entry_spare -= _NPCPV);
 			PV_STAT(pc_chunk_count--);
 			PV_STAT(pc_chunk_frees++);
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 			pmap_qremove((vm_offset_t)pc, 1);
 			vm_page_unwire(m, 0);
 			vm_page_free(m);
 			pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
 		}
 	}
 	PT_UPDATES_FLUSH();
 	if (*PMAP1)
 		PT_SET_MA(PADDR1, 0);
 
 	sched_unpin();
 	pmap_invalidate_all(pmap);
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 	pmap_free_zero_pages(free);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 	pv_entry_t pv;
 	pt_entry_t *pte;
 	pmap_t pmap;
 	boolean_t rv;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_is_modified: page %p is not managed", m));
 	rv = FALSE;
 
 	/*
 	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PG_WRITEABLE
 	 * is clear, no PTEs can have PG_M set.
 	 */
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	if ((m->oflags & VPO_BUSY) == 0 &&
 	    (m->flags & PG_WRITEABLE) == 0)
 		return (rv);
 	vm_page_lock_queues();
 	sched_pin();
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		rv = (*pte & PG_M) != 0;
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			break;
 	}
 	if (*PMAP1)
 		PT_SET_MA(PADDR1, 0);
 	sched_unpin();
 	vm_page_unlock_queues();
 	return (rv);
 }
 
 /*
  *	pmap_is_prefaultable:
  *
  *	Return whether or not the specified virtual address is elgible
  *	for prefault.
  */
 static boolean_t
 pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr)
 {
 	pt_entry_t *pte;
 	boolean_t rv = FALSE;
 
 	return (rv);
 	
 	if (pmap_is_current(pmap) && *pmap_pde(pmap, addr)) {
 		pte = vtopte(addr);
 		rv = (*pte == 0);
 	}
 	return (rv);
 }
 
 boolean_t
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 {
 	boolean_t rv;
 	
 	PMAP_LOCK(pmap);
 	rv = pmap_is_prefaultable_locked(pmap, addr);
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 boolean_t
 pmap_is_referenced(vm_page_t m)
 {
 	pv_entry_t pv;
 	pt_entry_t *pte;
 	pmap_t pmap;
 	boolean_t rv;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_is_referenced: page %p is not managed", m));
 	rv = FALSE;
 	vm_page_lock_queues();
 	sched_pin();
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V);
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			break;
 	}
 	if (*PMAP1)
 		PT_SET_MA(PADDR1, 0);
 	sched_unpin();
 	vm_page_unlock_queues();
 	return (rv);
 }
 
 void
 pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len)
 {
 	int i, npages = round_page(len) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		pt_entry_t *pte;
 		pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
 		pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M)));
 		PMAP_MARK_PRIV(xpmap_mtop(*pte));
 		pmap_pte_release(pte);
 	}
 }
 
 void
 pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len)
 {
 	int i, npages = round_page(len) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		pt_entry_t *pte;
 		pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
 		PMAP_MARK_UNPRIV(xpmap_mtop(*pte));
 		pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M));
 		pmap_pte_release(pte);
 	}
 }
 
 /*
  * Clear the write and modified bits in each of the given page's mappings.
  */
 void
 pmap_remove_write(vm_page_t m)
 {
 	pv_entry_t pv;
 	pmap_t pmap;
 	pt_entry_t oldpte, *pte;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_remove_write: page %p is not managed", m));
 
 	/*
 	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by
 	 * another thread while the object is locked.  Thus, if PG_WRITEABLE
 	 * is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	if ((m->oflags & VPO_BUSY) == 0 &&
 	    (m->flags & PG_WRITEABLE) == 0)
 		return;
 	vm_page_lock_queues();
 	sched_pin();
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 retry:
 		oldpte = *pte;
 		if ((oldpte & PG_RW) != 0) {
 			vm_paddr_t newpte = oldpte & ~(PG_RW | PG_M);
 			
 			/*
 			 * Regardless of whether a pte is 32 or 64 bits
 			 * in size, PG_RW and PG_M are among the least
 			 * significant 32 bits.
 			 */
 			PT_SET_VA_MA(pte, newpte, TRUE);
 			if (*pte != newpte)
 				goto retry;
 			
 			if ((oldpte & PG_M) != 0)
 				vm_page_dirty(m);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_flag_clear(m, PG_WRITEABLE);
 	PT_UPDATES_FLUSH();
 	if (*PMAP1)
 		PT_SET_MA(PADDR1, 0);
 	sched_unpin();
 	vm_page_unlock_queues();
 }
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	XXX: The exact number of bits to check and clear is a matter that
  *	should be tested and standardized at some point in the future for
  *	optimal aging of shared pages.
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	pv_entry_t pv, pvf, pvn;
 	pmap_t pmap;
 	pt_entry_t *pte;
 	int rtval = 0;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_ts_referenced: page %p is not managed", m));
 	vm_page_lock_queues();
 	sched_pin();
 	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pvf = pv;
 		do {
 			pvn = TAILQ_NEXT(pv, pv_list);
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 			pmap = PV_PMAP(pv);
 			PMAP_LOCK(pmap);
 			pte = pmap_pte_quick(pmap, pv->pv_va);
 			if ((*pte & PG_A) != 0) {
 				PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE);
 				pmap_invalidate_page(pmap, pv->pv_va);
 				rtval++;
 				if (rtval > 4)
 					pvn = NULL;
 			}
 			PMAP_UNLOCK(pmap);
 		} while ((pv = pvn) != NULL && pv != pvf);
 	}
 	PT_UPDATES_FLUSH();
 	if (*PMAP1)
 		PT_SET_MA(PADDR1, 0);
 
 	sched_unpin();
 	vm_page_unlock_queues();
 	return (rtval);
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	pv_entry_t pv;
 	pmap_t pmap;
 	pt_entry_t *pte;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	KASSERT((m->oflags & VPO_BUSY) == 0,
 	    ("pmap_clear_modify: page %p is busy", m));
 
 	/*
 	 * If the page is not PG_WRITEABLE, then no PTEs can have PG_M set.
 	 * If the object containing the page is locked and the page is not
 	 * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->flags & PG_WRITEABLE) == 0)
 		return;
 	vm_page_lock_queues();
 	sched_pin();
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		if ((*pte & PG_M) != 0) {
 			/*
 			 * Regardless of whether a pte is 32 or 64 bits
 			 * in size, PG_M is among the least significant
 			 * 32 bits. 
 			 */
 			PT_SET_VA_MA(pte, *pte & ~PG_M, FALSE);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	sched_unpin();
 	vm_page_unlock_queues();
 }
 
 /*
  *	pmap_clear_reference:
  *
  *	Clear the reference bit on the specified physical page.
  */
 void
 pmap_clear_reference(vm_page_t m)
 {
 	pv_entry_t pv;
 	pmap_t pmap;
 	pt_entry_t *pte;
 
 	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 	    ("pmap_clear_reference: page %p is not managed", m));
 	vm_page_lock_queues();
 	sched_pin();
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		if ((*pte & PG_A) != 0) {
 			/*
 			 * Regardless of whether a pte is 32 or 64 bits
 			 * in size, PG_A is among the least significant
 			 * 32 bits. 
 			 */
 			PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	sched_unpin();
 	vm_page_unlock_queues();
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
 {
 	vm_offset_t va, offset;
 	vm_size_t tmpsize;
 
 	offset = pa & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 	pa = pa & PG_FRAME;
 
 	if (pa < KERNLOAD && pa + size <= KERNLOAD)
 		va = KERNBASE + pa;
 	else
 		va = kmem_alloc_nofault(kernel_map, size);
 	if (!va)
 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 
 	for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
 		pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
 	pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
 	pmap_invalidate_cache_range(va, va + size);
 	return ((void *)(va + offset));
 }
 
 void *
 pmap_mapdev(vm_paddr_t pa, vm_size_t size)
 {
 
 	return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE));
 }
 
 void *
 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 {
 
 	return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK));
 }
 
 void
 pmap_unmapdev(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t base, offset, tmpva;
 
 	if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
 		return;
 	base = trunc_page(va);
 	offset = va & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 	critical_enter();
 	for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
 		pmap_kremove(tmpva);
 	pmap_invalidate_range(kernel_pmap, va, tmpva);
 	critical_exit();
 	kmem_free(kernel_map, base, size);
 }
 
 /*
  * Sets the memory attribute for the specified page.
  */
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
 	struct sysmaps *sysmaps;
 	vm_offset_t sva, eva;
 
 	m->md.pat_mode = ma;
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		return;
 
 	/*
 	 * If "m" is a normal page, flush it from the cache.
 	 * See pmap_invalidate_cache_range().
 	 *
 	 * First, try to find an existing mapping of the page by sf
 	 * buffer. sf_buf_invalidate_cache() modifies mapping and
 	 * flushes the cache.
 	 */    
 	if (sf_buf_invalidate_cache(m))
 		return;
 
 	/*
 	 * If page is not mapped by sf buffer, but CPU does not
 	 * support self snoop, map the page transient and do
 	 * invalidation. In the worst case, whole cache is flushed by
 	 * pmap_invalidate_cache_range().
 	 */
 	if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) {
 		sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 		mtx_lock(&sysmaps->lock);
 		if (*sysmaps->CMAP2)
 			panic("pmap_page_set_memattr: CMAP2 busy");
 		sched_pin();
 		PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW |
 		    VM_PAGE_TO_MACH(m) | PG_A | PG_M |
 		    pmap_cache_bits(m->md.pat_mode, 0));
 		invlcaddr(sysmaps->CADDR2);
 		sva = (vm_offset_t)sysmaps->CADDR2;
 		eva = sva + PAGE_SIZE;
 	} else
 		sva = eva = 0; /* gcc */
 	pmap_invalidate_cache_range(sva, eva);
 	if (sva != 0) {
 		PT_SET_MA(sysmaps->CADDR2, 0);
 		sched_unpin();
 		mtx_unlock(&sysmaps->lock);
 	}
 }
 
 int
 pmap_change_attr(va, size, mode)
 	vm_offset_t va;
 	vm_size_t size;
 	int mode;
 {
 	vm_offset_t base, offset, tmpva;
 	pt_entry_t *pte;
 	u_int opte, npte;
 	pd_entry_t *pde;
 	boolean_t changed;
 
 	base = trunc_page(va);
 	offset = va & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 
 	/* Only supported on kernel virtual addresses. */
 	if (base <= VM_MAXUSER_ADDRESS)
 		return (EINVAL);
 
 	/* 4MB pages and pages that aren't mapped aren't supported. */
 	for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
 		pde = pmap_pde(kernel_pmap, tmpva);
 		if (*pde & PG_PS)
 			return (EINVAL);
 		if ((*pde & PG_V) == 0)
 			return (EINVAL);
 		pte = vtopte(va);
 		if ((*pte & PG_V) == 0)
 			return (EINVAL);
 	}
 
 	changed = FALSE;
 
 	/*
 	 * Ok, all the pages exist and are 4k, so run through them updating
 	 * their cache mode.
 	 */
 	for (tmpva = base; size > 0; ) {
 		pte = vtopte(tmpva);
 
 		/*
 		 * The cache mode bits are all in the low 32-bits of the
 		 * PTE, so we can just spin on updating the low 32-bits.
 		 */
 		do {
 			opte = *(u_int *)pte;
 			npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT);
 			npte |= pmap_cache_bits(mode, 0);
 			PT_SET_VA_MA(pte, npte, TRUE);
 		} while (npte != opte && (*pte != npte));
 		if (npte != opte)
 			changed = TRUE;
 		tmpva += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 
 	/*
 	 * Flush CPU caches to make sure any data isn't cached that shouldn't
 	 * be, etc.
 	 */
 	if (changed) {
 		pmap_invalidate_range(kernel_pmap, base, tmpva);
 		pmap_invalidate_cache_range(base, tmpva);
 	}
 	return (0);
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
 {
 	pt_entry_t *ptep, pte;
 	vm_paddr_t pa;
 	int val;
 	
 	PMAP_LOCK(pmap);
 retry:
 	ptep = pmap_pte(pmap, addr);
 	pte = (ptep != NULL) ? PT_GET(ptep) : 0;
 	pmap_pte_release(ptep);
 	val = 0;
 	if ((pte & PG_V) != 0) {
 		val |= MINCORE_INCORE;
 		if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if ((pte & PG_A) != 0)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 	}
 	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
 	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
 	    (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) {
 		pa = pte & PG_FRAME;
 		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
 		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
 			goto retry;
 	} else
 		PA_UNLOCK_COND(*locked_pa);
 	PMAP_UNLOCK(pmap);
 	return (val);
 }
 
 void
 pmap_activate(struct thread *td)
 {
 	pmap_t	pmap, oldpmap;
 	u_int32_t  cr3;
 
 	critical_enter();
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	oldpmap = PCPU_GET(curpmap);
 #if defined(SMP)
 	atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
 	atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
 #else
 	oldpmap->pm_active &= ~1;
 	pmap->pm_active |= 1;
 #endif
 #ifdef PAE
 	cr3 = vtophys(pmap->pm_pdpt);
 #else
 	cr3 = vtophys(pmap->pm_pdir);
 #endif
 	/*
 	 * pmap_activate is for the current thread on the current cpu
 	 */
 	td->td_pcb->pcb_cr3 = cr3;
 	PT_UPDATES_FLUSH();
 	load_cr3(cr3);
 	PCPU_SET(curpmap, pmap);
 	critical_exit();
 }
 
 void
 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
 {
 }
 
 /*
  *	Increase the starting virtual address of the given mapping if a
  *	different alignment might result in more superpage mappings.
  */
 void
 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t size)
 {
 	vm_offset_t superpage_offset;
 
 	if (size < NBPDR)
 		return;
 	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
 		offset += ptoa(object->pg_color);
 	superpage_offset = offset & PDRMASK;
 	if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR ||
 	    (*addr & PDRMASK) == superpage_offset)
 		return;
 	if ((*addr & PDRMASK) < superpage_offset)
 		*addr = (*addr & ~PDRMASK) + superpage_offset;
 	else
 		*addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
 }
 
 void
 pmap_suspend()
 {
 	pmap_t pmap;
 	int i, pdir, offset;
 	vm_paddr_t pdirma;
 	mmu_update_t mu[4];
 
 	/*
 	 * We need to remove the recursive mapping structure from all
 	 * our pmaps so that Xen doesn't get confused when it restores
 	 * the page tables. The recursive map lives at page directory
 	 * index PTDPTDI. We assume that the suspend code has stopped
 	 * the other vcpus (if any).
 	 */
 	LIST_FOREACH(pmap, &allpmaps, pm_list) {
 		for (i = 0; i < 4; i++) {
 			/*
 			 * Figure out which page directory (L2) page
 			 * contains this bit of the recursive map and
 			 * the offset within that page of the map
 			 * entry
 			 */
 			pdir = (PTDPTDI + i) / NPDEPG;
 			offset = (PTDPTDI + i) % NPDEPG;
 			pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
 			mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
 			mu[i].val = 0;
 		}
 		HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
 	}
 }
 
 void
 pmap_resume()
 {
 	pmap_t pmap;
 	int i, pdir, offset;
 	vm_paddr_t pdirma;
 	mmu_update_t mu[4];
 
 	/*
 	 * Restore the recursive map that we removed on suspend.
 	 */
 	LIST_FOREACH(pmap, &allpmaps, pm_list) {
 		for (i = 0; i < 4; i++) {
 			/*
 			 * Figure out which page directory (L2) page
 			 * contains this bit of the recursive map and
 			 * the offset within that page of the map
 			 * entry
 			 */
 			pdir = (PTDPTDI + i) / NPDEPG;
 			offset = (PTDPTDI + i) % NPDEPG;
 			pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
 			mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
 			mu[i].val = (pmap->pm_pdpt[i] & PG_FRAME) | PG_V;
 		}
 		HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
 	}
 }
 
 #if defined(PMAP_DEBUG)
 pmap_pid_dump(int pid)
 {
 	pmap_t pmap;
 	struct proc *p;
 	int npte = 0;
 	int index;
 
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_pid != pid)
 			continue;
 
 		if (p->p_vmspace) {
 			int i,j;
 			index = 0;
 			pmap = vmspace_pmap(p->p_vmspace);
 			for (i = 0; i < NPDEPTD; i++) {
 				pd_entry_t *pde;
 				pt_entry_t *pte;
 				vm_offset_t base = i << PDRSHIFT;
 				
 				pde = &pmap->pm_pdir[i];
 				if (pde && pmap_pde_v(pde)) {
 					for (j = 0; j < NPTEPG; j++) {
 						vm_offset_t va = base + (j << PAGE_SHIFT);
 						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 							if (index) {
 								index = 0;
 								printf("\n");
 							}
 							sx_sunlock(&allproc_lock);
 							return npte;
 						}
 						pte = pmap_pte(pmap, va);
 						if (pte && pmap_pte_v(pte)) {
 							pt_entry_t pa;
 							vm_page_t m;
 							pa = PT_GET(pte);
 							m = PHYS_TO_VM_PAGE(pa & PG_FRAME);
 							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 								va, pa, m->hold_count, m->wire_count, m->flags);
 							npte++;
 							index++;
 							if (index >= 2) {
 								index = 0;
 								printf("\n");
 							} else {
 								printf(" ");
 							}
 						}
 					}
 				}
 			}
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	return npte;
 }
 #endif
 
 #if defined(DEBUG)
 
 static void	pads(pmap_t pm);
 void		pmap_pvdump(vm_paddr_t pa);
 
 /* print address space of pmap*/
 static void
 pads(pmap_t pm)
 {
 	int i, j;
 	vm_paddr_t va;
 	pt_entry_t *ptep;
 
 	if (pm == kernel_pmap)
 		return;
 	for (i = 0; i < NPDEPTD; i++)
 		if (pm->pm_pdir[i])
 			for (j = 0; j < NPTEPG; j++) {
 				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 				if (pm == kernel_pmap && va < KERNBASE)
 					continue;
 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 					continue;
 				ptep = pmap_pte(pm, va);
 				if (pmap_pte_v(ptep))
 					printf("%x:%x ", va, *ptep);
 			};
 
 }
 
 void
 pmap_pvdump(vm_paddr_t pa)
 {
 	pv_entry_t pv;
 	pmap_t pmap;
 	vm_page_t m;
 
 	printf("pa %x", pa);
 	m = PHYS_TO_VM_PAGE(pa);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		pmap = PV_PMAP(pv);
 		printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va);
 		pads(pmap);
 	}
 	printf(" ");
 }
 #endif
Index: projects/binutils-2.17/sys/i386/xen/xen_machdep.c
===================================================================
--- projects/binutils-2.17/sys/i386/xen/xen_machdep.c	(revision 215829)
+++ projects/binutils-2.17/sys/i386/xen/xen_machdep.c	(revision 215830)
@@ -1,1229 +1,1249 @@
 /*
  *
  * Copyright (c) 2004 Christian Limpach.
  * Copyright (c) 2004-2006,2008 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Christian Limpach.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mount.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/reboot.h>
 #include <sys/sysproto.h>
 
 #include <machine/xen/xen-os.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <machine/segments.h>
 #include <machine/pcb.h>
 #include <machine/stdarg.h>
 #include <machine/vmparam.h>
 #include <machine/cpu.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/asmacros.h>
 
 
 
 #include <xen/hypervisor.h>
 #include <machine/xen/xenvar.h>
 #include <machine/xen/xenfunc.h>
 #include <machine/xen/xenpmap.h>
 #include <machine/xen/xenfunc.h>
 #include <xen/interface/memory.h>
 #include <machine/xen/features.h>
 #ifdef SMP
 #include <machine/privatespace.h>
 #endif
 
 
 #include <vm/vm_page.h>
 
 
 #define	IDTVEC(name)	__CONCAT(X,name)
 
 extern inthand_t
 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
 
 
 int xendebug_flags; 
 start_info_t *xen_start_info;
 shared_info_t *HYPERVISOR_shared_info;
 xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
 xen_pfn_t *xen_phys_machine;
 xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
 xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
 int preemptable, init_first;
 extern unsigned int avail_space;
 
 void ni_cli(void);
 void ni_sti(void);
 
 
 void
 ni_cli(void)
 {
 	CTR0(KTR_SPARE2, "ni_cli disabling interrupts");
 	__asm__("pushl %edx;"
 		"pushl %eax;"
 		);
 	__cli();
 	__asm__("popl %eax;"
 		"popl %edx;"
 		);
 }
 
 
 void
 ni_sti(void)
 {
 	__asm__("pushl %edx;"
 		"pushl %esi;"
 		"pushl %eax;"
 		);
 	__sti();
 	__asm__("popl %eax;"
 		"popl %esi;"
 		"popl %edx;"
 		);
 }
 
 /*
  * Modify the cmd_line by converting ',' to NULLs so that it is in a  format 
  * suitable for the static env vars.
  */
 char *
 xen_setbootenv(char *cmd_line)
 {
 	char *cmd_line_next;
     
         /* Skip leading spaces */
         for (; *cmd_line == ' '; cmd_line++);
 
 	printk("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
 
 	for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
 	return cmd_line;
 }
 
 static struct 
 {
 	const char	*ev;
 	int		mask;
 } howto_names[] = {
 	{"boot_askname",	RB_ASKNAME},
 	{"boot_single",	RB_SINGLE},
 	{"boot_nosync",	RB_NOSYNC},
 	{"boot_halt",	RB_ASKNAME},
 	{"boot_serial",	RB_SERIAL},
 	{"boot_cdrom",	RB_CDROM},
 	{"boot_gdb",	RB_GDB},
 	{"boot_gdb_pause",	RB_RESERVED1},
 	{"boot_verbose",	RB_VERBOSE},
 	{"boot_multicons",	RB_MULTIPLE},
 	{NULL,	0}
 };
 
 int 
 xen_boothowto(char *envp)
 {
 	int i, howto = 0;
 
 	/* get equivalents from the environment */
 	for (i = 0; howto_names[i].ev != NULL; i++)
 		if (getenv(howto_names[i].ev) != NULL)
 			howto |= howto_names[i].mask;
 	return howto;
 }
 
 #define PRINTK_BUFSIZE 1024
 void
 printk(const char *fmt, ...)
 {
         __va_list ap;
         int retval;
         static char buf[PRINTK_BUFSIZE];
 
         va_start(ap, fmt);
         retval = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
         va_end(ap);
         buf[retval] = 0;
         (void)HYPERVISOR_console_write(buf, retval);
 }
 
 
 #define XPQUEUE_SIZE 128
 
 struct mmu_log {
 	char *file;
 	int line;
 };
 
 #ifdef SMP
 /* per-cpu queues and indices */
 #ifdef INVARIANTS
 static struct mmu_log xpq_queue_log[MAX_VIRT_CPUS][XPQUEUE_SIZE];
 #endif
 
 static int xpq_idx[MAX_VIRT_CPUS];  
 static mmu_update_t xpq_queue[MAX_VIRT_CPUS][XPQUEUE_SIZE];
 
 #define	XPQ_QUEUE_LOG xpq_queue_log[vcpu]
 #define	XPQ_QUEUE xpq_queue[vcpu]
 #define	XPQ_IDX xpq_idx[vcpu]
 #define	SET_VCPU() int vcpu = smp_processor_id()
 #else
 	
 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
 static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
 static int xpq_idx = 0;
 
 #define	XPQ_QUEUE_LOG xpq_queue_log
 #define	XPQ_QUEUE xpq_queue
 #define	XPQ_IDX xpq_idx
 #define	SET_VCPU()
 #endif /* !SMP */
 
 #define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
 
 #if 0
 static void
 xen_dump_queue(void)
 {
 	int _xpq_idx = XPQ_IDX;
 	int i;
 
 	if (_xpq_idx <= 1)
 		return;
 
 	printk("xen_dump_queue(): %u entries\n", _xpq_idx);
 	for (i = 0; i < _xpq_idx; i++) {
 		printk(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
 	}
 }
 #endif
 
 
 static __inline void
 _xen_flush_queue(void)
 {
 	SET_VCPU();
 	int _xpq_idx = XPQ_IDX;
 	int error, i;
 	/* window of vulnerability here? */
 
 	if (__predict_true(gdtset))
 		critical_enter();
 	XPQ_IDX = 0;
 	/* Make sure index is cleared first to avoid double updates. */
 	error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
 				      _xpq_idx, NULL, DOMID_SELF);
     
 #if 0
 	if (__predict_true(gdtset))
 	for (i = _xpq_idx; i > 0;) {
 		if (i >= 3) {
 			CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
 			    "ptr: %lx val: %lx ptr: %lx",
 			    (XPQ_QUEUE[i-1].val & 0xffffffff),
 			    (XPQ_QUEUE[i-1].ptr & 0xffffffff),
 			    (XPQ_QUEUE[i-2].val & 0xffffffff),
 			    (XPQ_QUEUE[i-2].ptr & 0xffffffff),
 			    (XPQ_QUEUE[i-3].val & 0xffffffff),
 			    (XPQ_QUEUE[i-3].ptr & 0xffffffff));
 			    i -= 3;
 		} else if (i == 2) {
 			CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
 			    (XPQ_QUEUE[i-1].val & 0xffffffff),
 			    (XPQ_QUEUE[i-1].ptr & 0xffffffff),
 			    (XPQ_QUEUE[i-2].val & 0xffffffff),
 			    (XPQ_QUEUE[i-2].ptr & 0xffffffff));
 			i = 0;
 		} else {
 			CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx", 
 			    (XPQ_QUEUE[i-1].val & 0xffffffff),
 			    (XPQ_QUEUE[i-1].ptr & 0xffffffff));
 			i = 0;
 		}
 	}
 #endif	
 	if (__predict_true(gdtset))
 		critical_exit();
 	if (__predict_false(error < 0)) {
 		for (i = 0; i < _xpq_idx; i++)
 			printf("val: %llx ptr: %llx\n",
 			    XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
 		panic("Failed to execute MMU updates: %d", error);
 	}
 
 }
 
 void
 xen_flush_queue(void)
 {
 	SET_VCPU();
 	if (XPQ_IDX != 0) _xen_flush_queue();
 }
 
 static __inline void
 xen_increment_idx(void)
 {
 	SET_VCPU();
 
 	XPQ_IDX++;
 	if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
 		xen_flush_queue();
 }
 
 void
 xen_check_queue(void)
 {
 #ifdef INVARIANTS
 	SET_VCPU();
 	
 	KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
 #endif
 }
 
 void
 xen_invlpg(vm_offset_t va)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_INVLPG_ALL;
 	op.arg1.linear_addr = va & ~PAGE_MASK;
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void
 xen_load_cr3(u_int val)
 {
 	struct mmuext_op op;
 #ifdef INVARIANTS
 	SET_VCPU();
 	
 	KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
 #endif
 	op.cmd = MMUEXT_NEW_BASEPTR;
 	op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 #ifdef KTR
 static __inline u_int
 rebp(void)
 {
 	u_int	data;
 
 	__asm __volatile("movl 4(%%ebp),%0" : "=r" (data));	
 	return (data);
 }
 #endif
 
 u_int
 read_eflags(void)
 {
         vcpu_info_t *_vcpu;
 	u_int eflags;
 
 	eflags = _read_eflags();
         _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; 
 	if (_vcpu->evtchn_upcall_mask)
 		eflags &= ~PSL_I;
 
 	return (eflags);
 }
 
 void
 write_eflags(u_int eflags)
 {
 	u_int intr;
 
 	CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags);
 	intr = ((eflags & PSL_I) == 0);
 	__restore_flags(intr);
 	_write_eflags(eflags);
 }
 
 void
 xen_cli(void)
 {
 	CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp());
 	__cli();
 }
 
 void
 xen_sti(void)
 {
 	CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp());
 	__sti();
 }
 
 u_int
 xen_rcr2(void)
 {
 
 	return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
 }
 
 void
 _xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
 {
 	SET_VCPU();
 	
 	if (__predict_true(gdtset))
 		critical_enter();
 	XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
 	XPQ_QUEUE[XPQ_IDX].val = pfn;
 #ifdef INVARIANTS
 	XPQ_QUEUE_LOG[XPQ_IDX].file = file;
 	XPQ_QUEUE_LOG[XPQ_IDX].line = line;	
 #endif		
 	xen_increment_idx();
 	if (__predict_true(gdtset))
 		critical_exit();
 }
 
 void
 _xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
 {
 	SET_VCPU();
 
 	if (__predict_true(gdtset))	
 		mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 
 	KASSERT((ptr & 7) == 0, ("misaligned update"));
 	
 	if (__predict_true(gdtset))
 		critical_enter();
 	
 	XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
 	XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
 #ifdef INVARIANTS
 	XPQ_QUEUE_LOG[XPQ_IDX].file = file;
 	XPQ_QUEUE_LOG[XPQ_IDX].line = line;	
 #endif	
 	xen_increment_idx();
 	if (__predict_true(gdtset))
 		critical_exit();
 }
 
 void 
 xen_pgdpt_pin(vm_paddr_t ma)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_PIN_L3_TABLE;
 	op.arg1.mfn = ma >> PAGE_SHIFT;
 	xen_flush_queue();
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void 
 xen_pgd_pin(vm_paddr_t ma)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_PIN_L2_TABLE;
 	op.arg1.mfn = ma >> PAGE_SHIFT;
 	xen_flush_queue();
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void 
 xen_pgd_unpin(vm_paddr_t ma)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_UNPIN_TABLE;
 	op.arg1.mfn = ma >> PAGE_SHIFT;
 	xen_flush_queue();
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void 
 xen_pt_pin(vm_paddr_t ma)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_PIN_L1_TABLE;
 	op.arg1.mfn = ma >> PAGE_SHIFT;
-	printk("xen_pt_pin(): mfn=%x\n", op.arg1.mfn);
 	xen_flush_queue();
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void 
 xen_pt_unpin(vm_paddr_t ma)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_UNPIN_TABLE;
 	op.arg1.mfn = ma >> PAGE_SHIFT;
 	xen_flush_queue();
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void 
 xen_set_ldt(vm_paddr_t ptr, unsigned long len)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_SET_LDT;
 	op.arg1.linear_addr = ptr;
 	op.arg2.nr_ents = len;
 	xen_flush_queue();
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_tlb_flush(void)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
 	xen_flush_queue();
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void
 xen_update_descriptor(union descriptor *table, union descriptor *entry)
 {
 	vm_paddr_t pa;
 	pt_entry_t *ptp;
 
 	ptp = vtopte((vm_offset_t)table);
 	pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
 	if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
 		panic("HYPERVISOR_update_descriptor failed\n");
 }
 
 
 #if 0
 /*
  * Bitmap is indexed by page number. If bit is set, the page is part of a
  * xen_create_contiguous_region() area of memory.
  */
 unsigned long *contiguous_bitmap;
 
 static void 
 contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
 {
 	unsigned long start_off, end_off, curr_idx, end_idx;
 
 	curr_idx  = first_page / BITS_PER_LONG;
 	start_off = first_page & (BITS_PER_LONG-1);
 	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
 	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
 
 	if (curr_idx == end_idx) {
 		contiguous_bitmap[curr_idx] |=
 			((1UL<<end_off)-1) & -(1UL<<start_off);
 	} else {
 		contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
 		while ( ++curr_idx < end_idx )
 			contiguous_bitmap[curr_idx] = ~0UL;
 		contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
 	}
 }
 
 static void 
 contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
 {
 	unsigned long start_off, end_off, curr_idx, end_idx;
 
 	curr_idx  = first_page / BITS_PER_LONG;
 	start_off = first_page & (BITS_PER_LONG-1);
 	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
 	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
 
 	if (curr_idx == end_idx) {
 		contiguous_bitmap[curr_idx] &=
 			-(1UL<<end_off) | ((1UL<<start_off)-1);
 	} else {
 		contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
 		while ( ++curr_idx != end_idx )
 			contiguous_bitmap[curr_idx] = 0;
 		contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
 	}
 }
 #endif
 
 /* Ensure multi-page extents are contiguous in machine memory. */
 int 
 xen_create_contiguous_region(vm_page_t pages, int npages)
 {
 	unsigned long  mfn, i, flags;
 	int order;
 	struct xen_memory_reservation reservation = {
 		.nr_extents   = 1,
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
 	set_xen_guest_handle(reservation.extent_start, &mfn);
 	
 	balloon_lock(flags);
 
 	/* can currently only handle power of two allocation */
 	PANIC_IF(ffs(npages) != fls(npages));
 
 	/* 0. determine order */
 	order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
 	
 	/* 1. give away machine pages. */
 	for (i = 0; i < (1 << order); i++) {
 		int pfn;
 		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
 		mfn = PFNTOMFN(pfn);
 		PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
 		PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
 	}
 
 
 	/* 2. Get a new contiguous memory extent. */
 	reservation.extent_order = order;
 	/* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not 
 	 * running with a broxen driver XXXEN
 	 */
 	reservation.address_bits = 31; 
 	if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
 		goto fail;
 
 	/* 3. Map the new extent in place of old pages. */
 	for (i = 0; i < (1 << order); i++) {
 		int pfn;
 		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
 		xen_machphys_update(mfn+i, pfn);
 		PFNTOMFN(pfn) = mfn+i;
 	}
 
 	xen_tlb_flush();
 
 #if 0
 	contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
 #endif
 
 	balloon_unlock(flags);
 
 	return 0;
 
  fail:
 	reservation.extent_order = 0;
 	reservation.address_bits = 0;
 
 	for (i = 0; i < (1 << order); i++) {
 		int pfn;
 		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
 		PANIC_IF(HYPERVISOR_memory_op(
 			XENMEM_increase_reservation, &reservation) != 1);
 		xen_machphys_update(mfn, pfn);
 		PFNTOMFN(pfn) = mfn;
 	}
 
 	xen_tlb_flush();
 
 	balloon_unlock(flags);
 
 	return ENOMEM;
 }
 
 void 
 xen_destroy_contiguous_region(void *addr, int npages)
 {
 	unsigned long  mfn, i, flags, order, pfn0;
 	struct xen_memory_reservation reservation = {
 		.nr_extents   = 1,
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
 	set_xen_guest_handle(reservation.extent_start, &mfn);
 	
 	pfn0 = vtophys(addr) >> PAGE_SHIFT;
 #if 0
 	scrub_pages(vstart, 1 << order);
 #endif
 	/* can currently only handle power of two allocation */
 	PANIC_IF(ffs(npages) != fls(npages));
 
 	/* 0. determine order */
 	order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
 
 	balloon_lock(flags);
 
 #if 0
 	contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
 #endif
 
 	/* 1. Zap current PTEs, giving away the underlying pages. */
 	for (i = 0; i < (1 << order); i++) {
 		int pfn;
 		uint64_t new_val = 0;
 		pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
 
 		PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
 		PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
 		PANIC_IF(HYPERVISOR_memory_op(
 			XENMEM_decrease_reservation, &reservation) != 1);
 	}
 
 	/* 2. Map new pages in place of old pages. */
 	for (i = 0; i < (1 << order); i++) {
 		int pfn;
 		uint64_t new_val;
 		pfn = pfn0 + i;
 		PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
 		
 		new_val = mfn << PAGE_SHIFT;
 		PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE), 
 						      new_val, PG_KERNEL));
 		xen_machphys_update(mfn, pfn);
 		PFNTOMFN(pfn) = mfn;
 	}
 
 	xen_tlb_flush();
 
 	balloon_unlock(flags);
 }
 
 extern  vm_offset_t	proc0kstack;
 extern int vm86paddr, vm86phystk;
 char *bootmem_start, *bootmem_current, *bootmem_end;
 
 pteinfo_t *pteinfo_list;
 void initvalues(start_info_t *startinfo);
 
 struct xenstore_domain_interface;
 extern struct xenstore_domain_interface *xen_store;
 
 char *console_page;
 
 void *
 bootmem_alloc(unsigned int size) 
 {
 	char *retptr;
 	
 	retptr = bootmem_current;
 	PANIC_IF(retptr + size > bootmem_end);
 	bootmem_current += size;
 
 	return retptr;
 }
 
 void 
 bootmem_free(void *ptr, unsigned int size) 
 {
 	char *tptr;
 	
 	tptr = ptr;
 	PANIC_IF(tptr != bootmem_current - size ||
 		bootmem_current - size < bootmem_start);	
 
 	bootmem_current -= size;
 }
 
 #if 0
 static vm_paddr_t
 xpmap_mtop2(vm_paddr_t mpa)
 {
         return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
             ) | (mpa & ~PG_FRAME);
 }
 
 static pd_entry_t 
 xpmap_get_bootpde(vm_paddr_t va)
 {
 
         return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
 }
 
 static pd_entry_t
 xpmap_get_vbootpde(vm_paddr_t va)
 {
         pd_entry_t pde;
 
         pde = xpmap_get_bootpde(va);
         if ((pde & PG_V) == 0)
                 return (pde & ~PG_FRAME);
         return (pde & ~PG_FRAME) |
                 (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
 }
 
 static pt_entry_t 8*
 xpmap_get_bootptep(vm_paddr_t va)
 {
         pd_entry_t pde;
 
         pde = xpmap_get_vbootpde(va);
         if ((pde & PG_V) == 0)
                 return (void *)-1;
 #define PT_MASK         0x003ff000      /* page table address bits */
         return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
 }
 
 static pt_entry_t
 xpmap_get_bootpte(vm_paddr_t va)
 {
 
         return xpmap_get_bootptep(va)[0];
 }
 #endif
 
 
 #ifdef ADD_ISA_HOLE
 static void
 shift_phys_machine(unsigned long *phys_machine, int nr_pages)
 {
 
         unsigned long *tmp_page, *current_page, *next_page;
 	int i;
 
 	tmp_page = bootmem_alloc(PAGE_SIZE);
 	current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));  
 	next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));  
 	bcopy(phys_machine, tmp_page, PAGE_SIZE);
 
 	while (current_page > phys_machine) { 
 	        /*  save next page */
 	        bcopy(next_page, tmp_page, PAGE_SIZE);
 	        /* shift down page */
 		bcopy(current_page, next_page, PAGE_SIZE);
 	        /*  finish swap */
 	        bcopy(tmp_page, current_page, PAGE_SIZE);
 	  
 		current_page -= (PAGE_SIZE/sizeof(unsigned long));
 		next_page -= (PAGE_SIZE/sizeof(unsigned long));
 	}
 	bootmem_free(tmp_page, PAGE_SIZE);	
 	
 	for (i = 0; i < nr_pages; i++) {
 	        xen_machphys_update(phys_machine[i], i);
 	}
 	memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
 
 }
 #endif /* ADD_ISA_HOLE */
 
 /*
  * Build a directory of the pages that make up our Physical to Machine
  * mapping table. The Xen suspend/restore code uses this to find our
  * mapping table.
  */
 static void
 init_frame_list_list(void *arg)
 {
 	unsigned long nr_pages = xen_start_info->nr_pages;
 #define FPP	(PAGE_SIZE/sizeof(xen_pfn_t))
 	int i, j, k;
 
 	xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
 	for (i = 0, j = 0, k = -1; i < nr_pages;
 	     i += FPP, j++) {
 		if ((j & (FPP - 1)) == 0) {
 			k++;
 			xen_pfn_to_mfn_frame_list[k] =
 				malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
 			xen_pfn_to_mfn_frame_list_list[k] =
 				VTOMFN(xen_pfn_to_mfn_frame_list[k]);
 			j = 0;
 		}
 		xen_pfn_to_mfn_frame_list[k][j] = 
 			VTOMFN(&xen_phys_machine[i]);
 	}
 
 	HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
 	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
 		= VTOMFN(xen_pfn_to_mfn_frame_list_list);
 }	
 SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
 
 extern unsigned long physfree;
 
 int pdir, curoffset;
 extern int nkpt;
 
 extern uint32_t kernbase;
 
 void
 initvalues(start_info_t *startinfo)
 { 
 	vm_offset_t cur_space, cur_space_pt;
 	struct physdev_set_iopl set_iopl;
 	
 	int l3_pages, l2_pages, l1_pages, offset;
 	vm_paddr_t console_page_ma, xen_store_ma;
 	vm_offset_t tmpva;
 	vm_paddr_t shinfo;
 #ifdef PAE
 	vm_paddr_t IdlePDPTma, IdlePDPTnewma;
 	vm_paddr_t IdlePTDnewma[4];
 	pd_entry_t *IdlePDPTnew, *IdlePTDnew;
 	vm_paddr_t IdlePTDma[4];
 #else
 	vm_paddr_t IdlePTDma[1];
 #endif
 	unsigned long i;
 	int ncpus = MAXCPU;
 
 	nkpt = min(
 		min(
 			max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
 		    NPGPTD*NPDEPG - KPTDI),
 		    (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
 
 	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);	
 #ifdef notyet
 	/*
 	 * need to install handler
 	 */
 	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);	
 #endif	
 	xen_start_info = startinfo;
 	xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
 
 	IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
 	l1_pages = 0;
 	
 #ifdef PAE
 	l3_pages = 1;
 	l2_pages = 0;
 	IdlePDPT = (pd_entry_t *)startinfo->pt_base;
 	IdlePDPTma = VTOM(startinfo->pt_base);
 	for (i = (KERNBASE >> 30);
 	     (i < 4) && (IdlePDPT[i] != 0); i++)
 			l2_pages++;
 	/*
 	 * Note that only one page directory has been allocated at this point.
 	 * Thus, if KERNBASE
 	 */
 	for (i = 0; i < l2_pages; i++)
 		IdlePTDma[i] = VTOM(IdlePTD + i*PAGE_SIZE);
 
 	l2_pages = (l2_pages == 0) ? 1 : l2_pages;
 #else	
 	l3_pages = 0;
 	l2_pages = 1;
 #endif
 	for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
 	     (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
 		
 		if (IdlePTD[i] == 0)
 			break;
 		l1_pages++;
 	}
 
 	/* number of pages allocated after the pts + 1*/;
 	cur_space = xen_start_info->pt_base +
 	    (l3_pages + l2_pages + l1_pages + 1)*PAGE_SIZE;
 
 	printk("initvalues(): wooh - availmem=%x,%x\n", avail_space, cur_space);
 
 	printk("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n",
 	    KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
 	    xen_start_info->nr_pt_frames);
 	xendebug_flags = 0; /* 0xffffffff; */
 
 #ifdef ADD_ISA_HOLE
 	shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
 #endif
 	XENPRINTF("IdlePTD %p\n", IdlePTD);
 	XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
 		  "mod_start: 0x%lx mod_len: 0x%lx\n",
 		  xen_start_info->nr_pages, xen_start_info->shared_info, 
 		  xen_start_info->flags, xen_start_info->pt_base, 
 		  xen_start_info->mod_start, xen_start_info->mod_len);
 
 #ifdef PAE
 	IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
 	bzero(IdlePDPTnew, PAGE_SIZE);
 
 	IdlePDPTnewma =  VTOM(IdlePDPTnew);
 	IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
 	bzero(IdlePTDnew, 4*PAGE_SIZE);
 
 	for (i = 0; i < 4; i++) 
 		IdlePTDnewma[i] = VTOM((uint8_t *)IdlePTDnew + i*PAGE_SIZE);
 	/*
 	 * L3
 	 *
 	 * Copy the 4 machine addresses of the new PTDs in to the PDPT
 	 * 
 	 */
 	for (i = 0; i < 4; i++)
 		IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
 
 	__asm__("nop;");
 	/*
 	 *
 	 * re-map the new PDPT read-only
 	 */
 	PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
 	/*
 	 * 
 	 * Unpin the current PDPT
 	 */
 	xen_pt_unpin(IdlePDPTma);
 
 #endif  /* PAE */
 
 	/* Map proc0's KSTACK */
 	proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
 	printk("proc0kstack=%u\n", proc0kstack);
 
 	/* vm86/bios stack */
 	cur_space += PAGE_SIZE;
 
 	/* Map space for the vm86 region */
 	vm86paddr = (vm_offset_t)cur_space;
 	cur_space += (PAGE_SIZE * 3);
 
 	/* allocate 4 pages for bootmem allocator */
 	bootmem_start = bootmem_current = (char *)cur_space;
 	cur_space += (4 * PAGE_SIZE);
 	bootmem_end = (char *)cur_space;
 	
 	/* allocate pages for gdt */
 	gdt = (union descriptor *)cur_space;
 	cur_space += PAGE_SIZE*ncpus;
 
         /* allocate page for ldt */
 	ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
 	cur_space += PAGE_SIZE;
 	
 	/* unmap remaining pages from initial chunk
 	 *
 	 */
 	for (tmpva = cur_space; tmpva < (((uint32_t)&kernbase) + (l1_pages<<PDRSHIFT));
 	     tmpva += PAGE_SIZE) {
 		bzero((char *)tmpva, PAGE_SIZE);
 		PT_SET_MA(tmpva, (vm_paddr_t)0);
 	}
 
 	PT_UPDATES_FLUSH();
 
 	memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
 	    ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
 	    l1_pages*sizeof(pt_entry_t));
 
 	for (i = 0; i < 4; i++) {
 		PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
 		    IdlePTDnewma[i] | PG_V);
 	}
 	xen_load_cr3(VTOP(IdlePDPTnew));
 	xen_pgdpt_pin(VTOM(IdlePDPTnew));
 
 	/* allocate remainder of nkpt pages */
 	cur_space_pt = cur_space;
 	for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
 	     i++, cur_space += PAGE_SIZE) {
 		pdir = (offset + i) / NPDEPG;
 		curoffset = ((offset + i) % NPDEPG);
 		if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
 			break;
 
 		/*
 		 * make sure that all the initial page table pages
 		 * have been zeroed
 		 */
 		PT_SET_MA(cur_space, VTOM(cur_space) | PG_V | PG_RW);
 		bzero((char *)cur_space, PAGE_SIZE);
 		PT_SET_MA(cur_space, (vm_paddr_t)0);
 		xen_pt_pin(VTOM(cur_space));
 		xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
 			curoffset*sizeof(vm_paddr_t)), 
 		    VTOM(cur_space) | PG_KERNEL);
 		PT_UPDATES_FLUSH();
 	}
 	
 	for (i = 0; i < 4; i++) {
 		pdir = (PTDPTDI + i) / NPDEPG;
 		curoffset = (PTDPTDI + i) % NPDEPG;
 
 		xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
 			curoffset*sizeof(vm_paddr_t)), 
 		    IdlePTDnewma[i] | PG_V);
 	}
 
 	PT_UPDATES_FLUSH();
 	
 	IdlePTD = IdlePTDnew;
 	IdlePDPT = IdlePDPTnew;
 	IdlePDPTma = IdlePDPTnewma;
 	
 	HYPERVISOR_shared_info = (shared_info_t *)cur_space;
 	cur_space += PAGE_SIZE;
 
 	xen_store = (struct xenstore_domain_interface *)cur_space;
 	cur_space += PAGE_SIZE;
 
 	console_page = (char *)cur_space;
 	cur_space += PAGE_SIZE;
 	
 	/*
 	 * shared_info is an unsigned long so this will randomly break if
 	 * it is allocated above 4GB - I guess people are used to that
 	 * sort of thing with Xen ... sigh
 	 */
 	shinfo = xen_start_info->shared_info;
 	PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
 	
 	printk("#4\n");
 
 	xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
 	PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
 	console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
 	PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
 
 	printk("#5\n");
 
 	set_iopl.iopl = 1;
 	PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl));
 	printk("#6\n");
 #if 0
 	/* add page table for KERNBASE */
 	xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t), 
 			    VTOM(cur_space) | PG_KERNEL);
 	xen_flush_queue();
 #ifdef PAE	
 	xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t), 
 			    VTOM(cur_space) | PG_V | PG_A);
 #else
 	xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), 
 			    VTOM(cur_space) | PG_V | PG_A);
 #endif	
 	xen_flush_queue();
 	cur_space += PAGE_SIZE;
 	printk("#6\n");
 #endif /* 0 */	
 #ifdef notyet
 	if (xen_start_info->flags & SIF_INITDOMAIN) {
 		/* Map first megabyte */
 		for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE) 
 			PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
 		xen_flush_queue();
 	}
 #endif
 	/*
 	 * re-map kernel text read-only
 	 *
 	 */
 	for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
 	     i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
 		PT_SET_MA(i, VTOM(i) | PG_V | PG_A);
 	
 	printk("#7\n");
 	physfree = VTOP(cur_space);
 	init_first = physfree >> PAGE_SHIFT;
 	IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
 	IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
 	setup_xen_features();
 	printk("#8, proc0kstack=%u\n", proc0kstack);
 }
 
 
 trap_info_t trap_table[] = {
 	{ 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
 	{ 1,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
 	{ 3,   3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
 	{ 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
 	/* This is UPL on Linux and KPL on BSD */
 	{ 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
 	{ 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
 	{ 7,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
 	/*
 	 * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
 	 *   no handler for double fault
 	 */
 	{ 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
 	{10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
 	{11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
 	{12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
 	{13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
 	{14,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
 	{15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
 	{16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
 	{17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
 	{18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
 	{19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
 	{0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
 	{  0, 0,           0, 0 }
 };
 
+/* Perform a multicall and check that individual calls succeeded. */
+int
+HYPERVISOR_multicall(struct multicall_entry * call_list, int nr_calls)
+{
+	int ret = 0;
+	int i;
+
+	/* Perform the multicall. */
+	PANIC_IF(_HYPERVISOR_multicall(call_list, nr_calls));
+
+	/* Check the results of individual hypercalls. */
+	for (i = 0; i < nr_calls; i++)
+		if (unlikely(call_list[i].result < 0))
+			ret++;
+	if (unlikely(ret > 0))
+		panic("%d multicall(s) failed: cpu %d\n",
+		    ret, smp_processor_id());
+
+	/* If we didn't panic already, everything succeeded. */
+	return (0);
+}
 
 /********** CODE WORTH KEEPING ABOVE HERE *****************/ 
 
 void xen_failsafe_handler(void);
 
 void
 xen_failsafe_handler(void)
 {
 
 	panic("xen_failsafe_handler called!\n");
 }
 
 void xen_handle_thread_switch(struct pcb *pcb);
 
 /* This is called by cpu_switch() when switching threads. */
 /* The pcb arg refers to the process control block of the */
 /* next thread which is to run */
 void
 xen_handle_thread_switch(struct pcb *pcb)
 {
     uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
     uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
     multicall_entry_t mcl[3];
     int i = 0;
 
     /* Notify Xen of task switch */
     mcl[i].op = __HYPERVISOR_stack_switch;
     mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
     mcl[i++].args[1] = (unsigned long)pcb;
 
     /* Check for update of fsd */
     if (*a != *b || *(a+1) != *(b+1)) {
         mcl[i].op = __HYPERVISOR_update_descriptor;
         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
     }    
 
     a += 2;
     b += 2;
 
     /* Check for update of gsd */
     if (*a != *b || *(a+1) != *(b+1)) {
         mcl[i].op = __HYPERVISOR_update_descriptor;
         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
     }    
 
     (void)HYPERVISOR_multicall(mcl, i);
 }
Index: projects/binutils-2.17/sys/kern/kern_tc.c
===================================================================
--- projects/binutils-2.17/sys/kern/kern_tc.c	(revision 215829)
+++ projects/binutils-2.17/sys/kern/kern_tc.c	(revision 215830)
@@ -1,956 +1,956 @@
 /*-
  * ----------------------------------------------------------------------------
  * "THE BEER-WARE LICENSE" (Revision 42):
  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
  * can do whatever you want with this stuff. If we meet some day, and you think
  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
  * ----------------------------------------------------------------------------
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ntp.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/timepps.h>
 #include <sys/timetc.h>
 #include <sys/timex.h>
 
 /*
  * A large step happens on boot.  This constant detects such steps.
  * It is relatively small so that ntp_update_second gets called enough
  * in the typical 'missed a couple of seconds' case, but doesn't loop
  * forever when the time step is large.
  */
 #define LARGE_STEP	200
 
 /*
  * Implement a dummy timecounter which we can use until we get a real one
  * in the air.  This allows the console and other early stuff to use
  * time services.
  */
 
 static u_int
 dummy_get_timecount(struct timecounter *tc)
 {
 	static u_int now;
 
 	return (++now);
 }
 
 static struct timecounter dummy_timecounter = {
 	dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
 };
 
 struct timehands {
 	/* These fields must be initialized by the driver. */
 	struct timecounter	*th_counter;
 	int64_t			th_adjustment;
 	uint64_t		th_scale;
 	u_int	 		th_offset_count;
 	struct bintime		th_offset;
 	struct timeval		th_microtime;
 	struct timespec		th_nanotime;
 	/* Fields not to be copied in tc_windup start with th_generation. */
 	volatile u_int		th_generation;
 	struct timehands	*th_next;
 };
 
 static struct timehands th0;
 static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0};
 static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9};
 static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8};
 static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7};
 static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6};
 static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5};
 static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4};
 static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3};
 static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2};
 static struct timehands th0 = {
 	&dummy_timecounter,
 	0,
 	(uint64_t)-1 / 1000000,
 	0,
 	{1, 0},
 	{0, 0},
 	{0, 0},
 	1,
 	&th1
 };
 
 static struct timehands *volatile timehands = &th0;
 struct timecounter *timecounter = &dummy_timecounter;
 static struct timecounter *timecounters = &dummy_timecounter;
 
 int tc_min_ticktock_freq = 1;
 
 time_t time_second = 1;
 time_t time_uptime = 1;
 
 struct bintime boottimebin;
 struct timeval boottime;
 static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, CTLTYPE_STRUCT|CTLFLAG_RD,
     NULL, 0, sysctl_kern_boottime, "S,timeval", "System boottime");
 
 SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
 SYSCTL_NODE(_kern_timecounter, OID_AUTO, tc, CTLFLAG_RW, 0, "");
 
 static int timestepwarnings;
 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW,
     &timestepwarnings, 0, "Log time steps");
 
 static void tc_windup(void);
 static void cpu_tick_calibrate(int);
 
 static int
 sysctl_kern_boottime(SYSCTL_HANDLER_ARGS)
 {
 #ifdef SCTL_MASK32
 	int tv[2];
 
 	if (req->flags & SCTL_MASK32) {
 		tv[0] = boottime.tv_sec;
 		tv[1] = boottime.tv_usec;
 		return SYSCTL_OUT(req, tv, sizeof(tv));
 	} else
 #endif
 		return SYSCTL_OUT(req, &boottime, sizeof(boottime));
 }
 
 static int
 sysctl_kern_timecounter_get(SYSCTL_HANDLER_ARGS)
 {
 	u_int ncount;
 	struct timecounter *tc = arg1;
 
 	ncount = tc->tc_get_timecount(tc);
 	return sysctl_handle_int(oidp, &ncount, 0, req);
 }
 
 static int
 sysctl_kern_timecounter_freq(SYSCTL_HANDLER_ARGS)
 {
 	uint64_t freq;
 	struct timecounter *tc = arg1;
 
 	freq = tc->tc_frequency;
 	return sysctl_handle_quad(oidp, &freq, 0, req);
 }
 
 /*
  * Return the difference between the timehands' counter value now and what
  * was when we copied it to the timehands' offset_count.
  */
 static __inline u_int
 tc_delta(struct timehands *th)
 {
 	struct timecounter *tc;
 
 	tc = th->th_counter;
 	return ((tc->tc_get_timecount(tc) - th->th_offset_count) &
 	    tc->tc_counter_mask);
 }
 
 /*
  * Functions for reading the time.  We have to loop until we are sure that
  * the timehands that we operated on was not updated under our feet.  See
  * the comment in <sys/time.h> for a description of these 12 functions.
  */
 
 void
 binuptime(struct bintime *bt)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = th->th_generation;
 		*bt = th->th_offset;
 		bintime_addx(bt, th->th_scale * tc_delta(th));
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 nanouptime(struct timespec *tsp)
 {
 	struct bintime bt;
 
 	binuptime(&bt);
 	bintime2timespec(&bt, tsp);
 }
 
 void
 microuptime(struct timeval *tvp)
 {
 	struct bintime bt;
 
 	binuptime(&bt);
 	bintime2timeval(&bt, tvp);
 }
 
 void
 bintime(struct bintime *bt)
 {
 
 	binuptime(bt);
 	bintime_add(bt, &boottimebin);
 }
 
 void
 nanotime(struct timespec *tsp)
 {
 	struct bintime bt;
 
 	bintime(&bt);
 	bintime2timespec(&bt, tsp);
 }
 
 void
 microtime(struct timeval *tvp)
 {
 	struct bintime bt;
 
 	bintime(&bt);
 	bintime2timeval(&bt, tvp);
 }
 
 void
 getbinuptime(struct bintime *bt)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = th->th_generation;
 		*bt = th->th_offset;
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 getnanouptime(struct timespec *tsp)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = th->th_generation;
 		bintime2timespec(&th->th_offset, tsp);
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 getmicrouptime(struct timeval *tvp)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = th->th_generation;
 		bintime2timeval(&th->th_offset, tvp);
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 getbintime(struct bintime *bt)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = th->th_generation;
 		*bt = th->th_offset;
 	} while (gen == 0 || gen != th->th_generation);
 	bintime_add(bt, &boottimebin);
 }
 
 void
 getnanotime(struct timespec *tsp)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = th->th_generation;
 		*tsp = th->th_nanotime;
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 getmicrotime(struct timeval *tvp)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = th->th_generation;
 		*tvp = th->th_microtime;
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 /*
  * Initialize a new timecounter and possibly use it.
  */
 void
 tc_init(struct timecounter *tc)
 {
 	u_int u;
 	struct sysctl_oid *tc_root;
 
 	u = tc->tc_frequency / tc->tc_counter_mask;
 	/* XXX: We need some margin here, 10% is a guess */
 	u *= 11;
 	u /= 10;
 	if (u > hz && tc->tc_quality >= 0) {
 		tc->tc_quality = -2000;
 		if (bootverbose) {
 			printf("Timecounter \"%s\" frequency %ju Hz",
 			    tc->tc_name, (uintmax_t)tc->tc_frequency);
 			printf(" -- Insufficient hz, needs at least %u\n", u);
 		}
 	} else if (tc->tc_quality >= 0 || bootverbose) {
 		printf("Timecounter \"%s\" frequency %ju Hz quality %d\n",
 		    tc->tc_name, (uintmax_t)tc->tc_frequency,
 		    tc->tc_quality);
 	}
 
 	tc->tc_next = timecounters;
 	timecounters = tc;
 	/*
 	 * Set up sysctl tree for this counter.
 	 */
 	tc_root = SYSCTL_ADD_NODE(NULL,
 	    SYSCTL_STATIC_CHILDREN(_kern_timecounter_tc), OID_AUTO, tc->tc_name,
 	    CTLFLAG_RW, 0, "timecounter description");
 	SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
 	    "mask", CTLFLAG_RD, &(tc->tc_counter_mask), 0,
 	    "mask for implemented bits");
 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
 	    "counter", CTLTYPE_UINT | CTLFLAG_RD, tc, sizeof(*tc),
 	    sysctl_kern_timecounter_get, "IU", "current timecounter value");
 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
 	    "frequency", CTLTYPE_QUAD | CTLFLAG_RD, tc, sizeof(*tc),
 	     sysctl_kern_timecounter_freq, "QU", "timecounter frequency");
 	SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
 	    "quality", CTLFLAG_RD, &(tc->tc_quality), 0,
 	    "goodness of time counter");
 	/*
 	 * Never automatically use a timecounter with negative quality.
 	 * Even though we run on the dummy counter, switching here may be
 	 * worse since this timecounter may not be monotonous.
 	 */
 	if (tc->tc_quality < 0)
 		return;
 	if (tc->tc_quality < timecounter->tc_quality)
 		return;
 	if (tc->tc_quality == timecounter->tc_quality &&
 	    tc->tc_frequency < timecounter->tc_frequency)
 		return;
 	(void)tc->tc_get_timecount(tc);
 	(void)tc->tc_get_timecount(tc);
 	timecounter = tc;
 }
 
 /* Report the frequency of the current timecounter. */
 uint64_t
 tc_getfrequency(void)
 {
 
 	return (timehands->th_counter->tc_frequency);
 }
 
 /*
  * Step our concept of UTC.  This is done by modifying our estimate of
  * when we booted.
  * XXX: not locked.
  */
 void
 tc_setclock(struct timespec *ts)
 {
 	struct timespec tbef, taft;
 	struct bintime bt, bt2;
 
 	cpu_tick_calibrate(1);
 	nanotime(&tbef);
 	timespec2bintime(ts, &bt);
 	binuptime(&bt2);
 	bintime_sub(&bt, &bt2);
 	bintime_add(&bt2, &boottimebin);
 	boottimebin = bt;
 	bintime2timeval(&bt, &boottime);
 
 	/* XXX fiddle all the little crinkly bits around the fiords... */
 	tc_windup();
 	nanotime(&taft);
 	if (timestepwarnings) {
 		log(LOG_INFO,
 		    "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n",
 		    (intmax_t)tbef.tv_sec, tbef.tv_nsec,
 		    (intmax_t)taft.tv_sec, taft.tv_nsec,
 		    (intmax_t)ts->tv_sec, ts->tv_nsec);
 	}
 	cpu_tick_calibrate(1);
 }
 
 /*
  * Initialize the next struct timehands in the ring and make
  * it the active timehands.  Along the way we might switch to a different
  * timecounter and/or do seconds processing in NTP.  Slightly magic.
  */
 static void
 tc_windup(void)
 {
 	struct bintime bt;
 	struct timehands *th, *tho;
 	uint64_t scale;
 	u_int delta, ncount, ogen;
 	int i;
 	time_t t;
 
 	/*
 	 * Make the next timehands a copy of the current one, but do not
 	 * overwrite the generation or next pointer.  While we update
 	 * the contents, the generation must be zero.
 	 */
 	tho = timehands;
 	th = tho->th_next;
 	ogen = th->th_generation;
 	th->th_generation = 0;
 	bcopy(tho, th, offsetof(struct timehands, th_generation));
 
 	/*
 	 * Capture a timecounter delta on the current timecounter and if
 	 * changing timecounters, a counter value from the new timecounter.
 	 * Update the offset fields accordingly.
 	 */
 	delta = tc_delta(th);
 	if (th->th_counter != timecounter)
 		ncount = timecounter->tc_get_timecount(timecounter);
 	else
 		ncount = 0;
 	th->th_offset_count += delta;
 	th->th_offset_count &= th->th_counter->tc_counter_mask;
 	while (delta > th->th_counter->tc_frequency) {
 		/* Eat complete unadjusted seconds. */
 		delta -= th->th_counter->tc_frequency;
 		th->th_offset.sec++;
 	}
 	if ((delta > th->th_counter->tc_frequency / 2) &&
-	    (th->th_scale * delta < (uint64_t)1 << 63)) {
+	    (th->th_scale * delta < ((uint64_t)1 << 63))) {
 		/* The product th_scale * delta just barely overflows. */
 		th->th_offset.sec++;
 	}
 	bintime_addx(&th->th_offset, th->th_scale * delta);
 
 	/*
 	 * Hardware latching timecounters may not generate interrupts on
 	 * PPS events, so instead we poll them.  There is a finite risk that
 	 * the hardware might capture a count which is later than the one we
 	 * got above, and therefore possibly in the next NTP second which might
 	 * have a different rate than the current NTP second.  It doesn't
 	 * matter in practice.
 	 */
 	if (tho->th_counter->tc_poll_pps)
 		tho->th_counter->tc_poll_pps(tho->th_counter);
 
 	/*
 	 * Deal with NTP second processing.  The for loop normally
 	 * iterates at most once, but in extreme situations it might
 	 * keep NTP sane if timeouts are not run for several seconds.
 	 * At boot, the time step can be large when the TOD hardware
 	 * has been read, so on really large steps, we call
 	 * ntp_update_second only twice.  We need to call it twice in
 	 * case we missed a leap second.
 	 */
 	bt = th->th_offset;
 	bintime_add(&bt, &boottimebin);
 	i = bt.sec - tho->th_microtime.tv_sec;
 	if (i > LARGE_STEP)
 		i = 2;
 	for (; i > 0; i--) {
 		t = bt.sec;
 		ntp_update_second(&th->th_adjustment, &bt.sec);
 		if (bt.sec != t)
 			boottimebin.sec += bt.sec - t;
 	}
 	/* Update the UTC timestamps used by the get*() functions. */
 	/* XXX shouldn't do this here.  Should force non-`get' versions. */
 	bintime2timeval(&bt, &th->th_microtime);
 	bintime2timespec(&bt, &th->th_nanotime);
 
 	/* Now is a good time to change timecounters. */
 	if (th->th_counter != timecounter) {
 		th->th_counter = timecounter;
 		th->th_offset_count = ncount;
 		tc_min_ticktock_freq = max(1, timecounter->tc_frequency /
 		    (((uint64_t)timecounter->tc_counter_mask + 1) / 3));
 	}
 
 	/*-
 	 * Recalculate the scaling factor.  We want the number of 1/2^64
 	 * fractions of a second per period of the hardware counter, taking
 	 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
 	 * processing provides us with.
 	 *
 	 * The th_adjustment is nanoseconds per second with 32 bit binary
 	 * fraction and we want 64 bit binary fraction of second:
 	 *
 	 *	 x = a * 2^32 / 10^9 = a * 4.294967296
 	 *
 	 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
 	 * we can only multiply by about 850 without overflowing, that
 	 * leaves no suitably precise fractions for multiply before divide.
 	 *
 	 * Divide before multiply with a fraction of 2199/512 results in a
 	 * systematic undercompensation of 10PPM of th_adjustment.  On a
 	 * 5000PPM adjustment this is a 0.05PPM error.  This is acceptable.
  	 *
 	 * We happily sacrifice the lowest of the 64 bits of our result
 	 * to the goddess of code clarity.
 	 *
 	 */
 	scale = (uint64_t)1 << 63;
 	scale += (th->th_adjustment / 1024) * 2199;
 	scale /= th->th_counter->tc_frequency;
 	th->th_scale = scale * 2;
 
 	/*
 	 * Now that the struct timehands is again consistent, set the new
 	 * generation number, making sure to not make it zero.
 	 */
 	if (++ogen == 0)
 		ogen = 1;
 	th->th_generation = ogen;
 
 	/* Go live with the new struct timehands. */
 	time_second = th->th_microtime.tv_sec;
 	time_uptime = th->th_offset.sec;
 	timehands = th;
 }
 
 /* Report or change the active timecounter hardware. */
 static int
 sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS)
 {
 	char newname[32];
 	struct timecounter *newtc, *tc;
 	int error;
 
 	tc = timecounter;
 	strlcpy(newname, tc->tc_name, sizeof(newname));
 
 	error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req);
 	if (error != 0 || req->newptr == NULL ||
 	    strcmp(newname, tc->tc_name) == 0)
 		return (error);
 	for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
 		if (strcmp(newname, newtc->tc_name) != 0)
 			continue;
 
 		/* Warm up new timecounter. */
 		(void)newtc->tc_get_timecount(newtc);
 		(void)newtc->tc_get_timecount(newtc);
 
 		timecounter = newtc;
 		return (0);
 	}
 	return (EINVAL);
 }
 
 SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW,
     0, 0, sysctl_kern_timecounter_hardware, "A",
     "Timecounter hardware selected");
 
 
 /* Report or change the active timecounter hardware. */
 static int
 sysctl_kern_timecounter_choice(SYSCTL_HANDLER_ARGS)
 {
 	char buf[32], *spc;
 	struct timecounter *tc;
 	int error;
 
 	spc = "";
 	error = 0;
 	for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) {
 		sprintf(buf, "%s%s(%d)",
 		    spc, tc->tc_name, tc->tc_quality);
 		error = SYSCTL_OUT(req, buf, strlen(buf));
 		spc = " ";
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_kern_timecounter, OID_AUTO, choice, CTLTYPE_STRING | CTLFLAG_RD,
     0, 0, sysctl_kern_timecounter_choice, "A", "Timecounter hardware detected");
 
 /*
  * RFC 2783 PPS-API implementation.
  */
 
 int
 pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps)
 {
 	pps_params_t *app;
 	struct pps_fetch_args *fapi;
 #ifdef PPS_SYNC
 	struct pps_kcbind_args *kapi;
 #endif
 
 	KASSERT(pps != NULL, ("NULL pps pointer in pps_ioctl"));
 	switch (cmd) {
 	case PPS_IOC_CREATE:
 		return (0);
 	case PPS_IOC_DESTROY:
 		return (0);
 	case PPS_IOC_SETPARAMS:
 		app = (pps_params_t *)data;
 		if (app->mode & ~pps->ppscap)
 			return (EINVAL);
 		pps->ppsparam = *app;
 		return (0);
 	case PPS_IOC_GETPARAMS:
 		app = (pps_params_t *)data;
 		*app = pps->ppsparam;
 		app->api_version = PPS_API_VERS_1;
 		return (0);
 	case PPS_IOC_GETCAP:
 		*(int*)data = pps->ppscap;
 		return (0);
 	case PPS_IOC_FETCH:
 		fapi = (struct pps_fetch_args *)data;
 		if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC)
 			return (EINVAL);
 		if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec)
 			return (EOPNOTSUPP);
 		pps->ppsinfo.current_mode = pps->ppsparam.mode;
 		fapi->pps_info_buf = pps->ppsinfo;
 		return (0);
 	case PPS_IOC_KCBIND:
 #ifdef PPS_SYNC
 		kapi = (struct pps_kcbind_args *)data;
 		/* XXX Only root should be able to do this */
 		if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC)
 			return (EINVAL);
 		if (kapi->kernel_consumer != PPS_KC_HARDPPS)
 			return (EINVAL);
 		if (kapi->edge & ~pps->ppscap)
 			return (EINVAL);
 		pps->kcmode = kapi->edge;
 		return (0);
 #else
 		return (EOPNOTSUPP);
 #endif
 	default:
 		return (ENOIOCTL);
 	}
 }
 
 void
 pps_init(struct pps_state *pps)
 {
 	pps->ppscap |= PPS_TSFMT_TSPEC;
 	if (pps->ppscap & PPS_CAPTUREASSERT)
 		pps->ppscap |= PPS_OFFSETASSERT;
 	if (pps->ppscap & PPS_CAPTURECLEAR)
 		pps->ppscap |= PPS_OFFSETCLEAR;
 }
 
 void
 pps_capture(struct pps_state *pps)
 {
 	struct timehands *th;
 
 	KASSERT(pps != NULL, ("NULL pps pointer in pps_capture"));
 	th = timehands;
 	pps->capgen = th->th_generation;
 	pps->capth = th;
 	pps->capcount = th->th_counter->tc_get_timecount(th->th_counter);
 	if (pps->capgen != th->th_generation)
 		pps->capgen = 0;
 }
 
 void
 pps_event(struct pps_state *pps, int event)
 {
 	struct bintime bt;
 	struct timespec ts, *tsp, *osp;
 	u_int tcount, *pcount;
 	int foff, fhard;
 	pps_seq_t *pseq;
 
 	KASSERT(pps != NULL, ("NULL pps pointer in pps_event"));
 	/* If the timecounter was wound up underneath us, bail out. */
 	if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation)
 		return;
 
 	/* Things would be easier with arrays. */
 	if (event == PPS_CAPTUREASSERT) {
 		tsp = &pps->ppsinfo.assert_timestamp;
 		osp = &pps->ppsparam.assert_offset;
 		foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
 		fhard = pps->kcmode & PPS_CAPTUREASSERT;
 		pcount = &pps->ppscount[0];
 		pseq = &pps->ppsinfo.assert_sequence;
 	} else {
 		tsp = &pps->ppsinfo.clear_timestamp;
 		osp = &pps->ppsparam.clear_offset;
 		foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
 		fhard = pps->kcmode & PPS_CAPTURECLEAR;
 		pcount = &pps->ppscount[1];
 		pseq = &pps->ppsinfo.clear_sequence;
 	}
 
 	/*
 	 * If the timecounter changed, we cannot compare the count values, so
 	 * we have to drop the rest of the PPS-stuff until the next event.
 	 */
 	if (pps->ppstc != pps->capth->th_counter) {
 		pps->ppstc = pps->capth->th_counter;
 		*pcount = pps->capcount;
 		pps->ppscount[2] = pps->capcount;
 		return;
 	}
 
 	/* Convert the count to a timespec. */
 	tcount = pps->capcount - pps->capth->th_offset_count;
 	tcount &= pps->capth->th_counter->tc_counter_mask;
 	bt = pps->capth->th_offset;
 	bintime_addx(&bt, pps->capth->th_scale * tcount);
 	bintime_add(&bt, &boottimebin);
 	bintime2timespec(&bt, &ts);
 
 	/* If the timecounter was wound up underneath us, bail out. */
 	if (pps->capgen != pps->capth->th_generation)
 		return;
 
 	*pcount = pps->capcount;
 	(*pseq)++;
 	*tsp = ts;
 
 	if (foff) {
 		timespecadd(tsp, osp);
 		if (tsp->tv_nsec < 0) {
 			tsp->tv_nsec += 1000000000;
 			tsp->tv_sec -= 1;
 		}
 	}
 #ifdef PPS_SYNC
 	if (fhard) {
 		uint64_t scale;
 
 		/*
 		 * Feed the NTP PLL/FLL.
 		 * The FLL wants to know how many (hardware) nanoseconds
 		 * elapsed since the previous event.
 		 */
 		tcount = pps->capcount - pps->ppscount[2];
 		pps->ppscount[2] = pps->capcount;
 		tcount &= pps->capth->th_counter->tc_counter_mask;
 		scale = (uint64_t)1 << 63;
 		scale /= pps->capth->th_counter->tc_frequency;
 		scale *= 2;
 		bt.sec = 0;
 		bt.frac = 0;
 		bintime_addx(&bt, scale * tcount);
 		bintime2timespec(&bt, &ts);
 		hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec);
 	}
 #endif
 }
 
 /*
  * Timecounters need to be updated every so often to prevent the hardware
  * counter from overflowing.  Updating also recalculates the cached values
  * used by the get*() family of functions, so their precision depends on
  * the update frequency.
  */
 
 static int tc_tick;
 SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tc_tick, 0,
     "Approximate number of hardclock ticks in a millisecond");
 
 void
 tc_ticktock(int cnt)
 {
 	static int count;
 
 	count += cnt;
 	if (count < tc_tick)
 		return;
 	count = 0;
 	tc_windup();
 }
 
 static void
 inittimecounter(void *dummy)
 {
 	u_int p;
 
 	/*
 	 * Set the initial timeout to
 	 * max(1, <approx. number of hardclock ticks in a millisecond>).
 	 * People should probably not use the sysctl to set the timeout
 	 * to smaller than its inital value, since that value is the
 	 * smallest reasonable one.  If they want better timestamps they
 	 * should use the non-"get"* functions.
 	 */
 	if (hz > 1000)
 		tc_tick = (hz + 500) / 1000;
 	else
 		tc_tick = 1;
 	p = (tc_tick * 1000000) / hz;
 	printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000);
 
 	/* warm up new timecounter (again) and get rolling. */
 	(void)timecounter->tc_get_timecount(timecounter);
 	(void)timecounter->tc_get_timecount(timecounter);
 	tc_windup();
 }
 
 SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL);
 
 /* Cpu tick handling -------------------------------------------------*/
 
 static int cpu_tick_variable;
 static uint64_t	cpu_tick_frequency;
 
 static uint64_t
 tc_cpu_ticks(void)
 {
 	static uint64_t base;
 	static unsigned last;
 	unsigned u;
 	struct timecounter *tc;
 
 	tc = timehands->th_counter;
 	u = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
 	if (u < last)
 		base += (uint64_t)tc->tc_counter_mask + 1;
 	last = u;
 	return (u + base);
 }
 
 void
 cpu_tick_calibration(void)
 {
 	static time_t last_calib;
 
 	if (time_uptime != last_calib && !(time_uptime & 0xf)) {
 		cpu_tick_calibrate(0);
 		last_calib = time_uptime;
 	}
 }
 
 /*
  * This function gets called every 16 seconds on only one designated
  * CPU in the system from hardclock() via cpu_tick_calibration()().
  *
  * Whenever the real time clock is stepped we get called with reset=1
  * to make sure we handle suspend/resume and similar events correctly.
  */
 
 static void
 cpu_tick_calibrate(int reset)
 {
 	static uint64_t c_last;
 	uint64_t c_this, c_delta;
 	static struct bintime  t_last;
 	struct bintime t_this, t_delta;
 	uint32_t divi;
 
 	if (reset) {
 		/* The clock was stepped, abort & reset */
 		t_last.sec = 0;
 		return;
 	}
 
 	/* we don't calibrate fixed rate cputicks */
 	if (!cpu_tick_variable)
 		return;
 
 	getbinuptime(&t_this);
 	c_this = cpu_ticks();
 	if (t_last.sec != 0) {
 		c_delta = c_this - c_last;
 		t_delta = t_this;
 		bintime_sub(&t_delta, &t_last);
 		/*
 		 * Headroom:
 		 * 	2^(64-20) / 16[s] =
 		 * 	2^(44) / 16[s] =
 		 * 	17.592.186.044.416 / 16 =
 		 * 	1.099.511.627.776 [Hz]
 		 */
 		divi = t_delta.sec << 20;
 		divi |= t_delta.frac >> (64 - 20);
 		c_delta <<= 20;
 		c_delta /= divi;
 		if (c_delta > cpu_tick_frequency) {
 			if (0 && bootverbose)
 				printf("cpu_tick increased to %ju Hz\n",
 				    c_delta);
 			cpu_tick_frequency = c_delta;
 		}
 	}
 	c_last = c_this;
 	t_last = t_this;
 }
 
 void
 set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var)
 {
 
 	if (func == NULL) {
 		cpu_ticks = tc_cpu_ticks;
 	} else {
 		cpu_tick_frequency = freq;
 		cpu_tick_variable = var;
 		cpu_ticks = func;
 	}
 }
 
 uint64_t
 cpu_tickrate(void)
 {
 
 	if (cpu_ticks == tc_cpu_ticks) 
 		return (tc_getfrequency());
 	return (cpu_tick_frequency);
 }
 
 /*
  * We need to be slightly careful converting cputicks to microseconds.
  * There is plenty of margin in 64 bits of microseconds (half a million
  * years) and in 64 bits at 4 GHz (146 years), but if we do a multiply
  * before divide conversion (to retain precision) we find that the
  * margin shrinks to 1.5 hours (one millionth of 146y).
  * With a three prong approach we never lose significant bits, no
  * matter what the cputick rate and length of timeinterval is.
  */
 
 uint64_t
 cputick2usec(uint64_t tick)
 {
 
 	if (tick > 18446744073709551LL)		/* floor(2^64 / 1000) */
 		return (tick / (cpu_tickrate() / 1000000LL));
 	else if (tick > 18446744073709LL)	/* floor(2^64 / 1000000) */
 		return ((tick * 1000LL) / (cpu_tickrate() / 1000LL));
 	else
 		return ((tick * 1000000LL) / cpu_tickrate());
 }
 
 cpu_tick_f	*cpu_ticks = tc_cpu_ticks;
Index: projects/binutils-2.17/sys/kern/subr_taskqueue.c
===================================================================
--- projects/binutils-2.17/sys/kern/subr_taskqueue.c	(revision 215829)
+++ projects/binutils-2.17/sys/kern/subr_taskqueue.c	(revision 215830)
@@ -1,487 +1,485 @@
 /*-
  * Copyright (c) 2000 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/taskqueue.h>
 #include <sys/unistd.h>
 #include <machine/stdarg.h>
 
 static MALLOC_DEFINE(M_TASKQUEUE, "taskqueue", "Task Queues");
 static void	*taskqueue_giant_ih;
 static void	*taskqueue_ih;
 
 struct taskqueue_busy {
 	struct task	*tb_running;
 	TAILQ_ENTRY(taskqueue_busy) tb_link;
 };
 
 struct taskqueue {
 	STAILQ_HEAD(, task)	tq_queue;
-	const char		*tq_name;
 	taskqueue_enqueue_fn	tq_enqueue;
 	void			*tq_context;
 	TAILQ_HEAD(, taskqueue_busy) tq_active;
 	struct mtx		tq_mutex;
 	struct thread		**tq_threads;
 	int			tq_tcount;
 	int			tq_spin;
 	int			tq_flags;
 };
 
 #define	TQ_FLAGS_ACTIVE		(1 << 0)
 #define	TQ_FLAGS_BLOCKED	(1 << 1)
 #define	TQ_FLAGS_PENDING	(1 << 2)
 
 #define	TQ_LOCK(tq)							\
 	do {								\
 		if ((tq)->tq_spin)					\
 			mtx_lock_spin(&(tq)->tq_mutex);			\
 		else							\
 			mtx_lock(&(tq)->tq_mutex);			\
 	} while (0)
 
 #define	TQ_UNLOCK(tq)							\
 	do {								\
 		if ((tq)->tq_spin)					\
 			mtx_unlock_spin(&(tq)->tq_mutex);		\
 		else							\
 			mtx_unlock(&(tq)->tq_mutex);			\
 	} while (0)
 
 static __inline int
 TQ_SLEEP(struct taskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
     int t)
 {
 	if (tq->tq_spin)
 		return (msleep_spin(p, m, wm, t));
 	return (msleep(p, m, pri, wm, t));
 }
 
 static struct taskqueue *
-_taskqueue_create(const char *name, int mflags,
+_taskqueue_create(const char *name __unused, int mflags,
 		 taskqueue_enqueue_fn enqueue, void *context,
 		 int mtxflags, const char *mtxname)
 {
 	struct taskqueue *queue;
 
 	queue = malloc(sizeof(struct taskqueue), M_TASKQUEUE, mflags | M_ZERO);
 	if (!queue)
 		return NULL;
 
 	STAILQ_INIT(&queue->tq_queue);
 	TAILQ_INIT(&queue->tq_active);
-	queue->tq_name = name;
 	queue->tq_enqueue = enqueue;
 	queue->tq_context = context;
 	queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
 	queue->tq_flags |= TQ_FLAGS_ACTIVE;
 	mtx_init(&queue->tq_mutex, mtxname, NULL, mtxflags);
 
 	return queue;
 }
 
 struct taskqueue *
 taskqueue_create(const char *name, int mflags,
 		 taskqueue_enqueue_fn enqueue, void *context)
 {
 	return _taskqueue_create(name, mflags, enqueue, context,
 			MTX_DEF, "taskqueue");
 }
 
 /*
  * Signal a taskqueue thread to terminate.
  */
 static void
 taskqueue_terminate(struct thread **pp, struct taskqueue *tq)
 {
 
 	while (tq->tq_tcount > 0) {
 		wakeup(tq);
 		TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
 	}
 }
 
 void
 taskqueue_free(struct taskqueue *queue)
 {
 
 	TQ_LOCK(queue);
 	queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
 	taskqueue_terminate(queue->tq_threads, queue);
 	KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
 	mtx_destroy(&queue->tq_mutex);
 	free(queue->tq_threads, M_TASKQUEUE);
 	free(queue, M_TASKQUEUE);
 }
 
 int
 taskqueue_enqueue(struct taskqueue *queue, struct task *task)
 {
 	struct task *ins;
 	struct task *prev;
 
 	TQ_LOCK(queue);
 
 	/*
 	 * Count multiple enqueues.
 	 */
 	if (task->ta_pending) {
 		task->ta_pending++;
 		TQ_UNLOCK(queue);
 		return 0;
 	}
 
 	/*
 	 * Optimise the case when all tasks have the same priority.
 	 */
 	prev = STAILQ_LAST(&queue->tq_queue, task, ta_link);
 	if (!prev || prev->ta_priority >= task->ta_priority) {
 		STAILQ_INSERT_TAIL(&queue->tq_queue, task, ta_link);
 	} else {
 		prev = NULL;
 		for (ins = STAILQ_FIRST(&queue->tq_queue); ins;
 		     prev = ins, ins = STAILQ_NEXT(ins, ta_link))
 			if (ins->ta_priority < task->ta_priority)
 				break;
 
 		if (prev)
 			STAILQ_INSERT_AFTER(&queue->tq_queue, prev, task, ta_link);
 		else
 			STAILQ_INSERT_HEAD(&queue->tq_queue, task, ta_link);
 	}
 
 	task->ta_pending = 1;
 	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
 		queue->tq_enqueue(queue->tq_context);
 	else
 		queue->tq_flags |= TQ_FLAGS_PENDING;
 
 	TQ_UNLOCK(queue);
 
 	return 0;
 }
 
 void
 taskqueue_block(struct taskqueue *queue)
 {
 
 	TQ_LOCK(queue);
 	queue->tq_flags |= TQ_FLAGS_BLOCKED;
 	TQ_UNLOCK(queue);
 }
 
 void
 taskqueue_unblock(struct taskqueue *queue)
 {
 
 	TQ_LOCK(queue);
 	queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
 	if (queue->tq_flags & TQ_FLAGS_PENDING) {
 		queue->tq_flags &= ~TQ_FLAGS_PENDING;
 		queue->tq_enqueue(queue->tq_context);
 	}
 	TQ_UNLOCK(queue);
 }
 
 static void
 taskqueue_run_locked(struct taskqueue *queue)
 {
 	struct taskqueue_busy tb;
 	struct task *task;
 	int pending;
 
 	mtx_assert(&queue->tq_mutex, MA_OWNED);
 	tb.tb_running = NULL;
 	TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
 
 	while (STAILQ_FIRST(&queue->tq_queue)) {
 		/*
 		 * Carefully remove the first task from the queue and
 		 * zero its pending count.
 		 */
 		task = STAILQ_FIRST(&queue->tq_queue);
 		STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
 		pending = task->ta_pending;
 		task->ta_pending = 0;
 		tb.tb_running = task;
 		TQ_UNLOCK(queue);
 
 		task->ta_func(task->ta_context, pending);
 
 		TQ_LOCK(queue);
 		tb.tb_running = NULL;
 		wakeup(task);
 	}
 	TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
 }
 
 void
 taskqueue_run(struct taskqueue *queue)
 {
 
 	TQ_LOCK(queue);
 	taskqueue_run_locked(queue);
 	TQ_UNLOCK(queue);
 }
 
 static int
 task_is_running(struct taskqueue *queue, struct task *task)
 {
 	struct taskqueue_busy *tb;
 
 	mtx_assert(&queue->tq_mutex, MA_OWNED);
 	TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
 		if (tb->tb_running == task)
 			return (1);
 	}
 	return (0);
 }
 
 int
 taskqueue_cancel(struct taskqueue *queue, struct task *task, u_int *pendp)
 {
 	u_int pending;
 	int error;
 
 	TQ_LOCK(queue);
 	if ((pending = task->ta_pending) > 0)
 		STAILQ_REMOVE(&queue->tq_queue, task, task, ta_link);
 	task->ta_pending = 0;
 	error = task_is_running(queue, task) ? EBUSY : 0;
 	TQ_UNLOCK(queue);
 
 	if (pendp != NULL)
 		*pendp = pending;
 	return (error);
 }
 
 void
 taskqueue_drain(struct taskqueue *queue, struct task *task)
 {
 
 	if (!queue->tq_spin)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
 
 	TQ_LOCK(queue);
 	while (task->ta_pending != 0 || task_is_running(queue, task))
 		TQ_SLEEP(queue, task, &queue->tq_mutex, PWAIT, "-", 0);
 	TQ_UNLOCK(queue);
 }
 
 static void
 taskqueue_swi_enqueue(void *context)
 {
 	swi_sched(taskqueue_ih, 0);
 }
 
 static void
 taskqueue_swi_run(void *dummy)
 {
 	taskqueue_run(taskqueue_swi);
 }
 
 static void
 taskqueue_swi_giant_enqueue(void *context)
 {
 	swi_sched(taskqueue_giant_ih, 0);
 }
 
 static void
 taskqueue_swi_giant_run(void *dummy)
 {
 	taskqueue_run(taskqueue_swi_giant);
 }
 
 int
 taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
 			const char *name, ...)
 {
 	va_list ap;
 	struct thread *td;
 	struct taskqueue *tq;
 	int i, error;
 	char ktname[MAXCOMLEN + 1];
 
 	if (count <= 0)
 		return (EINVAL);
 
 	tq = *tqp;
 
 	va_start(ap, name);
 	vsnprintf(ktname, sizeof(ktname), name, ap);
 	va_end(ap);
 
 	tq->tq_threads = malloc(sizeof(struct thread *) * count, M_TASKQUEUE,
 	    M_NOWAIT | M_ZERO);
 	if (tq->tq_threads == NULL) {
 		printf("%s: no memory for %s threads\n", __func__, ktname);
 		return (ENOMEM);
 	}
 
 	for (i = 0; i < count; i++) {
 		if (count == 1)
 			error = kthread_add(taskqueue_thread_loop, tqp, NULL,
 			    &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
 		else
 			error = kthread_add(taskqueue_thread_loop, tqp, NULL,
 			    &tq->tq_threads[i], RFSTOPPED, 0,
 			    "%s_%d", ktname, i);
 		if (error) {
 			/* should be ok to continue, taskqueue_free will dtrt */
 			printf("%s: kthread_add(%s): error %d", __func__,
 			    ktname, error);
 			tq->tq_threads[i] = NULL;		/* paranoid */
 		} else
 			tq->tq_tcount++;
 	}
 	for (i = 0; i < count; i++) {
 		if (tq->tq_threads[i] == NULL)
 			continue;
 		td = tq->tq_threads[i];
 		thread_lock(td);
 		sched_prio(td, pri);
 		sched_add(td, SRQ_BORING);
 		thread_unlock(td);
 	}
 
 	return (0);
 }
 
 void
 taskqueue_thread_loop(void *arg)
 {
 	struct taskqueue **tqp, *tq;
 
 	tqp = arg;
 	tq = *tqp;
 	TQ_LOCK(tq);
 	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
 		taskqueue_run_locked(tq);
 		/*
 		 * Because taskqueue_run() can drop tq_mutex, we need to
 		 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
 		 * meantime, which means we missed a wakeup.
 		 */
 		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 			break;
 		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
 	}
 	taskqueue_run_locked(tq);
 
 	/* rendezvous with thread that asked us to terminate */
 	tq->tq_tcount--;
 	wakeup_one(tq->tq_threads);
 	TQ_UNLOCK(tq);
 	kthread_exit();
 }
 
 void
 taskqueue_thread_enqueue(void *context)
 {
 	struct taskqueue **tqp, *tq;
 
 	tqp = context;
 	tq = *tqp;
 
 	mtx_assert(&tq->tq_mutex, MA_OWNED);
 	wakeup_one(tq);
 }
 
 TASKQUEUE_DEFINE(swi, taskqueue_swi_enqueue, NULL,
 		 swi_add(NULL, "task queue", taskqueue_swi_run, NULL, SWI_TQ,
 		     INTR_MPSAFE, &taskqueue_ih)); 
 
 TASKQUEUE_DEFINE(swi_giant, taskqueue_swi_giant_enqueue, NULL,
 		 swi_add(NULL, "Giant taskq", taskqueue_swi_giant_run,
 		     NULL, SWI_TQ_GIANT, 0, &taskqueue_giant_ih)); 
 
 TASKQUEUE_DEFINE_THREAD(thread);
 
 struct taskqueue *
 taskqueue_create_fast(const char *name, int mflags,
 		 taskqueue_enqueue_fn enqueue, void *context)
 {
 	return _taskqueue_create(name, mflags, enqueue, context,
 			MTX_SPIN, "fast_taskqueue");
 }
 
 /* NB: for backwards compatibility */
 int
 taskqueue_enqueue_fast(struct taskqueue *queue, struct task *task)
 {
 	return taskqueue_enqueue(queue, task);
 }
 
 static void	*taskqueue_fast_ih;
 
 static void
 taskqueue_fast_enqueue(void *context)
 {
 	swi_sched(taskqueue_fast_ih, 0);
 }
 
 static void
 taskqueue_fast_run(void *dummy)
 {
 	taskqueue_run(taskqueue_fast);
 }
 
 TASKQUEUE_FAST_DEFINE(fast, taskqueue_fast_enqueue, NULL,
 	swi_add(NULL, "Fast task queue", taskqueue_fast_run, NULL,
 	SWI_TQ_FAST, INTR_MPSAFE, &taskqueue_fast_ih));
 
 int
 taskqueue_member(struct taskqueue *queue, struct thread *td)
 {
 	int i, j, ret = 0;
 
 	TQ_LOCK(queue);
 	for (i = 0, j = 0; ; i++) {
 		if (queue->tq_threads[i] == NULL)
 			continue;
 		if (queue->tq_threads[i] == td) {
 			ret = 1;
 			break;
 		}
 		if (++j >= queue->tq_tcount)
 			break;
 	}
 	TQ_UNLOCK(queue);
 	return (ret);
 }
Index: projects/binutils-2.17/sys/kern/vfs_mount.c
===================================================================
--- projects/binutils-2.17/sys/kern/vfs_mount.c	(revision 215829)
+++ projects/binutils-2.17/sys/kern/vfs_mount.c	(revision 215830)
@@ -1,1972 +1,1974 @@
 /*-
  * Copyright (c) 1999-2004 Poul-Henning Kamp
  * Copyright (c) 1999 Michael Smith
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/libkern.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/filedesc.h>
 #include <sys/reboot.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 #include <sys/vnode.h>
 #include <vm/uma.h>
 
 #include <geom/geom.h>
 
 #include <machine/stdarg.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #define	VFS_MOUNTARG_SIZE_MAX	(1024 * 64)
 
 static int	vfs_domount(struct thread *td, const char *fstype,
 		    char *fspath, int fsflags, void *fsdata);
 static void	free_mntarg(struct mntarg *ma);
 
 static int	usermount = 0;
 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
     "Unprivileged users may mount and unmount file systems");
 
 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
 MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
 static uma_zone_t mount_zone;
 
 /* List of mounted filesystems. */
 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
 
 /* For any iteration/modification of mountlist */
 struct mtx mountlist_mtx;
 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
 
 /*
  * Global opts, taken by all filesystems
  */
 static const char *global_opts[] = {
 	"errmsg",
 	"fstype",
 	"fspath",
 	"ro",
 	"rw",
 	"nosuid",
 	"noexec",
 	NULL
 };
 
 static int
 mount_init(void *mem, int size, int flags)
 {
 	struct mount *mp;
 
 	mp = (struct mount *)mem;
 	mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
 	lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
 	return (0);
 }
 
 static void
 mount_fini(void *mem, int size)
 {
 	struct mount *mp;
 
 	mp = (struct mount *)mem;
 	lockdestroy(&mp->mnt_explock);
 	mtx_destroy(&mp->mnt_mtx);
 }
 
 static void
 vfs_mount_init(void *dummy __unused)
 {
 
 	mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount), NULL,
 	    NULL, mount_init, mount_fini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 }
 SYSINIT(vfs_mount, SI_SUB_VFS, SI_ORDER_ANY, vfs_mount_init, NULL);
 
 /*
  * ---------------------------------------------------------------------
  * Functions for building and sanitizing the mount options
  */
 
 /* Remove one mount option. */
 static void
 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
 {
 
 	TAILQ_REMOVE(opts, opt, link);
 	free(opt->name, M_MOUNT);
 	if (opt->value != NULL)
 		free(opt->value, M_MOUNT);
 	free(opt, M_MOUNT);
 }
 
 /* Release all resources related to the mount options. */
 void
 vfs_freeopts(struct vfsoptlist *opts)
 {
 	struct vfsopt *opt;
 
 	while (!TAILQ_EMPTY(opts)) {
 		opt = TAILQ_FIRST(opts);
 		vfs_freeopt(opts, opt);
 	}
 	free(opts, M_MOUNT);
 }
 
 void
 vfs_deleteopt(struct vfsoptlist *opts, const char *name)
 {
 	struct vfsopt *opt, *temp;
 
 	if (opts == NULL)
 		return;
 	TAILQ_FOREACH_SAFE(opt, opts, link, temp)  {
 		if (strcmp(opt->name, name) == 0)
 			vfs_freeopt(opts, opt);
 	}
 }
 
 /*
  * Check if options are equal (with or without the "no" prefix).
  */
 static int
 vfs_equalopts(const char *opt1, const char *opt2)
 {
 	char *p;
 
 	/* "opt" vs. "opt" or "noopt" vs. "noopt" */
 	if (strcmp(opt1, opt2) == 0)
 		return (1);
 	/* "noopt" vs. "opt" */
 	if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
 		return (1);
 	/* "opt" vs. "noopt" */
 	if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
 		return (1);
 	while ((p = strchr(opt1, '.')) != NULL &&
 	    !strncmp(opt1, opt2, ++p - opt1)) {
 		opt2 += p - opt1;
 		opt1 = p;
 		/* "foo.noopt" vs. "foo.opt" */
 		if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
 			return (1);
 		/* "foo.opt" vs. "foo.noopt" */
 		if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * If a mount option is specified several times,
  * (with or without the "no" prefix) only keep
  * the last occurence of it.
  */
 static void
 vfs_sanitizeopts(struct vfsoptlist *opts)
 {
 	struct vfsopt *opt, *opt2, *tmp;
 
 	TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
 		opt2 = TAILQ_PREV(opt, vfsoptlist, link);
 		while (opt2 != NULL) {
 			if (vfs_equalopts(opt->name, opt2->name)) {
 				tmp = TAILQ_PREV(opt2, vfsoptlist, link);
 				vfs_freeopt(opts, opt2);
 				opt2 = tmp;
 			} else {
 				opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
 			}
 		}
 	}
 }
 
 /*
  * Build a linked list of mount options from a struct uio.
  */
 int
 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
 {
 	struct vfsoptlist *opts;
 	struct vfsopt *opt;
 	size_t memused, namelen, optlen;
 	unsigned int i, iovcnt;
 	int error;
 
 	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
 	TAILQ_INIT(opts);
 	memused = 0;
 	iovcnt = auio->uio_iovcnt;
 	for (i = 0; i < iovcnt; i += 2) {
 		namelen = auio->uio_iov[i].iov_len;
 		optlen = auio->uio_iov[i + 1].iov_len;
 		memused += sizeof(struct vfsopt) + optlen + namelen;
 		/*
 		 * Avoid consuming too much memory, and attempts to overflow
 		 * memused.
 		 */
 		if (memused > VFS_MOUNTARG_SIZE_MAX ||
 		    optlen > VFS_MOUNTARG_SIZE_MAX ||
 		    namelen > VFS_MOUNTARG_SIZE_MAX) {
 			error = EINVAL;
 			goto bad;
 		}
 
 		opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
 		opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
 		opt->value = NULL;
 		opt->len = 0;
 		opt->pos = i / 2;
 		opt->seen = 0;
 
 		/*
 		 * Do this early, so jumps to "bad" will free the current
 		 * option.
 		 */
 		TAILQ_INSERT_TAIL(opts, opt, link);
 
 		if (auio->uio_segflg == UIO_SYSSPACE) {
 			bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
 		} else {
 			error = copyin(auio->uio_iov[i].iov_base, opt->name,
 			    namelen);
 			if (error)
 				goto bad;
 		}
 		/* Ensure names are null-terminated strings. */
 		if (namelen == 0 || opt->name[namelen - 1] != '\0') {
 			error = EINVAL;
 			goto bad;
 		}
 		if (optlen != 0) {
 			opt->len = optlen;
 			opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
 			if (auio->uio_segflg == UIO_SYSSPACE) {
 				bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
 				    optlen);
 			} else {
 				error = copyin(auio->uio_iov[i + 1].iov_base,
 				    opt->value, optlen);
 				if (error)
 					goto bad;
 			}
 		}
 	}
 	vfs_sanitizeopts(opts);
 	*options = opts;
 	return (0);
 bad:
 	vfs_freeopts(opts);
 	return (error);
 }
 
 /*
  * Merge the old mount options with the new ones passed
  * in the MNT_UPDATE case.
  *
  * XXX This function will keep a "nofoo" option in the
  *     new options if there is no matching "foo" option
  *     to be cancelled in the old options.  This is a bug
  *     if the option's canonical name is "foo".  E.g., "noro"
  *     shouldn't end up in the mount point's active options,
  *     but it can.
  */
 static void
 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
 {
 	struct vfsopt *opt, *opt2, *new;
 
 	TAILQ_FOREACH(opt, opts, link) {
 		/*
 		 * Check that this option hasn't been redefined
 		 * nor cancelled with a "no" mount option.
 		 */
 		opt2 = TAILQ_FIRST(toopts);
 		while (opt2 != NULL) {
 			if (strcmp(opt2->name, opt->name) == 0)
 				goto next;
 			if (strncmp(opt2->name, "no", 2) == 0 &&
 			    strcmp(opt2->name + 2, opt->name) == 0) {
 				vfs_freeopt(toopts, opt2);
 				goto next;
 			}
 			opt2 = TAILQ_NEXT(opt2, link);
 		}
 		/* We want this option, duplicate it. */
 		new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
 		new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
 		strcpy(new->name, opt->name);
 		if (opt->len != 0) {
 			new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
 			bcopy(opt->value, new->value, opt->len);
 		} else {
 			new->value = NULL;
 		}
 		new->len = opt->len;
 		new->seen = opt->seen;
 		TAILQ_INSERT_TAIL(toopts, new, link);
 next:
 		continue;
 	}
 }
 
 /*
  * Mount a filesystem.
  */
 int
 nmount(td, uap)
 	struct thread *td;
 	struct nmount_args /* {
 		struct iovec *iovp;
 		unsigned int iovcnt;
 		int flags;
 	} */ *uap;
 {
 	struct uio *auio;
 	int error;
 	u_int iovcnt;
 
 	AUDIT_ARG_FFLAGS(uap->flags);
 	CTR4(KTR_VFS, "%s: iovp %p with iovcnt %d and flags %d", __func__,
 	    uap->iovp, uap->iovcnt, uap->flags);
 
 	/*
 	 * Filter out MNT_ROOTFS.  We do not want clients of nmount() in
 	 * userspace to set this flag, but we must filter it out if we want
 	 * MNT_UPDATE on the root file system to work.
-	 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
+	 * MNT_ROOTFS should only be set by the kernel when mounting its
+	 * root file system.
 	 */
 	uap->flags &= ~MNT_ROOTFS;
 
 	iovcnt = uap->iovcnt;
 	/*
 	 * Check that we have an even number of iovec's
 	 * and that we have at least two options.
 	 */
 	if ((iovcnt & 1) || (iovcnt < 4)) {
 		CTR2(KTR_VFS, "%s: failed for invalid iovcnt %d", __func__,
 		    uap->iovcnt);
 		return (EINVAL);
 	}
 
 	error = copyinuio(uap->iovp, iovcnt, &auio);
 	if (error) {
 		CTR2(KTR_VFS, "%s: failed for invalid uio op with %d errno",
 		    __func__, error);
 		return (error);
 	}
 	error = vfs_donmount(td, uap->flags, auio);
 
 	free(auio, M_IOV);
 	return (error);
 }
 
 /*
  * ---------------------------------------------------------------------
  * Various utility functions
  */
 
 void
 vfs_ref(struct mount *mp)
 {
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 	MNT_ILOCK(mp);
 	MNT_REF(mp);
 	MNT_IUNLOCK(mp);
 }
 
 void
 vfs_rel(struct mount *mp)
 {
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 	MNT_ILOCK(mp);
 	MNT_REL(mp);
 	MNT_IUNLOCK(mp);
 }
 
 /*
  * Allocate and initialize the mount point struct.
  */
 struct mount *
 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath,
     struct ucred *cred)
 {
 	struct mount *mp;
 
 	mp = uma_zalloc(mount_zone, M_WAITOK);
 	bzero(&mp->mnt_startzero,
 	    __rangeof(struct mount, mnt_startzero, mnt_endzero));
 	TAILQ_INIT(&mp->mnt_nvnodelist);
 	mp->mnt_nvnodelistsize = 0;
 	mp->mnt_ref = 0;
 	(void) vfs_busy(mp, MBF_NOWAIT);
 	mp->mnt_op = vfsp->vfc_vfsops;
 	mp->mnt_vfc = vfsp;
 	vfsp->vfc_refcount++;	/* XXX Unlocked */
 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
 	mp->mnt_gen++;
 	strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 	mp->mnt_vnodecovered = vp;
 	mp->mnt_cred = crdup(cred);
 	mp->mnt_stat.f_owner = cred->cr_uid;
 	strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
 	mp->mnt_iosize_max = DFLTPHYS;
 #ifdef MAC
 	mac_mount_init(mp);
 	mac_mount_create(cred, mp);
 #endif
 	arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
 	return (mp);
 }
 
 /*
  * Destroy the mount struct previously allocated by vfs_mount_alloc().
  */
 void
 vfs_mount_destroy(struct mount *mp)
 {
 
 	MNT_ILOCK(mp);
 	mp->mnt_kern_flag |= MNTK_REFEXPIRE;
 	if (mp->mnt_kern_flag & MNTK_MWAIT) {
 		mp->mnt_kern_flag &= ~MNTK_MWAIT;
 		wakeup(mp);
 	}
 	while (mp->mnt_ref)
 		msleep(mp, MNT_MTX(mp), PVFS, "mntref", 0);
 	KASSERT(mp->mnt_ref == 0,
 	    ("%s: invalid refcount in the drain path @ %s:%d", __func__,
 	    __FILE__, __LINE__));
 	if (mp->mnt_writeopcount != 0)
 		panic("vfs_mount_destroy: nonzero writeopcount");
 	if (mp->mnt_secondary_writes != 0)
 		panic("vfs_mount_destroy: nonzero secondary_writes");
 	mp->mnt_vfc->vfc_refcount--;
 	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
 		struct vnode *vp;
 
 		TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
 			vprint("", vp);
 		panic("unmount: dangling vnode");
 	}
 	if (mp->mnt_nvnodelistsize != 0)
 		panic("vfs_mount_destroy: nonzero nvnodelistsize");
 	if (mp->mnt_lockref != 0)
 		panic("vfs_mount_destroy: nonzero lock refcount");
 	MNT_IUNLOCK(mp);
 #ifdef MAC
 	mac_mount_destroy(mp);
 #endif
 	if (mp->mnt_opt != NULL)
 		vfs_freeopts(mp->mnt_opt);
 	crfree(mp->mnt_cred);
 	uma_zfree(mount_zone, mp);
 }
 
 int
 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
 {
 	struct vfsoptlist *optlist;
 	struct vfsopt *opt, *noro_opt, *tmp_opt;
 	char *fstype, *fspath, *errmsg;
 	int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
 	int has_rw, has_noro;
 
 	errmsg = fspath = NULL;
 	errmsg_len = has_noro = has_rw = fspathlen = 0;
 	errmsg_pos = -1;
 
 	error = vfs_buildopts(fsoptions, &optlist);
 	if (error)
 		return (error);
 
 	if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
 		errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
 
 	/*
 	 * We need these two options before the others,
 	 * and they are mandatory for any filesystem.
 	 * Ensure they are NUL terminated as well.
 	 */
 	fstypelen = 0;
 	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
 	if (error || fstype[fstypelen - 1] != '\0') {
 		error = EINVAL;
 		if (errmsg != NULL)
 			strncpy(errmsg, "Invalid fstype", errmsg_len);
 		goto bail;
 	}
 	fspathlen = 0;
 	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
 	if (error || fspath[fspathlen - 1] != '\0') {
 		error = EINVAL;
 		if (errmsg != NULL)
 			strncpy(errmsg, "Invalid fspath", errmsg_len);
 		goto bail;
 	}
 
 	/*
 	 * We need to see if we have the "update" option
 	 * before we call vfs_domount(), since vfs_domount() has special
 	 * logic based on MNT_UPDATE.  This is very important
 	 * when we want to update the root filesystem.
 	 */
 	TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) {
 		if (strcmp(opt->name, "update") == 0) {
 			fsflags |= MNT_UPDATE;
 			vfs_freeopt(optlist, opt);
 		}
 		else if (strcmp(opt->name, "async") == 0)
 			fsflags |= MNT_ASYNC;
 		else if (strcmp(opt->name, "force") == 0) {
 			fsflags |= MNT_FORCE;
 			vfs_freeopt(optlist, opt);
 		}
 		else if (strcmp(opt->name, "reload") == 0) {
 			fsflags |= MNT_RELOAD;
 			vfs_freeopt(optlist, opt);
 		}
 		else if (strcmp(opt->name, "multilabel") == 0)
 			fsflags |= MNT_MULTILABEL;
 		else if (strcmp(opt->name, "noasync") == 0)
 			fsflags &= ~MNT_ASYNC;
 		else if (strcmp(opt->name, "noatime") == 0)
 			fsflags |= MNT_NOATIME;
 		else if (strcmp(opt->name, "atime") == 0) {
 			free(opt->name, M_MOUNT);
 			opt->name = strdup("nonoatime", M_MOUNT);
 		}
 		else if (strcmp(opt->name, "noclusterr") == 0)
 			fsflags |= MNT_NOCLUSTERR;
 		else if (strcmp(opt->name, "clusterr") == 0) {
 			free(opt->name, M_MOUNT);
 			opt->name = strdup("nonoclusterr", M_MOUNT);
 		}
 		else if (strcmp(opt->name, "noclusterw") == 0)
 			fsflags |= MNT_NOCLUSTERW;
 		else if (strcmp(opt->name, "clusterw") == 0) {
 			free(opt->name, M_MOUNT);
 			opt->name = strdup("nonoclusterw", M_MOUNT);
 		}
 		else if (strcmp(opt->name, "noexec") == 0)
 			fsflags |= MNT_NOEXEC;
 		else if (strcmp(opt->name, "exec") == 0) {
 			free(opt->name, M_MOUNT);
 			opt->name = strdup("nonoexec", M_MOUNT);
 		}
 		else if (strcmp(opt->name, "nosuid") == 0)
 			fsflags |= MNT_NOSUID;
 		else if (strcmp(opt->name, "suid") == 0) {
 			free(opt->name, M_MOUNT);
 			opt->name = strdup("nonosuid", M_MOUNT);
 		}
 		else if (strcmp(opt->name, "nosymfollow") == 0)
 			fsflags |= MNT_NOSYMFOLLOW;
 		else if (strcmp(opt->name, "symfollow") == 0) {
 			free(opt->name, M_MOUNT);
 			opt->name = strdup("nonosymfollow", M_MOUNT);
 		}
 		else if (strcmp(opt->name, "noro") == 0) {
 			fsflags &= ~MNT_RDONLY;
 			has_noro = 1;
 		}
 		else if (strcmp(opt->name, "rw") == 0) {
 			fsflags &= ~MNT_RDONLY;
 			has_rw = 1;
 		}
 		else if (strcmp(opt->name, "ro") == 0)
 			fsflags |= MNT_RDONLY;
 		else if (strcmp(opt->name, "rdonly") == 0) {
 			free(opt->name, M_MOUNT);
 			opt->name = strdup("ro", M_MOUNT);
 			fsflags |= MNT_RDONLY;
 		}
 		else if (strcmp(opt->name, "suiddir") == 0)
 			fsflags |= MNT_SUIDDIR;
 		else if (strcmp(opt->name, "sync") == 0)
 			fsflags |= MNT_SYNCHRONOUS;
 		else if (strcmp(opt->name, "union") == 0)
 			fsflags |= MNT_UNION;
 	}
 
 	/*
 	 * If "rw" was specified as a mount option, and we
 	 * are trying to update a mount-point from "ro" to "rw",
 	 * we need a mount option "noro", since in vfs_mergeopts(),
 	 * "noro" will cancel "ro", but "rw" will not do anything.
 	 */
 	if (has_rw && !has_noro) {
 		noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
 		noro_opt->name = strdup("noro", M_MOUNT);
 		noro_opt->value = NULL;
 		noro_opt->len = 0;
 		noro_opt->pos = -1;
 		noro_opt->seen = 1;
 		TAILQ_INSERT_TAIL(optlist, noro_opt, link);
 	}
 
 	/*
 	 * Be ultra-paranoid about making sure the type and fspath
 	 * variables will fit in our mp buffers, including the
 	 * terminating NUL.
 	 */
 	if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
 		error = ENAMETOOLONG;
 		goto bail;
 	}
 
 	error = vfs_domount(td, fstype, fspath, fsflags, optlist);
 bail:
 	/* copyout the errmsg */
 	if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
 	    && errmsg_len > 0 && errmsg != NULL) {
 		if (fsoptions->uio_segflg == UIO_SYSSPACE) {
 			bcopy(errmsg,
 			    fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
 			    fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
 		} else {
 			copyout(errmsg,
 			    fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
 			    fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
 		}
 	}
 
 	if (error != 0)
 		vfs_freeopts(optlist);
 	return (error);
 }
 
 /*
  * Old mount API.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mount_args {
 	char	*type;
 	char	*path;
 	int	flags;
 	caddr_t	data;
 };
 #endif
 /* ARGSUSED */
 int
 mount(td, uap)
 	struct thread *td;
 	struct mount_args /* {
 		char *type;
 		char *path;
 		int flags;
 		caddr_t data;
 	} */ *uap;
 {
 	char *fstype;
 	struct vfsconf *vfsp = NULL;
 	struct mntarg *ma = NULL;
 	int error;
 
 	AUDIT_ARG_FFLAGS(uap->flags);
 
 	/*
 	 * Filter out MNT_ROOTFS.  We do not want clients of mount() in
 	 * userspace to set this flag, but we must filter it out if we want
 	 * MNT_UPDATE on the root file system to work.
-	 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
+	 * MNT_ROOTFS should only be set by the kernel when mounting its
+	 * root file system.
 	 */
 	uap->flags &= ~MNT_ROOTFS;
 
 	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
 	error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
 	if (error) {
 		free(fstype, M_TEMP);
 		return (error);
 	}
 
 	AUDIT_ARG_TEXT(fstype);
 	mtx_lock(&Giant);
 	vfsp = vfs_byname_kld(fstype, td, &error);
 	free(fstype, M_TEMP);
 	if (vfsp == NULL) {
 		mtx_unlock(&Giant);
 		return (ENOENT);
 	}
 	if (vfsp->vfc_vfsops->vfs_cmount == NULL) {
 		mtx_unlock(&Giant);
 		return (EOPNOTSUPP);
 	}
 
 	ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
 	ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
 	ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
 	ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
 	ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
 
 	error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags);
 	mtx_unlock(&Giant);
 	return (error);
 }
 
 /*
  * vfs_domount_first(): first file system mount (not update)
  */
 static int
 vfs_domount_first(
 	struct thread *td,	/* Calling thread. */
 	struct vfsconf *vfsp,	/* File system type. */
 	char *fspath,		/* Mount path. */
 	struct vnode *vp,	/* Vnode to be covered. */
 	int fsflags,		/* Flags common to all filesystems. */
 	void *fsdata		/* Options local to the filesystem. */
 	)
 {
 	struct vattr va;
 	struct mount *mp;
 	struct vnode *newdp;
 	int error;
 
 	mtx_assert(&Giant, MA_OWNED);
 	ASSERT_VOP_ELOCKED(vp, __func__);
 	KASSERT((fsflags & MNT_UPDATE) == 0, ("MNT_UPDATE shouldn't be here"));
 
 	/*
 	 * If the user is not root, ensure that they own the directory
 	 * onto which we are attempting to mount.
 	 */
 	error = VOP_GETATTR(vp, &va, td->td_ucred);
 	if (error == 0 && va.va_uid != td->td_ucred->cr_uid)
 		error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN, 0);
 	if (error == 0)
 		error = vinvalbuf(vp, V_SAVE, 0, 0);
 	if (error == 0 && vp->v_type != VDIR)
 		error = ENOTDIR;
 	if (error == 0) {
 		VI_LOCK(vp);
 		if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
 			vp->v_iflag |= VI_MOUNT;
 		else
 			error = EBUSY;
 		VI_UNLOCK(vp);
 	}
 	if (error != 0) {
 		vput(vp);
 		return (error);
 	}
 	VOP_UNLOCK(vp, 0);
 
 	/* Allocate and initialize the filesystem. */
 	mp = vfs_mount_alloc(vp, vfsp, fspath, td->td_ucred);
 	/* XXXMAC: pass to vfs_mount_alloc? */
 	mp->mnt_optnew = fsdata;
 	/* Set the mount level flags. */
 	mp->mnt_flag = (fsflags & (MNT_UPDATEMASK | MNT_ROOTFS | MNT_RDONLY));
 
 	/*
 	 * Mount the filesystem.
 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
 	 * get.  No freeing of cn_pnbuf.
 	 */
         error = VFS_MOUNT(mp);
 	if (error != 0) {
 		vfs_unbusy(mp);
 		vfs_mount_destroy(mp);
 		VI_LOCK(vp);
 		vp->v_iflag &= ~VI_MOUNT;
 		VI_UNLOCK(vp);
 		vrele(vp);
 		return (error);
 	}
 
 	if (mp->mnt_opt != NULL)
 		vfs_freeopts(mp->mnt_opt);
 	mp->mnt_opt = mp->mnt_optnew;
 	(void)VFS_STATFS(mp, &mp->mnt_stat);
 
 	/*
 	 * Prevent external consumers of mount options from reading mnt_optnew.
 	 */
 	mp->mnt_optnew = NULL;
 
 	MNT_ILOCK(mp);
 	if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 		mp->mnt_kern_flag |= MNTK_ASYNC;
 	else
 		mp->mnt_kern_flag &= ~MNTK_ASYNC;
 	MNT_IUNLOCK(mp);
 
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	cache_purge(vp);
 	VI_LOCK(vp);
 	vp->v_iflag &= ~VI_MOUNT;
 	VI_UNLOCK(vp);
 	vp->v_mountedhere = mp;
 	/* Place the new filesystem at the end of the mount list. */
 	mtx_lock(&mountlist_mtx);
 	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 	mtx_unlock(&mountlist_mtx);
 	vfs_event_signal(NULL, VQ_MOUNT, 0);
 	if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp))
 		panic("mount: lost mount");
 	VOP_UNLOCK(newdp, 0);
 	VOP_UNLOCK(vp, 0);
 	mountcheckdirs(vp, newdp);
 	vrele(newdp);
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		vfs_allocate_syncvnode(mp);
 	vfs_unbusy(mp);
 	return (0);
 }
 
 /*
  * vfs_domount_update(): update of mounted file system
  */
 static int
 vfs_domount_update(
 	struct thread *td,	/* Calling thread. */
 	struct vnode *vp,	/* Mount point vnode. */
 	int fsflags,		/* Flags common to all filesystems. */
 	void *fsdata		/* Options local to the filesystem. */
 	)
 {
 	struct oexport_args oexport;
 	struct export_args export;
 	struct mount *mp;
 	int error, flag;
 
 	mtx_assert(&Giant, MA_OWNED);
 	ASSERT_VOP_ELOCKED(vp, __func__);
 	KASSERT((fsflags & MNT_UPDATE) != 0, ("MNT_UPDATE should be here"));
 
 	if ((vp->v_vflag & VV_ROOT) == 0) {
 		vput(vp);
 		return (EINVAL);
 	}
 	mp = vp->v_mount;
 	/*
 	 * We only allow the filesystem to be reloaded if it
 	 * is currently mounted read-only.
 	 */
 	flag = mp->mnt_flag;
 	if ((fsflags & MNT_RELOAD) != 0 && (flag & MNT_RDONLY) == 0) {
 		vput(vp);
 		return (EOPNOTSUPP);	/* Needs translation */
 	}
 	/*
 	 * Only privileged root, or (if MNT_USER is set) the user that
 	 * did the original mount is permitted to update it.
 	 */
 	error = vfs_suser(mp, td);
 	if (error != 0) {
 		vput(vp);
 		return (error);
 	}
 	if (vfs_busy(mp, MBF_NOWAIT)) {
 		vput(vp);
 		return (EBUSY);
 	}
 	VI_LOCK(vp);
 	if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) {
 		VI_UNLOCK(vp);
 		vfs_unbusy(mp);
 		vput(vp);
 		return (EBUSY);
 	}
 	vp->v_iflag |= VI_MOUNT;
 	VI_UNLOCK(vp);
 	VOP_UNLOCK(vp, 0);
 
 	MNT_ILOCK(mp);
 	mp->mnt_flag &= ~MNT_UPDATEMASK;
 	mp->mnt_flag |= fsflags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE |
 	    MNT_SNAPSHOT | MNT_ROOTFS | MNT_UPDATEMASK | MNT_RDONLY);
 	if ((mp->mnt_flag & MNT_ASYNC) == 0)
 		mp->mnt_kern_flag &= ~MNTK_ASYNC;
 	MNT_IUNLOCK(mp);
 	mp->mnt_optnew = fsdata;
 	vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
 
 	/*
 	 * Mount the filesystem.
 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
 	 * get.  No freeing of cn_pnbuf.
 	 */
         error = VFS_MOUNT(mp);
 
 	if (error == 0) {
 		/* Process the export option. */
 		if (vfs_copyopt(mp->mnt_optnew, "export", &export,
 		    sizeof(export)) == 0) {
 			error = vfs_export(mp, &export);
 		} else if (vfs_copyopt(mp->mnt_optnew, "export", &oexport,
 		    sizeof(oexport)) == 0) {
 			export.ex_flags = oexport.ex_flags;
 			export.ex_root = oexport.ex_root;
 			export.ex_anon = oexport.ex_anon;
 			export.ex_addr = oexport.ex_addr;
 			export.ex_addrlen = oexport.ex_addrlen;
 			export.ex_mask = oexport.ex_mask;
 			export.ex_masklen = oexport.ex_masklen;
 			export.ex_indexfile = oexport.ex_indexfile;
 			export.ex_numsecflavors = 0;
 			error = vfs_export(mp, &export);
 		}
 	}
 
 	MNT_ILOCK(mp);
 	if (error == 0) {
 		mp->mnt_flag &=	~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE |
 		    MNT_SNAPSHOT);
 	} else {
 		/*
 		 * If we fail, restore old mount flags. MNT_QUOTA is special,
 		 * because it is not part of MNT_UPDATEMASK, but it could have
 		 * changed in the meantime if quotactl(2) was called.
 		 * All in all we want current value of MNT_QUOTA, not the old
 		 * one.
 		 */
 		mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA);
 	}
 	if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 		mp->mnt_kern_flag |= MNTK_ASYNC;
 	else
 		mp->mnt_kern_flag &= ~MNTK_ASYNC;
 	MNT_IUNLOCK(mp);
 
 	if (error != 0)
 		goto end;
 
 	if (mp->mnt_opt != NULL)
 		vfs_freeopts(mp->mnt_opt);
 	mp->mnt_opt = mp->mnt_optnew;
 	(void)VFS_STATFS(mp, &mp->mnt_stat);
 	/*
 	 * Prevent external consumers of mount options from reading
 	 * mnt_optnew.
 	 */
 	mp->mnt_optnew = NULL;
 
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		vfs_allocate_syncvnode(mp);
 	else
 		vfs_deallocate_syncvnode(mp);
 end:
 	vfs_unbusy(mp);
 	VI_LOCK(vp);
 	vp->v_iflag &= ~VI_MOUNT;
 	VI_UNLOCK(vp);
 	vrele(vp);
 	return (error);
 }
 
 /*
  * vfs_domount(): actually attempt a filesystem mount.
  */
 static int
 vfs_domount(
 	struct thread *td,	/* Calling thread. */
 	const char *fstype,	/* Filesystem type. */
 	char *fspath,		/* Mount path. */
 	int fsflags,		/* Flags common to all filesystems. */
 	void *fsdata		/* Options local to the filesystem. */
 	)
 {
 	struct vfsconf *vfsp;
 	struct nameidata nd;
 	struct vnode *vp;
 	int error;
 
 	/*
 	 * Be ultra-paranoid about making sure the type and fspath
 	 * variables will fit in our mp buffers, including the
 	 * terminating NUL.
 	 */
 	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
 		return (ENAMETOOLONG);
 
 	if (jailed(td->td_ucred) || usermount == 0) {
 		if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0)
 			return (error);
 	}
 
 	/*
 	 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
 	 */
 	if (fsflags & MNT_EXPORTED) {
 		error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED);
 		if (error)
 			return (error);
 	}
 	if (fsflags & MNT_SUIDDIR) {
 		error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR);
 		if (error)
 			return (error);
 	}
 	/*
 	 * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users.
 	 */
 	if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) {
 		if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0)
 			fsflags |= MNT_NOSUID | MNT_USER;
 	}
 
 	/* Load KLDs before we lock the covered vnode to avoid reversals. */
 	vfsp = NULL;
 	if ((fsflags & MNT_UPDATE) == 0) {
 		/* Don't try to load KLDs if we're mounting the root. */
 		if (fsflags & MNT_ROOTFS)
 			vfsp = vfs_byname(fstype);
 		else
 			vfsp = vfs_byname_kld(fstype, td, &error);
 		if (vfsp == NULL)
 			return (ENODEV);
 		if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL))
 			return (EPERM);
 	}
 
 	/*
 	 * Get vnode to be covered or mount point's vnode in case of MNT_UPDATE.
 	 */
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
 	    UIO_SYSSPACE, fspath, td);
 	error = namei(&nd);
 	if (error != 0)
 		return (error);
 	if (!NDHASGIANT(&nd))
 		mtx_lock(&Giant);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 	if ((fsflags & MNT_UPDATE) == 0) {
 		error = vfs_domount_first(td, vfsp, fspath, vp, fsflags,
 		    fsdata);
 	} else {
 		error = vfs_domount_update(td, vp, fsflags, fsdata);
 	}
 	mtx_unlock(&Giant);
 
 	ASSERT_VI_UNLOCKED(vp, __func__);
 	ASSERT_VOP_UNLOCKED(vp, __func__);
 
 	return (error);
 }
 
 /*
  * Unmount a filesystem.
  *
  * Note: unmount takes a path to the vnode mounted on as argument, not
  * special file (as before).
  */
 #ifndef _SYS_SYSPROTO_H_
 struct unmount_args {
 	char	*path;
 	int	flags;
 };
 #endif
 /* ARGSUSED */
 int
 unmount(td, uap)
 	struct thread *td;
 	register struct unmount_args /* {
 		char *path;
 		int flags;
 	} */ *uap;
 {
 	struct mount *mp;
 	char *pathbuf;
 	int error, id0, id1;
 
 	AUDIT_ARG_VALUE(uap->flags);
 	if (jailed(td->td_ucred) || usermount == 0) {
 		error = priv_check(td, PRIV_VFS_UNMOUNT);
 		if (error)
 			return (error);
 	}
 
 	pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 	error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
 	if (error) {
 		free(pathbuf, M_TEMP);
 		return (error);
 	}
 	mtx_lock(&Giant);
 	if (uap->flags & MNT_BYFSID) {
 		AUDIT_ARG_TEXT(pathbuf);
 		/* Decode the filesystem ID. */
 		if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
 			mtx_unlock(&Giant);
 			free(pathbuf, M_TEMP);
 			return (EINVAL);
 		}
 
 		mtx_lock(&mountlist_mtx);
 		TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 			if (mp->mnt_stat.f_fsid.val[0] == id0 &&
 			    mp->mnt_stat.f_fsid.val[1] == id1)
 				break;
 		}
 		mtx_unlock(&mountlist_mtx);
 	} else {
 		AUDIT_ARG_UPATH1(td, pathbuf);
 		mtx_lock(&mountlist_mtx);
 		TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 			if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
 				break;
 		}
 		mtx_unlock(&mountlist_mtx);
 	}
 	free(pathbuf, M_TEMP);
 	if (mp == NULL) {
 		/*
 		 * Previously we returned ENOENT for a nonexistent path and
 		 * EINVAL for a non-mountpoint.  We cannot tell these apart
 		 * now, so in the !MNT_BYFSID case return the more likely
 		 * EINVAL for compatibility.
 		 */
 		mtx_unlock(&Giant);
 		return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
 	}
 
 	/*
 	 * Don't allow unmounting the root filesystem.
 	 */
 	if (mp->mnt_flag & MNT_ROOTFS) {
 		mtx_unlock(&Giant);
 		return (EINVAL);
 	}
 	error = dounmount(mp, uap->flags, td);
 	mtx_unlock(&Giant);
 	return (error);
 }
 
 /*
  * Do the actual filesystem unmount.
  */
 int
 dounmount(mp, flags, td)
 	struct mount *mp;
 	int flags;
 	struct thread *td;
 {
 	struct vnode *coveredvp, *fsrootvp;
 	int error;
 	int async_flag;
 	int mnt_gen_r;
 
 	mtx_assert(&Giant, MA_OWNED);
 
 	if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
 		mnt_gen_r = mp->mnt_gen;
 		VI_LOCK(coveredvp);
 		vholdl(coveredvp);
 		vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY);
 		vdrop(coveredvp);
 		/*
 		 * Check for mp being unmounted while waiting for the
 		 * covered vnode lock.
 		 */
 		if (coveredvp->v_mountedhere != mp ||
 		    coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) {
 			VOP_UNLOCK(coveredvp, 0);
 			return (EBUSY);
 		}
 	}
 	/*
 	 * Only privileged root, or (if MNT_USER is set) the user that did the
 	 * original mount is permitted to unmount this filesystem.
 	 */
 	error = vfs_suser(mp, td);
 	if (error) {
 		if (coveredvp)
 			VOP_UNLOCK(coveredvp, 0);
 		return (error);
 	}
 
 	MNT_ILOCK(mp);
 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 		MNT_IUNLOCK(mp);
 		if (coveredvp)
 			VOP_UNLOCK(coveredvp, 0);
 		return (EBUSY);
 	}
 	mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_NOINSMNTQ;
 	/* Allow filesystems to detect that a forced unmount is in progress. */
 	if (flags & MNT_FORCE)
 		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
 	error = 0;
 	if (mp->mnt_lockref) {
 		if ((flags & MNT_FORCE) == 0) {
 			mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_NOINSMNTQ |
 			    MNTK_UNMOUNTF);
 			if (mp->mnt_kern_flag & MNTK_MWAIT) {
 				mp->mnt_kern_flag &= ~MNTK_MWAIT;
 				wakeup(mp);
 			}
 			MNT_IUNLOCK(mp);
 			if (coveredvp)
 				VOP_UNLOCK(coveredvp, 0);
 			return (EBUSY);
 		}
 		mp->mnt_kern_flag |= MNTK_DRAINING;
 		error = msleep(&mp->mnt_lockref, MNT_MTX(mp), PVFS,
 		    "mount drain", 0);
 	}
 	MNT_IUNLOCK(mp);
 	KASSERT(mp->mnt_lockref == 0,
 	    ("%s: invalid lock refcount in the drain path @ %s:%d",
 	    __func__, __FILE__, __LINE__));
 	KASSERT(error == 0,
 	    ("%s: invalid return value for msleep in the drain path @ %s:%d",
 	    __func__, __FILE__, __LINE__));
 	vn_start_write(NULL, &mp, V_WAIT);
 
 	if (mp->mnt_flag & MNT_EXPUBLIC)
 		vfs_setpublicfs(NULL, NULL, NULL);
 
 	vfs_msync(mp, MNT_WAIT);
 	MNT_ILOCK(mp);
 	async_flag = mp->mnt_flag & MNT_ASYNC;
 	mp->mnt_flag &= ~MNT_ASYNC;
 	mp->mnt_kern_flag &= ~MNTK_ASYNC;
 	MNT_IUNLOCK(mp);
 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
 	vfs_deallocate_syncvnode(mp);
 	/*
 	 * For forced unmounts, move process cdir/rdir refs on the fs root
 	 * vnode to the covered vnode.  For non-forced unmounts we want
 	 * such references to cause an EBUSY error.
 	 */
 	if ((flags & MNT_FORCE) &&
 	    VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp) == 0) {
 		if (mp->mnt_vnodecovered != NULL)
 			mountcheckdirs(fsrootvp, mp->mnt_vnodecovered);
 		if (fsrootvp == rootvnode) {
 			vrele(rootvnode);
 			rootvnode = NULL;
 		}
 		vput(fsrootvp);
 	}
 	if (((mp->mnt_flag & MNT_RDONLY) ||
 	     (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || (flags & MNT_FORCE) != 0)
 		error = VFS_UNMOUNT(mp, flags);
 	vn_finished_write(mp);
 	/*
 	 * If we failed to flush the dirty blocks for this mount point,
 	 * undo all the cdir/rdir and rootvnode changes we made above.
 	 * Unless we failed to do so because the device is reporting that
 	 * it doesn't exist anymore.
 	 */
 	if (error && error != ENXIO) {
 		if ((flags & MNT_FORCE) &&
 		    VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp) == 0) {
 			if (mp->mnt_vnodecovered != NULL)
 				mountcheckdirs(mp->mnt_vnodecovered, fsrootvp);
 			if (rootvnode == NULL) {
 				rootvnode = fsrootvp;
 				vref(rootvnode);
 			}
 			vput(fsrootvp);
 		}
 		MNT_ILOCK(mp);
 		mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ;
 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
 			MNT_IUNLOCK(mp);
 			vfs_allocate_syncvnode(mp);
 			MNT_ILOCK(mp);
 		}
 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
 		mp->mnt_flag |= async_flag;
 		if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 			mp->mnt_kern_flag |= MNTK_ASYNC;
 		if (mp->mnt_kern_flag & MNTK_MWAIT) {
 			mp->mnt_kern_flag &= ~MNTK_MWAIT;
 			wakeup(mp);
 		}
 		MNT_IUNLOCK(mp);
 		if (coveredvp)
 			VOP_UNLOCK(coveredvp, 0);
 		return (error);
 	}
 	mtx_lock(&mountlist_mtx);
 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
 	mtx_unlock(&mountlist_mtx);
 	if (coveredvp != NULL) {
 		coveredvp->v_mountedhere = NULL;
 		vput(coveredvp);
 	}
 	vfs_event_signal(NULL, VQ_UNMOUNT, 0);
 	vfs_mount_destroy(mp);
 	return (0);
 }
 
 /*
  * Report errors during filesystem mounting.
  */
 void
 vfs_mount_error(struct mount *mp, const char *fmt, ...)
 {
 	struct vfsoptlist *moptlist = mp->mnt_optnew;
 	va_list ap;
 	int error, len;
 	char *errmsg;
 
 	error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len);
 	if (error || errmsg == NULL || len <= 0)
 		return;
 
 	va_start(ap, fmt);
 	vsnprintf(errmsg, (size_t)len, fmt, ap);
 	va_end(ap);
 }
 
 void
 vfs_opterror(struct vfsoptlist *opts, const char *fmt, ...)
 {
 	va_list ap;
 	int error, len;
 	char *errmsg;
 
 	error = vfs_getopt(opts, "errmsg", (void **)&errmsg, &len);
 	if (error || errmsg == NULL || len <= 0)
 		return;
 
 	va_start(ap, fmt);
 	vsnprintf(errmsg, (size_t)len, fmt, ap);
 	va_end(ap);
 }
 
 /*
  * ---------------------------------------------------------------------
  * Functions for querying mount options/arguments from filesystems.
  */
 
 /*
  * Check that no unknown options are given
  */
 int
 vfs_filteropt(struct vfsoptlist *opts, const char **legal)
 {
 	struct vfsopt *opt;
 	char errmsg[255];
 	const char **t, *p, *q;
 	int ret = 0;
 
 	TAILQ_FOREACH(opt, opts, link) {
 		p = opt->name;
 		q = NULL;
 		if (p[0] == 'n' && p[1] == 'o')
 			q = p + 2;
 		for(t = global_opts; *t != NULL; t++) {
 			if (strcmp(*t, p) == 0)
 				break;
 			if (q != NULL) {
 				if (strcmp(*t, q) == 0)
 					break;
 			}
 		}
 		if (*t != NULL)
 			continue;
 		for(t = legal; *t != NULL; t++) {
 			if (strcmp(*t, p) == 0)
 				break;
 			if (q != NULL) {
 				if (strcmp(*t, q) == 0)
 					break;
 			}
 		}
 		if (*t != NULL)
 			continue;
 		snprintf(errmsg, sizeof(errmsg),
 		    "mount option <%s> is unknown", p);
 		ret = EINVAL;
 	}
 	if (ret != 0) {
 		TAILQ_FOREACH(opt, opts, link) {
 			if (strcmp(opt->name, "errmsg") == 0) {
 				strncpy((char *)opt->value, errmsg, opt->len);
 				break;
 			}
 		}
 		if (opt == NULL)
 			printf("%s\n", errmsg);
 	}
 	return (ret);
 }
 
 /*
  * Get a mount option by its name.
  *
  * Return 0 if the option was found, ENOENT otherwise.
  * If len is non-NULL it will be filled with the length
  * of the option. If buf is non-NULL, it will be filled
  * with the address of the option.
  */
 int
 vfs_getopt(opts, name, buf, len)
 	struct vfsoptlist *opts;
 	const char *name;
 	void **buf;
 	int *len;
 {
 	struct vfsopt *opt;
 
 	KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) == 0) {
 			opt->seen = 1;
 			if (len != NULL)
 				*len = opt->len;
 			if (buf != NULL)
 				*buf = opt->value;
 			return (0);
 		}
 	}
 	return (ENOENT);
 }
 
 int
 vfs_getopt_pos(struct vfsoptlist *opts, const char *name)
 {
 	struct vfsopt *opt;
 
 	if (opts == NULL)
 		return (-1);
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) == 0) {
 			opt->seen = 1;
 			return (opt->pos);
 		}
 	}
 	return (-1);
 }
 
 char *
 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
 {
 	struct vfsopt *opt;
 
 	*error = 0;
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) != 0)
 			continue;
 		opt->seen = 1;
 		if (opt->len == 0 ||
 		    ((char *)opt->value)[opt->len - 1] != '\0') {
 			*error = EINVAL;
 			return (NULL);
 		}
 		return (opt->value);
 	}
 	*error = ENOENT;
 	return (NULL);
 }
 
 int
 vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val)
 {
 	struct vfsopt *opt;
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) == 0) {
 			opt->seen = 1;
 			if (w != NULL)
 				*w |= val;
 			return (1);
 		}
 	}
 	if (w != NULL)
 		*w &= ~val;
 	return (0);
 }
 
 int
 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
 {
 	va_list ap;
 	struct vfsopt *opt;
 	int ret;
 
 	KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) != 0)
 			continue;
 		opt->seen = 1;
 		if (opt->len == 0 || opt->value == NULL)
 			return (0);
 		if (((char *)opt->value)[opt->len - 1] != '\0')
 			return (0);
 		va_start(ap, fmt);
 		ret = vsscanf(opt->value, fmt, ap);
 		va_end(ap);
 		return (ret);
 	}
 	return (0);
 }
 
 int
 vfs_setopt(struct vfsoptlist *opts, const char *name, void *value, int len)
 {
 	struct vfsopt *opt;
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) != 0)
 			continue;
 		opt->seen = 1;
 		if (opt->value == NULL)
 			opt->len = len;
 		else {
 			if (opt->len != len)
 				return (EINVAL);
 			bcopy(value, opt->value, len);
 		}
 		return (0);
 	}
 	return (ENOENT);
 }
 
 int
 vfs_setopt_part(struct vfsoptlist *opts, const char *name, void *value, int len)
 {
 	struct vfsopt *opt;
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) != 0)
 			continue;
 		opt->seen = 1;
 		if (opt->value == NULL)
 			opt->len = len;
 		else {
 			if (opt->len < len)
 				return (EINVAL);
 			opt->len = len;
 			bcopy(value, opt->value, len);
 		}
 		return (0);
 	}
 	return (ENOENT);
 }
 
 int
 vfs_setopts(struct vfsoptlist *opts, const char *name, const char *value)
 {
 	struct vfsopt *opt;
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) != 0)
 			continue;
 		opt->seen = 1;
 		if (opt->value == NULL)
 			opt->len = strlen(value) + 1;
 		else if (strlcpy(opt->value, value, opt->len) >= opt->len)
 			return (EINVAL);
 		return (0);
 	}
 	return (ENOENT);
 }
 
 /*
  * Find and copy a mount option.
  *
  * The size of the buffer has to be specified
  * in len, if it is not the same length as the
  * mount option, EINVAL is returned.
  * Returns ENOENT if the option is not found.
  */
 int
 vfs_copyopt(opts, name, dest, len)
 	struct vfsoptlist *opts;
 	const char *name;
 	void *dest;
 	int len;
 {
 	struct vfsopt *opt;
 
 	KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) == 0) {
 			opt->seen = 1;
 			if (len != opt->len)
 				return (EINVAL);
 			bcopy(opt->value, dest, opt->len);
 			return (0);
 		}
 	}
 	return (ENOENT);
 }
 
 /*
  * This is a helper function for filesystems to traverse their
  * vnodes.  See MNT_VNODE_FOREACH() in sys/mount.h
  */
 
 struct vnode *
 __mnt_vnode_next(struct vnode **mvp, struct mount *mp)
 {
 	struct vnode *vp;
 
 	mtx_assert(MNT_MTX(mp), MA_OWNED);
 
 	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 	if ((*mvp)->v_yield++ == 500) {
 		MNT_IUNLOCK(mp);
 		(*mvp)->v_yield = 0;
 		uio_yield();
 		MNT_ILOCK(mp);
 	}
 	vp = TAILQ_NEXT(*mvp, v_nmntvnodes);
 	while (vp != NULL && vp->v_type == VMARKER)
 		vp = TAILQ_NEXT(vp, v_nmntvnodes);
 
 	/* Check if we are done */
 	if (vp == NULL) {
 		__mnt_vnode_markerfree(mvp, mp);
 		return (NULL);
 	}
 	TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 	TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 	return (vp);
 }
 
 struct vnode *
 __mnt_vnode_first(struct vnode **mvp, struct mount *mp)
 {
 	struct vnode *vp;
 
 	mtx_assert(MNT_MTX(mp), MA_OWNED);
 
 	vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 	while (vp != NULL && vp->v_type == VMARKER)
 		vp = TAILQ_NEXT(vp, v_nmntvnodes);
 
 	/* Check if we are done */
 	if (vp == NULL) {
 		*mvp = NULL;
 		return (NULL);
 	}
 	MNT_REF(mp);
 	MNT_IUNLOCK(mp);
 	*mvp = (struct vnode *) malloc(sizeof(struct vnode),
 				       M_VNODE_MARKER,
 				       M_WAITOK | M_ZERO);
 	MNT_ILOCK(mp);
 	(*mvp)->v_type = VMARKER;
 
 	vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 	while (vp != NULL && vp->v_type == VMARKER)
 		vp = TAILQ_NEXT(vp, v_nmntvnodes);
 
 	/* Check if we are done */
 	if (vp == NULL) {
 		MNT_IUNLOCK(mp);
 		free(*mvp, M_VNODE_MARKER);
 		MNT_ILOCK(mp);
 		*mvp = NULL;
 		MNT_REL(mp);
 		return (NULL);
 	}
 	(*mvp)->v_mount = mp;
 	TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 	return (vp);
 }
 
 
 void
 __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp)
 {
 
 	if (*mvp == NULL)
 		return;
 
 	mtx_assert(MNT_MTX(mp), MA_OWNED);
 
 	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 	TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 	MNT_IUNLOCK(mp);
 	free(*mvp, M_VNODE_MARKER);
 	MNT_ILOCK(mp);
 	*mvp = NULL;
 	MNT_REL(mp);
 }
 
 
 int
 __vfs_statfs(struct mount *mp, struct statfs *sbp)
 {
 	int error;
 
 	error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat);
 	if (sbp != &mp->mnt_stat)
 		*sbp = mp->mnt_stat;
 	return (error);
 }
 
 void
 vfs_mountedfrom(struct mount *mp, const char *from)
 {
 
 	bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
 	strlcpy(mp->mnt_stat.f_mntfromname, from,
 	    sizeof mp->mnt_stat.f_mntfromname);
 }
 
 /*
  * ---------------------------------------------------------------------
  * This is the api for building mount args and mounting filesystems from
  * inside the kernel.
  *
  * The API works by accumulation of individual args.  First error is
  * latched.
  *
  * XXX: should be documented in new manpage kernel_mount(9)
  */
 
 /* A memory allocation which must be freed when we are done */
 struct mntaarg {
 	SLIST_ENTRY(mntaarg)	next;
 };
 
 /* The header for the mount arguments */
 struct mntarg {
 	struct iovec *v;
 	int len;
 	int error;
 	SLIST_HEAD(, mntaarg)	list;
 };
 
 /*
  * Add a boolean argument.
  *
  * flag is the boolean value.
  * name must start with "no".
  */
 struct mntarg *
 mount_argb(struct mntarg *ma, int flag, const char *name)
 {
 
 	KASSERT(name[0] == 'n' && name[1] == 'o',
 	    ("mount_argb(...,%s): name must start with 'no'", name));
 
 	return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
 }
 
 /*
  * Add an argument printf style
  */
 struct mntarg *
 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
 {
 	va_list ap;
 	struct mntaarg *maa;
 	struct sbuf *sb;
 	int len;
 
 	if (ma == NULL) {
 		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 		SLIST_INIT(&ma->list);
 	}
 	if (ma->error)
 		return (ma);
 
 	ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 	    M_MOUNT, M_WAITOK);
 	ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 	ma->v[ma->len].iov_len = strlen(name) + 1;
 	ma->len++;
 
 	sb = sbuf_new_auto();
 	va_start(ap, fmt);
 	sbuf_vprintf(sb, fmt, ap);
 	va_end(ap);
 	sbuf_finish(sb);
 	len = sbuf_len(sb) + 1;
 	maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 	SLIST_INSERT_HEAD(&ma->list, maa, next);
 	bcopy(sbuf_data(sb), maa + 1, len);
 	sbuf_delete(sb);
 
 	ma->v[ma->len].iov_base = maa + 1;
 	ma->v[ma->len].iov_len = len;
 	ma->len++;
 
 	return (ma);
 }
 
 /*
  * Add an argument which is a userland string.
  */
 struct mntarg *
 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
 {
 	struct mntaarg *maa;
 	char *tbuf;
 
 	if (val == NULL)
 		return (ma);
 	if (ma == NULL) {
 		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 		SLIST_INIT(&ma->list);
 	}
 	if (ma->error)
 		return (ma);
 	maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 	SLIST_INSERT_HEAD(&ma->list, maa, next);
 	tbuf = (void *)(maa + 1);
 	ma->error = copyinstr(val, tbuf, len, NULL);
 	return (mount_arg(ma, name, tbuf, -1));
 }
 
 /*
  * Plain argument.
  *
  * If length is -1, treat value as a C string.
  */
 struct mntarg *
 mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
 {
 
 	if (ma == NULL) {
 		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 		SLIST_INIT(&ma->list);
 	}
 	if (ma->error)
 		return (ma);
 
 	ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 	    M_MOUNT, M_WAITOK);
 	ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 	ma->v[ma->len].iov_len = strlen(name) + 1;
 	ma->len++;
 
 	ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
 	if (len < 0)
 		ma->v[ma->len].iov_len = strlen(val) + 1;
 	else
 		ma->v[ma->len].iov_len = len;
 	ma->len++;
 	return (ma);
 }
 
 /*
  * Free a mntarg structure
  */
 static void
 free_mntarg(struct mntarg *ma)
 {
 	struct mntaarg *maa;
 
 	while (!SLIST_EMPTY(&ma->list)) {
 		maa = SLIST_FIRST(&ma->list);
 		SLIST_REMOVE_HEAD(&ma->list, next);
 		free(maa, M_MOUNT);
 	}
 	free(ma->v, M_MOUNT);
 	free(ma, M_MOUNT);
 }
 
 /*
  * Mount a filesystem
  */
 int
 kernel_mount(struct mntarg *ma, int flags)
 {
 	struct uio auio;
 	int error;
 
 	KASSERT(ma != NULL, ("kernel_mount NULL ma"));
 	KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
 	KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
 
 	auio.uio_iov = ma->v;
 	auio.uio_iovcnt = ma->len;
 	auio.uio_segflg = UIO_SYSSPACE;
 
 	error = ma->error;
 	if (!error)
 		error = vfs_donmount(curthread, flags, &auio);
 	free_mntarg(ma);
 	return (error);
 }
 
 /*
  * A printflike function to mount a filesystem.
  */
 int
 kernel_vmount(int flags, ...)
 {
 	struct mntarg *ma = NULL;
 	va_list ap;
 	const char *cp;
 	const void *vp;
 	int error;
 
 	va_start(ap, flags);
 	for (;;) {
 		cp = va_arg(ap, const char *);
 		if (cp == NULL)
 			break;
 		vp = va_arg(ap, const void *);
 		ma = mount_arg(ma, cp, vp, (vp != NULL ? -1 : 0));
 	}
 	va_end(ap);
 
 	error = kernel_mount(ma, flags);
 	return (error);
 }
 
 void
 vfs_oexport_conv(const struct oexport_args *oexp, struct export_args *exp)
 {
 
 	bcopy(oexp, exp, sizeof(*oexp));
 	exp->ex_numsecflavors = 0;
 }
Index: projects/binutils-2.17/sys/kern/vfs_subr.c
===================================================================
--- projects/binutils-2.17/sys/kern/vfs_subr.c	(revision 215829)
+++ projects/binutils-2.17/sys/kern/vfs_subr.c	(revision 215830)
@@ -1,4421 +1,4431 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
  */
 
 /*
  * External virtual filesystem routines
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/condvar.h>
 #include <sys/conf.h>
 #include <sys/dirent.h>
 #include <sys/event.h>
 #include <sys/eventhandler.h>
 #include <sys/extattr.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/lockf.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/reboot.h>
 #include <sys/sleepqueue.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 
 #include <machine/stdarg.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_kern.h>
 #include <vm/uma.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #define	WI_MPSAFEQ	0
 #define	WI_GIANTQ	1
 
 static MALLOC_DEFINE(M_NETADDR, "subr_export_host", "Export host address structure");
 
 static void	delmntque(struct vnode *vp);
 static int	flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo,
 		    int slpflag, int slptimeo);
 static void	syncer_shutdown(void *arg, int howto);
 static int	vtryrecycle(struct vnode *vp);
 static void	vbusy(struct vnode *vp);
 static void	vinactive(struct vnode *, struct thread *);
 static void	v_incr_usecount(struct vnode *);
 static void	v_decr_usecount(struct vnode *);
 static void	v_decr_useonly(struct vnode *);
 static void	v_upgrade_usecount(struct vnode *);
 static void	vfree(struct vnode *);
 static void	vnlru_free(int);
 static void	vgonel(struct vnode *);
 static void	vfs_knllock(void *arg);
 static void	vfs_knlunlock(void *arg);
 static void	vfs_knl_assert_locked(void *arg);
 static void	vfs_knl_assert_unlocked(void *arg);
 static void	destroy_vpollinfo(struct vpollinfo *vi);
 
 /*
  * Number of vnodes in existence.  Increased whenever getnewvnode()
  * allocates a new vnode, decreased on vdestroy() called on VI_DOOMed
  * vnode.
  */
 static unsigned long	numvnodes;
 
 SYSCTL_LONG(_vfs, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0,
     "Number of vnodes in existence");
 
 /*
  * Conversion tables for conversion from vnode types to inode formats
  * and back.
  */
 enum vtype iftovt_tab[16] = {
 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
 };
 int vttoif_tab[10] = {
 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 	S_IFSOCK, S_IFIFO, S_IFMT, S_IFMT
 };
 
 /*
  * List of vnodes that are ready for recycling.
  */
 static TAILQ_HEAD(freelst, vnode) vnode_free_list;
 
 /*
  * Free vnode target.  Free vnodes may simply be files which have been stat'd
  * but not read.  This is somewhat common, and a small cache of such files
  * should be kept to avoid recreation costs.
  */
 static u_long wantfreevnodes;
 SYSCTL_LONG(_vfs, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
 /* Number of vnodes in the free list. */
 static u_long freevnodes;
 SYSCTL_LONG(_vfs, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0,
     "Number of vnodes in the free list");
 
 static int vlru_allow_cache_src;
 SYSCTL_INT(_vfs, OID_AUTO, vlru_allow_cache_src, CTLFLAG_RW,
     &vlru_allow_cache_src, 0, "Allow vlru to reclaim source vnode");
 
 /*
  * Various variables used for debugging the new implementation of
  * reassignbuf().
  * XXX these are probably of (very) limited utility now.
  */
 static int reassignbufcalls;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0,
     "Number of calls to reassignbuf");
 
 /*
  * Cache for the mount type id assigned to NFS.  This is used for
  * special checks in nfs/nfs_nqlease.c and vm/vnode_pager.c.
  */
 int	nfs_mount_type = -1;
 
 /* To keep more than one thread at a time from running vfs_getnewfsid */
 static struct mtx mntid_mtx;
 
 /*
  * Lock for any access to the following:
  *	vnode_free_list
  *	numvnodes
  *	freevnodes
  */
 static struct mtx vnode_free_list_mtx;
 
 /* Publicly exported FS */
 struct nfs_public nfs_pub;
 
 /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */
 static uma_zone_t vnode_zone;
 static uma_zone_t vnodepoll_zone;
 
 /*
  * The workitem queue.
  *
  * It is useful to delay writes of file data and filesystem metadata
  * for tens of seconds so that quickly created and deleted files need
  * not waste disk bandwidth being created and removed. To realize this,
  * we append vnodes to a "workitem" queue. When running with a soft
  * updates implementation, most pending metadata dependencies should
  * not wait for more than a few seconds. Thus, mounted on block devices
  * are delayed only about a half the time that file data is delayed.
  * Similarly, directory updates are more critical, so are only delayed
  * about a third the time that file data is delayed. Thus, there are
  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
  * one each second (driven off the filesystem syncer process). The
  * syncer_delayno variable indicates the next queue that is to be processed.
  * Items that need to be processed soon are placed in this queue:
  *
  *	syncer_workitem_pending[syncer_delayno]
  *
  * A delay of fifteen seconds is done by placing the request fifteen
  * entries later in the queue:
  *
  *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
  *
  */
 static int syncer_delayno;
 static long syncer_mask;
 LIST_HEAD(synclist, bufobj);
 static struct synclist *syncer_workitem_pending[2];
 /*
  * The sync_mtx protects:
  *	bo->bo_synclist
  *	sync_vnode_count
  *	syncer_delayno
  *	syncer_state
  *	syncer_workitem_pending
  *	syncer_worklist_len
  *	rushjob
  */
 static struct mtx sync_mtx;
 static struct cv sync_wakeup;
 
 #define SYNCER_MAXDELAY		32
 static int syncer_maxdelay = SYNCER_MAXDELAY;	/* maximum delay time */
 static int syncdelay = 30;		/* max time to delay syncing data */
 static int filedelay = 30;		/* time to delay syncing files */
 SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0,
     "Time to delay syncing files (in seconds)");
 static int dirdelay = 29;		/* time to delay syncing directories */
 SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0,
     "Time to delay syncing directories (in seconds)");
 static int metadelay = 28;		/* time to delay syncing metadata */
 SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0,
     "Time to delay syncing metadata (in seconds)");
 static int rushjob;		/* number of slots to run ASAP */
 static int stat_rush_requests;	/* number of times I/O speeded up */
 SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0,
     "Number of times I/O speeded up (rush requests)");
 
 /*
  * When shutting down the syncer, run it at four times normal speed.
  */
 #define SYNCER_SHUTDOWN_SPEEDUP		4
 static int sync_vnode_count;
 static int syncer_worklist_len;
 static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY }
     syncer_state;
 
 /*
  * Number of vnodes we want to exist at any one time.  This is mostly used
  * to size hash tables in vnode-related code.  It is normally not used in
  * getnewvnode(), as wantfreevnodes is normally nonzero.)
  *
  * XXX desiredvnodes is historical cruft and should not exist.
  */
 int desiredvnodes;
 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW,
     &desiredvnodes, 0, "Maximum number of vnodes");
 SYSCTL_INT(_kern, OID_AUTO, minvnodes, CTLFLAG_RW,
     &wantfreevnodes, 0, "Minimum number of vnodes (legacy)");
 static int vnlru_nowhere;
 SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW,
     &vnlru_nowhere, 0, "Number of times the vnlru process ran without success");
 
 /*
  * Macros to control when a vnode is freed and recycled.  All require
  * the vnode interlock.
  */
 #define VCANRECYCLE(vp) (((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt)
 #define VSHOULDFREE(vp) (!((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt)
 #define VSHOULDBUSY(vp) (((vp)->v_iflag & VI_FREE) && (vp)->v_holdcnt)
 
 
 /*
  * Initialize the vnode management data structures.
  *
  * Reevaluate the following cap on the number of vnodes after the physical
  * memory size exceeds 512GB.  In the limit, as the physical memory size
  * grows, the ratio of physical pages to vnodes approaches sixteen to one.
  */
 #ifndef	MAXVNODES_MAX
 #define	MAXVNODES_MAX	(512 * (1024 * 1024 * 1024 / (int)PAGE_SIZE / 16))
 #endif
 static void
 vntblinit(void *dummy __unused)
 {
 	int physvnodes, virtvnodes;
 
 	/*
 	 * Desiredvnodes is a function of the physical memory size and the
 	 * kernel's heap size.  Generally speaking, it scales with the
 	 * physical memory size.  The ratio of desiredvnodes to physical pages
 	 * is one to four until desiredvnodes exceeds 98,304.  Thereafter, the
 	 * marginal ratio of desiredvnodes to physical pages is one to
 	 * sixteen.  However, desiredvnodes is limited by the kernel's heap
 	 * size.  The memory required by desiredvnodes vnodes and vm objects
 	 * may not exceed one seventh of the kernel's heap size.
 	 */
 	physvnodes = maxproc + cnt.v_page_count / 16 + 3 * min(98304 * 4,
 	    cnt.v_page_count) / 16;
 	virtvnodes = vm_kmem_size / (7 * (sizeof(struct vm_object) +
 	    sizeof(struct vnode)));
 	desiredvnodes = min(physvnodes, virtvnodes);
 	if (desiredvnodes > MAXVNODES_MAX) {
 		if (bootverbose)
 			printf("Reducing kern.maxvnodes %d -> %d\n",
 			    desiredvnodes, MAXVNODES_MAX);
 		desiredvnodes = MAXVNODES_MAX;
 	}
 	wantfreevnodes = desiredvnodes / 4;
 	mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF);
 	TAILQ_INIT(&vnode_free_list);
 	mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF);
 	vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, 0);
 	vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	/*
 	 * Initialize the filesystem syncer.
 	 */
 	syncer_workitem_pending[WI_MPSAFEQ] = hashinit(syncer_maxdelay, M_VNODE,
 	    &syncer_mask);
 	syncer_workitem_pending[WI_GIANTQ] = hashinit(syncer_maxdelay, M_VNODE,
 	    &syncer_mask);
 	syncer_maxdelay = syncer_mask + 1;
 	mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF);
 	cv_init(&sync_wakeup, "syncer");
 }
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL);
 
 
 /*
  * Mark a mount point as busy. Used to synchronize access and to delay
  * unmounting. Eventually, mountlist_mtx is not released on failure.
  */
 int
 vfs_busy(struct mount *mp, int flags)
 {
 
 	MPASS((flags & ~MBF_MASK) == 0);
 	CTR3(KTR_VFS, "%s: mp %p with flags %d", __func__, mp, flags);
 
 	MNT_ILOCK(mp);
 	MNT_REF(mp);
 	/*
 	 * If mount point is currenly being unmounted, sleep until the
 	 * mount point fate is decided.  If thread doing the unmounting fails,
 	 * it will clear MNTK_UNMOUNT flag before waking us up, indicating
 	 * that this mount point has survived the unmount attempt and vfs_busy
 	 * should retry.  Otherwise the unmounter thread will set MNTK_REFEXPIRE
 	 * flag in addition to MNTK_UNMOUNT, indicating that mount point is
 	 * about to be really destroyed.  vfs_busy needs to release its
 	 * reference on the mount point in this case and return with ENOENT,
 	 * telling the caller that mount mount it tried to busy is no longer
 	 * valid.
 	 */
 	while (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 		if (flags & MBF_NOWAIT || mp->mnt_kern_flag & MNTK_REFEXPIRE) {
 			MNT_REL(mp);
 			MNT_IUNLOCK(mp);
 			CTR1(KTR_VFS, "%s: failed busying before sleeping",
 			    __func__);
 			return (ENOENT);
 		}
 		if (flags & MBF_MNTLSTLOCK)
 			mtx_unlock(&mountlist_mtx);
 		mp->mnt_kern_flag |= MNTK_MWAIT;
 		msleep(mp, MNT_MTX(mp), PVFS, "vfs_busy", 0);
 		if (flags & MBF_MNTLSTLOCK)
 			mtx_lock(&mountlist_mtx);
 	}
 	if (flags & MBF_MNTLSTLOCK)
 		mtx_unlock(&mountlist_mtx);
 	mp->mnt_lockref++;
 	MNT_IUNLOCK(mp);
 	return (0);
 }
 
 /*
  * Free a busy filesystem.
  */
 void
 vfs_unbusy(struct mount *mp)
 {
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 	MNT_ILOCK(mp);
 	MNT_REL(mp);
 	KASSERT(mp->mnt_lockref > 0, ("negative mnt_lockref"));
 	mp->mnt_lockref--;
 	if (mp->mnt_lockref == 0 && (mp->mnt_kern_flag & MNTK_DRAINING) != 0) {
 		MPASS(mp->mnt_kern_flag & MNTK_UNMOUNT);
 		CTR1(KTR_VFS, "%s: waking up waiters", __func__);
 		mp->mnt_kern_flag &= ~MNTK_DRAINING;
 		wakeup(&mp->mnt_lockref);
 	}
 	MNT_IUNLOCK(mp);
 }
 
 /*
  * Lookup a mount point by filesystem identifier.
  */
 struct mount *
 vfs_getvfs(fsid_t *fsid)
 {
 	struct mount *mp;
 
 	CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid);
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 			vfs_ref(mp);
 			mtx_unlock(&mountlist_mtx);
 			return (mp);
 		}
 	}
 	mtx_unlock(&mountlist_mtx);
 	CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid);
 	return ((struct mount *) 0);
 }
 
 /*
  * Lookup a mount point by filesystem identifier, busying it before
  * returning.
  */
 struct mount *
 vfs_busyfs(fsid_t *fsid)
 {
 	struct mount *mp;
 	int error;
 
 	CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid);
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 			error = vfs_busy(mp, MBF_MNTLSTLOCK);
 			if (error) {
 				mtx_unlock(&mountlist_mtx);
 				return (NULL);
 			}
 			return (mp);
 		}
 	}
 	CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid);
 	mtx_unlock(&mountlist_mtx);
 	return ((struct mount *) 0);
 }
 
 /*
  * Check if a user can access privileged mount options.
  */
 int
 vfs_suser(struct mount *mp, struct thread *td)
 {
 	int error;
 
 	/*
 	 * If the thread is jailed, but this is not a jail-friendly file
 	 * system, deny immediately.
 	 */
 	if (!(mp->mnt_vfc->vfc_flags & VFCF_JAIL) && jailed(td->td_ucred))
 		return (EPERM);
 
 	/*
 	 * If the file system was mounted outside the jail of the calling
 	 * thread, deny immediately.
 	 */
 	if (prison_check(td->td_ucred, mp->mnt_cred) != 0)
 		return (EPERM);
 
 	/*
 	 * If file system supports delegated administration, we don't check
 	 * for the PRIV_VFS_MOUNT_OWNER privilege - it will be better verified
 	 * by the file system itself.
 	 * If this is not the user that did original mount, we check for
 	 * the PRIV_VFS_MOUNT_OWNER privilege.
 	 */
 	if (!(mp->mnt_vfc->vfc_flags & VFCF_DELEGADMIN) &&
 	    mp->mnt_cred->cr_uid != td->td_ucred->cr_uid) {
 		if ((error = priv_check(td, PRIV_VFS_MOUNT_OWNER)) != 0)
 			return (error);
 	}
 	return (0);
 }
 
 /*
  * Get a new unique fsid.  Try to make its val[0] unique, since this value
  * will be used to create fake device numbers for stat().  Also try (but
  * not so hard) make its val[0] unique mod 2^16, since some emulators only
  * support 16-bit device numbers.  We end up with unique val[0]'s for the
  * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls.
  *
  * Keep in mind that several mounts may be running in parallel.  Starting
  * the search one past where the previous search terminated is both a
  * micro-optimization and a defense against returning the same fsid to
  * different mounts.
  */
 void
 vfs_getnewfsid(struct mount *mp)
 {
 	static uint16_t mntid_base;
 	struct mount *nmp;
 	fsid_t tfsid;
 	int mtype;
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 	mtx_lock(&mntid_mtx);
 	mtype = mp->mnt_vfc->vfc_typenum;
 	tfsid.val[1] = mtype;
 	mtype = (mtype & 0xFF) << 24;
 	for (;;) {
 		tfsid.val[0] = makedev(255,
 		    mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF));
 		mntid_base++;
 		if ((nmp = vfs_getvfs(&tfsid)) == NULL)
 			break;
 		vfs_rel(nmp);
 	}
 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 	mp->mnt_stat.f_fsid.val[1] = tfsid.val[1];
 	mtx_unlock(&mntid_mtx);
 }
 
 /*
  * Knob to control the precision of file timestamps:
  *
  *   0 = seconds only; nanoseconds zeroed.
  *   1 = seconds and nanoseconds, accurate within 1/HZ.
  *   2 = seconds and nanoseconds, truncated to microseconds.
  * >=3 = seconds and nanoseconds, maximum precision.
  */
 enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC };
 
 static int timestamp_precision = TSP_SEC;
 SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW,
     &timestamp_precision, 0, "File timestamp precision (0: seconds, "
     "1: sec + ns accurate to 1/HZ, 2: sec + ns truncated to ms, "
     "3+: sec + ns (max. precision))");
 
 /*
  * Get a current timestamp.
  */
 void
 vfs_timestamp(struct timespec *tsp)
 {
 	struct timeval tv;
 
 	switch (timestamp_precision) {
 	case TSP_SEC:
 		tsp->tv_sec = time_second;
 		tsp->tv_nsec = 0;
 		break;
 	case TSP_HZ:
 		getnanotime(tsp);
 		break;
 	case TSP_USEC:
 		microtime(&tv);
 		TIMEVAL_TO_TIMESPEC(&tv, tsp);
 		break;
 	case TSP_NSEC:
 	default:
 		nanotime(tsp);
 		break;
 	}
 }
 
 /*
  * Set vnode attributes to VNOVAL
  */
 void
 vattr_null(struct vattr *vap)
 {
 
 	vap->va_type = VNON;
 	vap->va_size = VNOVAL;
 	vap->va_bytes = VNOVAL;
 	vap->va_mode = VNOVAL;
 	vap->va_nlink = VNOVAL;
 	vap->va_uid = VNOVAL;
 	vap->va_gid = VNOVAL;
 	vap->va_fsid = VNOVAL;
 	vap->va_fileid = VNOVAL;
 	vap->va_blocksize = VNOVAL;
 	vap->va_rdev = VNOVAL;
 	vap->va_atime.tv_sec = VNOVAL;
 	vap->va_atime.tv_nsec = VNOVAL;
 	vap->va_mtime.tv_sec = VNOVAL;
 	vap->va_mtime.tv_nsec = VNOVAL;
 	vap->va_ctime.tv_sec = VNOVAL;
 	vap->va_ctime.tv_nsec = VNOVAL;
 	vap->va_birthtime.tv_sec = VNOVAL;
 	vap->va_birthtime.tv_nsec = VNOVAL;
 	vap->va_flags = VNOVAL;
 	vap->va_gen = VNOVAL;
 	vap->va_vaflags = 0;
 }
 
 /*
  * This routine is called when we have too many vnodes.  It attempts
  * to free <count> vnodes and will potentially free vnodes that still
  * have VM backing store (VM backing store is typically the cause
  * of a vnode blowout so we want to do this).  Therefore, this operation
  * is not considered cheap.
  *
  * A number of conditions may prevent a vnode from being reclaimed.
  * the buffer cache may have references on the vnode, a directory
  * vnode may still have references due to the namei cache representing
  * underlying files, or the vnode may be in active use.   It is not
  * desireable to reuse such vnodes.  These conditions may cause the
  * number of vnodes to reach some minimum value regardless of what
  * you set kern.maxvnodes to.  Do not set kern.maxvnodes too low.
  */
 static int
 vlrureclaim(struct mount *mp)
 {
 	struct vnode *vp;
 	int done;
 	int trigger;
 	int usevnodes;
 	int count;
 
 	/*
 	 * Calculate the trigger point, don't allow user
 	 * screwups to blow us up.   This prevents us from
 	 * recycling vnodes with lots of resident pages.  We
 	 * aren't trying to free memory, we are trying to
 	 * free vnodes.
 	 */
 	usevnodes = desiredvnodes;
 	if (usevnodes <= 0)
 		usevnodes = 1;
 	trigger = cnt.v_page_count * 2 / usevnodes;
 	done = 0;
 	vn_start_write(NULL, &mp, V_WAIT);
 	MNT_ILOCK(mp);
 	count = mp->mnt_nvnodelistsize / 10 + 1;
 	while (count != 0) {
 		vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 		while (vp != NULL && vp->v_type == VMARKER)
 			vp = TAILQ_NEXT(vp, v_nmntvnodes);
 		if (vp == NULL)
 			break;
 		TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
 		TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
 		--count;
 		if (!VI_TRYLOCK(vp))
 			goto next_iter;
 		/*
 		 * If it's been deconstructed already, it's still
 		 * referenced, or it exceeds the trigger, skip it.
 		 */
 		if (vp->v_usecount ||
 		    (!vlru_allow_cache_src &&
 			!LIST_EMPTY(&(vp)->v_cache_src)) ||
 		    (vp->v_iflag & VI_DOOMED) != 0 || (vp->v_object != NULL &&
 		    vp->v_object->resident_page_count > trigger)) {
 			VI_UNLOCK(vp);
 			goto next_iter;
 		}
 		MNT_IUNLOCK(mp);
 		vholdl(vp);
 		if (VOP_LOCK(vp, LK_INTERLOCK|LK_EXCLUSIVE|LK_NOWAIT)) {
 			vdrop(vp);
 			goto next_iter_mntunlocked;
 		}
 		VI_LOCK(vp);
 		/*
 		 * v_usecount may have been bumped after VOP_LOCK() dropped
 		 * the vnode interlock and before it was locked again.
 		 *
 		 * It is not necessary to recheck VI_DOOMED because it can
 		 * only be set by another thread that holds both the vnode
 		 * lock and vnode interlock.  If another thread has the
 		 * vnode lock before we get to VOP_LOCK() and obtains the
 		 * vnode interlock after VOP_LOCK() drops the vnode
 		 * interlock, the other thread will be unable to drop the
 		 * vnode lock before our VOP_LOCK() call fails.
 		 */
 		if (vp->v_usecount ||
 		    (!vlru_allow_cache_src &&
 			!LIST_EMPTY(&(vp)->v_cache_src)) ||
 		    (vp->v_object != NULL &&
 		    vp->v_object->resident_page_count > trigger)) {
 			VOP_UNLOCK(vp, LK_INTERLOCK);
 			goto next_iter_mntunlocked;
 		}
 		KASSERT((vp->v_iflag & VI_DOOMED) == 0,
 		    ("VI_DOOMED unexpectedly detected in vlrureclaim()"));
 		vgonel(vp);
 		VOP_UNLOCK(vp, 0);
 		vdropl(vp);
 		done++;
 next_iter_mntunlocked:
 		if ((count % 256) != 0)
 			goto relock_mnt;
 		goto yield;
 next_iter:
 		if ((count % 256) != 0)
 			continue;
 		MNT_IUNLOCK(mp);
 yield:
 		uio_yield();
 relock_mnt:
 		MNT_ILOCK(mp);
 	}
 	MNT_IUNLOCK(mp);
 	vn_finished_write(mp);
 	return done;
 }
 
 /*
  * Attempt to keep the free list at wantfreevnodes length.
  */
 static void
 vnlru_free(int count)
 {
 	struct vnode *vp;
 	int vfslocked;
 
 	mtx_assert(&vnode_free_list_mtx, MA_OWNED);
 	for (; count > 0; count--) {
 		vp = TAILQ_FIRST(&vnode_free_list);
 		/*
 		 * The list can be modified while the free_list_mtx
 		 * has been dropped and vp could be NULL here.
 		 */
 		if (!vp)
 			break;
 		VNASSERT(vp->v_op != NULL, vp,
 		    ("vnlru_free: vnode already reclaimed."));
 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		/*
 		 * Don't recycle if we can't get the interlock.
 		 */
 		if (!VI_TRYLOCK(vp)) {
 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 			continue;
 		}
 		VNASSERT(VCANRECYCLE(vp), vp,
 		    ("vp inconsistent on freelist"));
 		freevnodes--;
 		vp->v_iflag &= ~VI_FREE;
 		vholdl(vp);
 		mtx_unlock(&vnode_free_list_mtx);
 		VI_UNLOCK(vp);
 		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 		vtryrecycle(vp);
 		VFS_UNLOCK_GIANT(vfslocked);
 		/*
 		 * If the recycled succeeded this vdrop will actually free
 		 * the vnode.  If not it will simply place it back on
 		 * the free list.
 		 */
 		vdrop(vp);
 		mtx_lock(&vnode_free_list_mtx);
 	}
 }
 /*
  * Attempt to recycle vnodes in a context that is always safe to block.
  * Calling vlrurecycle() from the bowels of filesystem code has some
  * interesting deadlock problems.
  */
 static struct proc *vnlruproc;
 static int vnlruproc_sig;
 
 static void
 vnlru_proc(void)
 {
 	struct mount *mp, *nmp;
 	int done, vfslocked;
 	struct proc *p = vnlruproc;
 
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, p,
 	    SHUTDOWN_PRI_FIRST);
 
 	for (;;) {
 		kproc_suspend_check(p);
 		mtx_lock(&vnode_free_list_mtx);
 		if (freevnodes > wantfreevnodes)
 			vnlru_free(freevnodes - wantfreevnodes);
 		if (numvnodes <= desiredvnodes * 9 / 10) {
 			vnlruproc_sig = 0;
 			wakeup(&vnlruproc_sig);
 			msleep(vnlruproc, &vnode_free_list_mtx,
 			    PVFS|PDROP, "vlruwt", hz);
 			continue;
 		}
 		mtx_unlock(&vnode_free_list_mtx);
 		done = 0;
 		mtx_lock(&mountlist_mtx);
 		for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 			if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
 				nmp = TAILQ_NEXT(mp, mnt_list);
 				continue;
 			}
 			vfslocked = VFS_LOCK_GIANT(mp);
 			done += vlrureclaim(mp);
 			VFS_UNLOCK_GIANT(vfslocked);
 			mtx_lock(&mountlist_mtx);
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			vfs_unbusy(mp);
 		}
 		mtx_unlock(&mountlist_mtx);
 		if (done == 0) {
 #if 0
 			/* These messages are temporary debugging aids */
 			if (vnlru_nowhere < 5)
 				printf("vnlru process getting nowhere..\n");
 			else if (vnlru_nowhere == 5)
 				printf("vnlru process messages stopped.\n");
 #endif
 			vnlru_nowhere++;
 			tsleep(vnlruproc, PPAUSE, "vlrup", hz * 3);
 		} else
 			uio_yield();
 	}
 }
 
 static struct kproc_desc vnlru_kp = {
 	"vnlru",
 	vnlru_proc,
 	&vnlruproc
 };
 SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start,
     &vnlru_kp);
  
 /*
  * Routines having to do with the management of the vnode table.
  */
 
 void
 vdestroy(struct vnode *vp)
 {
 	struct bufobj *bo;
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	mtx_lock(&vnode_free_list_mtx);
 	numvnodes--;
 	mtx_unlock(&vnode_free_list_mtx);
 	bo = &vp->v_bufobj;
 	VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
 	    ("cleaned vnode still on the free list."));
 	VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't"));
 	VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count"));
 	VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count"));
 	VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count"));
 	VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's"));
 	VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0"));
 	VNASSERT(bo->bo_clean.bv_root == NULL, vp, ("cleanblkroot not NULL"));
 	VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0"));
 	VNASSERT(bo->bo_dirty.bv_root == NULL, vp, ("dirtyblkroot not NULL"));
 	VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst"));
 	VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src"));
 	VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for .."));
 	VI_UNLOCK(vp);
 #ifdef MAC
 	mac_vnode_destroy(vp);
 #endif
 	if (vp->v_pollinfo != NULL)
 		destroy_vpollinfo(vp->v_pollinfo);
 #ifdef INVARIANTS
 	/* XXX Elsewhere we can detect an already freed vnode via NULL v_op. */
 	vp->v_op = NULL;
 #endif
 	lockdestroy(vp->v_vnlock);
 	mtx_destroy(&vp->v_interlock);
 	mtx_destroy(BO_MTX(bo));
 	uma_zfree(vnode_zone, vp);
 }
 
 /*
  * Try to recycle a freed vnode.  We abort if anyone picks up a reference
  * before we actually vgone().  This function must be called with the vnode
  * held to prevent the vnode from being returned to the free list midway
  * through vgone().
  */
 static int
 vtryrecycle(struct vnode *vp)
 {
 	struct mount *vnmp;
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	VNASSERT(vp->v_holdcnt, vp,
 	    ("vtryrecycle: Recycling vp %p without a reference.", vp));
 	/*
 	 * This vnode may found and locked via some other list, if so we
 	 * can't recycle it yet.
 	 */
 	if (VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 		CTR2(KTR_VFS,
 		    "%s: impossible to recycle, vp %p lock is already held",
 		    __func__, vp);
 		return (EWOULDBLOCK);
 	}
 	/*
 	 * Don't recycle if its filesystem is being suspended.
 	 */
 	if (vn_start_write(vp, &vnmp, V_NOWAIT) != 0) {
 		VOP_UNLOCK(vp, 0);
 		CTR2(KTR_VFS,
 		    "%s: impossible to recycle, cannot start the write for %p",
 		    __func__, vp);
 		return (EBUSY);
 	}
 	/*
 	 * If we got this far, we need to acquire the interlock and see if
 	 * anyone picked up this vnode from another list.  If not, we will
 	 * mark it with DOOMED via vgonel() so that anyone who does find it
 	 * will skip over it.
 	 */
 	VI_LOCK(vp);
 	if (vp->v_usecount) {
 		VOP_UNLOCK(vp, LK_INTERLOCK);
 		vn_finished_write(vnmp);
 		CTR2(KTR_VFS,
 		    "%s: impossible to recycle, %p is already referenced",
 		    __func__, vp);
 		return (EBUSY);
 	}
 	if ((vp->v_iflag & VI_DOOMED) == 0)
 		vgonel(vp);
 	VOP_UNLOCK(vp, LK_INTERLOCK);
 	vn_finished_write(vnmp);
 	return (0);
 }
 
 /*
  * Return the next vnode from the free list.
  */
 int
 getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
     struct vnode **vpp)
 {
 	struct vnode *vp = NULL;
 	struct bufobj *bo;
 
 	CTR3(KTR_VFS, "%s: mp %p with tag %s", __func__, mp, tag);
 	mtx_lock(&vnode_free_list_mtx);
 	/*
 	 * Lend our context to reclaim vnodes if they've exceeded the max.
 	 */
 	if (freevnodes > wantfreevnodes)
 		vnlru_free(1);
 	/*
 	 * Wait for available vnodes.
 	 */
 	if (numvnodes > desiredvnodes) {
 		if (mp != NULL && (mp->mnt_kern_flag & MNTK_SUSPEND)) {
 			/*
 			 * File system is beeing suspended, we cannot risk a
 			 * deadlock here, so allocate new vnode anyway.
 			 */
 			if (freevnodes > wantfreevnodes)
 				vnlru_free(freevnodes - wantfreevnodes);
 			goto alloc;
 		}
 		if (vnlruproc_sig == 0) {
 			vnlruproc_sig = 1;	/* avoid unnecessary wakeups */
 			wakeup(vnlruproc);
 		}
 		msleep(&vnlruproc_sig, &vnode_free_list_mtx, PVFS,
 		    "vlruwk", hz);
 #if 0	/* XXX Not all VFS_VGET/ffs_vget callers check returns. */
 		if (numvnodes > desiredvnodes) {
 			mtx_unlock(&vnode_free_list_mtx);
 			return (ENFILE);
 		}
 #endif
 	}
 alloc:
 	numvnodes++;
 	mtx_unlock(&vnode_free_list_mtx);
 	vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK|M_ZERO);
 	/*
 	 * Setup locks.
 	 */
 	vp->v_vnlock = &vp->v_lock;
 	mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF);
 	/*
 	 * By default, don't allow shared locks unless filesystems
 	 * opt-in.
 	 */
 	lockinit(vp->v_vnlock, PVFS, tag, VLKTIMEOUT, LK_NOSHARE);
 	/*
 	 * Initialize bufobj.
 	 */
 	bo = &vp->v_bufobj;
 	bo->__bo_vnode = vp;
 	mtx_init(BO_MTX(bo), "bufobj interlock", NULL, MTX_DEF);
 	bo->bo_ops = &buf_ops_bio;
 	bo->bo_private = vp;
 	TAILQ_INIT(&bo->bo_clean.bv_hd);
 	TAILQ_INIT(&bo->bo_dirty.bv_hd);
 	/*
 	 * Initialize namecache.
 	 */
 	LIST_INIT(&vp->v_cache_src);
 	TAILQ_INIT(&vp->v_cache_dst);
 	/*
 	 * Finalize various vnode identity bits.
 	 */
 	vp->v_type = VNON;
 	vp->v_tag = tag;
 	vp->v_op = vops;
 	v_incr_usecount(vp);
 	vp->v_data = 0;
 #ifdef MAC
 	mac_vnode_init(vp);
 	if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0)
 		mac_vnode_associate_singlelabel(mp, vp);
 	else if (mp == NULL && vops != &dead_vnodeops)
 		printf("NULL mp in getnewvnode()\n");
 #endif
 	if (mp != NULL) {
 		bo->bo_bsize = mp->mnt_stat.f_iosize;
 		if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0)
 			vp->v_vflag |= VV_NOKNOTE;
 	}
 
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Delete from old mount point vnode list, if on one.
  */
 static void
 delmntque(struct vnode *vp)
 {
 	struct mount *mp;
 
 	mp = vp->v_mount;
 	if (mp == NULL)
 		return;
 	MNT_ILOCK(mp);
 	vp->v_mount = NULL;
 	VNASSERT(mp->mnt_nvnodelistsize > 0, vp,
 		("bad mount point vnode list size"));
 	TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
 	mp->mnt_nvnodelistsize--;
 	MNT_REL(mp);
 	MNT_IUNLOCK(mp);
 }
 
 static void
 insmntque_stddtr(struct vnode *vp, void *dtr_arg)
 {
 
 	vp->v_data = NULL;
 	vp->v_op = &dead_vnodeops;
 	/* XXX non mp-safe fs may still call insmntque with vnode
 	   unlocked */
 	if (!VOP_ISLOCKED(vp))
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	vgone(vp);
 	vput(vp);
 }
 
 /*
  * Insert into list of vnodes for the new mount point, if available.
  */
 int
 insmntque1(struct vnode *vp, struct mount *mp,
 	void (*dtr)(struct vnode *, void *), void *dtr_arg)
 {
 	int locked;
 
 	KASSERT(vp->v_mount == NULL,
 		("insmntque: vnode already on per mount vnode list"));
 	VNASSERT(mp != NULL, vp, ("Don't call insmntque(foo, NULL)"));
 #ifdef DEBUG_VFS_LOCKS
 	if (!VFS_NEEDSGIANT(mp))
 		ASSERT_VOP_ELOCKED(vp,
 		    "insmntque: mp-safe fs and non-locked vp");
 #endif
 	MNT_ILOCK(mp);
 	if ((mp->mnt_kern_flag & MNTK_NOINSMNTQ) != 0 &&
 	    ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0 ||
 	     mp->mnt_nvnodelistsize == 0)) {
 		locked = VOP_ISLOCKED(vp);
 		if (!locked || (locked == LK_EXCLUSIVE &&
 		     (vp->v_vflag & VV_FORCEINSMQ) == 0)) {
 			MNT_IUNLOCK(mp);
 			if (dtr != NULL)
 				dtr(vp, dtr_arg);
 			return (EBUSY);
 		}
 	}
 	vp->v_mount = mp;
 	MNT_REF(mp);
 	TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
 	VNASSERT(mp->mnt_nvnodelistsize >= 0, vp,
 		("neg mount point vnode list size"));
 	mp->mnt_nvnodelistsize++;
 	MNT_IUNLOCK(mp);
 	return (0);
 }
 
 int
 insmntque(struct vnode *vp, struct mount *mp)
 {
 
 	return (insmntque1(vp, mp, insmntque_stddtr, NULL));
 }
 
 /*
  * Flush out and invalidate all buffers associated with a bufobj
  * Called with the underlying object locked.
  */
 int
 bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo)
 {
 	int error;
 
 	BO_LOCK(bo);
 	if (flags & V_SAVE) {
 		error = bufobj_wwait(bo, slpflag, slptimeo);
 		if (error) {
 			BO_UNLOCK(bo);
 			return (error);
 		}
 		if (bo->bo_dirty.bv_cnt > 0) {
 			BO_UNLOCK(bo);
 			if ((error = BO_SYNC(bo, MNT_WAIT)) != 0)
 				return (error);
 			/*
 			 * XXX We could save a lock/unlock if this was only
 			 * enabled under INVARIANTS
 			 */
 			BO_LOCK(bo);
 			if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)
 				panic("vinvalbuf: dirty bufs");
 		}
 	}
 	/*
 	 * If you alter this loop please notice that interlock is dropped and
 	 * reacquired in flushbuflist.  Special care is needed to ensure that
 	 * no race conditions occur from this.
 	 */
 	do {
 		error = flushbuflist(&bo->bo_clean,
 		    flags, bo, slpflag, slptimeo);
 		if (error == 0)
 			error = flushbuflist(&bo->bo_dirty,
 			    flags, bo, slpflag, slptimeo);
 		if (error != 0 && error != EAGAIN) {
 			BO_UNLOCK(bo);
 			return (error);
 		}
 	} while (error != 0);
 
 	/*
 	 * Wait for I/O to complete.  XXX needs cleaning up.  The vnode can
 	 * have write I/O in-progress but if there is a VM object then the
 	 * VM object can also have read-I/O in-progress.
 	 */
 	do {
 		bufobj_wwait(bo, 0, 0);
 		BO_UNLOCK(bo);
 		if (bo->bo_object != NULL) {
 			VM_OBJECT_LOCK(bo->bo_object);
 			vm_object_pip_wait(bo->bo_object, "bovlbx");
 			VM_OBJECT_UNLOCK(bo->bo_object);
 		}
 		BO_LOCK(bo);
 	} while (bo->bo_numoutput > 0);
 	BO_UNLOCK(bo);
 
 	/*
 	 * Destroy the copy in the VM cache, too.
 	 */
 	if (bo->bo_object != NULL && (flags & (V_ALT | V_NORMAL)) == 0) {
 		VM_OBJECT_LOCK(bo->bo_object);
 		vm_object_page_remove(bo->bo_object, 0, 0,
 			(flags & V_SAVE) ? TRUE : FALSE);
 		VM_OBJECT_UNLOCK(bo->bo_object);
 	}
 
 #ifdef INVARIANTS
 	BO_LOCK(bo);
 	if ((flags & (V_ALT | V_NORMAL)) == 0 &&
 	    (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0))
 		panic("vinvalbuf: flush failed");
 	BO_UNLOCK(bo);
 #endif
 	return (0);
 }
 
 /*
  * Flush out and invalidate all buffers associated with a vnode.
  * Called with the underlying object locked.
  */
 int
 vinvalbuf(struct vnode *vp, int flags, int slpflag, int slptimeo)
 {
 
 	CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags);
 	ASSERT_VOP_LOCKED(vp, "vinvalbuf");
 	return (bufobj_invalbuf(&vp->v_bufobj, flags, slpflag, slptimeo));
 }
 
 /*
  * Flush out buffers on the specified list.
  *
  */
 static int
 flushbuflist( struct bufv *bufv, int flags, struct bufobj *bo, int slpflag,
     int slptimeo)
 {
 	struct buf *bp, *nbp;
 	int retval, error;
 	daddr_t lblkno;
 	b_xflags_t xflags;
 
 	ASSERT_BO_LOCKED(bo);
 
 	retval = 0;
 	TAILQ_FOREACH_SAFE(bp, &bufv->bv_hd, b_bobufs, nbp) {
 		if (((flags & V_NORMAL) && (bp->b_xflags & BX_ALTDATA)) ||
 		    ((flags & V_ALT) && (bp->b_xflags & BX_ALTDATA) == 0)) {
 			continue;
 		}
 		lblkno = 0;
 		xflags = 0;
 		if (nbp != NULL) {
 			lblkno = nbp->b_lblkno;
 			xflags = nbp->b_xflags &
 				(BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN);
 		}
 		retval = EAGAIN;
 		error = BUF_TIMELOCK(bp,
 		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_MTX(bo),
 		    "flushbuf", slpflag, slptimeo);
 		if (error) {
 			BO_LOCK(bo);
 			return (error != ENOLCK ? error : EAGAIN);
 		}
 		KASSERT(bp->b_bufobj == bo,
 		    ("bp %p wrong b_bufobj %p should be %p",
 		    bp, bp->b_bufobj, bo));
 		if (bp->b_bufobj != bo) {	/* XXX: necessary ? */
 			BUF_UNLOCK(bp);
 			BO_LOCK(bo);
 			return (EAGAIN);
 		}
 		/*
 		 * XXX Since there are no node locks for NFS, I
 		 * believe there is a slight chance that a delayed
 		 * write will occur while sleeping just above, so
 		 * check for it.
 		 */
 		if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) &&
 		    (flags & V_SAVE)) {
 			BO_LOCK(bo);
 			bremfree(bp);
 			BO_UNLOCK(bo);
 			bp->b_flags |= B_ASYNC;
 			bwrite(bp);
 			BO_LOCK(bo);
 			return (EAGAIN);	/* XXX: why not loop ? */
 		}
 		BO_LOCK(bo);
 		bremfree(bp);
 		BO_UNLOCK(bo);
 		bp->b_flags |= (B_INVAL | B_RELBUF);
 		bp->b_flags &= ~B_ASYNC;
 		brelse(bp);
 		BO_LOCK(bo);
 		if (nbp != NULL &&
 		    (nbp->b_bufobj != bo ||
 		     nbp->b_lblkno != lblkno ||
 		     (nbp->b_xflags &
 		      (BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN)) != xflags))
 			break;			/* nbp invalid */
 	}
 	return (retval);
 }
 
 /*
  * Truncate a file's buffer and pages to a specified length.  This
  * is in lieu of the old vinvalbuf mechanism, which performed unneeded
  * sync activity.
  */
 int
 vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td,
     off_t length, int blksize)
 {
 	struct buf *bp, *nbp;
 	int anyfreed;
 	int trunclbn;
 	struct bufobj *bo;
 
 	CTR5(KTR_VFS, "%s: vp %p with cred %p and block %d:%ju", __func__,
 	    vp, cred, blksize, (uintmax_t)length);
 
 	/*
 	 * Round up to the *next* lbn.
 	 */
 	trunclbn = (length + blksize - 1) / blksize;
 
 	ASSERT_VOP_LOCKED(vp, "vtruncbuf");
 restart:
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
 	anyfreed = 1;
 	for (;anyfreed;) {
 		anyfreed = 0;
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) {
 			if (bp->b_lblkno < trunclbn)
 				continue;
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_MTX(bo)) == ENOLCK)
 				goto restart;
 
 			BO_LOCK(bo);
 			bremfree(bp);
 			BO_UNLOCK(bo);
 			bp->b_flags |= (B_INVAL | B_RELBUF);
 			bp->b_flags &= ~B_ASYNC;
 			brelse(bp);
 			anyfreed = 1;
 
 			if (nbp != NULL &&
 			    (((nbp->b_xflags & BX_VNCLEAN) == 0) ||
 			    (nbp->b_vp != vp) ||
 			    (nbp->b_flags & B_DELWRI))) {
 				goto restart;
 			}
 			BO_LOCK(bo);
 		}
 
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 			if (bp->b_lblkno < trunclbn)
 				continue;
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_MTX(bo)) == ENOLCK)
 				goto restart;
 			BO_LOCK(bo);
 			bremfree(bp);
 			BO_UNLOCK(bo);
 			bp->b_flags |= (B_INVAL | B_RELBUF);
 			bp->b_flags &= ~B_ASYNC;
 			brelse(bp);
 			anyfreed = 1;
 			if (nbp != NULL &&
 			    (((nbp->b_xflags & BX_VNDIRTY) == 0) ||
 			    (nbp->b_vp != vp) ||
 			    (nbp->b_flags & B_DELWRI) == 0)) {
 				goto restart;
 			}
 			BO_LOCK(bo);
 		}
 	}
 
 	if (length > 0) {
 restartsync:
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 			if (bp->b_lblkno > 0)
 				continue;
 			/*
 			 * Since we hold the vnode lock this should only
 			 * fail if we're racing with the buf daemon.
 			 */
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_MTX(bo)) == ENOLCK) {
 				goto restart;
 			}
 			VNASSERT((bp->b_flags & B_DELWRI), vp,
 			    ("buf(%p) on dirty queue without DELWRI", bp));
 
 			BO_LOCK(bo);
 			bremfree(bp);
 			BO_UNLOCK(bo);
 			bawrite(bp);
 			BO_LOCK(bo);
 			goto restartsync;
 		}
 	}
 
 	bufobj_wwait(bo, 0, 0);
 	BO_UNLOCK(bo);
 	vnode_pager_setsize(vp, length);
 
 	return (0);
 }
 
 /*
  * buf_splay() - splay tree core for the clean/dirty list of buffers in
  *		 a vnode.
  *
  *	NOTE: We have to deal with the special case of a background bitmap
  *	buffer, a situation where two buffers will have the same logical
  *	block offset.  We want (1) only the foreground buffer to be accessed
  *	in a lookup and (2) must differentiate between the foreground and
  *	background buffer in the splay tree algorithm because the splay
  *	tree cannot normally handle multiple entities with the same 'index'.
  *	We accomplish this by adding differentiating flags to the splay tree's
  *	numerical domain.
  */
 static
 struct buf *
 buf_splay(daddr_t lblkno, b_xflags_t xflags, struct buf *root)
 {
 	struct buf dummy;
 	struct buf *lefttreemax, *righttreemin, *y;
 
 	if (root == NULL)
 		return (NULL);
 	lefttreemax = righttreemin = &dummy;
 	for (;;) {
 		if (lblkno < root->b_lblkno ||
 		    (lblkno == root->b_lblkno &&
 		    (xflags & BX_BKGRDMARKER) < (root->b_xflags & BX_BKGRDMARKER))) {
 			if ((y = root->b_left) == NULL)
 				break;
 			if (lblkno < y->b_lblkno) {
 				/* Rotate right. */
 				root->b_left = y->b_right;
 				y->b_right = root;
 				root = y;
 				if ((y = root->b_left) == NULL)
 					break;
 			}
 			/* Link into the new root's right tree. */
 			righttreemin->b_left = root;
 			righttreemin = root;
 		} else if (lblkno > root->b_lblkno ||
 		    (lblkno == root->b_lblkno &&
 		    (xflags & BX_BKGRDMARKER) > (root->b_xflags & BX_BKGRDMARKER))) {
 			if ((y = root->b_right) == NULL)
 				break;
 			if (lblkno > y->b_lblkno) {
 				/* Rotate left. */
 				root->b_right = y->b_left;
 				y->b_left = root;
 				root = y;
 				if ((y = root->b_right) == NULL)
 					break;
 			}
 			/* Link into the new root's left tree. */
 			lefttreemax->b_right = root;
 			lefttreemax = root;
 		} else {
 			break;
 		}
 		root = y;
 	}
 	/* Assemble the new root. */
 	lefttreemax->b_right = root->b_left;
 	righttreemin->b_left = root->b_right;
 	root->b_left = dummy.b_right;
 	root->b_right = dummy.b_left;
 	return (root);
 }
 
 static void
 buf_vlist_remove(struct buf *bp)
 {
 	struct buf *root;
 	struct bufv *bv;
 
 	KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
 	ASSERT_BO_LOCKED(bp->b_bufobj);
 	KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) !=
 	    (BX_VNDIRTY|BX_VNCLEAN),
 	    ("buf_vlist_remove: Buf %p is on two lists", bp));
 	if (bp->b_xflags & BX_VNDIRTY)
 		bv = &bp->b_bufobj->bo_dirty;
 	else
 		bv = &bp->b_bufobj->bo_clean;
 	if (bp != bv->bv_root) {
 		root = buf_splay(bp->b_lblkno, bp->b_xflags, bv->bv_root);
 		KASSERT(root == bp, ("splay lookup failed in remove"));
 	}
 	if (bp->b_left == NULL) {
 		root = bp->b_right;
 	} else {
 		root = buf_splay(bp->b_lblkno, bp->b_xflags, bp->b_left);
 		root->b_right = bp->b_right;
 	}
 	bv->bv_root = root;
 	TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs);
 	bv->bv_cnt--;
 	bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
 }
 
 /*
  * Add the buffer to the sorted clean or dirty block list using a
  * splay tree algorithm.
  *
  * NOTE: xflags is passed as a constant, optimizing this inline function!
  */
 static void
 buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
 {
 	struct buf *root;
 	struct bufv *bv;
 
 	ASSERT_BO_LOCKED(bo);
 	KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0,
 	    ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags));
 	bp->b_xflags |= xflags;
 	if (xflags & BX_VNDIRTY)
 		bv = &bo->bo_dirty;
 	else
 		bv = &bo->bo_clean;
 
 	root = buf_splay(bp->b_lblkno, bp->b_xflags, bv->bv_root);
 	if (root == NULL) {
 		bp->b_left = NULL;
 		bp->b_right = NULL;
 		TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs);
 	} else if (bp->b_lblkno < root->b_lblkno ||
 	    (bp->b_lblkno == root->b_lblkno &&
 	    (bp->b_xflags & BX_BKGRDMARKER) < (root->b_xflags & BX_BKGRDMARKER))) {
 		bp->b_left = root->b_left;
 		bp->b_right = root;
 		root->b_left = NULL;
 		TAILQ_INSERT_BEFORE(root, bp, b_bobufs);
 	} else {
 		bp->b_right = root->b_right;
 		bp->b_left = root;
 		root->b_right = NULL;
 		TAILQ_INSERT_AFTER(&bv->bv_hd, root, bp, b_bobufs);
 	}
 	bv->bv_cnt++;
 	bv->bv_root = bp;
 }
 
 /*
  * Lookup a buffer using the splay tree.  Note that we specifically avoid
  * shadow buffers used in background bitmap writes.
  *
  * This code isn't quite efficient as it could be because we are maintaining
  * two sorted lists and do not know which list the block resides in.
  *
  * During a "make buildworld" the desired buffer is found at one of
  * the roots more than 60% of the time.  Thus, checking both roots
  * before performing either splay eliminates unnecessary splays on the
  * first tree splayed.
  */
 struct buf *
 gbincore(struct bufobj *bo, daddr_t lblkno)
 {
 	struct buf *bp;
 
 	ASSERT_BO_LOCKED(bo);
 	if ((bp = bo->bo_clean.bv_root) != NULL &&
 	    bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER))
 		return (bp);
 	if ((bp = bo->bo_dirty.bv_root) != NULL &&
 	    bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER))
 		return (bp);
 	if ((bp = bo->bo_clean.bv_root) != NULL) {
 		bo->bo_clean.bv_root = bp = buf_splay(lblkno, 0, bp);
 		if (bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER))
 			return (bp);
 	}
 	if ((bp = bo->bo_dirty.bv_root) != NULL) {
 		bo->bo_dirty.bv_root = bp = buf_splay(lblkno, 0, bp);
 		if (bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER))
 			return (bp);
 	}
 	return (NULL);
 }
 
 /*
  * Associate a buffer with a vnode.
  */
 void
 bgetvp(struct vnode *vp, struct buf *bp)
 {
 	struct bufobj *bo;
 
 	bo = &vp->v_bufobj;
 	ASSERT_BO_LOCKED(bo);
 	VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free"));
 
 	CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags);
 	VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp,
 	    ("bgetvp: bp already attached! %p", bp));
 
 	vhold(vp);
 	if (VFS_NEEDSGIANT(vp->v_mount) || bo->bo_flag & BO_NEEDSGIANT)
 		bp->b_flags |= B_NEEDSGIANT;
 	bp->b_vp = vp;
 	bp->b_bufobj = bo;
 	/*
 	 * Insert onto list for new vnode.
 	 */
 	buf_vlist_add(bp, bo, BX_VNCLEAN);
 }
 
 /*
  * Disassociate a buffer from a vnode.
  */
 void
 brelvp(struct buf *bp)
 {
 	struct bufobj *bo;
 	struct vnode *vp;
 
 	CTR3(KTR_BUF, "brelvp(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
 	KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
 
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	vp = bp->b_vp;		/* XXX */
 	bo = bp->b_bufobj;
 	BO_LOCK(bo);
 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN))
 		buf_vlist_remove(bp);
 	else
 		panic("brelvp: Buffer %p not on queue.", bp);
 	if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) {
 		bo->bo_flag &= ~BO_ONWORKLST;
 		mtx_lock(&sync_mtx);
 		LIST_REMOVE(bo, bo_synclist);
 		syncer_worklist_len--;
 		mtx_unlock(&sync_mtx);
 	}
 	bp->b_flags &= ~B_NEEDSGIANT;
 	bp->b_vp = NULL;
 	bp->b_bufobj = NULL;
 	BO_UNLOCK(bo);
 	vdrop(vp);
 }
 
 /*
  * Add an item to the syncer work queue.
  */
 static void
 vn_syncer_add_to_worklist(struct bufobj *bo, int delay)
 {
 	int queue, slot;
 
 	ASSERT_BO_LOCKED(bo);
 
 	mtx_lock(&sync_mtx);
 	if (bo->bo_flag & BO_ONWORKLST)
 		LIST_REMOVE(bo, bo_synclist);
 	else {
 		bo->bo_flag |= BO_ONWORKLST;
 		syncer_worklist_len++;
 	}
 
 	if (delay > syncer_maxdelay - 2)
 		delay = syncer_maxdelay - 2;
 	slot = (syncer_delayno + delay) & syncer_mask;
 
 	queue = VFS_NEEDSGIANT(bo->__bo_vnode->v_mount) ? WI_GIANTQ :
 	    WI_MPSAFEQ;
 	LIST_INSERT_HEAD(&syncer_workitem_pending[queue][slot], bo,
 	    bo_synclist);
 	mtx_unlock(&sync_mtx);
 }
 
 static int
 sysctl_vfs_worklist_len(SYSCTL_HANDLER_ARGS)
 {
 	int error, len;
 
 	mtx_lock(&sync_mtx);
 	len = syncer_worklist_len - sync_vnode_count;
 	mtx_unlock(&sync_mtx);
 	error = SYSCTL_OUT(req, &len, sizeof(len));
 	return (error);
 }
 
 SYSCTL_PROC(_vfs, OID_AUTO, worklist_len, CTLTYPE_INT | CTLFLAG_RD, NULL, 0,
     sysctl_vfs_worklist_len, "I", "Syncer thread worklist length");
 
 static struct proc *updateproc;
 static void sched_sync(void);
 static struct kproc_desc up_kp = {
 	"syncer",
 	sched_sync,
 	&updateproc
 };
 SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp);
 
 static int
 sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td)
 {
 	struct vnode *vp;
 	struct mount *mp;
 
 	*bo = LIST_FIRST(slp);
 	if (*bo == NULL)
 		return (0);
 	vp = (*bo)->__bo_vnode;	/* XXX */
 	if (VOP_ISLOCKED(vp) != 0 || VI_TRYLOCK(vp) == 0)
 		return (1);
 	/*
 	 * We use vhold in case the vnode does not
 	 * successfully sync.  vhold prevents the vnode from
 	 * going away when we unlock the sync_mtx so that
 	 * we can acquire the vnode interlock.
 	 */
 	vholdl(vp);
 	mtx_unlock(&sync_mtx);
 	VI_UNLOCK(vp);
 	if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
 		vdrop(vp);
 		mtx_lock(&sync_mtx);
 		return (*bo == LIST_FIRST(slp));
 	}
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	(void) VOP_FSYNC(vp, MNT_LAZY, td);
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 	BO_LOCK(*bo);
 	if (((*bo)->bo_flag & BO_ONWORKLST) != 0) {
 		/*
 		 * Put us back on the worklist.  The worklist
 		 * routine will remove us from our current
 		 * position and then add us back in at a later
 		 * position.
 		 */
 		vn_syncer_add_to_worklist(*bo, syncdelay);
 	}
 	BO_UNLOCK(*bo);
 	vdrop(vp);
 	mtx_lock(&sync_mtx);
 	return (0);
 }
 
 /*
  * System filesystem synchronizer daemon.
  */
 static void
 sched_sync(void)
 {
 	struct synclist *gnext, *next;
 	struct synclist *gslp, *slp;
 	struct bufobj *bo;
 	long starttime;
 	struct thread *td = curthread;
 	int last_work_seen;
 	int net_worklist_len;
 	int syncer_final_iter;
 	int first_printf;
 	int error;
 
 	last_work_seen = 0;
 	syncer_final_iter = 0;
 	first_printf = 1;
 	syncer_state = SYNCER_RUNNING;
 	starttime = time_uptime;
 	td->td_pflags |= TDP_NORUNNINGBUF;
 
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc,
 	    SHUTDOWN_PRI_LAST);
 
 	mtx_lock(&sync_mtx);
 	for (;;) {
 		if (syncer_state == SYNCER_FINAL_DELAY &&
 		    syncer_final_iter == 0) {
 			mtx_unlock(&sync_mtx);
 			kproc_suspend_check(td->td_proc);
 			mtx_lock(&sync_mtx);
 		}
 		net_worklist_len = syncer_worklist_len - sync_vnode_count;
 		if (syncer_state != SYNCER_RUNNING &&
 		    starttime != time_uptime) {
 			if (first_printf) {
 				printf("\nSyncing disks, vnodes remaining...");
 				first_printf = 0;
 			}
 			printf("%d ", net_worklist_len);
 		}
 		starttime = time_uptime;
 
 		/*
 		 * Push files whose dirty time has expired.  Be careful
 		 * of interrupt race on slp queue.
 		 *
 		 * Skip over empty worklist slots when shutting down.
 		 */
 		do {
 			slp = &syncer_workitem_pending[WI_MPSAFEQ][syncer_delayno];
 			gslp = &syncer_workitem_pending[WI_GIANTQ][syncer_delayno];
 			syncer_delayno += 1;
 			if (syncer_delayno == syncer_maxdelay)
 				syncer_delayno = 0;
 			next = &syncer_workitem_pending[WI_MPSAFEQ][syncer_delayno];
 			gnext = &syncer_workitem_pending[WI_GIANTQ][syncer_delayno];
 			/*
 			 * If the worklist has wrapped since the
 			 * it was emptied of all but syncer vnodes,
 			 * switch to the FINAL_DELAY state and run
 			 * for one more second.
 			 */
 			if (syncer_state == SYNCER_SHUTTING_DOWN &&
 			    net_worklist_len == 0 &&
 			    last_work_seen == syncer_delayno) {
 				syncer_state = SYNCER_FINAL_DELAY;
 				syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP;
 			}
 		} while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) &&
 		    LIST_EMPTY(gslp) && syncer_worklist_len > 0);
 
 		/*
 		 * Keep track of the last time there was anything
 		 * on the worklist other than syncer vnodes.
 		 * Return to the SHUTTING_DOWN state if any
 		 * new work appears.
 		 */
 		if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING)
 			last_work_seen = syncer_delayno;
 		if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY)
 			syncer_state = SYNCER_SHUTTING_DOWN;
 		while (!LIST_EMPTY(slp)) {
 			error = sync_vnode(slp, &bo, td);
 			if (error == 1) {
 				LIST_REMOVE(bo, bo_synclist);
 				LIST_INSERT_HEAD(next, bo, bo_synclist);
 				continue;
 			}
 		}
 		if (!LIST_EMPTY(gslp)) {
 			mtx_unlock(&sync_mtx);
 			mtx_lock(&Giant);
 			mtx_lock(&sync_mtx);
 			while (!LIST_EMPTY(gslp)) {
 				error = sync_vnode(gslp, &bo, td);
 				if (error == 1) {
 					LIST_REMOVE(bo, bo_synclist);
 					LIST_INSERT_HEAD(gnext, bo,
 					    bo_synclist);
 					continue;
 				}
 			}
 			mtx_unlock(&Giant);
 		}
 		if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0)
 			syncer_final_iter--;
 		/*
 		 * The variable rushjob allows the kernel to speed up the
 		 * processing of the filesystem syncer process. A rushjob
 		 * value of N tells the filesystem syncer to process the next
 		 * N seconds worth of work on its queue ASAP. Currently rushjob
 		 * is used by the soft update code to speed up the filesystem
 		 * syncer process when the incore state is getting so far
 		 * ahead of the disk that the kernel memory pool is being
 		 * threatened with exhaustion.
 		 */
 		if (rushjob > 0) {
 			rushjob -= 1;
 			continue;
 		}
 		/*
 		 * Just sleep for a short period of time between
 		 * iterations when shutting down to allow some I/O
 		 * to happen.
 		 *
 		 * If it has taken us less than a second to process the
 		 * current work, then wait. Otherwise start right over
 		 * again. We can still lose time if any single round
 		 * takes more than two seconds, but it does not really
 		 * matter as we are just trying to generally pace the
 		 * filesystem activity.
 		 */
 		if (syncer_state != SYNCER_RUNNING)
 			cv_timedwait(&sync_wakeup, &sync_mtx,
 			    hz / SYNCER_SHUTDOWN_SPEEDUP);
 		else if (time_uptime == starttime)
 			cv_timedwait(&sync_wakeup, &sync_mtx, hz);
 	}
 }
 
 /*
  * Request the syncer daemon to speed up its work.
  * We never push it to speed up more than half of its
  * normal turn time, otherwise it could take over the cpu.
  */
 int
 speedup_syncer(void)
 {
 	int ret = 0;
 
 	mtx_lock(&sync_mtx);
 	if (rushjob < syncdelay / 2) {
 		rushjob += 1;
 		stat_rush_requests += 1;
 		ret = 1;
 	}
 	mtx_unlock(&sync_mtx);
 	cv_broadcast(&sync_wakeup);
 	return (ret);
 }
 
 /*
  * Tell the syncer to speed up its work and run though its work
  * list several times, then tell it to shut down.
  */
 static void
 syncer_shutdown(void *arg, int howto)
 {
 
 	if (howto & RB_NOSYNC)
 		return;
 	mtx_lock(&sync_mtx);
 	syncer_state = SYNCER_SHUTTING_DOWN;
 	rushjob = 0;
 	mtx_unlock(&sync_mtx);
 	cv_broadcast(&sync_wakeup);
 	kproc_shutdown(arg, howto);
 }
 
 /*
  * Reassign a buffer from one vnode to another.
  * Used to assign file specific control information
  * (indirect blocks) to the vnode to which they belong.
  */
 void
 reassignbuf(struct buf *bp)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 	int delay;
 #ifdef INVARIANTS
 	struct bufv *bv;
 #endif
 
 	vp = bp->b_vp;
 	bo = bp->b_bufobj;
 	++reassignbufcalls;
 
 	CTR3(KTR_BUF, "reassignbuf(%p) vp %p flags %X",
 	    bp, bp->b_vp, bp->b_flags);
 	/*
 	 * B_PAGING flagged buffers cannot be reassigned because their vp
 	 * is not fully linked in.
 	 */
 	if (bp->b_flags & B_PAGING)
 		panic("cannot reassign paging buffer");
 
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	BO_LOCK(bo);
 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN))
 		buf_vlist_remove(bp);
 	else
 		panic("reassignbuf: Buffer %p not on queue.", bp);
 	/*
 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
 	 * of clean buffers.
 	 */
 	if (bp->b_flags & B_DELWRI) {
 		if ((bo->bo_flag & BO_ONWORKLST) == 0) {
 			switch (vp->v_type) {
 			case VDIR:
 				delay = dirdelay;
 				break;
 			case VCHR:
 				delay = metadelay;
 				break;
 			default:
 				delay = filedelay;
 			}
 			vn_syncer_add_to_worklist(bo, delay);
 		}
 		buf_vlist_add(bp, bo, BX_VNDIRTY);
 	} else {
 		buf_vlist_add(bp, bo, BX_VNCLEAN);
 
 		if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) {
 			mtx_lock(&sync_mtx);
 			LIST_REMOVE(bo, bo_synclist);
 			syncer_worklist_len--;
 			mtx_unlock(&sync_mtx);
 			bo->bo_flag &= ~BO_ONWORKLST;
 		}
 	}
 #ifdef INVARIANTS
 	bv = &bo->bo_clean;
 	bp = TAILQ_FIRST(&bv->bv_hd);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 	bp = TAILQ_LAST(&bv->bv_hd, buflists);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 	bv = &bo->bo_dirty;
 	bp = TAILQ_FIRST(&bv->bv_hd);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 	bp = TAILQ_LAST(&bv->bv_hd, buflists);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 #endif
 	BO_UNLOCK(bo);
 }
 
 /*
  * Increment the use and hold counts on the vnode, taking care to reference
  * the driver's usecount if this is a chardev.  The vholdl() will remove
  * the vnode from the free list if it is presently free.  Requires the
  * vnode interlock and returns with it held.
  */
 static void
 v_incr_usecount(struct vnode *vp)
 {
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	vp->v_usecount++;
 	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
 		dev_lock();
 		vp->v_rdev->si_usecount++;
 		dev_unlock();
 	}
 	vholdl(vp);
 }
 
 /*
  * Turn a holdcnt into a use+holdcnt such that only one call to
  * v_decr_usecount is needed.
  */
 static void
 v_upgrade_usecount(struct vnode *vp)
 {
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	vp->v_usecount++;
 	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
 		dev_lock();
 		vp->v_rdev->si_usecount++;
 		dev_unlock();
 	}
 }
 
 /*
  * Decrement the vnode use and hold count along with the driver's usecount
  * if this is a chardev.  The vdropl() below releases the vnode interlock
  * as it may free the vnode.
  */
 static void
 v_decr_usecount(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __FUNCTION__);
 	VNASSERT(vp->v_usecount > 0, vp,
 	    ("v_decr_usecount: negative usecount"));
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	vp->v_usecount--;
 	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
 		dev_lock();
 		vp->v_rdev->si_usecount--;
 		dev_unlock();
 	}
 	vdropl(vp);
 }
 
 /*
  * Decrement only the use count and driver use count.  This is intended to
  * be paired with a follow on vdropl() to release the remaining hold count.
  * In this way we may vgone() a vnode with a 0 usecount without risk of
  * having it end up on a free list because the hold count is kept above 0.
  */
 static void
 v_decr_useonly(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __FUNCTION__);
 	VNASSERT(vp->v_usecount > 0, vp,
 	    ("v_decr_useonly: negative usecount"));
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	vp->v_usecount--;
 	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
 		dev_lock();
 		vp->v_rdev->si_usecount--;
 		dev_unlock();
 	}
 }
 
 /*
  * Grab a particular vnode from the free list, increment its
  * reference count and lock it.  VI_DOOMED is set if the vnode
  * is being destroyed.  Only callers who specify LK_RETRY will
  * see doomed vnodes.  If inactive processing was delayed in
  * vput try to do it here.
  */
 int
 vget(struct vnode *vp, int flags, struct thread *td)
 {
 	int error;
 
 	error = 0;
 	VFS_ASSERT_GIANT(vp->v_mount);
 	VNASSERT((flags & LK_TYPE_MASK) != 0, vp,
 	    ("vget: invalid lock operation"));
 	CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags);
 
 	if ((flags & LK_INTERLOCK) == 0)
 		VI_LOCK(vp);
 	vholdl(vp);
 	if ((error = vn_lock(vp, flags | LK_INTERLOCK)) != 0) {
 		vdrop(vp);
 		CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__,
 		    vp);
 		return (error);
 	}
 	if (vp->v_iflag & VI_DOOMED && (flags & LK_RETRY) == 0)
 		panic("vget: vn_lock failed to return ENOENT\n");
 	VI_LOCK(vp);
 	/* Upgrade our holdcnt to a usecount. */
 	v_upgrade_usecount(vp);
 	/*
 	 * We don't guarantee that any particular close will
 	 * trigger inactive processing so just make a best effort
 	 * here at preventing a reference to a removed file.  If
 	 * we don't succeed no harm is done.
 	 */
 	if (vp->v_iflag & VI_OWEINACT) {
 		if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE &&
 		    (flags & LK_NOWAIT) == 0)
 			vinactive(vp, td);
 		vp->v_iflag &= ~VI_OWEINACT;
 	}
 	VI_UNLOCK(vp);
 	return (0);
 }
 
 /*
  * Increase the reference count of a vnode.
  */
 void
 vref(struct vnode *vp)
 {
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	VI_LOCK(vp);
 	v_incr_usecount(vp);
 	VI_UNLOCK(vp);
 }
 
 /*
  * Return reference count of a vnode.
  *
  * The results of this call are only guaranteed when some mechanism other
  * than the VI lock is used to stop other processes from gaining references
  * to the vnode.  This may be the case if the caller holds the only reference.
  * This is also useful when stale data is acceptable as race conditions may
  * be accounted for by some other means.
  */
 int
 vrefcnt(struct vnode *vp)
 {
 	int usecnt;
 
 	VI_LOCK(vp);
 	usecnt = vp->v_usecount;
 	VI_UNLOCK(vp);
 
 	return (usecnt);
 }
 
 #define	VPUTX_VRELE	1
 #define	VPUTX_VPUT	2
 #define	VPUTX_VUNREF	3
 
 static void
 vputx(struct vnode *vp, int func)
 {
 	int error;
 
 	KASSERT(vp != NULL, ("vputx: null vp"));
 	if (func == VPUTX_VUNREF)
-		ASSERT_VOP_ELOCKED(vp, "vunref");
+		ASSERT_VOP_LOCKED(vp, "vunref");
 	else if (func == VPUTX_VPUT)
 		ASSERT_VOP_LOCKED(vp, "vput");
 	else
 		KASSERT(func == VPUTX_VRELE, ("vputx: wrong func"));
 	VFS_ASSERT_GIANT(vp->v_mount);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	VI_LOCK(vp);
 
 	/* Skip this v_writecount check if we're going to panic below. */
 	VNASSERT(vp->v_writecount < vp->v_usecount || vp->v_usecount < 1, vp,
 	    ("vputx: missed vn_close"));
 	error = 0;
 
 	if (vp->v_usecount > 1 || ((vp->v_iflag & VI_DOINGINACT) &&
 	    vp->v_usecount == 1)) {
 		if (func == VPUTX_VPUT)
 			VOP_UNLOCK(vp, 0);
 		v_decr_usecount(vp);
 		return;
 	}
 
 	if (vp->v_usecount != 1) {
 		vprint("vputx: negative ref count", vp);
 		panic("vputx: negative ref cnt");
 	}
 	CTR2(KTR_VFS, "%s: return vnode %p to the freelist", __func__, vp);
 	/*
 	 * We want to hold the vnode until the inactive finishes to
 	 * prevent vgone() races.  We drop the use count here and the
 	 * hold count below when we're done.
 	 */
 	v_decr_useonly(vp);
 	/*
 	 * We must call VOP_INACTIVE with the node locked. Mark
 	 * as VI_DOINGINACT to avoid recursion.
 	 */
 	vp->v_iflag |= VI_OWEINACT;
-	if (func == VPUTX_VRELE) {
+	switch (func) {
+	case VPUTX_VRELE:
 		error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK);
 		VI_LOCK(vp);
-	} else if (func == VPUTX_VPUT && VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
-		error = VOP_LOCK(vp, LK_UPGRADE | LK_INTERLOCK | LK_NOWAIT);
-		VI_LOCK(vp);
+		break;
+	case VPUTX_VPUT:
+		if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
+			error = VOP_LOCK(vp, LK_UPGRADE | LK_INTERLOCK |
+			    LK_NOWAIT);
+			VI_LOCK(vp);
+		}
+		break;
+	case VPUTX_VUNREF:
+		if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
+			error = EBUSY;
+		break;
 	}
 	if (vp->v_usecount > 0)
 		vp->v_iflag &= ~VI_OWEINACT;
 	if (error == 0) {
 		if (vp->v_iflag & VI_OWEINACT)
 			vinactive(vp, curthread);
 		if (func != VPUTX_VUNREF)
 			VOP_UNLOCK(vp, 0);
 	}
 	vdropl(vp);
 }
 
 /*
  * Vnode put/release.
  * If count drops to zero, call inactive routine and return to freelist.
  */
 void
 vrele(struct vnode *vp)
 {
 
 	vputx(vp, VPUTX_VRELE);
 }
 
 /*
  * Release an already locked vnode.  This give the same effects as
  * unlock+vrele(), but takes less time and avoids releasing and
  * re-aquiring the lock (as vrele() acquires the lock internally.)
  */
 void
 vput(struct vnode *vp)
 {
 
 	vputx(vp, VPUTX_VPUT);
 }
 
 /*
  * Release an exclusively locked vnode. Do not unlock the vnode lock.
  */
 void
 vunref(struct vnode *vp)
 {
 
 	vputx(vp, VPUTX_VUNREF);
 }
 
 /*
  * Somebody doesn't want the vnode recycled.
  */
 void
 vhold(struct vnode *vp)
 {
 
 	VI_LOCK(vp);
 	vholdl(vp);
 	VI_UNLOCK(vp);
 }
 
 void
 vholdl(struct vnode *vp)
 {
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	vp->v_holdcnt++;
 	if (VSHOULDBUSY(vp))
 		vbusy(vp);
 }
 
 /*
  * Note that there is one less who cares about this vnode.  vdrop() is the
  * opposite of vhold().
  */
 void
 vdrop(struct vnode *vp)
 {
 
 	VI_LOCK(vp);
 	vdropl(vp);
 }
 
 /*
  * Drop the hold count of the vnode.  If this is the last reference to
  * the vnode we will free it if it has been vgone'd otherwise it is
  * placed on the free list.
  */
 void
 vdropl(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, "vdropl");
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	if (vp->v_holdcnt <= 0)
 		panic("vdrop: holdcnt %d", vp->v_holdcnt);
 	vp->v_holdcnt--;
 	if (vp->v_holdcnt == 0) {
 		if (vp->v_iflag & VI_DOOMED) {
 			CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__,
 			    vp);
 			vdestroy(vp);
 			return;
 		} else
 			vfree(vp);
 	}
 	VI_UNLOCK(vp);
 }
 
 /*
  * Call VOP_INACTIVE on the vnode and manage the DOINGINACT and OWEINACT
  * flags.  DOINGINACT prevents us from recursing in calls to vinactive.
  * OWEINACT tracks whether a vnode missed a call to inactive due to a
  * failed lock upgrade.
  */
 static void
 vinactive(struct vnode *vp, struct thread *td)
 {
 
 	ASSERT_VOP_ELOCKED(vp, "vinactive");
 	ASSERT_VI_LOCKED(vp, "vinactive");
 	VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp,
 	    ("vinactive: recursed on VI_DOINGINACT"));
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	vp->v_iflag |= VI_DOINGINACT;
 	vp->v_iflag &= ~VI_OWEINACT;
 	VI_UNLOCK(vp);
 	VOP_INACTIVE(vp, td);
 	VI_LOCK(vp);
 	VNASSERT(vp->v_iflag & VI_DOINGINACT, vp,
 	    ("vinactive: lost VI_DOINGINACT"));
 	vp->v_iflag &= ~VI_DOINGINACT;
 }
 
 /*
  * Remove any vnodes in the vnode table belonging to mount point mp.
  *
  * If FORCECLOSE is not specified, there should not be any active ones,
  * return error if any are found (nb: this is a user error, not a
  * system error). If FORCECLOSE is specified, detach any active vnodes
  * that are found.
  *
  * If WRITECLOSE is set, only flush out regular file vnodes open for
  * writing.
  *
  * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped.
  *
  * `rootrefs' specifies the base reference count for the root vnode
  * of this filesystem. The root vnode is considered busy if its
  * v_usecount exceeds this value. On a successful return, vflush(, td)
  * will call vrele() on the root vnode exactly rootrefs times.
  * If the SKIPSYSTEM or WRITECLOSE flags are specified, rootrefs must
  * be zero.
  */
 #ifdef DIAGNOSTIC
 static int busyprt = 0;		/* print out busy vnodes */
 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "Print out busy vnodes");
 #endif
 
 int
 vflush(struct mount *mp, int rootrefs, int flags, struct thread *td)
 {
 	struct vnode *vp, *mvp, *rootvp = NULL;
 	struct vattr vattr;
 	int busy = 0, error;
 
 	CTR4(KTR_VFS, "%s: mp %p with rootrefs %d and flags %d", __func__, mp,
 	    rootrefs, flags);
 	if (rootrefs > 0) {
 		KASSERT((flags & (SKIPSYSTEM | WRITECLOSE)) == 0,
 		    ("vflush: bad args"));
 		/*
 		 * Get the filesystem root vnode. We can vput() it
 		 * immediately, since with rootrefs > 0, it won't go away.
 		 */
 		if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rootvp)) != 0) {
 			CTR2(KTR_VFS, "%s: vfs_root lookup failed with %d",
 			    __func__, error);
 			return (error);
 		}
 		vput(rootvp);
 	}
 	MNT_ILOCK(mp);
 loop:
 	MNT_VNODE_FOREACH(vp, mp, mvp) {
 		VI_LOCK(vp);
 		vholdl(vp);
 		MNT_IUNLOCK(mp);
 		error = vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE);
 		if (error) {
 			vdrop(vp);
 			MNT_ILOCK(mp);
 			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
 			goto loop;
 		}
 		/*
 		 * Skip over a vnodes marked VV_SYSTEM.
 		 */
 		if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
 			VOP_UNLOCK(vp, 0);
 			vdrop(vp);
 			MNT_ILOCK(mp);
 			continue;
 		}
 		/*
 		 * If WRITECLOSE is set, flush out unlinked but still open
 		 * files (even if open only for reading) and regular file
 		 * vnodes open for writing.
 		 */
 		if (flags & WRITECLOSE) {
 			error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 			VI_LOCK(vp);
 
 			if ((vp->v_type == VNON ||
 			    (error == 0 && vattr.va_nlink > 0)) &&
 			    (vp->v_writecount == 0 || vp->v_type != VREG)) {
 				VOP_UNLOCK(vp, 0);
 				vdropl(vp);
 				MNT_ILOCK(mp);
 				continue;
 			}
 		} else
 			VI_LOCK(vp);
 		/*
 		 * With v_usecount == 0, all we need to do is clear out the
 		 * vnode data structures and we are done.
 		 *
 		 * If FORCECLOSE is set, forcibly close the vnode.
 		 */
 		if (vp->v_usecount == 0 || (flags & FORCECLOSE)) {
 			VNASSERT(vp->v_usecount == 0 ||
 			    (vp->v_type != VCHR && vp->v_type != VBLK), vp,
 			    ("device VNODE %p is FORCECLOSED", vp));
 			vgonel(vp);
 		} else {
 			busy++;
 #ifdef DIAGNOSTIC
 			if (busyprt)
 				vprint("vflush: busy vnode", vp);
 #endif
 		}
 		VOP_UNLOCK(vp, 0);
 		vdropl(vp);
 		MNT_ILOCK(mp);
 	}
 	MNT_IUNLOCK(mp);
 	if (rootrefs > 0 && (flags & FORCECLOSE) == 0) {
 		/*
 		 * If just the root vnode is busy, and if its refcount
 		 * is equal to `rootrefs', then go ahead and kill it.
 		 */
 		VI_LOCK(rootvp);
 		KASSERT(busy > 0, ("vflush: not busy"));
 		VNASSERT(rootvp->v_usecount >= rootrefs, rootvp,
 		    ("vflush: usecount %d < rootrefs %d",
 		     rootvp->v_usecount, rootrefs));
 		if (busy == 1 && rootvp->v_usecount == rootrefs) {
 			VOP_LOCK(rootvp, LK_EXCLUSIVE|LK_INTERLOCK);
 			vgone(rootvp);
 			VOP_UNLOCK(rootvp, 0);
 			busy = 0;
 		} else
 			VI_UNLOCK(rootvp);
 	}
 	if (busy) {
 		CTR2(KTR_VFS, "%s: failing as %d vnodes are busy", __func__,
 		    busy);
 		return (EBUSY);
 	}
 	for (; rootrefs > 0; rootrefs--)
 		vrele(rootvp);
 	return (0);
 }
 
 /*
  * Recycle an unused vnode to the front of the free list.
  */
 int
 vrecycle(struct vnode *vp, struct thread *td)
 {
 	int recycled;
 
 	ASSERT_VOP_ELOCKED(vp, "vrecycle");
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	recycled = 0;
 	VI_LOCK(vp);
 	if (vp->v_usecount == 0) {
 		recycled = 1;
 		vgonel(vp);
 	}
 	VI_UNLOCK(vp);
 	return (recycled);
 }
 
 /*
  * Eliminate all activity associated with a vnode
  * in preparation for reuse.
  */
 void
 vgone(struct vnode *vp)
 {
 	VI_LOCK(vp);
 	vgonel(vp);
 	VI_UNLOCK(vp);
 }
 
 /*
  * vgone, with the vp interlock held.
  */
 void
 vgonel(struct vnode *vp)
 {
 	struct thread *td;
 	int oweinact;
 	int active;
 	struct mount *mp;
 
 	ASSERT_VOP_ELOCKED(vp, "vgonel");
 	ASSERT_VI_LOCKED(vp, "vgonel");
 	VNASSERT(vp->v_holdcnt, vp,
 	    ("vgonel: vp %p has no reference.", vp));
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	td = curthread;
 
 	/*
 	 * Don't vgonel if we're already doomed.
 	 */
 	if (vp->v_iflag & VI_DOOMED)
 		return;
 	vp->v_iflag |= VI_DOOMED;
 	/*
 	 * Check to see if the vnode is in use.  If so, we have to call
 	 * VOP_CLOSE() and VOP_INACTIVE().
 	 */
 	active = vp->v_usecount;
 	oweinact = (vp->v_iflag & VI_OWEINACT);
 	VI_UNLOCK(vp);
 	/*
 	 * Clean out any buffers associated with the vnode.
 	 * If the flush fails, just toss the buffers.
 	 */
 	mp = NULL;
 	if (!TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd))
 		(void) vn_start_secondary_write(vp, &mp, V_WAIT);
 	if (vinvalbuf(vp, V_SAVE, 0, 0) != 0)
 		vinvalbuf(vp, 0, 0, 0);
 
 	/*
 	 * If purging an active vnode, it must be closed and
 	 * deactivated before being reclaimed.
 	 */
 	if (active)
 		VOP_CLOSE(vp, FNONBLOCK, NOCRED, td);
 	if (oweinact || active) {
 		VI_LOCK(vp);
 		if ((vp->v_iflag & VI_DOINGINACT) == 0)
 			vinactive(vp, td);
 		VI_UNLOCK(vp);
 	}
 	/*
 	 * Reclaim the vnode.
 	 */
 	if (VOP_RECLAIM(vp, td))
 		panic("vgone: cannot reclaim");
 	if (mp != NULL)
 		vn_finished_secondary_write(mp);
 	VNASSERT(vp->v_object == NULL, vp,
 	    ("vop_reclaim left v_object vp=%p, tag=%s", vp, vp->v_tag));
 	/*
 	 * Clear the advisory locks and wake up waiting threads.
 	 */
 	(void)VOP_ADVLOCKPURGE(vp);
 	/*
 	 * Delete from old mount point vnode list.
 	 */
 	delmntque(vp);
 	cache_purge(vp);
 	/*
 	 * Done with purge, reset to the standard lock and invalidate
 	 * the vnode.
 	 */
 	VI_LOCK(vp);
 	vp->v_vnlock = &vp->v_lock;
 	vp->v_op = &dead_vnodeops;
 	vp->v_tag = "none";
 	vp->v_type = VBAD;
 }
 
 /*
  * Calculate the total number of references to a special device.
  */
 int
 vcount(struct vnode *vp)
 {
 	int count;
 
 	dev_lock();
 	count = vp->v_rdev->si_usecount;
 	dev_unlock();
 	return (count);
 }
 
 /*
  * Same as above, but using the struct cdev *as argument
  */
 int
 count_dev(struct cdev *dev)
 {
 	int count;
 
 	dev_lock();
 	count = dev->si_usecount;
 	dev_unlock();
 	return(count);
 }
 
 /*
  * Print out a description of a vnode.
  */
 static char *typename[] =
 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD",
  "VMARKER"};
 
 void
 vn_printf(struct vnode *vp, const char *fmt, ...)
 {
 	va_list ap;
 	char buf[256], buf2[16];
 	u_long flags;
 
 	va_start(ap, fmt);
 	vprintf(fmt, ap);
 	va_end(ap);
 	printf("%p: ", (void *)vp);
 	printf("tag %s, type %s\n", vp->v_tag, typename[vp->v_type]);
 	printf("    usecount %d, writecount %d, refcount %d mountedhere %p\n",
 	    vp->v_usecount, vp->v_writecount, vp->v_holdcnt, vp->v_mountedhere);
 	buf[0] = '\0';
 	buf[1] = '\0';
 	if (vp->v_vflag & VV_ROOT)
 		strlcat(buf, "|VV_ROOT", sizeof(buf));
 	if (vp->v_vflag & VV_ISTTY)
 		strlcat(buf, "|VV_ISTTY", sizeof(buf));
 	if (vp->v_vflag & VV_NOSYNC)
 		strlcat(buf, "|VV_NOSYNC", sizeof(buf));
 	if (vp->v_vflag & VV_CACHEDLABEL)
 		strlcat(buf, "|VV_CACHEDLABEL", sizeof(buf));
 	if (vp->v_vflag & VV_TEXT)
 		strlcat(buf, "|VV_TEXT", sizeof(buf));
 	if (vp->v_vflag & VV_COPYONWRITE)
 		strlcat(buf, "|VV_COPYONWRITE", sizeof(buf));
 	if (vp->v_vflag & VV_SYSTEM)
 		strlcat(buf, "|VV_SYSTEM", sizeof(buf));
 	if (vp->v_vflag & VV_PROCDEP)
 		strlcat(buf, "|VV_PROCDEP", sizeof(buf));
 	if (vp->v_vflag & VV_NOKNOTE)
 		strlcat(buf, "|VV_NOKNOTE", sizeof(buf));
 	if (vp->v_vflag & VV_DELETED)
 		strlcat(buf, "|VV_DELETED", sizeof(buf));
 	if (vp->v_vflag & VV_MD)
 		strlcat(buf, "|VV_MD", sizeof(buf));
 	flags = vp->v_vflag & ~(VV_ROOT | VV_ISTTY | VV_NOSYNC |
 	    VV_CACHEDLABEL | VV_TEXT | VV_COPYONWRITE | VV_SYSTEM | VV_PROCDEP |
 	    VV_NOKNOTE | VV_DELETED | VV_MD);
 	if (flags != 0) {
 		snprintf(buf2, sizeof(buf2), "|VV(0x%lx)", flags);
 		strlcat(buf, buf2, sizeof(buf));
 	}
 	if (vp->v_iflag & VI_MOUNT)
 		strlcat(buf, "|VI_MOUNT", sizeof(buf));
 	if (vp->v_iflag & VI_AGE)
 		strlcat(buf, "|VI_AGE", sizeof(buf));
 	if (vp->v_iflag & VI_DOOMED)
 		strlcat(buf, "|VI_DOOMED", sizeof(buf));
 	if (vp->v_iflag & VI_FREE)
 		strlcat(buf, "|VI_FREE", sizeof(buf));
 	if (vp->v_iflag & VI_DOINGINACT)
 		strlcat(buf, "|VI_DOINGINACT", sizeof(buf));
 	if (vp->v_iflag & VI_OWEINACT)
 		strlcat(buf, "|VI_OWEINACT", sizeof(buf));
 	flags = vp->v_iflag & ~(VI_MOUNT | VI_AGE | VI_DOOMED | VI_FREE |
 	    VI_DOINGINACT | VI_OWEINACT);
 	if (flags != 0) {
 		snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags);
 		strlcat(buf, buf2, sizeof(buf));
 	}
 	printf("    flags (%s)\n", buf + 1);
 	if (mtx_owned(VI_MTX(vp)))
 		printf(" VI_LOCKed");
 	if (vp->v_object != NULL)
 		printf("    v_object %p ref %d pages %d\n",
 		    vp->v_object, vp->v_object->ref_count,
 		    vp->v_object->resident_page_count);
 	printf("    ");
 	lockmgr_printinfo(vp->v_vnlock);
 	if (vp->v_data != NULL)
 		VOP_PRINT(vp);
 }
 
 #ifdef DDB
 /*
  * List all of the locked vnodes in the system.
  * Called when debugging the kernel.
  */
 DB_SHOW_COMMAND(lockedvnods, lockedvnodes)
 {
 	struct mount *mp, *nmp;
 	struct vnode *vp;
 
 	/*
 	 * Note: because this is DDB, we can't obey the locking semantics
 	 * for these structures, which means we could catch an inconsistent
 	 * state and dereference a nasty pointer.  Not much to be done
 	 * about that.
 	 */
 	db_printf("Locked vnodes\n");
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 		nmp = TAILQ_NEXT(mp, mnt_list);
 		TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 			if (vp->v_type != VMARKER &&
 			    VOP_ISLOCKED(vp))
 				vprint("", vp);
 		}
 		nmp = TAILQ_NEXT(mp, mnt_list);
 	}
 }
 
 /*
  * Show details about the given vnode.
  */
 DB_SHOW_COMMAND(vnode, db_show_vnode)
 {
 	struct vnode *vp;
 
 	if (!have_addr)
 		return;
 	vp = (struct vnode *)addr;
 	vn_printf(vp, "vnode ");
 }
 
 /*
  * Show details about the given mount point.
  */
 DB_SHOW_COMMAND(mount, db_show_mount)
 {
 	struct mount *mp;
 	struct vfsopt *opt;
 	struct statfs *sp;
 	struct vnode *vp;
 	char buf[512];
 	u_int flags;
 
 	if (!have_addr) {
 		/* No address given, print short info about all mount points. */
 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 			db_printf("%p %s on %s (%s)\n", mp,
 			    mp->mnt_stat.f_mntfromname,
 			    mp->mnt_stat.f_mntonname,
 			    mp->mnt_stat.f_fstypename);
 			if (db_pager_quit)
 				break;
 		}
 		db_printf("\nMore info: show mount <addr>\n");
 		return;
 	}
 
 	mp = (struct mount *)addr;
 	db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname,
 	    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename);
 
 	buf[0] = '\0';
 	flags = mp->mnt_flag;
 #define	MNT_FLAG(flag)	do {						\
 	if (flags & (flag)) {						\
 		if (buf[0] != '\0')					\
 			strlcat(buf, ", ", sizeof(buf));		\
 		strlcat(buf, (#flag) + 4, sizeof(buf));			\
 		flags &= ~(flag);					\
 	}								\
 } while (0)
 	MNT_FLAG(MNT_RDONLY);
 	MNT_FLAG(MNT_SYNCHRONOUS);
 	MNT_FLAG(MNT_NOEXEC);
 	MNT_FLAG(MNT_NOSUID);
 	MNT_FLAG(MNT_UNION);
 	MNT_FLAG(MNT_ASYNC);
 	MNT_FLAG(MNT_SUIDDIR);
 	MNT_FLAG(MNT_SOFTDEP);
 	MNT_FLAG(MNT_NOSYMFOLLOW);
 	MNT_FLAG(MNT_GJOURNAL);
 	MNT_FLAG(MNT_MULTILABEL);
 	MNT_FLAG(MNT_ACLS);
 	MNT_FLAG(MNT_NOATIME);
 	MNT_FLAG(MNT_NOCLUSTERR);
 	MNT_FLAG(MNT_NOCLUSTERW);
 	MNT_FLAG(MNT_NFS4ACLS);
 	MNT_FLAG(MNT_EXRDONLY);
 	MNT_FLAG(MNT_EXPORTED);
 	MNT_FLAG(MNT_DEFEXPORTED);
 	MNT_FLAG(MNT_EXPORTANON);
 	MNT_FLAG(MNT_EXKERB);
 	MNT_FLAG(MNT_EXPUBLIC);
 	MNT_FLAG(MNT_LOCAL);
 	MNT_FLAG(MNT_QUOTA);
 	MNT_FLAG(MNT_ROOTFS);
 	MNT_FLAG(MNT_USER);
 	MNT_FLAG(MNT_IGNORE);
 	MNT_FLAG(MNT_UPDATE);
 	MNT_FLAG(MNT_DELEXPORT);
 	MNT_FLAG(MNT_RELOAD);
 	MNT_FLAG(MNT_FORCE);
 	MNT_FLAG(MNT_SNAPSHOT);
 	MNT_FLAG(MNT_BYFSID);
 	MNT_FLAG(MNT_SOFTDEP);
 #undef MNT_FLAG
 	if (flags != 0) {
 		if (buf[0] != '\0')
 			strlcat(buf, ", ", sizeof(buf));
 		snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
 		    "0x%08x", flags);
 	}
 	db_printf("    mnt_flag = %s\n", buf);
 
 	buf[0] = '\0';
 	flags = mp->mnt_kern_flag;
 #define	MNT_KERN_FLAG(flag)	do {					\
 	if (flags & (flag)) {						\
 		if (buf[0] != '\0')					\
 			strlcat(buf, ", ", sizeof(buf));		\
 		strlcat(buf, (#flag) + 5, sizeof(buf));			\
 		flags &= ~(flag);					\
 	}								\
 } while (0)
 	MNT_KERN_FLAG(MNTK_UNMOUNTF);
 	MNT_KERN_FLAG(MNTK_ASYNC);
 	MNT_KERN_FLAG(MNTK_SOFTDEP);
 	MNT_KERN_FLAG(MNTK_NOINSMNTQ);
 	MNT_KERN_FLAG(MNTK_DRAINING);
 	MNT_KERN_FLAG(MNTK_REFEXPIRE);
 	MNT_KERN_FLAG(MNTK_EXTENDED_SHARED);
 	MNT_KERN_FLAG(MNTK_SHARED_WRITES);
 	MNT_KERN_FLAG(MNTK_UNMOUNT);
 	MNT_KERN_FLAG(MNTK_MWAIT);
 	MNT_KERN_FLAG(MNTK_SUSPEND);
 	MNT_KERN_FLAG(MNTK_SUSPEND2);
 	MNT_KERN_FLAG(MNTK_SUSPENDED);
 	MNT_KERN_FLAG(MNTK_MPSAFE);
 	MNT_KERN_FLAG(MNTK_LOOKUP_SHARED);
 	MNT_KERN_FLAG(MNTK_NOKNOTE);
 #undef MNT_KERN_FLAG
 	if (flags != 0) {
 		if (buf[0] != '\0')
 			strlcat(buf, ", ", sizeof(buf));
 		snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
 		    "0x%08x", flags);
 	}
 	db_printf("    mnt_kern_flag = %s\n", buf);
 
 	db_printf("    mnt_opt = ");
 	opt = TAILQ_FIRST(mp->mnt_opt);
 	if (opt != NULL) {
 		db_printf("%s", opt->name);
 		opt = TAILQ_NEXT(opt, link);
 		while (opt != NULL) {
 			db_printf(", %s", opt->name);
 			opt = TAILQ_NEXT(opt, link);
 		}
 	}
 	db_printf("\n");
 
 	sp = &mp->mnt_stat;
 	db_printf("    mnt_stat = { version=%u type=%u flags=0x%016jx "
 	    "bsize=%ju iosize=%ju blocks=%ju bfree=%ju bavail=%jd files=%ju "
 	    "ffree=%jd syncwrites=%ju asyncwrites=%ju syncreads=%ju "
 	    "asyncreads=%ju namemax=%u owner=%u fsid=[%d, %d] }\n",
 	    (u_int)sp->f_version, (u_int)sp->f_type, (uintmax_t)sp->f_flags,
 	    (uintmax_t)sp->f_bsize, (uintmax_t)sp->f_iosize,
 	    (uintmax_t)sp->f_blocks, (uintmax_t)sp->f_bfree,
 	    (intmax_t)sp->f_bavail, (uintmax_t)sp->f_files,
 	    (intmax_t)sp->f_ffree, (uintmax_t)sp->f_syncwrites,
 	    (uintmax_t)sp->f_asyncwrites, (uintmax_t)sp->f_syncreads,
 	    (uintmax_t)sp->f_asyncreads, (u_int)sp->f_namemax,
 	    (u_int)sp->f_owner, (int)sp->f_fsid.val[0], (int)sp->f_fsid.val[1]);
 
 	db_printf("    mnt_cred = { uid=%u ruid=%u",
 	    (u_int)mp->mnt_cred->cr_uid, (u_int)mp->mnt_cred->cr_ruid);
 	if (jailed(mp->mnt_cred))
 		db_printf(", jail=%d", mp->mnt_cred->cr_prison->pr_id);
 	db_printf(" }\n");
 	db_printf("    mnt_ref = %d\n", mp->mnt_ref);
 	db_printf("    mnt_gen = %d\n", mp->mnt_gen);
 	db_printf("    mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize);
 	db_printf("    mnt_writeopcount = %d\n", mp->mnt_writeopcount);
 	db_printf("    mnt_noasync = %u\n", mp->mnt_noasync);
 	db_printf("    mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen);
 	db_printf("    mnt_iosize_max = %d\n", mp->mnt_iosize_max);
 	db_printf("    mnt_hashseed = %u\n", mp->mnt_hashseed);
 	db_printf("    mnt_secondary_writes = %d\n", mp->mnt_secondary_writes);
 	db_printf("    mnt_secondary_accwrites = %d\n",
 	    mp->mnt_secondary_accwrites);
 	db_printf("    mnt_gjprovider = %s\n",
 	    mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL");
 	db_printf("\n");
 
 	TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 		if (vp->v_type != VMARKER) {
 			vn_printf(vp, "vnode ");
 			if (db_pager_quit)
 				break;
 		}
 	}
 }
 #endif	/* DDB */
 
 /*
  * Fill in a struct xvfsconf based on a struct vfsconf.
  */
 static void
 vfsconf2x(struct vfsconf *vfsp, struct xvfsconf *xvfsp)
 {
 
 	strcpy(xvfsp->vfc_name, vfsp->vfc_name);
 	xvfsp->vfc_typenum = vfsp->vfc_typenum;
 	xvfsp->vfc_refcount = vfsp->vfc_refcount;
 	xvfsp->vfc_flags = vfsp->vfc_flags;
 	/*
 	 * These are unused in userland, we keep them
 	 * to not break binary compatibility.
 	 */
 	xvfsp->vfc_vfsops = NULL;
 	xvfsp->vfc_next = NULL;
 }
 
 /*
  * Top level filesystem related information gathering.
  */
 static int
 sysctl_vfs_conflist(SYSCTL_HANDLER_ARGS)
 {
 	struct vfsconf *vfsp;
 	struct xvfsconf xvfsp;
 	int error;
 
 	error = 0;
 	TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
 		bzero(&xvfsp, sizeof(xvfsp));
 		vfsconf2x(vfsp, &xvfsp);
 		error = SYSCTL_OUT(req, &xvfsp, sizeof xvfsp);
 		if (error)
 			break;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_vfs, OID_AUTO, conflist, CTLFLAG_RD, NULL, 0, sysctl_vfs_conflist,
     "S,xvfsconf", "List of all configured filesystems");
 
 #ifndef BURN_BRIDGES
 static int	sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS);
 
 static int
 vfs_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1 - 1;	/* XXX */
 	u_int namelen = arg2 + 1;	/* XXX */
 	struct vfsconf *vfsp;
 	struct xvfsconf xvfsp;
 
 	printf("WARNING: userland calling deprecated sysctl, "
 	    "please rebuild world\n");
 
 #if 1 || defined(COMPAT_PRELITE2)
 	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
 	if (namelen == 1)
 		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
 #endif
 
 	switch (name[1]) {
 	case VFS_MAXTYPENUM:
 		if (namelen != 2)
 			return (ENOTDIR);
 		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
 	case VFS_CONF:
 		if (namelen != 3)
 			return (ENOTDIR);	/* overloaded */
 		TAILQ_FOREACH(vfsp, &vfsconf, vfc_list)
 			if (vfsp->vfc_typenum == name[2])
 				break;
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 		bzero(&xvfsp, sizeof(xvfsp));
 		vfsconf2x(vfsp, &xvfsp);
 		return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp)));
 	}
 	return (EOPNOTSUPP);
 }
 
 static SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD | CTLFLAG_SKIP,
     vfs_sysctl, "Generic filesystem");
 
 #if 1 || defined(COMPAT_PRELITE2)
 
 static int
 sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct vfsconf *vfsp;
 	struct ovfsconf ovfs;
 
 	TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
 		bzero(&ovfs, sizeof(ovfs));
 		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
 		strcpy(ovfs.vfc_name, vfsp->vfc_name);
 		ovfs.vfc_index = vfsp->vfc_typenum;
 		ovfs.vfc_refcount = vfsp->vfc_refcount;
 		ovfs.vfc_flags = vfsp->vfc_flags;
 		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
 		if (error)
 			return error;
 	}
 	return 0;
 }
 
 #endif /* 1 || COMPAT_PRELITE2 */
 #endif /* !BURN_BRIDGES */
 
 #define KINFO_VNODESLOP		10
 #ifdef notyet
 /*
  * Dump vnode list (via sysctl).
  */
 /* ARGSUSED */
 static int
 sysctl_vnode(SYSCTL_HANDLER_ARGS)
 {
 	struct xvnode *xvn;
 	struct mount *mp;
 	struct vnode *vp;
 	int error, len, n;
 
 	/*
 	 * Stale numvnodes access is not fatal here.
 	 */
 	req->lock = 0;
 	len = (numvnodes + KINFO_VNODESLOP) * sizeof *xvn;
 	if (!req->oldptr)
 		/* Make an estimate */
 		return (SYSCTL_OUT(req, 0, len));
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	xvn = malloc(len, M_TEMP, M_ZERO | M_WAITOK);
 	n = 0;
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
 			continue;
 		MNT_ILOCK(mp);
 		TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 			if (n == len)
 				break;
 			vref(vp);
 			xvn[n].xv_size = sizeof *xvn;
 			xvn[n].xv_vnode = vp;
 			xvn[n].xv_id = 0;	/* XXX compat */
 #define XV_COPY(field) xvn[n].xv_##field = vp->v_##field
 			XV_COPY(usecount);
 			XV_COPY(writecount);
 			XV_COPY(holdcnt);
 			XV_COPY(mount);
 			XV_COPY(numoutput);
 			XV_COPY(type);
 #undef XV_COPY
 			xvn[n].xv_flag = vp->v_vflag;
 
 			switch (vp->v_type) {
 			case VREG:
 			case VDIR:
 			case VLNK:
 				break;
 			case VBLK:
 			case VCHR:
 				if (vp->v_rdev == NULL) {
 					vrele(vp);
 					continue;
 				}
 				xvn[n].xv_dev = dev2udev(vp->v_rdev);
 				break;
 			case VSOCK:
 				xvn[n].xv_socket = vp->v_socket;
 				break;
 			case VFIFO:
 				xvn[n].xv_fifo = vp->v_fifoinfo;
 				break;
 			case VNON:
 			case VBAD:
 			default:
 				/* shouldn't happen? */
 				vrele(vp);
 				continue;
 			}
 			vrele(vp);
 			++n;
 		}
 		MNT_IUNLOCK(mp);
 		mtx_lock(&mountlist_mtx);
 		vfs_unbusy(mp);
 		if (n == len)
 			break;
 	}
 	mtx_unlock(&mountlist_mtx);
 
 	error = SYSCTL_OUT(req, xvn, n * sizeof *xvn);
 	free(xvn, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
     0, 0, sysctl_vnode, "S,xvnode", "");
 #endif
 
 /*
  * Unmount all filesystems. The list is traversed in reverse order
  * of mounting to avoid dependencies.
  */
 void
 vfs_unmountall(void)
 {
 	struct mount *mp;
 	struct thread *td;
 	int error;
 
 	KASSERT(curthread != NULL, ("vfs_unmountall: NULL curthread"));
 	CTR1(KTR_VFS, "%s: unmounting all filesystems", __func__);
 	td = curthread;
 
 	/*
 	 * Since this only runs when rebooting, it is not interlocked.
 	 */
 	while(!TAILQ_EMPTY(&mountlist)) {
 		mp = TAILQ_LAST(&mountlist, mntlist);
 		error = dounmount(mp, MNT_FORCE, td);
 		if (error) {
 			TAILQ_REMOVE(&mountlist, mp, mnt_list);
 			/*
 			 * XXX: Due to the way in which we mount the root
 			 * file system off of devfs, devfs will generate a
 			 * "busy" warning when we try to unmount it before
 			 * the root.  Don't print a warning as a result in
 			 * order to avoid false positive errors that may
 			 * cause needless upset.
 			 */
 			if (strcmp(mp->mnt_vfc->vfc_name, "devfs") != 0) {
 				printf("unmount of %s failed (",
 				    mp->mnt_stat.f_mntonname);
 				if (error == EBUSY)
 					printf("BUSY)\n");
 				else
 					printf("%d)\n", error);
 			}
 		} else {
 			/* The unmount has removed mp from the mountlist */
 		}
 	}
 }
 
 /*
  * perform msync on all vnodes under a mount point
  * the mount point must be locked.
  */
 void
 vfs_msync(struct mount *mp, int flags)
 {
 	struct vnode *vp, *mvp;
 	struct vm_object *obj;
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 	MNT_ILOCK(mp);
 	MNT_VNODE_FOREACH(vp, mp, mvp) {
 		VI_LOCK(vp);
 		obj = vp->v_object;
 		if (obj != NULL && (obj->flags & OBJ_MIGHTBEDIRTY) != 0 &&
 		    (flags == MNT_WAIT || VOP_ISLOCKED(vp) == 0)) {
 			MNT_IUNLOCK(mp);
 			if (!vget(vp,
 			    LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK,
 			    curthread)) {
 				if (vp->v_vflag & VV_NOSYNC) {	/* unlinked */
 					vput(vp);
 					MNT_ILOCK(mp);
 					continue;
 				}
 
 				obj = vp->v_object;
 				if (obj != NULL) {
 					VM_OBJECT_LOCK(obj);
 					vm_object_page_clean(obj, 0, 0,
 					    flags == MNT_WAIT ?
 					    OBJPC_SYNC : OBJPC_NOSYNC);
 					VM_OBJECT_UNLOCK(obj);
 				}
 				vput(vp);
 			}
 			MNT_ILOCK(mp);
 		} else
 			VI_UNLOCK(vp);
 	}
 	MNT_IUNLOCK(mp);
 }
 
 /*
  * Mark a vnode as free, putting it up for recycling.
  */
 static void
 vfree(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, "vfree");
 	mtx_lock(&vnode_free_list_mtx);
 	VNASSERT(vp->v_op != NULL, vp, ("vfree: vnode already reclaimed."));
 	VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, ("vnode already free"));
 	VNASSERT(VSHOULDFREE(vp), vp, ("vfree: freeing when we shouldn't"));
 	VNASSERT((vp->v_iflag & VI_DOOMED) == 0, vp,
 	    ("vfree: Freeing doomed vnode"));
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	if (vp->v_iflag & VI_AGE) {
 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 	} else {
 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 	}
 	freevnodes++;
 	vp->v_iflag &= ~VI_AGE;
 	vp->v_iflag |= VI_FREE;
 	mtx_unlock(&vnode_free_list_mtx);
 }
 
 /*
  * Opposite of vfree() - mark a vnode as in use.
  */
 static void
 vbusy(struct vnode *vp)
 {
 	ASSERT_VI_LOCKED(vp, "vbusy");
 	VNASSERT((vp->v_iflag & VI_FREE) != 0, vp, ("vnode not free"));
 	VNASSERT(vp->v_op != NULL, vp, ("vbusy: vnode already reclaimed."));
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 
 	mtx_lock(&vnode_free_list_mtx);
 	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 	freevnodes--;
 	vp->v_iflag &= ~(VI_FREE|VI_AGE);
 	mtx_unlock(&vnode_free_list_mtx);
 }
 
 static void
 destroy_vpollinfo(struct vpollinfo *vi)
 {
 	knlist_destroy(&vi->vpi_selinfo.si_note);
 	mtx_destroy(&vi->vpi_lock);
 	uma_zfree(vnodepoll_zone, vi);
 }
 
 /*
  * Initalize per-vnode helper structure to hold poll-related state.
  */
 void
 v_addpollinfo(struct vnode *vp)
 {
 	struct vpollinfo *vi;
 
 	if (vp->v_pollinfo != NULL)
 		return;
 	vi = uma_zalloc(vnodepoll_zone, M_WAITOK);
 	mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF);
 	knlist_init(&vi->vpi_selinfo.si_note, vp, vfs_knllock,
 	    vfs_knlunlock, vfs_knl_assert_locked, vfs_knl_assert_unlocked);
 	VI_LOCK(vp);
 	if (vp->v_pollinfo != NULL) {
 		VI_UNLOCK(vp);
 		destroy_vpollinfo(vi);
 		return;
 	}
 	vp->v_pollinfo = vi;
 	VI_UNLOCK(vp);
 }
 
 /*
  * Record a process's interest in events which might happen to
  * a vnode.  Because poll uses the historic select-style interface
  * internally, this routine serves as both the ``check for any
  * pending events'' and the ``record my interest in future events''
  * functions.  (These are done together, while the lock is held,
  * to avoid race conditions.)
  */
 int
 vn_pollrecord(struct vnode *vp, struct thread *td, int events)
 {
 
 	v_addpollinfo(vp);
 	mtx_lock(&vp->v_pollinfo->vpi_lock);
 	if (vp->v_pollinfo->vpi_revents & events) {
 		/*
 		 * This leaves events we are not interested
 		 * in available for the other process which
 		 * which presumably had requested them
 		 * (otherwise they would never have been
 		 * recorded).
 		 */
 		events &= vp->v_pollinfo->vpi_revents;
 		vp->v_pollinfo->vpi_revents &= ~events;
 
 		mtx_unlock(&vp->v_pollinfo->vpi_lock);
 		return (events);
 	}
 	vp->v_pollinfo->vpi_events |= events;
 	selrecord(td, &vp->v_pollinfo->vpi_selinfo);
 	mtx_unlock(&vp->v_pollinfo->vpi_lock);
 	return (0);
 }
 
 /*
  * Routine to create and manage a filesystem syncer vnode.
  */
 #define sync_close ((int (*)(struct  vop_close_args *))nullop)
 static int	sync_fsync(struct  vop_fsync_args *);
 static int	sync_inactive(struct  vop_inactive_args *);
 static int	sync_reclaim(struct  vop_reclaim_args *);
 
 static struct vop_vector sync_vnodeops = {
 	.vop_bypass =	VOP_EOPNOTSUPP,
 	.vop_close =	sync_close,		/* close */
 	.vop_fsync =	sync_fsync,		/* fsync */
 	.vop_inactive =	sync_inactive,	/* inactive */
 	.vop_reclaim =	sync_reclaim,	/* reclaim */
 	.vop_lock1 =	vop_stdlock,	/* lock */
 	.vop_unlock =	vop_stdunlock,	/* unlock */
 	.vop_islocked =	vop_stdislocked,	/* islocked */
 };
 
 /*
  * Create a new filesystem syncer vnode for the specified mount point.
  */
 void
 vfs_allocate_syncvnode(struct mount *mp)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 	static long start, incr, next;
 	int error;
 
 	/* Allocate a new vnode */
 	error = getnewvnode("syncer", mp, &sync_vnodeops, &vp);
 	if (error != 0)
 		panic("vfs_allocate_syncvnode: getnewvnode() failed");
 	vp->v_type = VNON;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	vp->v_vflag |= VV_FORCEINSMQ;
 	error = insmntque(vp, mp);
 	if (error != 0)
 		panic("vfs_allocate_syncvnode: insmntque() failed");
 	vp->v_vflag &= ~VV_FORCEINSMQ;
 	VOP_UNLOCK(vp, 0);
 	/*
 	 * Place the vnode onto the syncer worklist. We attempt to
 	 * scatter them about on the list so that they will go off
 	 * at evenly distributed times even if all the filesystems
 	 * are mounted at once.
 	 */
 	next += incr;
 	if (next == 0 || next > syncer_maxdelay) {
 		start /= 2;
 		incr /= 2;
 		if (start == 0) {
 			start = syncer_maxdelay / 2;
 			incr = syncer_maxdelay;
 		}
 		next = start;
 	}
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
 	vn_syncer_add_to_worklist(bo, syncdelay > 0 ? next % syncdelay : 0);
 	/* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */
 	mtx_lock(&sync_mtx);
 	sync_vnode_count++;
 	if (mp->mnt_syncer == NULL) {
 		mp->mnt_syncer = vp;
 		vp = NULL;
 	}
 	mtx_unlock(&sync_mtx);
 	BO_UNLOCK(bo);
 	if (vp != NULL) {
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		vgone(vp);
 		vput(vp);
 	}
 }
 
 void
 vfs_deallocate_syncvnode(struct mount *mp)
 {
 	struct vnode *vp;
 
 	mtx_lock(&sync_mtx);
 	vp = mp->mnt_syncer;
 	if (vp != NULL)
 		mp->mnt_syncer = NULL;
 	mtx_unlock(&sync_mtx);
 	if (vp != NULL)
 		vrele(vp);
 }
 
 /*
  * Do a lazy sync of the filesystem.
  */
 static int
 sync_fsync(struct vop_fsync_args *ap)
 {
 	struct vnode *syncvp = ap->a_vp;
 	struct mount *mp = syncvp->v_mount;
 	int error;
 	struct bufobj *bo;
 
 	/*
 	 * We only need to do something if this is a lazy evaluation.
 	 */
 	if (ap->a_waitfor != MNT_LAZY)
 		return (0);
 
 	/*
 	 * Move ourselves to the back of the sync list.
 	 */
 	bo = &syncvp->v_bufobj;
 	BO_LOCK(bo);
 	vn_syncer_add_to_worklist(bo, syncdelay);
 	BO_UNLOCK(bo);
 
 	/*
 	 * Walk the list of vnodes pushing all that are dirty and
 	 * not already on the sync list.
 	 */
 	mtx_lock(&mountlist_mtx);
 	if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) {
 		mtx_unlock(&mountlist_mtx);
 		return (0);
 	}
 	if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) {
 		vfs_unbusy(mp);
 		return (0);
 	}
 	MNT_ILOCK(mp);
 	mp->mnt_noasync++;
 	mp->mnt_kern_flag &= ~MNTK_ASYNC;
 	MNT_IUNLOCK(mp);
 	vfs_msync(mp, MNT_NOWAIT);
 	error = VFS_SYNC(mp, MNT_LAZY);
 	MNT_ILOCK(mp);
 	mp->mnt_noasync--;
 	if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 		mp->mnt_kern_flag |= MNTK_ASYNC;
 	MNT_IUNLOCK(mp);
 	vn_finished_write(mp);
 	vfs_unbusy(mp);
 	return (error);
 }
 
 /*
  * The syncer vnode is no referenced.
  */
 static int
 sync_inactive(struct vop_inactive_args *ap)
 {
 
 	vgone(ap->a_vp);
 	return (0);
 }
 
 /*
  * The syncer vnode is no longer needed and is being decommissioned.
  *
  * Modifications to the worklist must be protected by sync_mtx.
  */
 static int
 sync_reclaim(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct bufobj *bo;
 
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
 	mtx_lock(&sync_mtx);
 	if (vp->v_mount->mnt_syncer == vp)
 		vp->v_mount->mnt_syncer = NULL;
 	if (bo->bo_flag & BO_ONWORKLST) {
 		LIST_REMOVE(bo, bo_synclist);
 		syncer_worklist_len--;
 		sync_vnode_count--;
 		bo->bo_flag &= ~BO_ONWORKLST;
 	}
 	mtx_unlock(&sync_mtx);
 	BO_UNLOCK(bo);
 
 	return (0);
 }
 
 /*
  * Check if vnode represents a disk device
  */
 int
 vn_isdisk(struct vnode *vp, int *errp)
 {
 	int error;
 
 	error = 0;
 	dev_lock();
 	if (vp->v_type != VCHR)
 		error = ENOTBLK;
 	else if (vp->v_rdev == NULL)
 		error = ENXIO;
 	else if (vp->v_rdev->si_devsw == NULL)
 		error = ENXIO;
 	else if (!(vp->v_rdev->si_devsw->d_flags & D_DISK))
 		error = ENOTBLK;
 	dev_unlock();
 	if (errp != NULL)
 		*errp = error;
 	return (error == 0);
 }
 
 /*
  * Common filesystem object access control check routine.  Accepts a
  * vnode's type, "mode", uid and gid, requested access mode, credentials,
  * and optional call-by-reference privused argument allowing vaccess()
  * to indicate to the caller whether privilege was used to satisfy the
  * request (obsoleted).  Returns 0 on success, or an errno on failure.
  *
  * The ifdef'd CAPABILITIES version is here for reference, but is not
  * actually used.
  */
 int
 vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid,
     accmode_t accmode, struct ucred *cred, int *privused)
 {
 	accmode_t dac_granted;
 	accmode_t priv_granted;
 
 	KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0,
 	    ("invalid bit in accmode"));
 	KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE),
 	    ("VAPPEND without VWRITE"));
 
 	/*
 	 * Look for a normal, non-privileged way to access the file/directory
 	 * as requested.  If it exists, go with that.
 	 */
 
 	if (privused != NULL)
 		*privused = 0;
 
 	dac_granted = 0;
 
 	/* Check the owner. */
 	if (cred->cr_uid == file_uid) {
 		dac_granted |= VADMIN;
 		if (file_mode & S_IXUSR)
 			dac_granted |= VEXEC;
 		if (file_mode & S_IRUSR)
 			dac_granted |= VREAD;
 		if (file_mode & S_IWUSR)
 			dac_granted |= (VWRITE | VAPPEND);
 
 		if ((accmode & dac_granted) == accmode)
 			return (0);
 
 		goto privcheck;
 	}
 
 	/* Otherwise, check the groups (first match) */
 	if (groupmember(file_gid, cred)) {
 		if (file_mode & S_IXGRP)
 			dac_granted |= VEXEC;
 		if (file_mode & S_IRGRP)
 			dac_granted |= VREAD;
 		if (file_mode & S_IWGRP)
 			dac_granted |= (VWRITE | VAPPEND);
 
 		if ((accmode & dac_granted) == accmode)
 			return (0);
 
 		goto privcheck;
 	}
 
 	/* Otherwise, check everyone else. */
 	if (file_mode & S_IXOTH)
 		dac_granted |= VEXEC;
 	if (file_mode & S_IROTH)
 		dac_granted |= VREAD;
 	if (file_mode & S_IWOTH)
 		dac_granted |= (VWRITE | VAPPEND);
 	if ((accmode & dac_granted) == accmode)
 		return (0);
 
 privcheck:
 	/*
 	 * Build a privilege mask to determine if the set of privileges
 	 * satisfies the requirements when combined with the granted mask
 	 * from above.  For each privilege, if the privilege is required,
 	 * bitwise or the request type onto the priv_granted mask.
 	 */
 	priv_granted = 0;
 
 	if (type == VDIR) {
 		/*
 		 * For directories, use PRIV_VFS_LOOKUP to satisfy VEXEC
 		 * requests, instead of PRIV_VFS_EXEC.
 		 */
 		if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) &&
 		    !priv_check_cred(cred, PRIV_VFS_LOOKUP, 0))
 			priv_granted |= VEXEC;
 	} else {
 		/*
 		 * Ensure that at least one execute bit is on. Otherwise,
 		 * a privileged user will always succeed, and we don't want
 		 * this to happen unless the file really is executable.
 		 */
 		if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) &&
 		    (file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0 &&
 		    !priv_check_cred(cred, PRIV_VFS_EXEC, 0))
 			priv_granted |= VEXEC;
 	}
 
 	if ((accmode & VREAD) && ((dac_granted & VREAD) == 0) &&
 	    !priv_check_cred(cred, PRIV_VFS_READ, 0))
 		priv_granted |= VREAD;
 
 	if ((accmode & VWRITE) && ((dac_granted & VWRITE) == 0) &&
 	    !priv_check_cred(cred, PRIV_VFS_WRITE, 0))
 		priv_granted |= (VWRITE | VAPPEND);
 
 	if ((accmode & VADMIN) && ((dac_granted & VADMIN) == 0) &&
 	    !priv_check_cred(cred, PRIV_VFS_ADMIN, 0))
 		priv_granted |= VADMIN;
 
 	if ((accmode & (priv_granted | dac_granted)) == accmode) {
 		/* XXX audit: privilege used */
 		if (privused != NULL)
 			*privused = 1;
 		return (0);
 	}
 
 	return ((accmode & VADMIN) ? EPERM : EACCES);
 }
 
 /*
  * Credential check based on process requesting service, and per-attribute
  * permissions.
  */
 int
 extattr_check_cred(struct vnode *vp, int attrnamespace, struct ucred *cred,
     struct thread *td, accmode_t accmode)
 {
 
 	/*
 	 * Kernel-invoked always succeeds.
 	 */
 	if (cred == NOCRED)
 		return (0);
 
 	/*
 	 * Do not allow privileged processes in jail to directly manipulate
 	 * system attributes.
 	 */
 	switch (attrnamespace) {
 	case EXTATTR_NAMESPACE_SYSTEM:
 		/* Potentially should be: return (EPERM); */
 		return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM, 0));
 	case EXTATTR_NAMESPACE_USER:
 		return (VOP_ACCESS(vp, accmode, cred, td));
 	default:
 		return (EPERM);
 	}
 }
 
 #ifdef DEBUG_VFS_LOCKS
 /*
  * This only exists to supress warnings from unlocked specfs accesses.  It is
  * no longer ok to have an unlocked VFS.
  */
 #define	IGNORE_LOCK(vp) (panicstr != NULL || (vp) == NULL ||		\
 	(vp)->v_type == VCHR ||	(vp)->v_type == VBAD)
 
 int vfs_badlock_ddb = 1;	/* Drop into debugger on violation. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_ddb, CTLFLAG_RW, &vfs_badlock_ddb, 0,
     "Drop into debugger on lock violation");
 
 int vfs_badlock_mutex = 1;	/* Check for interlock across VOPs. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_mutex, CTLFLAG_RW, &vfs_badlock_mutex,
     0, "Check for interlock across VOPs");
 
 int vfs_badlock_print = 1;	/* Print lock violations. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_print, CTLFLAG_RW, &vfs_badlock_print,
     0, "Print lock violations");
 
 #ifdef KDB
 int vfs_badlock_backtrace = 1;	/* Print backtrace at lock violations. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_backtrace, CTLFLAG_RW,
     &vfs_badlock_backtrace, 0, "Print backtrace at lock violations");
 #endif
 
 static void
 vfs_badlock(const char *msg, const char *str, struct vnode *vp)
 {
 
 #ifdef KDB
 	if (vfs_badlock_backtrace)
 		kdb_backtrace();
 #endif
 	if (vfs_badlock_print)
 		printf("%s: %p %s\n", str, (void *)vp, msg);
 	if (vfs_badlock_ddb)
 		kdb_enter(KDB_WHY_VFSLOCK, "lock violation");
 }
 
 void
 assert_vi_locked(struct vnode *vp, const char *str)
 {
 
 	if (vfs_badlock_mutex && !mtx_owned(VI_MTX(vp)))
 		vfs_badlock("interlock is not locked but should be", str, vp);
 }
 
 void
 assert_vi_unlocked(struct vnode *vp, const char *str)
 {
 
 	if (vfs_badlock_mutex && mtx_owned(VI_MTX(vp)))
 		vfs_badlock("interlock is locked but should not be", str, vp);
 }
 
 void
 assert_vop_locked(struct vnode *vp, const char *str)
 {
 
 	if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) == 0)
 		vfs_badlock("is not locked but should be", str, vp);
 }
 
 void
 assert_vop_unlocked(struct vnode *vp, const char *str)
 {
 
 	if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) == LK_EXCLUSIVE)
 		vfs_badlock("is locked but should not be", str, vp);
 }
 
 void
 assert_vop_elocked(struct vnode *vp, const char *str)
 {
 
 	if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
 		vfs_badlock("is not exclusive locked but should be", str, vp);
 }
 
 #if 0
 void
 assert_vop_elocked_other(struct vnode *vp, const char *str)
 {
 
 	if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_EXCLOTHER)
 		vfs_badlock("is not exclusive locked by another thread",
 		    str, vp);
 }
 
 void
 assert_vop_slocked(struct vnode *vp, const char *str)
 {
 
 	if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_SHARED)
 		vfs_badlock("is not locked shared but should be", str, vp);
 }
 #endif /* 0 */
 #endif /* DEBUG_VFS_LOCKS */
 
 void
 vop_rename_fail(struct vop_rename_args *ap)
 {
 
 	if (ap->a_tvp != NULL)
 		vput(ap->a_tvp);
 	if (ap->a_tdvp == ap->a_tvp)
 		vrele(ap->a_tdvp);
 	else
 		vput(ap->a_tdvp);
 	vrele(ap->a_fdvp);
 	vrele(ap->a_fvp);
 }
 
 void
 vop_rename_pre(void *ap)
 {
 	struct vop_rename_args *a = ap;
 
 #ifdef DEBUG_VFS_LOCKS
 	if (a->a_tvp)
 		ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME");
 	ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME");
 	ASSERT_VI_UNLOCKED(a->a_fvp, "VOP_RENAME");
 	ASSERT_VI_UNLOCKED(a->a_fdvp, "VOP_RENAME");
 
 	/* Check the source (from). */
 	if (a->a_tdvp->v_vnlock != a->a_fdvp->v_vnlock &&
 	    (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fdvp->v_vnlock))
 		ASSERT_VOP_UNLOCKED(a->a_fdvp, "vop_rename: fdvp locked");
 	if (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fvp->v_vnlock)
 		ASSERT_VOP_UNLOCKED(a->a_fvp, "vop_rename: fvp locked");
 
 	/* Check the target. */
 	if (a->a_tvp)
 		ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked");
 	ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked");
 #endif
 	if (a->a_tdvp != a->a_fdvp)
 		vhold(a->a_fdvp);
 	if (a->a_tvp != a->a_fvp)
 		vhold(a->a_fvp);
 	vhold(a->a_tdvp);
 	if (a->a_tvp)
 		vhold(a->a_tvp);
 }
 
 void
 vop_strategy_pre(void *ap)
 {
 #ifdef DEBUG_VFS_LOCKS
 	struct vop_strategy_args *a;
 	struct buf *bp;
 
 	a = ap;
 	bp = a->a_bp;
 
 	/*
 	 * Cluster ops lock their component buffers but not the IO container.
 	 */
 	if ((bp->b_flags & B_CLUSTER) != 0)
 		return;
 
 	if (!BUF_ISLOCKED(bp)) {
 		if (vfs_badlock_print)
 			printf(
 			    "VOP_STRATEGY: bp is not locked but should be\n");
 		if (vfs_badlock_ddb)
 			kdb_enter(KDB_WHY_VFSLOCK, "lock violation");
 	}
 #endif
 }
 
 void
 vop_lookup_pre(void *ap)
 {
 #ifdef DEBUG_VFS_LOCKS
 	struct vop_lookup_args *a;
 	struct vnode *dvp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	ASSERT_VI_UNLOCKED(dvp, "VOP_LOOKUP");
 	ASSERT_VOP_LOCKED(dvp, "VOP_LOOKUP");
 #endif
 }
 
 void
 vop_lookup_post(void *ap, int rc)
 {
 #ifdef DEBUG_VFS_LOCKS
 	struct vop_lookup_args *a;
 	struct vnode *dvp;
 	struct vnode *vp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	vp = *(a->a_vpp);
 
 	ASSERT_VI_UNLOCKED(dvp, "VOP_LOOKUP");
 	ASSERT_VOP_LOCKED(dvp, "VOP_LOOKUP");
 
 	if (!rc)
 		ASSERT_VOP_LOCKED(vp, "VOP_LOOKUP (child)");
 #endif
 }
 
 void
 vop_lock_pre(void *ap)
 {
 #ifdef DEBUG_VFS_LOCKS
 	struct vop_lock1_args *a = ap;
 
 	if ((a->a_flags & LK_INTERLOCK) == 0)
 		ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK");
 	else
 		ASSERT_VI_LOCKED(a->a_vp, "VOP_LOCK");
 #endif
 }
 
 void
 vop_lock_post(void *ap, int rc)
 {
 #ifdef DEBUG_VFS_LOCKS
 	struct vop_lock1_args *a = ap;
 
 	ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK");
 	if (rc == 0)
 		ASSERT_VOP_LOCKED(a->a_vp, "VOP_LOCK");
 #endif
 }
 
 void
 vop_unlock_pre(void *ap)
 {
 #ifdef DEBUG_VFS_LOCKS
 	struct vop_unlock_args *a = ap;
 
 	if (a->a_flags & LK_INTERLOCK)
 		ASSERT_VI_LOCKED(a->a_vp, "VOP_UNLOCK");
 	ASSERT_VOP_LOCKED(a->a_vp, "VOP_UNLOCK");
 #endif
 }
 
 void
 vop_unlock_post(void *ap, int rc)
 {
 #ifdef DEBUG_VFS_LOCKS
 	struct vop_unlock_args *a = ap;
 
 	if (a->a_flags & LK_INTERLOCK)
 		ASSERT_VI_UNLOCKED(a->a_vp, "VOP_UNLOCK");
 #endif
 }
 
 void
 vop_create_post(void *ap, int rc)
 {
 	struct vop_create_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE);
 }
 
 void
 vop_link_post(void *ap, int rc)
 {
 	struct vop_link_args *a = ap;
 
 	if (!rc) {
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_LINK);
 		VFS_KNOTE_LOCKED(a->a_tdvp, NOTE_WRITE);
 	}
 }
 
 void
 vop_mkdir_post(void *ap, int rc)
 {
 	struct vop_mkdir_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK);
 }
 
 void
 vop_mknod_post(void *ap, int rc)
 {
 	struct vop_mknod_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE);
 }
 
 void
 vop_remove_post(void *ap, int rc)
 {
 	struct vop_remove_args *a = ap;
 
 	if (!rc) {
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE);
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE);
 	}
 }
 
 void
 vop_rename_post(void *ap, int rc)
 {
 	struct vop_rename_args *a = ap;
 
 	if (!rc) {
 		VFS_KNOTE_UNLOCKED(a->a_fdvp, NOTE_WRITE);
 		VFS_KNOTE_UNLOCKED(a->a_tdvp, NOTE_WRITE);
 		VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME);
 		if (a->a_tvp)
 			VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE);
 	}
 	if (a->a_tdvp != a->a_fdvp)
 		vdrop(a->a_fdvp);
 	if (a->a_tvp != a->a_fvp)
 		vdrop(a->a_fvp);
 	vdrop(a->a_tdvp);
 	if (a->a_tvp)
 		vdrop(a->a_tvp);
 }
 
 void
 vop_rmdir_post(void *ap, int rc)
 {
 	struct vop_rmdir_args *a = ap;
 
 	if (!rc) {
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK);
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE);
 	}
 }
 
 void
 vop_setattr_post(void *ap, int rc)
 {
 	struct vop_setattr_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB);
 }
 
 void
 vop_symlink_post(void *ap, int rc)
 {
 	struct vop_symlink_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE);
 }
 
 static struct knlist fs_knlist;
 
 static void
 vfs_event_init(void *arg)
 {
 	knlist_init_mtx(&fs_knlist, NULL);
 }
 /* XXX - correct order? */
 SYSINIT(vfs_knlist, SI_SUB_VFS, SI_ORDER_ANY, vfs_event_init, NULL);
 
 void
 vfs_event_signal(fsid_t *fsid, uint32_t event, intptr_t data __unused)
 {
 
 	KNOTE_UNLOCKED(&fs_knlist, event);
 }
 
 static int	filt_fsattach(struct knote *kn);
 static void	filt_fsdetach(struct knote *kn);
 static int	filt_fsevent(struct knote *kn, long hint);
 
 struct filterops fs_filtops = {
 	.f_isfd = 0,
 	.f_attach = filt_fsattach,
 	.f_detach = filt_fsdetach,
 	.f_event = filt_fsevent
 };
 
 static int
 filt_fsattach(struct knote *kn)
 {
 
 	kn->kn_flags |= EV_CLEAR;
 	knlist_add(&fs_knlist, kn, 0);
 	return (0);
 }
 
 static void
 filt_fsdetach(struct knote *kn)
 {
 
 	knlist_remove(&fs_knlist, kn, 0);
 }
 
 static int
 filt_fsevent(struct knote *kn, long hint)
 {
 
 	kn->kn_fflags |= hint;
 	return (kn->kn_fflags != 0);
 }
 
 static int
 sysctl_vfs_ctl(SYSCTL_HANDLER_ARGS)
 {
 	struct vfsidctl vc;
 	int error;
 	struct mount *mp;
 
 	error = SYSCTL_IN(req, &vc, sizeof(vc));
 	if (error)
 		return (error);
 	if (vc.vc_vers != VFS_CTL_VERS1)
 		return (EINVAL);
 	mp = vfs_getvfs(&vc.vc_fsid);
 	if (mp == NULL)
 		return (ENOENT);
 	/* ensure that a specific sysctl goes to the right filesystem. */
 	if (strcmp(vc.vc_fstypename, "*") != 0 &&
 	    strcmp(vc.vc_fstypename, mp->mnt_vfc->vfc_name) != 0) {
 		vfs_rel(mp);
 		return (EINVAL);
 	}
 	VCTLTOREQ(&vc, req);
 	error = VFS_SYSCTL(mp, vc.vc_op, req);
 	vfs_rel(mp);
 	return (error);
 }
 
 SYSCTL_PROC(_vfs, OID_AUTO, ctl, CTLFLAG_WR, NULL, 0, sysctl_vfs_ctl, "",
     "Sysctl by fsid");
 
 /*
  * Function to initialize a va_filerev field sensibly.
  * XXX: Wouldn't a random number make a lot more sense ??
  */
 u_quad_t
 init_va_filerev(void)
 {
 	struct bintime bt;
 
 	getbinuptime(&bt);
 	return (((u_quad_t)bt.sec << 32LL) | (bt.frac >> 32LL));
 }
 
 static int	filt_vfsread(struct knote *kn, long hint);
 static int	filt_vfswrite(struct knote *kn, long hint);
 static int	filt_vfsvnode(struct knote *kn, long hint);
 static void	filt_vfsdetach(struct knote *kn);
 static struct filterops vfsread_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_vfsdetach,
 	.f_event = filt_vfsread
 };
 static struct filterops vfswrite_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_vfsdetach,
 	.f_event = filt_vfswrite
 };
 static struct filterops vfsvnode_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_vfsdetach,
 	.f_event = filt_vfsvnode
 };
 
 static void
 vfs_knllock(void *arg)
 {
 	struct vnode *vp = arg;
 
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 }
 
 static void
 vfs_knlunlock(void *arg)
 {
 	struct vnode *vp = arg;
 
 	VOP_UNLOCK(vp, 0);
 }
 
 static void
 vfs_knl_assert_locked(void *arg)
 {
 #ifdef DEBUG_VFS_LOCKS
 	struct vnode *vp = arg;
 
 	ASSERT_VOP_LOCKED(vp, "vfs_knl_assert_locked");
 #endif
 }
 
 static void
 vfs_knl_assert_unlocked(void *arg)
 {
 #ifdef DEBUG_VFS_LOCKS
 	struct vnode *vp = arg;
 
 	ASSERT_VOP_UNLOCKED(vp, "vfs_knl_assert_unlocked");
 #endif
 }
 
 int
 vfs_kqfilter(struct vop_kqfilter_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct knote *kn = ap->a_kn;
 	struct knlist *knl;
 
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		kn->kn_fop = &vfsread_filtops;
 		break;
 	case EVFILT_WRITE:
 		kn->kn_fop = &vfswrite_filtops;
 		break;
 	case EVFILT_VNODE:
 		kn->kn_fop = &vfsvnode_filtops;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	kn->kn_hook = (caddr_t)vp;
 
 	v_addpollinfo(vp);
 	if (vp->v_pollinfo == NULL)
 		return (ENOMEM);
 	knl = &vp->v_pollinfo->vpi_selinfo.si_note;
 	knlist_add(knl, kn, 0);
 
 	return (0);
 }
 
 /*
  * Detach knote from vnode
  */
 static void
 filt_vfsdetach(struct knote *kn)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 
 	KASSERT(vp->v_pollinfo != NULL, ("Missing v_pollinfo"));
 	knlist_remove(&vp->v_pollinfo->vpi_selinfo.si_note, kn, 0);
 }
 
 /*ARGSUSED*/
 static int
 filt_vfsread(struct knote *kn, long hint)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 	struct vattr va;
 	int res;
 
 	/*
 	 * filesystem is gone, so set the EOF flag and schedule
 	 * the knote for deletion.
 	 */
 	if (hint == NOTE_REVOKE) {
 		VI_LOCK(vp);
 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 		VI_UNLOCK(vp);
 		return (1);
 	}
 
 	if (VOP_GETATTR(vp, &va, curthread->td_ucred))
 		return (0);
 
 	VI_LOCK(vp);
 	kn->kn_data = va.va_size - kn->kn_fp->f_offset;
 	res = (kn->kn_data != 0);
 	VI_UNLOCK(vp);
 	return (res);
 }
 
 /*ARGSUSED*/
 static int
 filt_vfswrite(struct knote *kn, long hint)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 
 	VI_LOCK(vp);
 
 	/*
 	 * filesystem is gone, so set the EOF flag and schedule
 	 * the knote for deletion.
 	 */
 	if (hint == NOTE_REVOKE)
 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 
 	kn->kn_data = 0;
 	VI_UNLOCK(vp);
 	return (1);
 }
 
 static int
 filt_vfsvnode(struct knote *kn, long hint)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 	int res;
 
 	VI_LOCK(vp);
 	if (kn->kn_sfflags & hint)
 		kn->kn_fflags |= hint;
 	if (hint == NOTE_REVOKE) {
 		kn->kn_flags |= EV_EOF;
 		VI_UNLOCK(vp);
 		return (1);
 	}
 	res = (kn->kn_fflags != 0);
 	VI_UNLOCK(vp);
 	return (res);
 }
 
 int
 vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off)
 {
 	int error;
 
 	if (dp->d_reclen > ap->a_uio->uio_resid)
 		return (ENAMETOOLONG);
 	error = uiomove(dp, dp->d_reclen, ap->a_uio);
 	if (error) {
 		if (ap->a_ncookies != NULL) {
 			if (ap->a_cookies != NULL)
 				free(ap->a_cookies, M_TEMP);
 			ap->a_cookies = NULL;
 			*ap->a_ncookies = 0;
 		}
 		return (error);
 	}
 	if (ap->a_ncookies == NULL)
 		return (0);
 
 	KASSERT(ap->a_cookies,
 	    ("NULL ap->a_cookies value with non-NULL ap->a_ncookies!"));
 
 	*ap->a_cookies = realloc(*ap->a_cookies,
 	    (*ap->a_ncookies + 1) * sizeof(u_long), M_TEMP, M_WAITOK | M_ZERO);
 	(*ap->a_cookies)[*ap->a_ncookies] = off;
 	return (0);
 }
 
 /*
  * Mark for update the access time of the file if the filesystem
  * supports VOP_MARKATIME.  This functionality is used by execve and
  * mmap, so we want to avoid the I/O implied by directly setting
  * va_atime for the sake of efficiency.
  */
 void
 vfs_mark_atime(struct vnode *vp, struct ucred *cred)
 {
 	struct mount *mp;
 
 	mp = vp->v_mount;
 	VFS_ASSERT_GIANT(mp);
 	ASSERT_VOP_LOCKED(vp, "vfs_mark_atime");
 	if (mp != NULL && (mp->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
 		(void)VOP_MARKATIME(vp);
 }
 
 /*
  * The purpose of this routine is to remove granularity from accmode_t,
  * reducing it into standard unix access bits - VEXEC, VREAD, VWRITE,
  * VADMIN and VAPPEND.
  *
  * If it returns 0, the caller is supposed to continue with the usual
  * access checks using 'accmode' as modified by this routine.  If it
  * returns nonzero value, the caller is supposed to return that value
  * as errno.
  *
  * Note that after this routine runs, accmode may be zero.
  */
 int
 vfs_unixify_accmode(accmode_t *accmode)
 {
 	/*
 	 * There is no way to specify explicit "deny" rule using
 	 * file mode or POSIX.1e ACLs.
 	 */
 	if (*accmode & VEXPLICIT_DENY) {
 		*accmode = 0;
 		return (0);
 	}
 
 	/*
 	 * None of these can be translated into usual access bits.
 	 * Also, the common case for NFSv4 ACLs is to not contain
 	 * either of these bits. Caller should check for VWRITE
 	 * on the containing directory instead.
 	 */
 	if (*accmode & (VDELETE_CHILD | VDELETE))
 		return (EPERM);
 
 	if (*accmode & VADMIN_PERMS) {
 		*accmode &= ~VADMIN_PERMS;
 		*accmode |= VADMIN;
 	}
 
 	/*
 	 * There is no way to deny VREAD_ATTRIBUTES, VREAD_ACL
 	 * or VSYNCHRONIZE using file mode or POSIX.1e ACL.
 	 */
 	*accmode &= ~(VSTAT_PERMS | VSYNCHRONIZE);
 
 	return (0);
 }
Index: projects/binutils-2.17/sys/net/bpf_buffer.c
===================================================================
--- projects/binutils-2.17/sys/net/bpf_buffer.c	(revision 215829)
+++ projects/binutils-2.17/sys/net/bpf_buffer.c	(revision 215830)
@@ -1,210 +1,210 @@
 /*-
  * Copyright (c) 2007 Seccuris Inc.
  * All rights reserved.
  *
  * This sofware was developed by Robert N. M. Watson under contract to
  * Seccuris Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Copyright (c) 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from the Stanford/CMU enet packet filter,
  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
  * Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bpf.h"
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/uio.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/bpf.h>
 #include <net/bpf_buffer.h>
 #include <net/bpfdesc.h>
 
 /*
  * Implement historical kernel memory buffering model for BPF: two malloc(9)
  * kernel buffers are hung off of the descriptor.  The size is fixed prior to
  * attaching to an ifnet, ad cannot be changed after that.  read(2) simply
  * copies the data to user space using uiomove(9).
  */
 
 static int bpf_bufsize = 4096;
 SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW,
-    &bpf_bufsize, 0, "Maximum capture buffer size in bytes");
+    &bpf_bufsize, 0, "Default capture buffer size in bytes");
 static int bpf_maxbufsize = BPF_MAXBUFSIZE;
 SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
-    &bpf_maxbufsize, 0, "Default capture buffer in bytes");
+    &bpf_maxbufsize, 0, "Maximum capture buffer in bytes");
 
 void
 bpf_buffer_alloc(struct bpf_d *d)
 {
 
 	KASSERT(d->bd_fbuf == NULL, ("bpf_buffer_alloc: bd_fbuf != NULL"));
 	KASSERT(d->bd_sbuf == NULL, ("bpf_buffer_alloc: bd_sbuf != NULL"));
 	KASSERT(d->bd_hbuf == NULL, ("bpf_buffer_alloc: bd_hbuf != NULL"));
 
 	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
 	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
 	d->bd_hbuf = NULL;
 	d->bd_slen = 0;
 	d->bd_hlen = 0;
 }
 
 /*
  * Simple data copy to the current kernel buffer.
  */
 void
 bpf_buffer_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset,
     void *src, u_int len)
 {
 	u_char *src_bytes;
 
 	src_bytes = (u_char *)src;
 	bcopy(src_bytes, buf + offset, len);
 }
 
 /*
  * Scatter-gather data copy from an mbuf chain to the current kernel buffer.
  */
 void
 bpf_buffer_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
     u_int len)
 {
 	const struct mbuf *m;
 	u_char *dst;
 	u_int count;
 
 	m = (struct mbuf *)src;
 	dst = (u_char *)buf + offset;
 	while (len > 0) {
 		if (m == NULL)
 			panic("bpf_mcopy");
 		count = min(m->m_len, len);
 		bcopy(mtod(m, void *), dst, count);
 		m = m->m_next;
 		dst += count;
 		len -= count;
 	}
 }
 
 /*
  * Free BPF kernel buffers on device close.
  */
 void
 bpf_buffer_free(struct bpf_d *d)
 {
 
 	if (d->bd_sbuf != NULL)
 		free(d->bd_sbuf, M_BPF);
 	if (d->bd_hbuf != NULL)
 		free(d->bd_hbuf, M_BPF);
 	if (d->bd_fbuf != NULL)
 		free(d->bd_fbuf, M_BPF);
 
 #ifdef INVARIANTS
 	d->bd_sbuf = d->bd_hbuf = d->bd_fbuf = (caddr_t)~0;
 #endif
 }
 
 /*
  * This is a historical initialization that occurs when the BPF descriptor is
  * first opened.  It does not imply selection of a buffer mode, so we don't
  * allocate buffers here.
  */
 void
 bpf_buffer_init(struct bpf_d *d)
 {
 
 	d->bd_bufsize = bpf_bufsize;
 }
 
 /*
  * Allocate or resize buffers.
  */
 int
 bpf_buffer_ioctl_sblen(struct bpf_d *d, u_int *i)
 {
 	u_int size;
 
 	BPFD_LOCK(d);
 	if (d->bd_bif != NULL) {
 		BPFD_UNLOCK(d);
 		return (EINVAL);
 	}
 	size = *i;
 	if (size > bpf_maxbufsize)
 		*i = size = bpf_maxbufsize;
 	else if (size < BPF_MINBUFSIZE)
 		*i = size = BPF_MINBUFSIZE;
 	d->bd_bufsize = size;
 	BPFD_UNLOCK(d);
 	return (0);
 }
 
 /*
  * Copy buffer storage to user space in read().
  */
 int
 bpf_buffer_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
 {
 
 	return (uiomove(buf, len, uio));
 }
Index: projects/binutils-2.17/sys/net/if_vlan.c
===================================================================
--- projects/binutils-2.17/sys/net/if_vlan.c	(revision 215829)
+++ projects/binutils-2.17/sys/net/if_vlan.c	(revision 215830)
@@ -1,1499 +1,1541 @@
 /*-
  * Copyright 1998 Massachusetts Institute of Technology
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby
  * granted, provided that both the above copyright notice and this
  * permission notice appear in all copies, that both the above
  * copyright notice and this permission notice appear in all
  * supporting documentation, and that the name of M.I.T. not be used
  * in advertising or publicity pertaining to distribution of the
  * software without specific, written prior permission.  M.I.T. makes
  * no representations about the suitability of this software for any
  * purpose.  It is provided "as is" without express or implied
  * warranty.
  * 
  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs.
  * Might be extended some day to also handle IEEE 802.1p priority
  * tagging.  This is sort of sneaky in the implementation, since
  * we need to pretend to be enough of an Ethernet implementation
  * to make arp work.  The way we do this is by telling everyone
  * that we are an Ethernet, and then catch the packets that
  * ether_output() left on our output queue when it calls
  * if_start(), rewrite them for use by the real outgoing interface,
  * and ask it to send them.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vlan.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/rwlock.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_clone.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 #include <net/vnet.h>
 
 #define VLANNAME	"vlan"
 #define	VLAN_DEF_HWIDTH	4
 #define	VLAN_IFFLAGS	(IFF_BROADCAST | IFF_MULTICAST)
 
 #define	UP_AND_RUNNING(ifp) \
     ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING)
 
 LIST_HEAD(ifvlanhead, ifvlan);
 
 struct ifvlantrunk {
 	struct	ifnet   *parent;	/* parent interface of this trunk */
 	struct	rwlock	rw;
 #ifdef VLAN_ARRAY
 #define	VLAN_ARRAY_SIZE	(EVL_VLID_MASK + 1)
 	struct	ifvlan	*vlans[VLAN_ARRAY_SIZE]; /* static table */
 #else
 	struct	ifvlanhead *hash;	/* dynamic hash-list table */
 	uint16_t	hmask;
 	uint16_t	hwidth;
 #endif
 	int		refcnt;
 };
 
 struct vlan_mc_entry {
 	struct ether_addr		mc_addr;
 	SLIST_ENTRY(vlan_mc_entry)	mc_entries;
 };
 
 struct	ifvlan {
 	struct	ifvlantrunk *ifv_trunk;
 	struct	ifnet *ifv_ifp;
 #define	TRUNK(ifv)	((ifv)->ifv_trunk)
 #define	PARENT(ifv)	((ifv)->ifv_trunk->parent)
 	int	ifv_pflags;	/* special flags we have set on parent */
 	struct	ifv_linkmib {
 		int	ifvm_encaplen;	/* encapsulation length */
 		int	ifvm_mtufudge;	/* MTU fudged by this much */
 		int	ifvm_mintu;	/* min transmission unit */
 		uint16_t ifvm_proto;	/* encapsulation ethertype */
 		uint16_t ifvm_tag;	/* tag to apply on packets leaving if */
 	}	ifv_mib;
 	SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
 #ifndef VLAN_ARRAY
 	LIST_ENTRY(ifvlan) ifv_list;
 #endif
 };
 #define	ifv_proto	ifv_mib.ifvm_proto
 #define	ifv_tag		ifv_mib.ifvm_tag
 #define	ifv_encaplen	ifv_mib.ifvm_encaplen
 #define	ifv_mtufudge	ifv_mib.ifvm_mtufudge
 #define	ifv_mintu	ifv_mib.ifvm_mintu
 
 /* Special flags we should propagate to parent. */
 static struct {
 	int flag;
 	int (*func)(struct ifnet *, int);
 } vlan_pflags[] = {
 	{IFF_PROMISC, ifpromisc},
 	{IFF_ALLMULTI, if_allmulti},
 	{0, NULL}
 };
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0, "IEEE 802.1Q VLAN");
 SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0, "for consistency");
 
 static int soft_pad = 0;
 SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0,
 	   "pad short frames before tagging");
 
 static MALLOC_DEFINE(M_VLAN, VLANNAME, "802.1Q Virtual LAN Interface");
 
 static eventhandler_tag ifdetach_tag;
 static eventhandler_tag iflladdr_tag;
 
 /*
  * We have a global mutex, that is used to serialize configuration
  * changes and isn't used in normal packet delivery.
  *
  * We also have a per-trunk rwlock, that is locked shared on packet
  * processing and exclusive when configuration is changed.
  *
  * The VLAN_ARRAY substitutes the dynamic hash with a static array
  * with 4096 entries. In theory this can give a boost in processing,
  * however on practice it does not. Probably this is because array
  * is too big to fit into CPU cache.
  */
 static struct mtx ifv_mtx;
 #define	VLAN_LOCK_INIT()	mtx_init(&ifv_mtx, "vlan_global", NULL, MTX_DEF)
 #define	VLAN_LOCK_DESTROY()	mtx_destroy(&ifv_mtx)
 #define	VLAN_LOCK_ASSERT()	mtx_assert(&ifv_mtx, MA_OWNED)
 #define	VLAN_LOCK()		mtx_lock(&ifv_mtx)
 #define	VLAN_UNLOCK()		mtx_unlock(&ifv_mtx)
 #define	TRUNK_LOCK_INIT(trunk)	rw_init(&(trunk)->rw, VLANNAME)
 #define	TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw)
 #define	TRUNK_LOCK(trunk)	rw_wlock(&(trunk)->rw)
 #define	TRUNK_UNLOCK(trunk)	rw_wunlock(&(trunk)->rw)
 #define	TRUNK_LOCK_ASSERT(trunk) rw_assert(&(trunk)->rw, RA_WLOCKED)
 #define	TRUNK_RLOCK(trunk)	rw_rlock(&(trunk)->rw)
 #define	TRUNK_RUNLOCK(trunk)	rw_runlock(&(trunk)->rw)
 #define	TRUNK_LOCK_RASSERT(trunk) rw_assert(&(trunk)->rw, RA_RLOCKED)
 
 #ifndef VLAN_ARRAY
 static	void vlan_inithash(struct ifvlantrunk *trunk);
 static	void vlan_freehash(struct ifvlantrunk *trunk);
 static	int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
 static	int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
 static	void vlan_growhash(struct ifvlantrunk *trunk, int howmuch);
 static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
 	uint16_t tag);
 #endif
 static	void trunk_destroy(struct ifvlantrunk *trunk);
 
 static	void vlan_start(struct ifnet *ifp);
 static	void vlan_init(void *foo);
 static	void vlan_input(struct ifnet *ifp, struct mbuf *m);
 static	int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
 static	int vlan_setflag(struct ifnet *ifp, int flag, int status,
     int (*func)(struct ifnet *, int));
 static	int vlan_setflags(struct ifnet *ifp, int status);
 static	int vlan_setmulti(struct ifnet *ifp);
 static	void vlan_unconfig(struct ifnet *ifp);
 static	void vlan_unconfig_locked(struct ifnet *ifp);
 static	int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag);
 static	void vlan_link_state(struct ifnet *ifp);
 static	void vlan_capabilities(struct ifvlan *ifv);
 static	void vlan_trunk_capabilities(struct ifnet *ifp);
 
 static	struct ifnet *vlan_clone_match_ethertag(struct if_clone *,
     const char *, int *);
 static	int vlan_clone_match(struct if_clone *, const char *);
 static	int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t);
 static	int vlan_clone_destroy(struct if_clone *, struct ifnet *);
 
 static	void vlan_ifdetach(void *arg, struct ifnet *ifp);
 static  void vlan_iflladdr(void *arg, struct ifnet *ifp);
 
 static	struct if_clone vlan_cloner = IFC_CLONE_INITIALIZER(VLANNAME, NULL,
     IF_MAXUNIT, NULL, vlan_clone_match, vlan_clone_create, vlan_clone_destroy);
 
+#ifdef VIMAGE
+static VNET_DEFINE(struct if_clone, vlan_cloner);
+#define	V_vlan_cloner	VNET(vlan_cloner)
+#endif
+
 #ifndef VLAN_ARRAY
 #define HASH(n, m)	((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m))
 
 static void
 vlan_inithash(struct ifvlantrunk *trunk)
 {
 	int i, n;
 	
 	/*
 	 * The trunk must not be locked here since we call malloc(M_WAITOK).
 	 * It is OK in case this function is called before the trunk struct
 	 * gets hooked up and becomes visible from other threads.
 	 */
 
 	KASSERT(trunk->hwidth == 0 && trunk->hash == NULL,
 	    ("%s: hash already initialized", __func__));
 
 	trunk->hwidth = VLAN_DEF_HWIDTH;
 	n = 1 << trunk->hwidth;
 	trunk->hmask = n - 1;
 	trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK);
 	for (i = 0; i < n; i++)
 		LIST_INIT(&trunk->hash[i]);
 }
 
 static void
 vlan_freehash(struct ifvlantrunk *trunk)
 {
 #ifdef INVARIANTS
 	int i;
 
 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
 	for (i = 0; i < (1 << trunk->hwidth); i++)
 		KASSERT(LIST_EMPTY(&trunk->hash[i]),
 		    ("%s: hash table not empty", __func__));
 #endif
 	free(trunk->hash, M_VLAN);
 	trunk->hash = NULL;
 	trunk->hwidth = trunk->hmask = 0;
 }
 
 static int
 vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
 {
 	int i, b;
 	struct ifvlan *ifv2;
 
 	TRUNK_LOCK_ASSERT(trunk);
 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
 
 	b = 1 << trunk->hwidth;
 	i = HASH(ifv->ifv_tag, trunk->hmask);
 	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
 		if (ifv->ifv_tag == ifv2->ifv_tag)
 			return (EEXIST);
 
 	/*
 	 * Grow the hash when the number of vlans exceeds half of the number of
 	 * hash buckets squared. This will make the average linked-list length
 	 * buckets/2.
 	 */
 	if (trunk->refcnt > (b * b) / 2) {
 		vlan_growhash(trunk, 1);
 		i = HASH(ifv->ifv_tag, trunk->hmask);
 	}
 	LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
 	trunk->refcnt++;
 
 	return (0);
 }
 
 static int
 vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
 {
 	int i, b;
 	struct ifvlan *ifv2;
 
 	TRUNK_LOCK_ASSERT(trunk);
 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
 	
 	b = 1 << trunk->hwidth;
 	i = HASH(ifv->ifv_tag, trunk->hmask);
 	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
 		if (ifv2 == ifv) {
 			trunk->refcnt--;
 			LIST_REMOVE(ifv2, ifv_list);
 			if (trunk->refcnt < (b * b) / 2)
 				vlan_growhash(trunk, -1);
 			return (0);
 		}
 
 	panic("%s: vlan not found\n", __func__);
 	return (ENOENT); /*NOTREACHED*/
 }
 
 /*
  * Grow the hash larger or smaller if memory permits.
  */
 static void
 vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
 {
 	struct ifvlan *ifv;
 	struct ifvlanhead *hash2;
 	int hwidth2, i, j, n, n2;
 
 	TRUNK_LOCK_ASSERT(trunk);
 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
 
 	if (howmuch == 0) {
 		/* Harmless yet obvious coding error */
 		printf("%s: howmuch is 0\n", __func__);
 		return;
 	}
 
 	hwidth2 = trunk->hwidth + howmuch;
 	n = 1 << trunk->hwidth;
 	n2 = 1 << hwidth2;
 	/* Do not shrink the table below the default */
 	if (hwidth2 < VLAN_DEF_HWIDTH)
 		return;
 
 	/* M_NOWAIT because we're called with trunk mutex held */
 	hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT);
 	if (hash2 == NULL) {
 		printf("%s: out of memory -- hash size not changed\n",
 		    __func__);
 		return;		/* We can live with the old hash table */
 	}
 	for (j = 0; j < n2; j++)
 		LIST_INIT(&hash2[j]);
 	for (i = 0; i < n; i++)
 		while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) {
 			LIST_REMOVE(ifv, ifv_list);
 			j = HASH(ifv->ifv_tag, n2 - 1);
 			LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
 		}
 	free(trunk->hash, M_VLAN);
 	trunk->hash = hash2;
 	trunk->hwidth = hwidth2;
 	trunk->hmask = n2 - 1;
 
 	if (bootverbose)
 		if_printf(trunk->parent,
 		    "VLAN hash table resized from %d to %d buckets\n", n, n2);
 }
 
 static __inline struct ifvlan *
 vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag)
 {
 	struct ifvlan *ifv;
 
 	TRUNK_LOCK_RASSERT(trunk);
 
 	LIST_FOREACH(ifv, &trunk->hash[HASH(tag, trunk->hmask)], ifv_list)
 		if (ifv->ifv_tag == tag)
 			return (ifv);
 	return (NULL);
 }
 
 #if 0
 /* Debugging code to view the hashtables. */
 static void
 vlan_dumphash(struct ifvlantrunk *trunk)
 {
 	int i;
 	struct ifvlan *ifv;
 
 	for (i = 0; i < (1 << trunk->hwidth); i++) {
 		printf("%d: ", i);
 		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
 			printf("%s ", ifv->ifv_ifp->if_xname);
 		printf("\n");
 	}
 }
 #endif /* 0 */
 #endif /* !VLAN_ARRAY */
 
 static void
 trunk_destroy(struct ifvlantrunk *trunk)
 {
 	VLAN_LOCK_ASSERT();
 
 	TRUNK_LOCK(trunk);
 #ifndef VLAN_ARRAY
 	vlan_freehash(trunk);
 #endif
 	trunk->parent->if_vlantrunk = NULL;
 	TRUNK_UNLOCK(trunk);
 	TRUNK_LOCK_DESTROY(trunk);
 	free(trunk, M_VLAN);
 }
 
 /*
  * Program our multicast filter. What we're actually doing is
  * programming the multicast filter of the parent. This has the
  * side effect of causing the parent interface to receive multicast
  * traffic that it doesn't really want, which ends up being discarded
  * later by the upper protocol layers. Unfortunately, there's no way
  * to avoid this: there really is only one physical interface.
  *
  * XXX: There is a possible race here if more than one thread is
  *      modifying the multicast state of the vlan interface at the same time.
  */
 static int
 vlan_setmulti(struct ifnet *ifp)
 {
 	struct ifnet		*ifp_p;
 	struct ifmultiaddr	*ifma, *rifma = NULL;
 	struct ifvlan		*sc;
 	struct vlan_mc_entry	*mc;
 	struct sockaddr_dl	sdl;
 	int			error;
 
 	/*VLAN_LOCK_ASSERT();*/
 
 	/* Find the parent. */
 	sc = ifp->if_softc;
 	ifp_p = PARENT(sc);
 
 	CURVNET_SET_QUIET(ifp_p->if_vnet);
 
 	bzero((char *)&sdl, sizeof(sdl));
 	sdl.sdl_len = sizeof(sdl);
 	sdl.sdl_family = AF_LINK;
 	sdl.sdl_index = ifp_p->if_index;
 	sdl.sdl_type = IFT_ETHER;
 	sdl.sdl_alen = ETHER_ADDR_LEN;
 
 	/* First, remove any existing filter entries. */
 	while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
 		bcopy((char *)&mc->mc_addr, LLADDR(&sdl), ETHER_ADDR_LEN);
 		error = if_delmulti(ifp_p, (struct sockaddr *)&sdl);
 		if (error)
 			return (error);
 		SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
 		free(mc, M_VLAN);
 	}
 
 	/* Now program new ones. */
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 		mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT);
 		if (mc == NULL)
 			return (ENOMEM);
 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
 		    (char *)&mc->mc_addr, ETHER_ADDR_LEN);
 		SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
 		    LLADDR(&sdl), ETHER_ADDR_LEN);
 		error = if_addmulti(ifp_p, (struct sockaddr *)&sdl, &rifma);
 		if (error)
 			return (error);
 	}
 
 	CURVNET_RESTORE();
 	return (0);
 }
 
 /*
  * A handler for parent interface link layer address changes.
  * If the parent interface link layer address is changed we
  * should also change it on all children vlans.
  */
 static void
 vlan_iflladdr(void *arg __unused, struct ifnet *ifp)
 {
 	struct ifvlan *ifv;
 #ifndef VLAN_ARRAY
 	struct ifvlan *next;
 #endif
 	int i;
 
 	/*
 	 * Check if it's a trunk interface first of all
 	 * to avoid needless locking.
 	 */
 	if (ifp->if_vlantrunk == NULL)
 		return;
 
 	VLAN_LOCK();
 	/*
 	 * OK, it's a trunk.  Loop over and change all vlan's lladdrs on it.
 	 */
 #ifdef VLAN_ARRAY
 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
 		if ((ifv = ifp->if_vlantrunk->vlans[i])) {
 #else /* VLAN_ARRAY */
 	for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
 		LIST_FOREACH_SAFE(ifv, &ifp->if_vlantrunk->hash[i], ifv_list, next) {
 #endif /* VLAN_ARRAY */
 			VLAN_UNLOCK();
 			if_setlladdr(ifv->ifv_ifp, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 			VLAN_LOCK();
 		}
 	VLAN_UNLOCK();
 
 }
 
 /*
  * A handler for network interface departure events.
  * Track departure of trunks here so that we don't access invalid
  * pointers or whatever if a trunk is ripped from under us, e.g.,
  * by ejecting its hot-plug card.  However, if an ifnet is simply
  * being renamed, then there's no need to tear down the state.
  */
 static void
 vlan_ifdetach(void *arg __unused, struct ifnet *ifp)
 {
 	struct ifvlan *ifv;
 	int i;
 
 	/*
 	 * Check if it's a trunk interface first of all
 	 * to avoid needless locking.
 	 */
 	if (ifp->if_vlantrunk == NULL)
 		return;
 
 	/* If the ifnet is just being renamed, don't do anything. */
 	if (ifp->if_flags & IFF_RENAMING)
 		return;
 
 	VLAN_LOCK();
 	/*
 	 * OK, it's a trunk.  Loop over and detach all vlan's on it.
 	 * Check trunk pointer after each vlan_unconfig() as it will
 	 * free it and set to NULL after the last vlan was detached.
 	 */
 #ifdef VLAN_ARRAY
 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
 		if ((ifv = ifp->if_vlantrunk->vlans[i])) {
 			vlan_unconfig_locked(ifv->ifv_ifp);
 			if (ifp->if_vlantrunk == NULL)
 				break;
 		}
 #else /* VLAN_ARRAY */
 restart:
 	for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
 		if ((ifv = LIST_FIRST(&ifp->if_vlantrunk->hash[i]))) {
 			vlan_unconfig_locked(ifv->ifv_ifp);
 			if (ifp->if_vlantrunk)
 				goto restart;	/* trunk->hwidth can change */
 			else
 				break;
 		}
 #endif /* VLAN_ARRAY */
 	/* Trunk should have been destroyed in vlan_unconfig(). */
 	KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__));
 	VLAN_UNLOCK();
 }
 
 /*
  * VLAN support can be loaded as a module.  The only place in the
  * system that's intimately aware of this is ether_input.  We hook
  * into this code through vlan_input_p which is defined there and
  * set here.  Noone else in the system should be aware of this so
  * we use an explicit reference here.
  */
 extern	void (*vlan_input_p)(struct ifnet *, struct mbuf *);
 
 /* For if_link_state_change() eyes only... */
 extern	void (*vlan_link_state_p)(struct ifnet *);
 
 static int
 vlan_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
 		    vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
 		if (ifdetach_tag == NULL)
 			return (ENOMEM);
 		iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
 		    vlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
 		if (iflladdr_tag == NULL)
 			return (ENOMEM);
 		VLAN_LOCK_INIT();
 		vlan_input_p = vlan_input;
 		vlan_link_state_p = vlan_link_state;
 		vlan_trunk_cap_p = vlan_trunk_capabilities;
+#ifndef VIMAGE
 		if_clone_attach(&vlan_cloner);
+#endif
 		if (bootverbose)
 			printf("vlan: initialized, using "
 #ifdef VLAN_ARRAY
 			       "full-size arrays"
 #else
 			       "hash tables with chaining"
 #endif
 			
 			       "\n");
 		break;
 	case MOD_UNLOAD:
+#ifndef VIMAGE
 		if_clone_detach(&vlan_cloner);
+#endif
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag);
 		EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag);
 		vlan_input_p = NULL;
 		vlan_link_state_p = NULL;
 		vlan_trunk_cap_p = NULL;
 		VLAN_LOCK_DESTROY();
 		if (bootverbose)
 			printf("vlan: unloaded\n");
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t vlan_mod = {
 	"if_vlan",
 	vlan_modevent,
 	0
 };
 
 DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_vlan, 3);
 
+#ifdef VIMAGE
+static void
+vnet_vlan_init(const void *unused __unused)
+{
+
+	V_vlan_cloner = vlan_cloner;
+	if_clone_attach(&V_vlan_cloner);
+}
+VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+    vnet_vlan_init, NULL);
+
+static void
+vnet_vlan_uninit(const void *unused __unused)
+{
+
+	if_clone_detach(&V_vlan_cloner);
+}
+VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
+    vnet_vlan_uninit, NULL);
+#endif
+
 static struct ifnet *
 vlan_clone_match_ethertag(struct if_clone *ifc, const char *name, int *tag)
 {
 	const char *cp;
 	struct ifnet *ifp;
 	int t;
 
 	/* Check for <etherif>.<vlan> style interface names. */
 	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (ifp->if_type != IFT_ETHER)
 			continue;
 		if (strncmp(ifp->if_xname, name, strlen(ifp->if_xname)) != 0)
 			continue;
 		cp = name + strlen(ifp->if_xname);
 		if (*cp++ != '.')
 			continue;
 		if (*cp == '\0')
 			continue;
 		t = 0;
 		for(; *cp >= '0' && *cp <= '9'; cp++)
 			t = (t * 10) + (*cp - '0');
 		if (*cp != '\0')
 			continue;
 		if (tag != NULL)
 			*tag = t;
 		break;
 	}
 	IFNET_RUNLOCK_NOSLEEP();
 
 	return (ifp);
 }
 
 static int
 vlan_clone_match(struct if_clone *ifc, const char *name)
 {
 	const char *cp;
 
 	if (vlan_clone_match_ethertag(ifc, name, NULL) != NULL)
 		return (1);
 
 	if (strncmp(VLANNAME, name, strlen(VLANNAME)) != 0)
 		return (0);
 	for (cp = name + 4; *cp != '\0'; cp++) {
 		if (*cp < '0' || *cp > '9')
 			return (0);
 	}
 
 	return (1);
 }
 
 static int
 vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 {
 	char *dp;
 	int wildcard;
 	int unit;
 	int error;
 	int tag;
 	int ethertag;
 	struct ifvlan *ifv;
 	struct ifnet *ifp;
 	struct ifnet *p;
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 	struct vlanreq vlr;
 	static const u_char eaddr[ETHER_ADDR_LEN];	/* 00:00:00:00:00:00 */
 
 	/*
 	 * There are 3 (ugh) ways to specify the cloned device:
 	 * o pass a parameter block with the clone request.
 	 * o specify parameters in the text of the clone device name
 	 * o specify no parameters and get an unattached device that
 	 *   must be configured separately.
 	 * The first technique is preferred; the latter two are
 	 * supported for backwards compatibilty.
 	 */
 	if (params) {
 		error = copyin(params, &vlr, sizeof(vlr));
 		if (error)
 			return error;
 		p = ifunit(vlr.vlr_parent);
 		if (p == NULL)
 			return ENXIO;
 		/*
 		 * Don't let the caller set up a VLAN tag with
 		 * anything except VLID bits.
 		 */
 		if (vlr.vlr_tag & ~EVL_VLID_MASK)
 			return (EINVAL);
 		error = ifc_name2unit(name, &unit);
 		if (error != 0)
 			return (error);
 
 		ethertag = 1;
 		tag = vlr.vlr_tag;
 		wildcard = (unit < 0);
 	} else if ((p = vlan_clone_match_ethertag(ifc, name, &tag)) != NULL) {
 		ethertag = 1;
 		unit = -1;
 		wildcard = 0;
 
 		/*
 		 * Don't let the caller set up a VLAN tag with
 		 * anything except VLID bits.
 		 */
 		if (tag & ~EVL_VLID_MASK)
 			return (EINVAL);
 	} else {
 		ethertag = 0;
 
 		error = ifc_name2unit(name, &unit);
 		if (error != 0)
 			return (error);
 
 		wildcard = (unit < 0);
 	}
 
 	error = ifc_alloc_unit(ifc, &unit);
 	if (error != 0)
 		return (error);
 
 	/* In the wildcard case, we need to update the name. */
 	if (wildcard) {
 		for (dp = name; *dp != '\0'; dp++);
 		if (snprintf(dp, len - (dp-name), "%d", unit) >
 		    len - (dp-name) - 1) {
 			panic("%s: interface name too long", __func__);
 		}
 	}
 
 	ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO);
 	ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		ifc_free_unit(ifc, unit);
 		free(ifv, M_VLAN);
 		return (ENOSPC);
 	}
 	SLIST_INIT(&ifv->vlan_mc_listhead);
 
 	ifp->if_softc = ifv;
 	/*
 	 * Set the name manually rather than using if_initname because
 	 * we don't conform to the default naming convention for interfaces.
 	 */
 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
 	ifp->if_dname = ifc->ifc_name;
 	ifp->if_dunit = unit;
 	/* NB: flags are not set here */
 	ifp->if_linkmib = &ifv->ifv_mib;
 	ifp->if_linkmiblen = sizeof(ifv->ifv_mib);
 	/* NB: mtu is not set here */
 
 	ifp->if_init = vlan_init;
 	ifp->if_start = vlan_start;
 	ifp->if_ioctl = vlan_ioctl;
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	ifp->if_flags = VLAN_IFFLAGS;
 	ether_ifattach(ifp, eaddr);
 	/* Now undo some of the damage... */
 	ifp->if_baudrate = 0;
 	ifp->if_type = IFT_L2VLAN;
 	ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN;
 	ifa = ifp->if_addr;
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_L2VLAN;
 
 	if (ethertag) {
 		error = vlan_config(ifv, p, tag);
 		if (error != 0) {
 			/*
 			 * Since we've partialy failed, we need to back
 			 * out all the way, otherwise userland could get
 			 * confused.  Thus, we destroy the interface.
 			 */
 			ether_ifdetach(ifp);
 			vlan_unconfig(ifp);
 			if_free_type(ifp, IFT_ETHER);
 			ifc_free_unit(ifc, unit);
 			free(ifv, M_VLAN);
 
 			return (error);
 		}
 
 		/* Update flags on the parent, if necessary. */
 		vlan_setflags(ifp, 1);
 	}
 
 	return (0);
 }
 
 static int
 vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
 {
 	struct ifvlan *ifv = ifp->if_softc;
 	int unit = ifp->if_dunit;
 
 	ether_ifdetach(ifp);	/* first, remove it from system-wide lists */
 	vlan_unconfig(ifp);	/* now it can be unconfigured and freed */
 	if_free_type(ifp, IFT_ETHER);
 	free(ifv, M_VLAN);
 	ifc_free_unit(ifc, unit);
 
 	return (0);
 }
 
 /*
  * The ifp->if_init entry point for vlan(4) is a no-op.
  */
 static void
 vlan_init(void *foo __unused)
 {
 }
 
 /*
  * The if_start method for vlan(4) interface. It doesn't
  * raises the IFF_DRV_OACTIVE flag, since it is called
  * only from IFQ_HANDOFF() macro in ether_output_frame().
  * If the interface queue is full, and vlan_start() is
  * not called, the queue would never get emptied and
  * interface would stall forever.
  */
 static void
 vlan_start(struct ifnet *ifp)
 {
 	struct ifvlan *ifv;
 	struct ifnet *p;
 	struct mbuf *m;
 	int error;
 
 	ifv = ifp->if_softc;
 	p = PARENT(ifv);
 
 	for (;;) {
 		IF_DEQUEUE(&ifp->if_snd, m);
 		if (m == NULL)
 			break;
 		BPF_MTAP(ifp, m);
 
 		/*
 		 * Do not run parent's if_start() if the parent is not up,
 		 * or parent's driver will cause a system crash.
 		 */
 		if (!UP_AND_RUNNING(p)) {
 			m_freem(m);
 			ifp->if_collisions++;
 			continue;
 		}
 
 		/*
 		 * Pad the frame to the minimum size allowed if told to.
 		 * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
 		 * paragraph C.4.4.3.b.  It can help to work around buggy
 		 * bridges that violate paragraph C.4.4.3.a from the same
 		 * document, i.e., fail to pad short frames after untagging.
 		 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
 		 * untagging it will produce a 62-byte frame, which is a runt
 		 * and requires padding.  There are VLAN-enabled network
 		 * devices that just discard such runts instead or mishandle
 		 * them somehow.
 		 */
 		if (soft_pad) {
 			static char pad[8];	/* just zeros */
 			int n;
 
 			for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len;
 			     n > 0; n -= sizeof(pad))
 				if (!m_append(m, min(n, sizeof(pad)), pad))
 					break;
 
 			if (n > 0) {
 				if_printf(ifp, "cannot pad short frame\n");
 				ifp->if_oerrors++;
 				m_freem(m);
 				continue;
 			}
 		}
 
 		/*
 		 * If underlying interface can do VLAN tag insertion itself,
 		 * just pass the packet along. However, we need some way to
 		 * tell the interface where the packet came from so that it
 		 * knows how to find the VLAN tag to use, so we attach a
 		 * packet tag that holds it.
 		 */
 		if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
 			m->m_pkthdr.ether_vtag = ifv->ifv_tag;
 			m->m_flags |= M_VLANTAG;
 		} else {
 			m = ether_vlanencap(m, ifv->ifv_tag);
 			if (m == NULL) {
 				if_printf(ifp,
 				    "unable to prepend VLAN header\n");
 				ifp->if_oerrors++;
 				continue;
 			}
 		}
 
 		/*
 		 * Send it, precisely as ether_output() would have.
 		 * We are already running at splimp.
 		 */
 		error = (p->if_transmit)(p, m);
 		if (!error)
 			ifp->if_opackets++;
 		else
 			ifp->if_oerrors++;
 	}
 }
 
 static void
 vlan_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
 	struct ifvlan *ifv;
 	uint16_t tag;
 
 	KASSERT(trunk != NULL, ("%s: no trunk", __func__));
 
 	if (m->m_flags & M_VLANTAG) {
 		/*
 		 * Packet is tagged, but m contains a normal
 		 * Ethernet frame; the tag is stored out-of-band.
 		 */
 		tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
 		m->m_flags &= ~M_VLANTAG;
 	} else {
 		struct ether_vlan_header *evl;
 
 		/*
 		 * Packet is tagged in-band as specified by 802.1q.
 		 */
 		switch (ifp->if_type) {
 		case IFT_ETHER:
 			if (m->m_len < sizeof(*evl) &&
 			    (m = m_pullup(m, sizeof(*evl))) == NULL) {
 				if_printf(ifp, "cannot pullup VLAN header\n");
 				return;
 			}
 			evl = mtod(m, struct ether_vlan_header *);
 			tag = EVL_VLANOFTAG(ntohs(evl->evl_tag));
 
 			/*
 			 * Remove the 802.1q header by copying the Ethernet
 			 * addresses over it and adjusting the beginning of
 			 * the data in the mbuf.  The encapsulated Ethernet
 			 * type field is already in place.
 			 */
 			bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
 			      ETHER_HDR_LEN - ETHER_TYPE_LEN);
 			m_adj(m, ETHER_VLAN_ENCAP_LEN);
 			break;
 
 		default:
 #ifdef INVARIANTS
 			panic("%s: %s has unsupported if_type %u",
 			      __func__, ifp->if_xname, ifp->if_type);
 #endif
 			m_freem(m);
 			ifp->if_noproto++;
 			return;
 		}
 	}
 
 	TRUNK_RLOCK(trunk);
 #ifdef VLAN_ARRAY
 	ifv = trunk->vlans[tag];
 #else
 	ifv = vlan_gethash(trunk, tag);
 #endif
 	if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
 		TRUNK_RUNLOCK(trunk);
 		m_freem(m);
 		ifp->if_noproto++;
 		return;
 	}
 	TRUNK_RUNLOCK(trunk);
 
 	m->m_pkthdr.rcvif = ifv->ifv_ifp;
 	ifv->ifv_ifp->if_ipackets++;
 
 	/* Pass it back through the parent's input routine. */
 	(*ifp->if_input)(ifv->ifv_ifp, m);
 }
 
 static int
 vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag)
 {
 	struct ifvlantrunk *trunk;
 	struct ifnet *ifp;
 	int error = 0;
 
 	/* VID numbers 0x0 and 0xFFF are reserved */
 	if (tag == 0 || tag == 0xFFF)
 		return (EINVAL);
 	if (p->if_type != IFT_ETHER)
 		return (EPROTONOSUPPORT);
 	if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS)
 		return (EPROTONOSUPPORT);
 	if (ifv->ifv_trunk)
 		return (EBUSY);
 
 	if (p->if_vlantrunk == NULL) {
 		trunk = malloc(sizeof(struct ifvlantrunk),
 		    M_VLAN, M_WAITOK | M_ZERO);
 #ifndef VLAN_ARRAY
 		vlan_inithash(trunk);
 #endif
 		VLAN_LOCK();
 		if (p->if_vlantrunk != NULL) {
 			/* A race that that is very unlikely to be hit. */
 #ifndef VLAN_ARRAY
 			vlan_freehash(trunk);
 #endif
 			free(trunk, M_VLAN);
 			goto exists;
 		}
 		TRUNK_LOCK_INIT(trunk);
 		TRUNK_LOCK(trunk);
 		p->if_vlantrunk = trunk;
 		trunk->parent = p;
 	} else {
 		VLAN_LOCK();
 exists:
 		trunk = p->if_vlantrunk;
 		TRUNK_LOCK(trunk);
 	}
 
 	ifv->ifv_tag = tag;	/* must set this before vlan_inshash() */
 #ifdef VLAN_ARRAY
 	if (trunk->vlans[tag] != NULL) {
 		error = EEXIST;
 		goto done;
 	}
 	trunk->vlans[tag] = ifv;
 	trunk->refcnt++;
 #else
 	error = vlan_inshash(trunk, ifv);
 	if (error)
 		goto done;
 #endif
 	ifv->ifv_proto = ETHERTYPE_VLAN;
 	ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN;
 	ifv->ifv_mintu = ETHERMIN;
 	ifv->ifv_pflags = 0;
 
 	/*
 	 * If the parent supports the VLAN_MTU capability,
 	 * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames,
 	 * use it.
 	 */
 	if (p->if_capenable & IFCAP_VLAN_MTU) {
 		/*
 		 * No need to fudge the MTU since the parent can
 		 * handle extended frames.
 		 */
 		ifv->ifv_mtufudge = 0;
 	} else {
 		/*
 		 * Fudge the MTU by the encapsulation size.  This
 		 * makes us incompatible with strictly compliant
 		 * 802.1Q implementations, but allows us to use
 		 * the feature with other NetBSD implementations,
 		 * which might still be useful.
 		 */
 		ifv->ifv_mtufudge = ifv->ifv_encaplen;
 	}
 
 	ifv->ifv_trunk = trunk;
 	ifp = ifv->ifv_ifp;
 	ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge;
 	ifp->if_baudrate = p->if_baudrate;
 	/*
 	 * Copy only a selected subset of flags from the parent.
 	 * Other flags are none of our business.
 	 */
 #define VLAN_COPY_FLAGS (IFF_SIMPLEX)
 	ifp->if_flags &= ~VLAN_COPY_FLAGS;
 	ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS;
 #undef VLAN_COPY_FLAGS
 
 	ifp->if_link_state = p->if_link_state;
 
 	vlan_capabilities(ifv);
 
 	/*
 	 * Set up our ``Ethernet address'' to reflect the underlying
 	 * physical interface's.
 	 */
 	bcopy(IF_LLADDR(p), IF_LLADDR(ifp), ETHER_ADDR_LEN);
 
 	/*
 	 * Configure multicast addresses that may already be
 	 * joined on the vlan device.
 	 */
 	(void)vlan_setmulti(ifp); /* XXX: VLAN lock held */
 
 	/* We are ready for operation now. */
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 done:
 	TRUNK_UNLOCK(trunk);
 	if (error == 0)
 		EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_tag);
 	VLAN_UNLOCK();
 
 	return (error);
 }
 
 static void
 vlan_unconfig(struct ifnet *ifp)
 {
 
 	VLAN_LOCK();
 	vlan_unconfig_locked(ifp);
 	VLAN_UNLOCK();
 }
 
 static void
 vlan_unconfig_locked(struct ifnet *ifp)
 {
 	struct ifvlantrunk *trunk;
 	struct vlan_mc_entry *mc;
 	struct ifvlan *ifv;
 	struct ifnet  *parent;
 
 	VLAN_LOCK_ASSERT();
 
 	ifv = ifp->if_softc;
 	trunk = ifv->ifv_trunk;
 	parent = NULL;
 
 	if (trunk != NULL) {
 		struct sockaddr_dl sdl;
 
 		TRUNK_LOCK(trunk);
 		parent = trunk->parent;
 
 		/*
 		 * Since the interface is being unconfigured, we need to
 		 * empty the list of multicast groups that we may have joined
 		 * while we were alive from the parent's list.
 		 */
 		bzero((char *)&sdl, sizeof(sdl));
 		sdl.sdl_len = sizeof(sdl);
 		sdl.sdl_family = AF_LINK;
 		sdl.sdl_index = parent->if_index;
 		sdl.sdl_type = IFT_ETHER;
 		sdl.sdl_alen = ETHER_ADDR_LEN;
 
 		while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) {
 			bcopy((char *)&mc->mc_addr, LLADDR(&sdl),
 			    ETHER_ADDR_LEN);
 
 			/*
 			 * This may fail if the parent interface is
 			 * being detached.  Regardless, we should do a
 			 * best effort to free this interface as much
 			 * as possible as all callers expect vlan
 			 * destruction to succeed.
 			 */
 			(void)if_delmulti(parent, (struct sockaddr *)&sdl);
 			SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries);
 			free(mc, M_VLAN);
 		}
 
 		vlan_setflags(ifp, 0); /* clear special flags on parent */
 #ifdef VLAN_ARRAY
 		trunk->vlans[ifv->ifv_tag] = NULL;
 		trunk->refcnt--;
 #else
 		vlan_remhash(trunk, ifv);
 #endif
 		ifv->ifv_trunk = NULL;
 
 		/*
 		 * Check if we were the last.
 		 */
 		if (trunk->refcnt == 0) {
 			trunk->parent->if_vlantrunk = NULL;
 			/*
 			 * XXXGL: If some ithread has already entered
 			 * vlan_input() and is now blocked on the trunk
 			 * lock, then it should preempt us right after
 			 * unlock and finish its work. Then we will acquire
 			 * lock again in trunk_destroy().
 			 */
 			TRUNK_UNLOCK(trunk);
 			trunk_destroy(trunk);
 		} else
 			TRUNK_UNLOCK(trunk);
 	}
 
 	/* Disconnect from parent. */
 	if (ifv->ifv_pflags)
 		if_printf(ifp, "%s: ifv_pflags unclean\n", __func__);
 	ifp->if_mtu = ETHERMTU;
 	ifp->if_link_state = LINK_STATE_UNKNOWN;
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 
 	/*
 	 * Only dispatch an event if vlan was
 	 * attached, otherwise there is nothing
 	 * to cleanup anyway.
 	 */
 	if (parent != NULL)
 		EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_tag);
 }
 
 /* Handle a reference counted flag that should be set on the parent as well */
 static int
 vlan_setflag(struct ifnet *ifp, int flag, int status,
 	     int (*func)(struct ifnet *, int))
 {
 	struct ifvlan *ifv;
 	int error;
 
 	/* XXX VLAN_LOCK_ASSERT(); */
 
 	ifv = ifp->if_softc;
 	status = status ? (ifp->if_flags & flag) : 0;
 	/* Now "status" contains the flag value or 0 */
 
 	/*
 	 * See if recorded parent's status is different from what
 	 * we want it to be.  If it is, flip it.  We record parent's
 	 * status in ifv_pflags so that we won't clear parent's flag
 	 * we haven't set.  In fact, we don't clear or set parent's
 	 * flags directly, but get or release references to them.
 	 * That's why we can be sure that recorded flags still are
 	 * in accord with actual parent's flags.
 	 */
 	if (status != (ifv->ifv_pflags & flag)) {
 		error = (*func)(PARENT(ifv), status);
 		if (error)
 			return (error);
 		ifv->ifv_pflags &= ~flag;
 		ifv->ifv_pflags |= status;
 	}
 	return (0);
 }
 
 /*
  * Handle IFF_* flags that require certain changes on the parent:
  * if "status" is true, update parent's flags respective to our if_flags;
  * if "status" is false, forcedly clear the flags set on parent.
  */
 static int
 vlan_setflags(struct ifnet *ifp, int status)
 {
 	int error, i;
 	
 	for (i = 0; vlan_pflags[i].flag; i++) {
 		error = vlan_setflag(ifp, vlan_pflags[i].flag,
 				     status, vlan_pflags[i].func);
 		if (error)
 			return (error);
 	}
 	return (0);
 }
 
 /* Inform all vlans that their parent has changed link state */
 static void
 vlan_link_state(struct ifnet *ifp)
 {
 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
 	struct ifvlan *ifv;
 	int i;
 
 	TRUNK_LOCK(trunk);
 #ifdef VLAN_ARRAY
 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
 		if (trunk->vlans[i] != NULL) {
 			ifv = trunk->vlans[i];
 #else
 	for (i = 0; i < (1 << trunk->hwidth); i++)
 		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) {
 #endif
 			ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate;
 			if_link_state_change(ifv->ifv_ifp,
 			    trunk->parent->if_link_state);
 		}
 	TRUNK_UNLOCK(trunk);
 }
 
 static void
 vlan_capabilities(struct ifvlan *ifv)
 {
 	struct ifnet *p = PARENT(ifv);
 	struct ifnet *ifp = ifv->ifv_ifp;
 
 	TRUNK_LOCK_ASSERT(TRUNK(ifv));
 
 	/*
 	 * If the parent interface can do checksum offloading
 	 * on VLANs, then propagate its hardware-assisted
 	 * checksumming flags. Also assert that checksum
 	 * offloading requires hardware VLAN tagging.
 	 */
 	if (p->if_capabilities & IFCAP_VLAN_HWCSUM)
 		ifp->if_capabilities = p->if_capabilities & IFCAP_HWCSUM;
 
 	if (p->if_capenable & IFCAP_VLAN_HWCSUM &&
 	    p->if_capenable & IFCAP_VLAN_HWTAGGING) {
 		ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM;
 		ifp->if_hwassist = p->if_hwassist & (CSUM_IP | CSUM_TCP |
 		    CSUM_UDP | CSUM_SCTP | CSUM_IP_FRAGS | CSUM_FRAGMENT);
 	} else {
 		ifp->if_capenable = 0;
 		ifp->if_hwassist = 0;
 	}
 	/*
 	 * If the parent interface can do TSO on VLANs then
 	 * propagate the hardware-assisted flag. TSO on VLANs
 	 * does not necessarily require hardware VLAN tagging.
 	 */
 	if (p->if_capabilities & IFCAP_VLAN_HWTSO)
 		ifp->if_capabilities |= p->if_capabilities & IFCAP_TSO;
 	if (p->if_capenable & IFCAP_VLAN_HWTSO) {
 		ifp->if_capenable |= p->if_capenable & IFCAP_TSO;
 		ifp->if_hwassist |= p->if_hwassist & CSUM_TSO;
 	} else {
 		ifp->if_capenable &= ~(p->if_capenable & IFCAP_TSO);
 		ifp->if_hwassist &= ~(p->if_hwassist & CSUM_TSO);
 	}
 }
 
 static void
 vlan_trunk_capabilities(struct ifnet *ifp)
 {
 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
 	struct ifvlan *ifv;
 	int i;
 
 	TRUNK_LOCK(trunk);
 #ifdef VLAN_ARRAY
 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
 		if (trunk->vlans[i] != NULL) {
 			ifv = trunk->vlans[i];
 #else
 	for (i = 0; i < (1 << trunk->hwidth); i++) {
 		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
 #endif
 			vlan_capabilities(ifv);
 	}
 	TRUNK_UNLOCK(trunk);
 }
 
 static int
 vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifnet *p;
 	struct ifreq *ifr;
 	struct ifvlan *ifv;
 	struct vlanreq vlr;
 	int error = 0;
 
 	ifr = (struct ifreq *)data;
 	ifv = ifp->if_softc;
 
 	switch (cmd) {
 	case SIOCGIFMEDIA:
 		VLAN_LOCK();
 		if (TRUNK(ifv) != NULL) {
 			p = PARENT(ifv);
 			VLAN_UNLOCK();
 			error = (*p->if_ioctl)(p, SIOCGIFMEDIA, data);
 			/* Limit the result to the parent's current config. */
 			if (error == 0) {
 				struct ifmediareq *ifmr;
 
 				ifmr = (struct ifmediareq *)data;
 				if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) {
 					ifmr->ifm_count = 1;
 					error = copyout(&ifmr->ifm_current,
 						ifmr->ifm_ulist,
 						sizeof(int));
 				}
 			}
 		} else {
 			VLAN_UNLOCK();
 			error = EINVAL;
 		}
 		break;
 
 	case SIOCSIFMEDIA:
 		error = EINVAL;
 		break;
 
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 */
 		VLAN_LOCK();
 		if (TRUNK(ifv) != NULL) {
 			if (ifr->ifr_mtu >
 			     (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) ||
 			    ifr->ifr_mtu <
 			     (ifv->ifv_mintu - ifv->ifv_mtufudge))
 				error = EINVAL;
 			else
 				ifp->if_mtu = ifr->ifr_mtu;
 		} else
 			error = EINVAL;
 		VLAN_UNLOCK();
 		break;
 
 	case SIOCSETVLAN:
+#ifdef VIMAGE
+		if (ifp->if_vnet != ifp->if_home_vnet) {
+			error = EPERM;
+			break;
+		}
+#endif
 		error = copyin(ifr->ifr_data, &vlr, sizeof(vlr));
 		if (error)
 			break;
 		if (vlr.vlr_parent[0] == '\0') {
 			vlan_unconfig(ifp);
 			break;
 		}
 		p = ifunit(vlr.vlr_parent);
 		if (p == NULL) {
 			error = ENOENT;
 			break;
 		}
 		/*
 		 * Don't let the caller set up a VLAN tag with
 		 * anything except VLID bits.
 		 */
 		if (vlr.vlr_tag & ~EVL_VLID_MASK) {
 			error = EINVAL;
 			break;
 		}
 		error = vlan_config(ifv, p, vlr.vlr_tag);
 		if (error)
 			break;
 
 		/* Update flags on the parent, if necessary. */
 		vlan_setflags(ifp, 1);
 		break;
 
 	case SIOCGETVLAN:
+#ifdef VIMAGE
+		if (ifp->if_vnet != ifp->if_home_vnet) {
+			error = EPERM;
+			break;
+		}
+#endif
 		bzero(&vlr, sizeof(vlr));
 		VLAN_LOCK();
 		if (TRUNK(ifv) != NULL) {
 			strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname,
 			    sizeof(vlr.vlr_parent));
 			vlr.vlr_tag = ifv->ifv_tag;
 		}
 		VLAN_UNLOCK();
 		error = copyout(&vlr, ifr->ifr_data, sizeof(vlr));
 		break;
 		
 	case SIOCSIFFLAGS:
 		/*
 		 * We should propagate selected flags to the parent,
 		 * e.g., promiscuous mode.
 		 */
 		if (TRUNK(ifv) != NULL)
 			error = vlan_setflags(ifp, 1);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		/*
 		 * If we don't have a parent, just remember the membership for
 		 * when we do.
 		 */
 		if (TRUNK(ifv) != NULL)
 			error = vlan_setmulti(ifp);
 		break;
 
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 	}
 
 	return (error);
 }
Index: projects/binutils-2.17/sys/netgraph/ng_pipe.c
===================================================================
--- projects/binutils-2.17/sys/netgraph/ng_pipe.c	(revision 215829)
+++ projects/binutils-2.17/sys/netgraph/ng_pipe.c	(revision 215830)
@@ -1,1058 +1,996 @@
 /*-
- * Copyright (c) 2004-2008 University of Zagreb
+ * Copyright (c) 2004-2010 University of Zagreb
  * Copyright (c) 2007-2008 FreeBSD Foundation
  *
  * This software was developed by the University of Zagreb and the
  * FreeBSD Foundation under sponsorship by the Stichting NLnet and the
  * FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * This node permits simple traffic shaping by emulating bandwidth
  * and delay, as well as random packet losses.
  * The node has two hooks, upper and lower. Traffic flowing from upper to
  * lower hook is referenced as downstream, and vice versa. Parameters for 
  * both directions can be set separately, except for delay.
  */
 
 
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/time.h>
 
 #include <vm/uma.h>
 
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/ng_parse.h>
 #include <netgraph/ng_pipe.h>
 
 static MALLOC_DEFINE(M_NG_PIPE, "ng_pipe", "ng_pipe");
 
-struct mtx ng_pipe_giant;
-
 /* Packet header struct */
 struct ngp_hdr {
 	TAILQ_ENTRY(ngp_hdr)	ngp_link;	/* next pkt in queue */
 	struct timeval		when;		/* this packet's due time */
 	struct mbuf		*m;		/* ptr to the packet data */
 };
 TAILQ_HEAD(p_head, ngp_hdr);
 
 /* FIFO queue struct */
 struct ngp_fifo {
 	TAILQ_ENTRY(ngp_fifo)	fifo_le;	/* list of active queues only */
 	struct p_head		packet_head;	/* FIFO queue head */
 	u_int32_t		hash;		/* flow signature */
 	struct timeval		vtime;		/* virtual time, for WFQ */
 	u_int32_t		rr_deficit;	/* for DRR */
 	u_int32_t		packets;	/* # of packets in this queue */
 };
 
 /* Per hook info */
 struct hookinfo {
 	hook_p			hook;
 	int			noqueue;	/* bypass any processing */
 	TAILQ_HEAD(, ngp_fifo)	fifo_head;	/* FIFO queues */
 	TAILQ_HEAD(, ngp_hdr)	qout_head;	/* delay queue head */
-	LIST_ENTRY(hookinfo)	active_le;	/* active hooks */
 	struct timeval		qin_utime;
 	struct ng_pipe_hookcfg	cfg;
 	struct ng_pipe_hookrun	run;
 	struct ng_pipe_hookstat	stats;
 	uint64_t		*ber_p;		/* loss_p(BER,psize) map */
 };
 
 /* Per node info */
 struct node_priv {
 	u_int64_t		delay;
 	u_int32_t		overhead;
 	u_int32_t		header_offset;
 	struct hookinfo		lower;
 	struct hookinfo		upper;
+	struct callout		timer;
+	int			timer_scheduled;
 };
 typedef struct node_priv *priv_p;
 
 /* Macro for calculating the virtual time for packet dequeueing in WFQ */
 #define FIFO_VTIME_SORT(plen)						\
 	if (hinfo->cfg.wfq && hinfo->cfg.bandwidth) {			\
 		ngp_f->vtime.tv_usec = now->tv_usec + ((uint64_t) (plen) \
 			+ priv->overhead ) * hinfo->run.fifo_queues *	\
 			8000000 / hinfo->cfg.bandwidth;			\
 		ngp_f->vtime.tv_sec = now->tv_sec +			\
 			ngp_f->vtime.tv_usec / 1000000;			\
 		ngp_f->vtime.tv_usec = ngp_f->vtime.tv_usec % 1000000;	\
 		TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le)	\
 			if (ngp_f1->vtime.tv_sec > ngp_f->vtime.tv_sec || \
 			    (ngp_f1->vtime.tv_sec == ngp_f->vtime.tv_sec && \
 			    ngp_f1->vtime.tv_usec > ngp_f->vtime.tv_usec)) \
 				break;					\
 		if (ngp_f1 == NULL)					\
 			TAILQ_INSERT_TAIL(&hinfo->fifo_head, ngp_f, fifo_le); \
 		else							\
 			TAILQ_INSERT_BEFORE(ngp_f1, ngp_f, fifo_le);	\
 	} else								\
 		TAILQ_INSERT_TAIL(&hinfo->fifo_head, ngp_f, fifo_le);	\
 
 
 static void	parse_cfg(struct ng_pipe_hookcfg *, struct ng_pipe_hookcfg *,
 			struct hookinfo *, priv_p);
 static void	pipe_dequeue(struct hookinfo *, struct timeval *);
-static void	pipe_scheduler(void *);
-static void	pipe_poll(void);
+static void	ngp_callout(node_p, hook_p, void *, int);
 static int	ngp_modevent(module_t, int, void *);
 
-/* linked list of active "pipe" hooks */
-static LIST_HEAD(, hookinfo) active_head;
-static int active_gen_id = 0;
-
-/* timeout handle for pipe_scheduler */
-static struct callout polling_timer;
-
 /* zone for storing ngp_hdr-s */
 static uma_zone_t ngp_zone;
 
 /* Netgraph methods */
 static ng_constructor_t	ngp_constructor;
 static ng_rcvmsg_t	ngp_rcvmsg;
 static ng_shutdown_t	ngp_shutdown;
 static ng_newhook_t	ngp_newhook;
 static ng_rcvdata_t	ngp_rcvdata;
 static ng_disconnect_t	ngp_disconnect;
 
 /* Parse type for struct ng_pipe_hookstat */
 static const struct ng_parse_struct_field
 	ng_pipe_hookstat_type_fields[] = NG_PIPE_HOOKSTAT_INFO;
 static const struct ng_parse_type ng_pipe_hookstat_type = {
 	&ng_parse_struct_type,
 	&ng_pipe_hookstat_type_fields
 };
 
 /* Parse type for struct ng_pipe_stats */
 static const struct ng_parse_struct_field ng_pipe_stats_type_fields[] =
 	NG_PIPE_STATS_INFO(&ng_pipe_hookstat_type);
 static const struct ng_parse_type ng_pipe_stats_type = {
 	&ng_parse_struct_type,
 	&ng_pipe_stats_type_fields
 };
 
 /* Parse type for struct ng_pipe_hookrun */
 static const struct ng_parse_struct_field
 	ng_pipe_hookrun_type_fields[] = NG_PIPE_HOOKRUN_INFO;
 static const struct ng_parse_type ng_pipe_hookrun_type = {
 	&ng_parse_struct_type,
 	&ng_pipe_hookrun_type_fields
 };
 
 /* Parse type for struct ng_pipe_run */
 static const struct ng_parse_struct_field
 	ng_pipe_run_type_fields[] = NG_PIPE_RUN_INFO(&ng_pipe_hookrun_type);
 static const struct ng_parse_type ng_pipe_run_type = {
 	&ng_parse_struct_type,
 	&ng_pipe_run_type_fields
 };
 
 /* Parse type for struct ng_pipe_hookcfg */
 static const struct ng_parse_struct_field
 	ng_pipe_hookcfg_type_fields[] = NG_PIPE_HOOKCFG_INFO;
 static const struct ng_parse_type ng_pipe_hookcfg_type = {
 	&ng_parse_struct_type,
 	&ng_pipe_hookcfg_type_fields
 };
 
 /* Parse type for struct ng_pipe_cfg */
 static const struct ng_parse_struct_field
 	ng_pipe_cfg_type_fields[] = NG_PIPE_CFG_INFO(&ng_pipe_hookcfg_type);
 static const struct ng_parse_type ng_pipe_cfg_type = {
 	&ng_parse_struct_type,
 	&ng_pipe_cfg_type_fields
 };
 
 /* List of commands and how to convert arguments to/from ASCII */
 static const struct ng_cmdlist ngp_cmds[] = {
 	{
 		.cookie =	NGM_PIPE_COOKIE,
 		.cmd =		NGM_PIPE_GET_STATS,
 		.name = 	"getstats",
 		.respType =	 &ng_pipe_stats_type
 	},
 	{
 		.cookie =	NGM_PIPE_COOKIE,
 		.cmd =		NGM_PIPE_CLR_STATS,
 		.name =		"clrstats"
 	},
 	{
 		.cookie =	NGM_PIPE_COOKIE,
 		.cmd =		NGM_PIPE_GETCLR_STATS,
 		.name =		"getclrstats",
 		.respType =	&ng_pipe_stats_type
 	},
 	{
 		.cookie =	NGM_PIPE_COOKIE,
 		.cmd =		NGM_PIPE_GET_RUN,
 		.name =		"getrun",
 		.respType =	&ng_pipe_run_type
 	},
 	{
 		.cookie =	NGM_PIPE_COOKIE,
 		.cmd =		NGM_PIPE_GET_CFG,
 		.name =		"getcfg",
 		.respType =	&ng_pipe_cfg_type
 	},
 	{
 		.cookie =	NGM_PIPE_COOKIE,
 		.cmd =		NGM_PIPE_SET_CFG,
 		.name =		"setcfg",
 		.mesgType =	&ng_pipe_cfg_type,
 	},
 	{ 0 }
 };
 
 /* Netgraph type descriptor */
 static struct ng_type ng_pipe_typestruct = {
 	.version =	NG_ABI_VERSION,
 	.name =		NG_PIPE_NODE_TYPE,
 	.mod_event =	ngp_modevent,
 	.constructor =	ngp_constructor,
 	.shutdown =	ngp_shutdown,
 	.rcvmsg =	ngp_rcvmsg,
 	.newhook =	ngp_newhook,
 	.rcvdata =	ngp_rcvdata,
 	.disconnect =	ngp_disconnect,
 	.cmdlist =	ngp_cmds
 };
 NETGRAPH_INIT(pipe, &ng_pipe_typestruct);
 
 /* Node constructor */
 static int
 ngp_constructor(node_p node)
 {
 	priv_p priv;
 
 	priv = malloc(sizeof(*priv), M_NG_PIPE, M_ZERO | M_NOWAIT);
 	if (priv == NULL)
 		return (ENOMEM);
 	NG_NODE_SET_PRIVATE(node, priv);
 
+	/* Mark node as single-threaded */
+	NG_NODE_FORCE_WRITER(node);
+
+	ng_callout_init(&priv->timer);
+
 	return (0);
 }
 
 /* Add a hook */
 static int
 ngp_newhook(node_p node, hook_p hook, const char *name)
 {
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	struct hookinfo *hinfo;
 
 	if (strcmp(name, NG_PIPE_HOOK_UPPER) == 0) {
 		bzero(&priv->upper, sizeof(priv->upper));
 		priv->upper.hook = hook;
 		NG_HOOK_SET_PRIVATE(hook, &priv->upper);
 	} else if (strcmp(name, NG_PIPE_HOOK_LOWER) == 0) {
 		bzero(&priv->lower, sizeof(priv->lower));
 		priv->lower.hook = hook;
 		NG_HOOK_SET_PRIVATE(hook, &priv->lower);
 	} else
 		return (EINVAL);
 
 	/* Load non-zero initial cfg values */
 	hinfo = NG_HOOK_PRIVATE(hook);
 	hinfo->cfg.qin_size_limit = 50;
 	hinfo->cfg.fifo = 1;
 	hinfo->cfg.droptail = 1;
 	TAILQ_INIT(&hinfo->fifo_head);
 	TAILQ_INIT(&hinfo->qout_head);
 	return (0);
 }
 
 /* Receive a control message */
 static int
 ngp_rcvmsg(node_p node, item_p item, hook_p lasthook)
 {
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	struct ng_mesg *resp = NULL;
 	struct ng_mesg *msg;
 	struct ng_pipe_stats *stats;
 	struct ng_pipe_run *run;
 	struct ng_pipe_cfg *cfg;
 	int error = 0;
 
-	mtx_lock(&ng_pipe_giant);
-
 	NGI_GET_MSG(item, msg);
 	switch (msg->header.typecookie) {
 	case NGM_PIPE_COOKIE:
 		switch (msg->header.cmd) {
 		case NGM_PIPE_GET_STATS:
 		case NGM_PIPE_CLR_STATS:
 		case NGM_PIPE_GETCLR_STATS:
 			if (msg->header.cmd != NGM_PIPE_CLR_STATS) {
 				NG_MKRESPONSE(resp, msg,
 				    sizeof(*stats), M_NOWAIT);
 				if (resp == NULL) {
 					error = ENOMEM;
 					break;
 				}
-				stats = (struct ng_pipe_stats *)resp->data;
+				stats = (struct ng_pipe_stats *) resp->data;
 				bcopy(&priv->upper.stats, &stats->downstream,
 				    sizeof(stats->downstream));
 				bcopy(&priv->lower.stats, &stats->upstream,
 				    sizeof(stats->upstream));
 			}
 			if (msg->header.cmd != NGM_PIPE_GET_STATS) {
 				bzero(&priv->upper.stats,
 				    sizeof(priv->upper.stats));
 				bzero(&priv->lower.stats,
 				    sizeof(priv->lower.stats));
 			}
 			break;
 		case NGM_PIPE_GET_RUN:
 			NG_MKRESPONSE(resp, msg, sizeof(*run), M_NOWAIT);
 			if (resp == NULL) {
 				error = ENOMEM;
 				break;
 			}
-			run = (struct ng_pipe_run *)resp->data;
+			run = (struct ng_pipe_run *) resp->data;
 			bcopy(&priv->upper.run, &run->downstream,
 				sizeof(run->downstream));
 			bcopy(&priv->lower.run, &run->upstream,
 				sizeof(run->upstream));
 			break;
 		case NGM_PIPE_GET_CFG:
 			NG_MKRESPONSE(resp, msg, sizeof(*cfg), M_NOWAIT);
 			if (resp == NULL) {
 				error = ENOMEM;
 				break;
 			}
-			cfg = (struct ng_pipe_cfg *)resp->data;
+			cfg = (struct ng_pipe_cfg *) resp->data;
 			bcopy(&priv->upper.cfg, &cfg->downstream,
 				sizeof(cfg->downstream));
 			bcopy(&priv->lower.cfg, &cfg->upstream,
 				sizeof(cfg->upstream));
 			cfg->delay = priv->delay;
 			cfg->overhead = priv->overhead;
 			cfg->header_offset = priv->header_offset;
 			if (cfg->upstream.bandwidth ==
 			    cfg->downstream.bandwidth) {
 				cfg->bandwidth = cfg->upstream.bandwidth;
 				cfg->upstream.bandwidth = 0;
 				cfg->downstream.bandwidth = 0;
 			} else
 				cfg->bandwidth = 0;
 			break;
 		case NGM_PIPE_SET_CFG:
-			cfg = (struct ng_pipe_cfg *)msg->data;
+			cfg = (struct ng_pipe_cfg *) msg->data;
 			if (msg->header.arglen != sizeof(*cfg)) {
 				error = EINVAL;
 				break;
 			}
 
 			if (cfg->delay == -1)
 				priv->delay = 0;
 			else if (cfg->delay > 0 && cfg->delay < 10000000)
 				priv->delay = cfg->delay;
 
 			if (cfg->bandwidth == -1) {
 				priv->upper.cfg.bandwidth = 0;
 				priv->lower.cfg.bandwidth = 0;
 				priv->overhead = 0;
 			} else if (cfg->bandwidth >= 100 &&
 			    cfg->bandwidth <= 1000000000) {
 				priv->upper.cfg.bandwidth = cfg->bandwidth;
 				priv->lower.cfg.bandwidth = cfg->bandwidth;
 				if (cfg->bandwidth >= 10000000)
 					priv->overhead = 8+4+12; /* Ethernet */
 				else
 					priv->overhead = 10; /* HDLC */
 			}
 
 			if (cfg->overhead == -1)
 				priv->overhead = 0;
-			else if (cfg->overhead > 0 && cfg->overhead < 256)
+			else if (cfg->overhead > 0 &&
+			    cfg->overhead < MAX_OHSIZE)
 				priv->overhead = cfg->overhead;
 
 			if (cfg->header_offset == -1)
 				priv->header_offset = 0;
 			else if (cfg->header_offset > 0 &&
 			    cfg->header_offset < 64)
 				priv->header_offset = cfg->header_offset;
 
 			parse_cfg(&priv->upper.cfg, &cfg->downstream,
-				  &priv->upper, priv);
+			    &priv->upper, priv);
 			parse_cfg(&priv->lower.cfg, &cfg->upstream,
-				  &priv->lower, priv);
+			    &priv->lower, priv);
 			break;
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	NG_RESPOND_MSG(error, node, item, resp);
 	NG_FREE_MSG(msg);
 
-	mtx_unlock(&ng_pipe_giant);
-
 	return (error);
 }
 
 static void
 parse_cfg(struct ng_pipe_hookcfg *current, struct ng_pipe_hookcfg *new,
 	struct hookinfo *hinfo, priv_p priv)
 {
 
 	if (new->ber == -1) {
 		current->ber = 0;
 		if (hinfo->ber_p) {
 			free(hinfo->ber_p, M_NG_PIPE);
 			hinfo->ber_p = NULL;
 		}
 	} else if (new->ber >= 1 && new->ber <= 1000000000000) {
 		static const uint64_t one = 0x1000000000000; /* = 2^48 */
 		uint64_t p0, p;
 		uint32_t fsize, i;
 
 		if (hinfo->ber_p == NULL)
-			hinfo->ber_p = malloc(\
-				(MAX_FSIZE + MAX_OHSIZE)*sizeof(uint64_t), \
-				M_NG_PIPE, M_NOWAIT);
+			hinfo->ber_p =
+			    malloc((MAX_FSIZE + MAX_OHSIZE) * sizeof(uint64_t),
+			    M_NG_PIPE, M_NOWAIT);
 		current->ber = new->ber;
 
 		/*
 		 * For given BER and each frame size N (in bytes) calculate
 		 * the probability P_OK that the frame is clean:
 		 *
 		 * P_OK(BER,N) = (1 - 1/BER)^(N*8)
 		 *
 		 * We use a 64-bit fixed-point format with decimal point
 		 * positioned between bits 47 and 48.
 		 */
 		p0 = one - one / new->ber;
 		p = one;
 		for (fsize = 0; fsize < MAX_FSIZE + MAX_OHSIZE; fsize++) {
 			hinfo->ber_p[fsize] = p;
-			for (i=0; i<8; i++)
-				p = (p*(p0&0xffff)>>48) + \
-				    (p*((p0>>16)&0xffff)>>32) + \
-				    (p*(p0>>32)>>16);
+			for (i = 0; i < 8; i++)
+				p = (p * (p0 & 0xffff) >> 48) +
+				    (p * ((p0 >> 16) & 0xffff) >> 32) +
+				    (p * (p0 >> 32) >> 16);
 		}
 	}
 
 	if (new->qin_size_limit == -1)
 		current->qin_size_limit = 0;
 	else if (new->qin_size_limit >= 5) 
 		current->qin_size_limit = new->qin_size_limit;
 
 	if (new->qout_size_limit == -1)
 		current->qout_size_limit = 0;
 	else if (new->qout_size_limit >= 5)
 		current->qout_size_limit = new->qout_size_limit;
 
 	if (new->duplicate == -1)
 		current->duplicate = 0;
 	else if (new->duplicate > 0 && new->duplicate <= 50)
 		current->duplicate = new->duplicate;
 
 	if (new->fifo) {
 		current->fifo = 1;
 		current->wfq = 0;
 		current->drr = 0;
 	}
 
 	if (new->wfq) {
 		current->fifo = 0;
 		current->wfq = 1;
 		current->drr = 0;
 	}
 
 	if (new->drr) {
 		current->fifo = 0;
 		current->wfq = 0;
 		/* DRR quantum */
 		if (new->drr >= 32)
 			current->drr = new->drr;
 		else
 			current->drr = 2048;		/* default quantum */
 	}
 
 	if (new->droptail) {
 		current->droptail = 1;
 		current->drophead = 0;
 	}
 
 	if (new->drophead) {
 		current->droptail = 0;
 		current->drophead = 1;
 	}
 
 	if (new->bandwidth == -1) {
 		current->bandwidth = 0;
 		current->fifo = 1;
 		current->wfq = 0;
 		current->drr = 0;
 	} else if (new->bandwidth >= 100 && new->bandwidth <= 1000000000)
 		current->bandwidth = new->bandwidth;
 
 	if (current->bandwidth | priv->delay | 
 	    current->duplicate | current->ber)
 		hinfo->noqueue = 0;
 	else
 		hinfo->noqueue = 1;
 }
 
 /*
  * Compute a hash signature for a packet. This function suffers from the
  * NIH sindrome, so probably it would be wise to look around what other
  * folks have found out to be a good and efficient IP hash function...
  */
 static int
 ip_hash(struct mbuf *m, int offset)
 {
 	u_int64_t i;
 	struct ip *ip = (struct ip *)(mtod(m, u_char *) + offset);
 
 	if (m->m_len < sizeof(struct ip) + offset ||
 	    ip->ip_v != 4 || ip->ip_hl << 2 != sizeof(struct ip))
 		return 0;
 
 	i = ((u_int64_t) ip->ip_src.s_addr ^
 	    ((u_int64_t) ip->ip_src.s_addr << 13) ^
 	    ((u_int64_t) ip->ip_dst.s_addr << 7) ^
 	    ((u_int64_t) ip->ip_dst.s_addr << 19));
 	return (i ^ (i >> 32));
 }
 
 /*
  * Receive data on a hook - both in upstream and downstream direction.
  * We put the frame on the inbound queue, and try to initiate dequeuing
  * sequence immediately. If inbound queue is full, discard one frame
  * depending on dropping policy (from the head or from the tail of the
  * queue).
  */
 static int
 ngp_rcvdata(hook_p hook, item_p item)
 {
 	struct hookinfo *const hinfo = NG_HOOK_PRIVATE(hook);
 	const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
 	struct timeval uuptime;
 	struct timeval *now = &uuptime;
 	struct ngp_fifo *ngp_f = NULL, *ngp_f1;
 	struct ngp_hdr *ngp_h = NULL;
 	struct mbuf *m;
-	int hash;
+	int hash, plen;
 	int error = 0;
 
-	if (hinfo->noqueue) {
+	/*
+	 * Shortcut from inbound to outbound hook when neither of
+	 * bandwidth, delay, BER or duplication probability is
+	 * configured, nor we have queued frames to drain.
+	 */
+	if (hinfo->run.qin_frames == 0 && hinfo->run.qout_frames == 0 &&
+	    hinfo->noqueue) {
 		struct hookinfo *dest;
 		if (hinfo == &priv->lower)
 			dest = &priv->upper;
 		else
 			dest = &priv->lower;
+
+		/* Send the frame. */
+		plen = NGI_M(item)->m_pkthdr.len;
 		NG_FWD_ITEM_HOOK(error, item, dest->hook);
-		return error;
+
+		/* Update stats. */
+		if (error) {
+			hinfo->stats.out_disc_frames++;
+			hinfo->stats.out_disc_octets += plen;
+		} else {
+			hinfo->stats.fwd_frames++;
+			hinfo->stats.fwd_octets += plen;
+		}
+
+		return (error);
 	}
 
-	mtx_lock(&ng_pipe_giant);
 	microuptime(now);
 
 	/*
-	 * Attach us to the list of active ng_pipes if this was an empty
-	 * one before, and also update the queue service deadline time.
+	 * If this was an empty queue, update service deadline time.
 	 */
 	if (hinfo->run.qin_frames == 0) {
 		struct timeval *when = &hinfo->qin_utime;
 		if (when->tv_sec < now->tv_sec || (when->tv_sec == now->tv_sec
 		    && when->tv_usec < now->tv_usec)) {
 			when->tv_sec = now->tv_sec;
 			when->tv_usec = now->tv_usec;
 		}
-		if (hinfo->run.qout_frames == 0)
-			LIST_INSERT_HEAD(&active_head, hinfo, active_le);
 	}
 
 	/* Populate the packet header */
 	ngp_h = uma_zalloc(ngp_zone, M_NOWAIT);
 	KASSERT((ngp_h != NULL), ("ngp_h zalloc failed (1)"));
 	NGI_GET_M(item, m);
 	KASSERT(m != NULL, ("NGI_GET_M failed"));
 	ngp_h->m = m;
 	NG_FREE_ITEM(item);
 
 	if (hinfo->cfg.fifo)
 		hash = 0;	/* all packets go into a single FIFO queue */
 	else
 		hash = ip_hash(m, priv->header_offset);
 
 	/* Find the appropriate FIFO queue for the packet and enqueue it*/
 	TAILQ_FOREACH(ngp_f, &hinfo->fifo_head, fifo_le)
 		if (hash == ngp_f->hash)
 			break;
 	if (ngp_f == NULL) {
 		ngp_f = uma_zalloc(ngp_zone, M_NOWAIT);
 		KASSERT(ngp_h != NULL, ("ngp_h zalloc failed (2)"));
 		TAILQ_INIT(&ngp_f->packet_head);
 		ngp_f->hash = hash;
 		ngp_f->packets = 1;
 		ngp_f->rr_deficit = hinfo->cfg.drr;	/* DRR quantum */
 		hinfo->run.fifo_queues++;
 		TAILQ_INSERT_TAIL(&ngp_f->packet_head, ngp_h, ngp_link);
 		FIFO_VTIME_SORT(m->m_pkthdr.len);
 	} else {
 		TAILQ_INSERT_TAIL(&ngp_f->packet_head, ngp_h, ngp_link);
 		ngp_f->packets++;
 	}
 	hinfo->run.qin_frames++;
 	hinfo->run.qin_octets += m->m_pkthdr.len;
 
 	/* Discard a frame if inbound queue limit has been reached */
 	if (hinfo->run.qin_frames > hinfo->cfg.qin_size_limit) {
 		struct mbuf *m1;
 		int longest = 0;
 
 		/* Find the longest queue */
 		TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le)
 			if (ngp_f1->packets > longest) {
 				longest = ngp_f1->packets;
 				ngp_f = ngp_f1;
 			}
 
 		/* Drop a frame from the queue head/tail, depending on cfg */
 		if (hinfo->cfg.drophead) 
 			ngp_h = TAILQ_FIRST(&ngp_f->packet_head);
 		else 
 			ngp_h = TAILQ_LAST(&ngp_f->packet_head, p_head);
 		TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
 		m1 = ngp_h->m;
 		uma_zfree(ngp_zone, ngp_h);
 		hinfo->run.qin_octets -= m1->m_pkthdr.len;
 		hinfo->stats.in_disc_octets += m1->m_pkthdr.len;
 		m_freem(m1);
 		if (--(ngp_f->packets) == 0) {
 			TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
 			uma_zfree(ngp_zone, ngp_f);
 			hinfo->run.fifo_queues--;
 		}
 		hinfo->run.qin_frames--;
 		hinfo->stats.in_disc_frames++;
 	} else if (hinfo->run.qin_frames > hinfo->cfg.qin_size_limit) {
 		struct mbuf *m1;
 		int longest = 0;
 
 		/* Find the longest queue */
 		TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le)
 			if (ngp_f1->packets > longest) {
 				longest = ngp_f1->packets;
 				ngp_f = ngp_f1;
 			}
 
 		/* Drop a frame from the queue head/tail, depending on cfg */
 		if (hinfo->cfg.drophead) 
 			ngp_h = TAILQ_FIRST(&ngp_f->packet_head);
 		else 
 			ngp_h = TAILQ_LAST(&ngp_f->packet_head, p_head);
 		TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
 		m1 = ngp_h->m;
 		uma_zfree(ngp_zone, ngp_h);
 		hinfo->run.qin_octets -= m1->m_pkthdr.len;
 		hinfo->stats.in_disc_octets += m1->m_pkthdr.len;
 		m_freem(m1);
 		if (--(ngp_f->packets) == 0) {
 			TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
 			uma_zfree(ngp_zone, ngp_f);
 			hinfo->run.fifo_queues--;
 		}
 		hinfo->run.qin_frames--;
 		hinfo->stats.in_disc_frames++;
 	}
 
 	/*
-	 * Try to start the dequeuing process immediately.  We must
-	 * hold the ng_pipe_giant lock here and pipe_dequeue() will
-	 * release it
+	 * Try to start the dequeuing process immediately.
 	 */
 	pipe_dequeue(hinfo, now);
 
 	return (0);
 }
 
 
 /*
  * Dequeueing sequence - we basically do the following:
  *  1) Try to extract the frame from the inbound (bandwidth) queue;
  *  2) In accordance to BER specified, discard the frame randomly;
  *  3) If the frame survives BER, prepend it with delay info and move it
  *     to outbound (delay) queue;
  *  4) Loop to 2) until bandwidth quota for this timeslice is reached, or
  *     inbound queue is flushed completely;
- *  5) Extract the first frame from the outbound queue, if it's time has
- *     come.  Queue the frame for transmission on the outbound hook;
- *  6) Loop to 5) until outbound queue is flushed completely, or the next
- *     frame in the queue is not scheduled to be dequeued yet;
- *  7) Transimit all frames queued in 5)
- *
- * Note: the caller must hold the ng_pipe_giant lock; this function
- * returns with the lock released.
+ *  5) Dequeue frames from the outbound queue and send them downstream until
+ *     outbound queue is flushed completely, or the next frame in the queue
+ *     is not due to be dequeued yet
  */
 static void
 pipe_dequeue(struct hookinfo *hinfo, struct timeval *now) {
 	static uint64_t rand, oldrand;
-	const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hinfo->hook));
+	const node_p node = NG_HOOK_NODE(hinfo->hook);
+	const priv_p priv = NG_NODE_PRIVATE(node);
 	struct hookinfo *dest;
 	struct ngp_fifo *ngp_f, *ngp_f1;
 	struct ngp_hdr *ngp_h;
 	struct timeval *when;
-	struct mbuf *q_head = NULL;
-	struct mbuf *q_tail = NULL;
 	struct mbuf *m;
-	int error = 0;
+	int plen, error = 0;
 
 	/* Which one is the destination hook? */
 	if (hinfo == &priv->lower)
 		dest = &priv->upper;
 	else
 		dest = &priv->lower;
 
 	/* Bandwidth queue processing */
 	while ((ngp_f = TAILQ_FIRST(&hinfo->fifo_head))) {
 		when = &hinfo->qin_utime;
 		if (when->tv_sec > now->tv_sec || (when->tv_sec == now->tv_sec
 		    && when->tv_usec > now->tv_usec))
 			break;
 
 		ngp_h = TAILQ_FIRST(&ngp_f->packet_head);
 		m = ngp_h->m;
 
 		/* Deficit Round Robin (DRR) processing */
 		if (hinfo->cfg.drr) {
 			if (ngp_f->rr_deficit >= m->m_pkthdr.len) {
 				ngp_f->rr_deficit -= m->m_pkthdr.len;
 			} else {
 				ngp_f->rr_deficit += hinfo->cfg.drr;
 				TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
 				TAILQ_INSERT_TAIL(&hinfo->fifo_head,
 				    ngp_f, fifo_le);
 				continue;
 			}
 		}
 
 		/*
 		 * Either create a duplicate and pass it on, or dequeue
 		 * the original packet...
 		 */
 		if (hinfo->cfg.duplicate &&
 		    random() % 100 <= hinfo->cfg.duplicate) {
 			ngp_h = uma_zalloc(ngp_zone, M_NOWAIT);
 			KASSERT(ngp_h != NULL, ("ngp_h zalloc failed (3)"));
 			m = m_dup(m, M_NOWAIT);
 			KASSERT(m != NULL, ("m_dup failed"));
 			ngp_h->m = m;
 		} else {
 			TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
 			hinfo->run.qin_frames--;
 			hinfo->run.qin_octets -= m->m_pkthdr.len;
 			ngp_f->packets--;
 		}
 		
 		/* Calculate the serialization delay */
 		if (hinfo->cfg.bandwidth) {
-			hinfo->qin_utime.tv_usec += ((uint64_t) m->m_pkthdr.len
-				+ priv->overhead ) *
-				8000000 / hinfo->cfg.bandwidth;
+			hinfo->qin_utime.tv_usec +=
+			    ((uint64_t) m->m_pkthdr.len + priv->overhead ) *
+			    8000000 / hinfo->cfg.bandwidth;
 			hinfo->qin_utime.tv_sec +=
-				hinfo->qin_utime.tv_usec / 1000000;
+			    hinfo->qin_utime.tv_usec / 1000000;
 			hinfo->qin_utime.tv_usec =
-				hinfo->qin_utime.tv_usec % 1000000;
+			    hinfo->qin_utime.tv_usec % 1000000;
 		}
 		when = &ngp_h->when;
 		when->tv_sec = hinfo->qin_utime.tv_sec;
 		when->tv_usec = hinfo->qin_utime.tv_usec;
 
 		/* Sort / rearrange inbound queues */
 		if (ngp_f->packets) {
 			if (hinfo->cfg.wfq) {
 				TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
 				FIFO_VTIME_SORT(TAILQ_FIRST(
 				    &ngp_f->packet_head)->m->m_pkthdr.len)
 			}
 		} else {
 			TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
 			uma_zfree(ngp_zone, ngp_f);
 			hinfo->run.fifo_queues--;
 		}
 
 		/* Randomly discard the frame, according to BER setting */
 		if (hinfo->cfg.ber) {
 			oldrand = rand;
 			rand = random();
 			if (((oldrand ^ rand) << 17) >=
 			    hinfo->ber_p[priv->overhead + m->m_pkthdr.len]) {
 				hinfo->stats.out_disc_frames++;
 				hinfo->stats.out_disc_octets += m->m_pkthdr.len;
 				uma_zfree(ngp_zone, ngp_h);
 				m_freem(m);
 				continue;
 			}
 		}
 
 		/* Discard frame if outbound queue size limit exceeded */
 		if (hinfo->cfg.qout_size_limit &&
 		    hinfo->run.qout_frames>=hinfo->cfg.qout_size_limit) {
 			hinfo->stats.out_disc_frames++;
 			hinfo->stats.out_disc_octets += m->m_pkthdr.len;
 			uma_zfree(ngp_zone, ngp_h);
 			m_freem(m);
 			continue;
 		}
 
 		/* Calculate the propagation delay */
 		when->tv_usec += priv->delay;
 		when->tv_sec += when->tv_usec / 1000000;
 		when->tv_usec = when->tv_usec % 1000000;
 
 		/* Put the frame into the delay queue */
 		TAILQ_INSERT_TAIL(&hinfo->qout_head, ngp_h, ngp_link);
 		hinfo->run.qout_frames++;
 		hinfo->run.qout_octets += m->m_pkthdr.len;
 	}
 
 	/* Delay queue processing */
 	while ((ngp_h = TAILQ_FIRST(&hinfo->qout_head))) {
-		struct mbuf *m = ngp_h->m;
-
 		when = &ngp_h->when;
+		m = ngp_h->m;
 		if (when->tv_sec > now->tv_sec ||
 		    (when->tv_sec == now->tv_sec &&
 		    when->tv_usec > now->tv_usec))
 			break;
 
 		/* Update outbound queue stats */
-		hinfo->stats.fwd_frames++;
-		hinfo->stats.fwd_octets += m->m_pkthdr.len;
+		plen = m->m_pkthdr.len;
 		hinfo->run.qout_frames--;
-		hinfo->run.qout_octets -= m->m_pkthdr.len;
+		hinfo->run.qout_octets -= plen;
 
 		/* Dequeue the packet from qout */
 		TAILQ_REMOVE(&hinfo->qout_head, ngp_h, ngp_link);
 		uma_zfree(ngp_zone, ngp_h);
 
-		/* Enqueue locally for sending downstream */
-		if (q_head == NULL)
-			q_head = m;
-		if (q_tail)
-			q_tail->m_nextpkt = m;
-		q_tail = m;
-		m->m_nextpkt = NULL;
+		NG_SEND_DATA(error, dest->hook, m, meta);
+		if (error) {
+			hinfo->stats.out_disc_frames++;
+			hinfo->stats.out_disc_octets += plen;
+		} else {
+			hinfo->stats.fwd_frames++;
+			hinfo->stats.fwd_octets += plen;
+		}
 	}
 
-	/* If both queues are empty detach us from the list of active queues */
-	if (hinfo->run.qin_frames + hinfo->run.qout_frames == 0) {
-		LIST_REMOVE(hinfo, active_le);
-		active_gen_id++;
+	if ((hinfo->run.qin_frames != 0 || hinfo->run.qout_frames != 0) &&
+	    !priv->timer_scheduled) {
+		ng_callout(&priv->timer, node, NULL, 1, ngp_callout, NULL, 0);
+		priv->timer_scheduled = 1;
 	}
-
-	mtx_unlock(&ng_pipe_giant);
-
-	while ((m = q_head) != NULL) {
-		q_head = m->m_nextpkt;
-		m->m_nextpkt = NULL;
-		NG_SEND_DATA(error, dest->hook, m, meta);
-	}
 }
 
-
 /*
- * This routine is called on every clock tick. We poll all nodes/hooks
+ * This routine is called on every clock tick.  We poll connected hooks
  * for queued frames by calling pipe_dequeue().
  */
 static void
-pipe_scheduler(void *arg)
+ngp_callout(node_p node, hook_p hook, void *arg1, int arg2)
 {
-	pipe_poll();
-
-	/* Reschedule  */
-	callout_reset(&polling_timer, 1, &pipe_scheduler, NULL);
-}
-
-
-/*
- * Traverse the list of all active hooks and attempt to dequeue
- * some packets.  Hooks with empty queues are not traversed since
- * they are not linked into this list.
- */
-static void
-pipe_poll(void)
-{
-	struct hookinfo *hinfo;
+	const priv_p priv = NG_NODE_PRIVATE(node);
 	struct timeval now;
-	int old_gen_id = active_gen_id;
-	
-	mtx_lock(&ng_pipe_giant);
+
+	priv->timer_scheduled = 0;
 	microuptime(&now);
-	LIST_FOREACH(hinfo, &active_head, active_le) {
-		CURVNET_SET(NG_HOOK_NODE(hinfo->hook)->nd_vnet);
-		pipe_dequeue(hinfo, &now);
-		CURVNET_RESTORE();
-		mtx_lock(&ng_pipe_giant);
-		if (old_gen_id != active_gen_id) {
-			/* the list was updated; restart traversing */
-			hinfo = LIST_FIRST(&active_head);
-			if (hinfo == NULL)
-				break;
-			old_gen_id = active_gen_id;
-			continue;
-		}
-	}
-	mtx_unlock(&ng_pipe_giant);
+	if (priv->upper.hook != NULL)
+		pipe_dequeue(&priv->upper, &now);
+	if (priv->lower.hook != NULL)
+		pipe_dequeue(&priv->lower, &now);
 }
 
-
 /*
  * Shutdown processing
  *
  * This is tricky. If we have both a lower and upper hook, then we
  * probably want to extricate ourselves and leave the two peers
  * still linked to each other. Otherwise we should just shut down as
  * a normal node would.
  */
 static int
 ngp_shutdown(node_p node)
 {
 	const priv_p priv = NG_NODE_PRIVATE(node);
 
+	if (priv->timer_scheduled)
+		ng_uncallout(&priv->timer, node);
 	if (priv->lower.hook && priv->upper.hook)
 		ng_bypass(priv->lower.hook, priv->upper.hook);
 	else {
 		if (priv->upper.hook != NULL)
 			ng_rmhook_self(priv->upper.hook);
 		if (priv->lower.hook != NULL)
 			ng_rmhook_self(priv->lower.hook);
 	}
 	NG_NODE_UNREF(node);
 	free(priv, M_NG_PIPE);
 	return (0);
 }
 
 
 /*
  * Hook disconnection
  */
 static int
 ngp_disconnect(hook_p hook)
 {
 	struct hookinfo *const hinfo = NG_HOOK_PRIVATE(hook);
 	struct ngp_fifo *ngp_f;
 	struct ngp_hdr *ngp_h;
-	int removed = 0;
 
-	mtx_lock(&ng_pipe_giant);
-
 	KASSERT(hinfo != NULL, ("%s: null info", __FUNCTION__));
 	hinfo->hook = NULL;
 
 	/* Flush all fifo queues associated with the hook */
 	while ((ngp_f = TAILQ_FIRST(&hinfo->fifo_head))) {
 		while ((ngp_h = TAILQ_FIRST(&ngp_f->packet_head))) {
 			TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
 			m_freem(ngp_h->m);
 			uma_zfree(ngp_zone, ngp_h);
-			removed++;
 		}
 		TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
 		uma_zfree(ngp_zone, ngp_f);
 	}
 
 	/* Flush the delay queue */
 	while ((ngp_h = TAILQ_FIRST(&hinfo->qout_head))) {
 		TAILQ_REMOVE(&hinfo->qout_head, ngp_h, ngp_link);
 		m_freem(ngp_h->m);
 		uma_zfree(ngp_zone, ngp_h);
-		removed++;
 	}
 
-	/*
-	 * Both queues should be empty by now, so detach us from
-	 * the list of active queues
-	 */
-	if (removed) {
-		LIST_REMOVE(hinfo, active_le);
-		active_gen_id++;
-	}
-	if (hinfo->run.qin_frames + hinfo->run.qout_frames != removed)
-		printf("Mismatch: queued=%d but removed=%d !?!",
-		    hinfo->run.qin_frames + hinfo->run.qout_frames, removed);
-
 	/* Release the packet loss probability table (BER) */
 	if (hinfo->ber_p)
 		free(hinfo->ber_p, M_NG_PIPE);
 
-	mtx_unlock(&ng_pipe_giant);
-
 	return (0);
 }
 
 static int
 ngp_modevent(module_t mod, int type, void *unused)
 {
 	int error = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		ngp_zone = uma_zcreate("ng_pipe", max(sizeof(struct ngp_hdr),
 		    sizeof (struct ngp_fifo)), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		if (ngp_zone == NULL)
 			panic("ng_pipe: couldn't allocate descriptor zone");
-
-		mtx_init(&ng_pipe_giant, "ng_pipe_giant", NULL, MTX_DEF);
-		LIST_INIT(&active_head);
-		callout_init(&polling_timer, CALLOUT_MPSAFE);
-		callout_reset(&polling_timer, 1, &pipe_scheduler, NULL);
 		break;
 	case MOD_UNLOAD:
-		callout_drain(&polling_timer);
 		uma_zdestroy(ngp_zone);
-		mtx_destroy(&ng_pipe_giant);
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	return (error);
 }
Index: projects/binutils-2.17/sys/netinet/ip_carp.c
===================================================================
--- projects/binutils-2.17/sys/netinet/ip_carp.c	(revision 215829)
+++ projects/binutils-2.17/sys/netinet/ip_carp.c	(revision 215830)
@@ -1,2423 +1,2426 @@
 /*
  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
  * Copyright (c) 2003 Ryan McBride. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bpf.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/time.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/signalvar.h>
 #include <sys/filio.h>
 #include <sys/sockio.h>
 
 #include <sys/socket.h>
 #include <sys/vnode.h>
 
 #include <machine/stdarg.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/fddi.h>
 #include <net/iso88025.h>
 #include <net/if.h>
 #include <net/if_clone.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/if_ether.h>
 #include <machine/in_cksum.h>
 #endif
 
 #ifdef INET6
 #include <netinet/icmp6.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6protosw.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #endif
 
 #include <crypto/sha1.h>
 #include <netinet/ip_carp.h>
 
 #define	CARP_IFNAME	"carp"
 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
 SYSCTL_DECL(_net_inet_carp);
 
 struct carp_softc {
 	struct ifnet	 	*sc_ifp;	/* Interface clue */
 	struct ifnet		*sc_carpdev;	/* Pointer to parent interface */
 	struct in_ifaddr 	*sc_ia;		/* primary iface address */
 	struct ip_moptions 	 sc_imo;
 #ifdef INET6
 	struct in6_ifaddr 	*sc_ia6;	/* primary iface address v6 */
 	struct ip6_moptions 	 sc_im6o;
 #endif /* INET6 */
 	TAILQ_ENTRY(carp_softc)	 sc_list;
 
 	enum { INIT = 0, BACKUP, MASTER }	sc_state;
 
 	int			 sc_flags_backup;
 	int			 sc_suppress;
 
 	int			 sc_sendad_errors;
 #define	CARP_SENDAD_MAX_ERRORS	3
 	int			 sc_sendad_success;
 #define	CARP_SENDAD_MIN_SUCCESS 3
 
 	int			 sc_vhid;
 	int			 sc_advskew;
 	int			 sc_naddrs;
 	int			 sc_naddrs6;
 	int			 sc_advbase;	/* seconds */
 	int			 sc_init_counter;
 	u_int64_t		 sc_counter;
 
 	/* authentication */
 #define CARP_HMAC_PAD	64
 	unsigned char sc_key[CARP_KEY_LEN];
 	unsigned char sc_pad[CARP_HMAC_PAD];
 	SHA1_CTX sc_sha1;
 
 	struct callout		 sc_ad_tmo;	/* advertisement timeout */
 	struct callout		 sc_md_tmo;	/* master down timeout */
 	struct callout 		 sc_md6_tmo;	/* master down timeout */
 	
 	LIST_ENTRY(carp_softc)	 sc_next;	/* Interface clue */
 };
 #define	SC2IFP(sc)	((sc)->sc_ifp)
 
 int carp_suppress_preempt = 0;
 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 };	/* XXX for now */
 SYSCTL_NODE(_net_inet, IPPROTO_CARP,	carp,	CTLFLAG_RW, 0,	"CARP");
 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
     &carp_suppress_preempt, 0, "Preemption is suppressed");
 
 struct carpstats carpstats;
 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
     &carpstats, carpstats,
     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
 
 struct carp_if {
 	TAILQ_HEAD(, carp_softc) vhif_vrs;
 	int vhif_nvrs;
 
 	struct ifnet 	*vhif_ifp;
 	struct mtx	 vhif_mtx;
 };
 
 #define	CARP_INET	0
 #define	CARP_INET6	1
 static int proto_reg[] = {-1, -1};
 
 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */
 #define	SC2CIF(sc)		((struct carp_if *)(sc)->sc_carpdev->if_carp)
 
 /* lock per carp_if queue */
 #define	CARP_LOCK_INIT(cif)	mtx_init(&(cif)->vhif_mtx, "carp_if", 	\
 	NULL, MTX_DEF)
 #define	CARP_LOCK_DESTROY(cif)	mtx_destroy(&(cif)->vhif_mtx)
 #define	CARP_LOCK_ASSERT(cif)	mtx_assert(&(cif)->vhif_mtx, MA_OWNED)
 #define	CARP_LOCK(cif)		mtx_lock(&(cif)->vhif_mtx)
 #define	CARP_UNLOCK(cif)	mtx_unlock(&(cif)->vhif_mtx)
 
 #define	CARP_SCLOCK(sc)		mtx_lock(&SC2CIF(sc)->vhif_mtx)
 #define	CARP_SCUNLOCK(sc)	mtx_unlock(&SC2CIF(sc)->vhif_mtx)
 #define	CARP_SCLOCK_ASSERT(sc)	mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED)
 
 #define	CARP_LOG(...)	do {				\
 	if (carp_opts[CARPCTL_LOG] > 0)			\
 		log(LOG_INFO, __VA_ARGS__);		\
 } while (0)
 
 #define	CARP_DEBUG(...)	do {				\
 	if (carp_opts[CARPCTL_LOG] > 1)			\
 		log(LOG_DEBUG, __VA_ARGS__);		\
 } while (0)
 
 static void	carp_hmac_prepare(struct carp_softc *);
 static void	carp_hmac_generate(struct carp_softc *, u_int32_t *,
 		    unsigned char *);
 static int	carp_hmac_verify(struct carp_softc *, u_int32_t *,
 		    unsigned char *);
 static void	carp_setroute(struct carp_softc *, int);
 static void	carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
 static int 	carp_clone_create(struct if_clone *, int, caddr_t);
 static void 	carp_clone_destroy(struct ifnet *);
 static void	carpdetach(struct carp_softc *, int);
 static int	carp_prepare_ad(struct mbuf *, struct carp_softc *,
 		    struct carp_header *);
 static void	carp_send_ad_all(void);
 static void	carp_send_ad(void *);
 static void	carp_send_ad_locked(struct carp_softc *);
 static void	carp_send_arp(struct carp_softc *);
 static void	carp_master_down(void *);
 static void	carp_master_down_locked(struct carp_softc *);
 static int	carp_ioctl(struct ifnet *, u_long, caddr_t);
 static int	carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
     		    struct route *);
 static void	carp_start(struct ifnet *);
 static void	carp_setrun(struct carp_softc *, sa_family_t);
 static void	carp_set_state(struct carp_softc *, int);
 static int	carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
 enum	{ CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
 
-static void	carp_multicast_cleanup(struct carp_softc *);
+static void	carp_multicast_cleanup(struct carp_softc *, int dofree);
 static int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
 static int	carp_del_addr(struct carp_softc *, struct sockaddr_in *);
 static void	carp_carpdev_state_locked(struct carp_if *);
 static void	carp_sc_state_locked(struct carp_softc *);
 #ifdef INET6
 static void	carp_send_na(struct carp_softc *);
 static int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
 static int	carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
-static void	carp_multicast6_cleanup(struct carp_softc *);
+static void	carp_multicast6_cleanup(struct carp_softc *, int dofree);
 #endif
 
 static LIST_HEAD(, carp_softc) carpif_list;
 static struct mtx carp_mtx;
 IFC_SIMPLE_DECLARE(carp, 0);
 
 static eventhandler_tag if_detach_event_tag;
 
 static __inline u_int16_t
 carp_cksum(struct mbuf *m, int len)
 {
 	return (in_cksum(m, len));
 }
 
 static void
 carp_hmac_prepare(struct carp_softc *sc)
 {
 	u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
 	u_int8_t vhid = sc->sc_vhid & 0xff;
 	struct ifaddr *ifa;
 	int i, found;
 #ifdef INET
 	struct in_addr last, cur, in;
 #endif
 #ifdef INET6
 	struct in6_addr last6, cur6, in6;
 #endif
 
 	if (sc->sc_carpdev)
 		CARP_SCLOCK(sc);
 
 	/* XXX: possible race here */
 
 	/* compute ipad from key */
 	bzero(sc->sc_pad, sizeof(sc->sc_pad));
 	bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
 	for (i = 0; i < sizeof(sc->sc_pad); i++)
 		sc->sc_pad[i] ^= 0x36;
 
 	/* precompute first part of inner hash */
 	SHA1Init(&sc->sc_sha1);
 	SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
 	SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
 	SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
 	SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
 #ifdef INET
 	cur.s_addr = 0;
 	do {
 		found = 0;
 		last = cur;
 		cur.s_addr = 0xffffffff;
 		IF_ADDR_LOCK(SC2IFP(sc));
 		TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
 			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
 			if (ifa->ifa_addr->sa_family == AF_INET &&
 			    ntohl(in.s_addr) > ntohl(last.s_addr) &&
 			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
 				cur.s_addr = in.s_addr;
 				found++;
 			}
 		}
 		IF_ADDR_UNLOCK(SC2IFP(sc));
 		if (found)
 			SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
 	} while (found);
 #endif /* INET */
 #ifdef INET6
 	memset(&cur6, 0, sizeof(cur6));
 	do {
 		found = 0;
 		last6 = cur6;
 		memset(&cur6, 0xff, sizeof(cur6));
 		IF_ADDR_LOCK(SC2IFP(sc));
 		TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
 			if (IN6_IS_SCOPE_EMBED(&in6))
 				in6.s6_addr16[1] = 0;
 			if (ifa->ifa_addr->sa_family == AF_INET6 &&
 			    memcmp(&in6, &last6, sizeof(in6)) > 0 &&
 			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
 				cur6 = in6;
 				found++;
 			}
 		}
 		IF_ADDR_UNLOCK(SC2IFP(sc));
 		if (found)
 			SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
 	} while (found);
 #endif /* INET6 */
 
 	/* convert ipad to opad */
 	for (i = 0; i < sizeof(sc->sc_pad); i++)
 		sc->sc_pad[i] ^= 0x36 ^ 0x5c;
 
 	if (sc->sc_carpdev)
 		CARP_SCUNLOCK(sc);
 }
 
 static void
 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
     unsigned char md[20])
 {
 	SHA1_CTX sha1ctx;
 
 	/* fetch first half of inner hash */
 	bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
 
 	SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
 	SHA1Final(md, &sha1ctx);
 
 	/* outer hash */
 	SHA1Init(&sha1ctx);
 	SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
 	SHA1Update(&sha1ctx, md, 20);
 	SHA1Final(md, &sha1ctx);
 }
 
 static int
 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
     unsigned char md[20])
 {
 	unsigned char md2[20];
 
 	CARP_SCLOCK_ASSERT(sc);
 
 	carp_hmac_generate(sc, counter, md2);
 
 	return (bcmp(md, md2, sizeof(md2)));
 }
 
 static void
 carp_setroute(struct carp_softc *sc, int cmd)
 {
 	struct ifaddr *ifa;
 	int s;
 
 	if (sc->sc_carpdev)
 		CARP_SCLOCK_ASSERT(sc);
 
 	s = splnet();
 	TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
 		if (ifa->ifa_addr->sa_family == AF_INET &&
 		    sc->sc_carpdev != NULL) {
 			int count = carp_addrcount(
 			    (struct carp_if *)sc->sc_carpdev->if_carp,
 			    ifatoia(ifa), CARP_COUNT_MASTER);
 
 			if ((cmd == RTM_ADD && count == 1) ||
 			    (cmd == RTM_DELETE && count == 0))
 				rtinit(ifa, cmd, RTF_UP | RTF_HOST);
 		}
 	}
 	splx(s);
 }
 
 static int
 carp_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 
 	struct carp_softc *sc;
 	struct ifnet *ifp;
 
 	sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
 	ifp = SC2IFP(sc) = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		free(sc, M_CARP);
 		return (ENOSPC);
 	}
 	
 	sc->sc_flags_backup = 0;
 	sc->sc_suppress = 0;
 	sc->sc_advbase = CARP_DFLTINTV;
 	sc->sc_vhid = -1;	/* required setting */
 	sc->sc_advskew = 0;
 	sc->sc_init_counter = 1;
 	sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
 	    M_WAITOK);
 	sc->sc_imo.imo_mfilters = NULL;
 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
 	sc->sc_imo.imo_multicast_vif = -1;
 #ifdef INET6
 	sc->sc_im6o.im6o_membership = (struct in6_multi **)malloc(
 	    (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
 	    M_WAITOK);
 	sc->sc_im6o.im6o_mfilters = NULL;
 	sc->sc_im6o.im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
 	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
 #endif
 
 	callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE);
 	callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE);
 	callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE);
 	
 	ifp->if_softc = sc;
 	if_initname(ifp, CARP_IFNAME, unit);
 	ifp->if_mtu = ETHERMTU;
 	ifp->if_flags = IFF_LOOPBACK;
 	ifp->if_ioctl = carp_ioctl;
 	ifp->if_output = carp_looutput;
 	ifp->if_start = carp_start;
 	ifp->if_type = IFT_CARP;
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	ifp->if_hdrlen = 0;
 	if_attach(ifp);
 	bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t));
 	mtx_lock(&carp_mtx);
 	LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
 	mtx_unlock(&carp_mtx);
 	return (0);
 }
 
 static void
 carp_clone_destroy(struct ifnet *ifp)
 {
 	struct carp_softc *sc = ifp->if_softc;
 
 	if (sc->sc_carpdev)
 		CARP_SCLOCK(sc);
 	carpdetach(sc, 1);	/* Returns unlocked. */
 
 	mtx_lock(&carp_mtx);
 	LIST_REMOVE(sc, sc_next);
 	mtx_unlock(&carp_mtx);
 	bpfdetach(ifp);
 	if_detach(ifp);
 	if_free_type(ifp, IFT_ETHER);
 	free(sc->sc_imo.imo_membership, M_CARP);
 #ifdef INET6
 	free(sc->sc_im6o.im6o_membership, M_CARP);
 #endif
 	free(sc, M_CARP);
 }
 
 /*
  * This function can be called on CARP interface destroy path,
  * and in case of the removal of the underlying interface as
- * well. We differentiate these two cases. In the latter case
- * we do not cleanup our multicast memberships, since they
- * are already freed. Also, in the latter case we do not
+ * well. We differentiate these two cases: in case of destruction
+ * of the underlying interface, we do not cleanup our multicast
+ * memberships, since they are already freed. But we purge pointers
+ * to multicast structures, since they are no longer valid, to
+ * avoid panic in future calls to carpdetach(). Also, we do not
  * release the lock on return, because the function will be
  * called once more, for another CARP instance on the same
  * interface.
  */
 static void
 carpdetach(struct carp_softc *sc, int unlock)
 {
 	struct carp_if *cif;
 
 	callout_stop(&sc->sc_ad_tmo);
 	callout_stop(&sc->sc_md_tmo);
 	callout_stop(&sc->sc_md6_tmo);
 
 	if (sc->sc_suppress)
 		carp_suppress_preempt--;
 	sc->sc_suppress = 0;
 
 	if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
 		carp_suppress_preempt--;
 	sc->sc_sendad_errors = 0;
 
 	carp_set_state(sc, INIT);
 	SC2IFP(sc)->if_flags &= ~IFF_UP;
 	carp_setrun(sc, 0);
-	if (unlock)
-		carp_multicast_cleanup(sc);
+	carp_multicast_cleanup(sc, unlock);
 #ifdef INET6
-	carp_multicast6_cleanup(sc);
+	carp_multicast6_cleanup(sc, unlock);
 #endif
 
 	if (sc->sc_carpdev != NULL) {
 		cif = (struct carp_if *)sc->sc_carpdev->if_carp;
 		CARP_LOCK_ASSERT(cif);
 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
 		if (!--cif->vhif_nvrs) {
 			ifpromisc(sc->sc_carpdev, 0);
 			sc->sc_carpdev->if_carp = NULL;
 			CARP_LOCK_DESTROY(cif);
 			free(cif, M_CARP);
 		} else if (unlock)
 			CARP_UNLOCK(cif);
 		sc->sc_carpdev = NULL;
 	}
 }
 
 /* Detach an interface from the carp. */
 static void
 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
 {
 	struct carp_if *cif = (struct carp_if *)ifp->if_carp;
 	struct carp_softc *sc, *nextsc;
 
 	if (cif == NULL)
 		return;
 
 	/*
 	 * XXX: At the end of for() cycle the lock will be destroyed.
 	 */
 	CARP_LOCK(cif);
 	for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
 		nextsc = TAILQ_NEXT(sc, sc_list);
 		carpdetach(sc, 0);
 	}
 }
 
 /*
  * process input packet.
  * we have rearranged checks order compared to the rfc,
  * but it seems more efficient this way or not possible otherwise.
  */
 void
 carp_input(struct mbuf *m, int hlen)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	struct carp_header *ch;
 	int iplen, len;
 
 	CARPSTATS_INC(carps_ipackets);
 
 	if (!carp_opts[CARPCTL_ALLOW]) {
 		m_freem(m);
 		return;
 	}
 
 	/* check if received on a valid carp interface */
 	if (m->m_pkthdr.rcvif->if_carp == NULL) {
 		CARPSTATS_INC(carps_badif);
 		CARP_DEBUG("carp_input: packet received on non-carp "
 		    "interface: %s\n",
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return;
 	}
 
 	/* verify that the IP TTL is 255.  */
 	if (ip->ip_ttl != CARP_DFLTTL) {
 		CARPSTATS_INC(carps_badttl);
 		CARP_DEBUG("carp_input: received ttl %d != 255 on %s\n",
 		    ip->ip_ttl,
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return;
 	}
 
 	iplen = ip->ip_hl << 2;
 
 	if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
 		CARPSTATS_INC(carps_badlen);
 		CARP_DEBUG("carp_input: received len %zd < "
 		    "sizeof(struct carp_header) on %s\n",
 		    m->m_len - sizeof(struct ip),
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return;
 	}
 
 	if (iplen + sizeof(*ch) < m->m_len) {
 		if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
 			CARPSTATS_INC(carps_hdrops);
 			CARP_DEBUG("carp_input: pullup failed\n");
 			return;
 		}
 		ip = mtod(m, struct ip *);
 	}
 	ch = (struct carp_header *)((char *)ip + iplen);
 
 	/*
 	 * verify that the received packet length is
 	 * equal to the CARP header
 	 */
 	len = iplen + sizeof(*ch);
 	if (len > m->m_pkthdr.len) {
 		CARPSTATS_INC(carps_badlen);
 		CARP_DEBUG("carp_input: packet too short %d on %s\n",
 		    m->m_pkthdr.len,
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return;
 	}
 
 	if ((m = m_pullup(m, len)) == NULL) {
 		CARPSTATS_INC(carps_hdrops);
 		return;
 	}
 	ip = mtod(m, struct ip *);
 	ch = (struct carp_header *)((char *)ip + iplen);
 
 	/* verify the CARP checksum */
 	m->m_data += iplen;
 	if (carp_cksum(m, len - iplen)) {
 		CARPSTATS_INC(carps_badsum);
 		CARP_DEBUG("carp_input: checksum failed on %s\n",
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return;
 	}
 	m->m_data -= iplen;
 
 	carp_input_c(m, ch, AF_INET);
 }
 
 #ifdef INET6
 int
 carp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct carp_header *ch;
 	u_int len;
 
 	CARPSTATS_INC(carps_ipackets6);
 
 	if (!carp_opts[CARPCTL_ALLOW]) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	/* check if received on a valid carp interface */
 	if (m->m_pkthdr.rcvif->if_carp == NULL) {
 		CARPSTATS_INC(carps_badif);
 		CARP_DEBUG("carp6_input: packet received on non-carp "
 		    "interface: %s\n",
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	/* verify that the IP TTL is 255 */
 	if (ip6->ip6_hlim != CARP_DFLTTL) {
 		CARPSTATS_INC(carps_badttl);
 		CARP_DEBUG("carp6_input: received ttl %d != 255 on %s\n",
 		    ip6->ip6_hlim,
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	/* verify that we have a complete carp packet */
 	len = m->m_len;
 	IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
 	if (ch == NULL) {
 		CARPSTATS_INC(carps_badlen);
 		CARP_DEBUG("carp6_input: packet size %u too small\n", len);
 		return (IPPROTO_DONE);
 	}
 
 
 	/* verify the CARP checksum */
 	m->m_data += *offp;
 	if (carp_cksum(m, sizeof(*ch))) {
 		CARPSTATS_INC(carps_badsum);
 		CARP_DEBUG("carp6_input: checksum failed, on %s\n",
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 	m->m_data -= *offp;
 
 	carp_input_c(m, ch, AF_INET6);
 	return (IPPROTO_DONE);
 }
 #endif /* INET6 */
 
 static void
 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct carp_softc *sc;
 	u_int64_t tmp_counter;
 	struct timeval sc_tv, ch_tv;
 
 	/* verify that the VHID is valid on the receiving interface */
 	CARP_LOCK(ifp->if_carp);
 	TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
 		if (sc->sc_vhid == ch->carp_vhid)
 			break;
 
 	if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) &&
 	    (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
 		CARPSTATS_INC(carps_badvhid);
 		CARP_UNLOCK(ifp->if_carp);
 		m_freem(m);
 		return;
 	}
 
 	getmicrotime(&SC2IFP(sc)->if_lastchange);
 	SC2IFP(sc)->if_ipackets++;
 	SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
 
 	if (bpf_peers_present(SC2IFP(sc)->if_bpf)) {
 		struct ip *ip = mtod(m, struct ip *);
 		uint32_t af1 = af;
 
 		/* BPF wants net byte order */
 		ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
 		ip->ip_off = htons(ip->ip_off);
 		bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m);
 	}
 
 	/* verify the CARP version. */
 	if (ch->carp_version != CARP_VERSION) {
 		CARPSTATS_INC(carps_badver);
 		SC2IFP(sc)->if_ierrors++;
 		CARP_UNLOCK(ifp->if_carp);
 		CARP_DEBUG("%s; invalid version %d\n",
 		    SC2IFP(sc)->if_xname,
 		    ch->carp_version);
 		m_freem(m);
 		return;
 	}
 
 	/* verify the hash */
 	if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
 		CARPSTATS_INC(carps_badauth);
 		SC2IFP(sc)->if_ierrors++;
 		CARP_UNLOCK(ifp->if_carp);
 		CARP_DEBUG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
 		m_freem(m);
 		return;
 	}
 
 	tmp_counter = ntohl(ch->carp_counter[0]);
 	tmp_counter = tmp_counter<<32;
 	tmp_counter += ntohl(ch->carp_counter[1]);
 
 	/* XXX Replay protection goes here */
 
 	sc->sc_init_counter = 0;
 	sc->sc_counter = tmp_counter;
 
 	sc_tv.tv_sec = sc->sc_advbase;
 	if (carp_suppress_preempt && sc->sc_advskew <  240)
 		sc_tv.tv_usec = 240 * 1000000 / 256;
 	else
 		sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 	ch_tv.tv_sec = ch->carp_advbase;
 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
 
 	switch (sc->sc_state) {
 	case INIT:
 		break;
 	case MASTER:
 		/*
 		 * If we receive an advertisement from a master who's going to
 		 * be more frequent than us, go into BACKUP state.
 		 */
 		if (timevalcmp(&sc_tv, &ch_tv, >) ||
 		    timevalcmp(&sc_tv, &ch_tv, ==)) {
 			callout_stop(&sc->sc_ad_tmo);
 			CARP_LOG("%s: MASTER -> BACKUP "
 			   "(more frequent advertisement received)\n",
 			   SC2IFP(sc)->if_xname);
 			carp_set_state(sc, BACKUP);
 			carp_setrun(sc, 0);
 			carp_setroute(sc, RTM_DELETE);
 		}
 		break;
 	case BACKUP:
 		/*
 		 * If we're pre-empting masters who advertise slower than us,
 		 * and this one claims to be slower, treat him as down.
 		 */
 		if (carp_opts[CARPCTL_PREEMPT] &&
 		    timevalcmp(&sc_tv, &ch_tv, <)) {
 			CARP_LOG("%s: BACKUP -> MASTER "
 			    "(preempting a slower master)\n",
 			    SC2IFP(sc)->if_xname);
 			carp_master_down_locked(sc);
 			break;
 		}
 
 		/*
 		 *  If the master is going to advertise at such a low frequency
 		 *  that he's guaranteed to time out, we'd might as well just
 		 *  treat him as timed out now.
 		 */
 		sc_tv.tv_sec = sc->sc_advbase * 3;
 		if (timevalcmp(&sc_tv, &ch_tv, <)) {
 			CARP_LOG("%s: BACKUP -> MASTER "
 			    "(master timed out)\n",
 			    SC2IFP(sc)->if_xname);
 			carp_master_down_locked(sc);
 			break;
 		}
 
 		/*
 		 * Otherwise, we reset the counter and wait for the next
 		 * advertisement.
 		 */
 		carp_setrun(sc, af);
 		break;
 	}
 
 	CARP_UNLOCK(ifp->if_carp);
 
 	m_freem(m);
 	return;
 }
 
 static int
 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
 {
 	struct m_tag *mtag;
 	struct ifnet *ifp = SC2IFP(sc);
 
 	if (sc->sc_init_counter) {
 		/* this could also be seconds since unix epoch */
 		sc->sc_counter = arc4random();
 		sc->sc_counter = sc->sc_counter << 32;
 		sc->sc_counter += arc4random();
 	} else
 		sc->sc_counter++;
 
 	ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
 	ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
 
 	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
 
 	/* Tag packet for carp_output */
 	mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
 	if (mtag == NULL) {
 		m_freem(m);
 		SC2IFP(sc)->if_oerrors++;
 		return (ENOMEM);
 	}
 	bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
 	m_tag_prepend(m, mtag);
 
 	return (0);
 }
 
 static void
 carp_send_ad_all(void)
 {
 	struct carp_softc *sc;
 
 	mtx_lock(&carp_mtx);
 	LIST_FOREACH(sc, &carpif_list, sc_next) {
 		if (sc->sc_carpdev == NULL)
 			continue;
 		CARP_SCLOCK(sc);
 		if ((SC2IFP(sc)->if_flags & IFF_UP) &&
 		    (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) &&
 		     sc->sc_state == MASTER)
 			carp_send_ad_locked(sc);
 		CARP_SCUNLOCK(sc);
 	}
 	mtx_unlock(&carp_mtx);
 }
 
 static void
 carp_send_ad(void *v)
 {
 	struct carp_softc *sc = v;
 
 	CARP_SCLOCK(sc);
 	carp_send_ad_locked(sc);
 	CARP_SCUNLOCK(sc);
 }
 
 static void
 carp_send_ad_locked(struct carp_softc *sc)
 {
 	struct carp_header ch;
 	struct timeval tv;
 	struct carp_header *ch_ptr;
 	struct mbuf *m;
 	int len, advbase, advskew;
 
 	CARP_SCLOCK_ASSERT(sc);
 
 	/* bow out if we've lost our UPness or RUNNINGuiness */
 	if (!((SC2IFP(sc)->if_flags & IFF_UP) &&
 	    (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
 		advbase = 255;
 		advskew = 255;
 	} else {
 		advbase = sc->sc_advbase;
 		if (!carp_suppress_preempt || sc->sc_advskew > 240)
 			advskew = sc->sc_advskew;
 		else
 			advskew = 240;
 		tv.tv_sec = advbase;
 		tv.tv_usec = advskew * 1000000 / 256;
 	}
 
 	ch.carp_version = CARP_VERSION;
 	ch.carp_type = CARP_ADVERTISEMENT;
 	ch.carp_vhid = sc->sc_vhid;
 	ch.carp_advbase = advbase;
 	ch.carp_advskew = advskew;
 	ch.carp_authlen = 7;	/* XXX DEFINE */
 	ch.carp_pad1 = 0;	/* must be zero */
 	ch.carp_cksum = 0;
 
 #ifdef INET
 	if (sc->sc_ia) {
 		struct ip *ip;
 
 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
 		if (m == NULL) {
 			SC2IFP(sc)->if_oerrors++;
 			CARPSTATS_INC(carps_onomem);
 			/* XXX maybe less ? */
 			if (advbase != 255 || advskew != 255)
 				callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
 				    carp_send_ad, sc);
 			return;
 		}
 		len = sizeof(*ip) + sizeof(ch);
 		m->m_pkthdr.len = len;
 		m->m_pkthdr.rcvif = NULL;
 		m->m_len = len;
 		MH_ALIGN(m, m->m_len);
 		m->m_flags |= M_MCAST;
 		ip = mtod(m, struct ip *);
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = sizeof(*ip) >> 2;
 		ip->ip_tos = IPTOS_LOWDELAY;
 		ip->ip_len = len;
 		ip->ip_id = ip_newid();
 		ip->ip_off = IP_DF;
 		ip->ip_ttl = CARP_DFLTTL;
 		ip->ip_p = IPPROTO_CARP;
 		ip->ip_sum = 0;
 		ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
 		ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
 
 		ch_ptr = (struct carp_header *)(&ip[1]);
 		bcopy(&ch, ch_ptr, sizeof(ch));
 		if (carp_prepare_ad(m, sc, ch_ptr))
 			return;
 
 		m->m_data += sizeof(*ip);
 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
 		m->m_data -= sizeof(*ip);
 
 		getmicrotime(&SC2IFP(sc)->if_lastchange);
 		SC2IFP(sc)->if_opackets++;
 		SC2IFP(sc)->if_obytes += len;
 		CARPSTATS_INC(carps_opackets);
 
 		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
 			SC2IFP(sc)->if_oerrors++;
 			if (sc->sc_sendad_errors < INT_MAX)
 				sc->sc_sendad_errors++;
 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
 				carp_suppress_preempt++;
 				if (carp_suppress_preempt == 1) {
 					CARP_SCUNLOCK(sc);
 					carp_send_ad_all();
 					CARP_SCLOCK(sc);
 				}
 			}
 			sc->sc_sendad_success = 0;
 		} else {
 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
 				if (++sc->sc_sendad_success >=
 				    CARP_SENDAD_MIN_SUCCESS) {
 					carp_suppress_preempt--;
 					sc->sc_sendad_errors = 0;
 				}
 			} else
 				sc->sc_sendad_errors = 0;
 		}
 	}
 #endif /* INET */
 #ifdef INET6
 	if (sc->sc_ia6) {
 		struct ip6_hdr *ip6;
 
 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
 		if (m == NULL) {
 			SC2IFP(sc)->if_oerrors++;
 			CARPSTATS_INC(carps_onomem);
 			/* XXX maybe less ? */
 			if (advbase != 255 || advskew != 255)
 				callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
 				    carp_send_ad, sc);
 			return;
 		}
 		len = sizeof(*ip6) + sizeof(ch);
 		m->m_pkthdr.len = len;
 		m->m_pkthdr.rcvif = NULL;
 		m->m_len = len;
 		MH_ALIGN(m, m->m_len);
 		m->m_flags |= M_MCAST;
 		ip6 = mtod(m, struct ip6_hdr *);
 		bzero(ip6, sizeof(*ip6));
 		ip6->ip6_vfc |= IPV6_VERSION;
 		ip6->ip6_hlim = CARP_DFLTTL;
 		ip6->ip6_nxt = IPPROTO_CARP;
 		bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
 		    sizeof(struct in6_addr));
 		/* set the multicast destination */
 
 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
 		ip6->ip6_dst.s6_addr8[15] = 0x12;
 		if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
 			SC2IFP(sc)->if_oerrors++;
 			m_freem(m);
 			CARP_DEBUG("%s: in6_setscope failed\n", __func__);
 			return;
 		}
 
 		ch_ptr = (struct carp_header *)(&ip6[1]);
 		bcopy(&ch, ch_ptr, sizeof(ch));
 		if (carp_prepare_ad(m, sc, ch_ptr))
 			return;
 
 		m->m_data += sizeof(*ip6);
 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
 		m->m_data -= sizeof(*ip6);
 
 		getmicrotime(&SC2IFP(sc)->if_lastchange);
 		SC2IFP(sc)->if_opackets++;
 		SC2IFP(sc)->if_obytes += len;
 		CARPSTATS_INC(carps_opackets6);
 
 		if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
 			SC2IFP(sc)->if_oerrors++;
 			if (sc->sc_sendad_errors < INT_MAX)
 				sc->sc_sendad_errors++;
 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
 				carp_suppress_preempt++;
 				if (carp_suppress_preempt == 1) {
 					CARP_SCUNLOCK(sc);
 					carp_send_ad_all();
 					CARP_SCLOCK(sc);
 				}
 			}
 			sc->sc_sendad_success = 0;
 		} else {
 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
 				if (++sc->sc_sendad_success >=
 				    CARP_SENDAD_MIN_SUCCESS) {
 					carp_suppress_preempt--;
 					sc->sc_sendad_errors = 0;
 				}
 			} else
 				sc->sc_sendad_errors = 0;
 		}
 	}
 #endif /* INET6 */
 
 	if (advbase != 255 || advskew != 255)
 		callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
 		    carp_send_ad, sc);
 
 }
 
 /*
  * Broadcast a gratuitous ARP request containing
  * the virtual router MAC address for each IP address
  * associated with the virtual router.
  */
 static void
 carp_send_arp(struct carp_softc *sc)
 {
 	struct ifaddr *ifa;
 
 	TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
 
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 
 /*		arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */
 		arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));
 
 		DELAY(1000);	/* XXX */
 	}
 }
 
 #ifdef INET6
 static void
 carp_send_na(struct carp_softc *sc)
 {
 	struct ifaddr *ifa;
 	struct in6_addr *in6;
 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 
 	TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
 
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 
 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
 		nd6_na_output(sc->sc_carpdev, &mcast, in6,
 		    ND_NA_FLAG_OVERRIDE, 1, NULL);
 		DELAY(1000);	/* XXX */
 	}
 }
 #endif /* INET6 */
 
 static int
 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
 {
 	struct carp_softc *vh;
 	struct ifaddr *ifa;
 	int count = 0;
 
 	CARP_LOCK_ASSERT(cif);
 
 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 		if ((type == CARP_COUNT_RUNNING &&
 		    (SC2IFP(vh)->if_flags & IFF_UP) &&
 		    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) ||
 		    (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
 			IF_ADDR_LOCK(SC2IFP(vh));
 			TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
 			    ifa_list) {
 				if (ifa->ifa_addr->sa_family == AF_INET &&
 				    ia->ia_addr.sin_addr.s_addr ==
 				    ifatoia(ifa)->ia_addr.sin_addr.s_addr)
 					count++;
 			}
 			IF_ADDR_UNLOCK(SC2IFP(vh));
 		}
 	}
 	return (count);
 }
 
 int
 carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia,
     struct in_addr *isaddr, u_int8_t **enaddr)
 {
 	struct carp_if *cif;
 	struct carp_softc *vh;
 	int index, count = 0;
 	struct ifaddr *ifa;
 
 	cif = ifp->if_carp;
 	CARP_LOCK(cif);
 
 	if (carp_opts[CARPCTL_ARPBALANCE]) {
 		/*
 		 * XXX proof of concept implementation.
 		 * We use the source ip to decide which virtual host should
 		 * handle the request. If we're master of that virtual host,
 		 * then we respond, otherwise, just drop the arp packet on
 		 * the floor.
 		 */
 		count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
 		if (count == 0) {
 			/* should never reach this */
 			CARP_UNLOCK(cif);
 			return (0);
 		}
 
 		/* this should be a hash, like pf_hash() */
 		index = ntohl(isaddr->s_addr) % count;
 		count = 0;
 
 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 			if ((SC2IFP(vh)->if_flags & IFF_UP) &&
 			    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) {
 				IF_ADDR_LOCK(SC2IFP(vh));
 				TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
 				    ifa_list) {
 					if (ifa->ifa_addr->sa_family ==
 					    AF_INET &&
 					    ia->ia_addr.sin_addr.s_addr ==
 					    ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
 						if (count == index) {
 							if (vh->sc_state ==
 							    MASTER) {
 								*enaddr = IF_LLADDR(vh->sc_ifp);
 								IF_ADDR_UNLOCK(SC2IFP(vh));
 								CARP_UNLOCK(cif);
 								return (1);
 							} else {
 								IF_ADDR_UNLOCK(SC2IFP(vh));
 								CARP_UNLOCK(cif);
 								return (0);
 							}
 						}
 						count++;
 					}
 				}
 				IF_ADDR_UNLOCK(SC2IFP(vh));
 			}
 		}
 	} else {
 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 			if ((SC2IFP(vh)->if_flags & IFF_UP) &&
 			    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
 			    ia->ia_ifp == SC2IFP(vh) &&
 			    vh->sc_state == MASTER) {
 				*enaddr = IF_LLADDR(vh->sc_ifp);
 				CARP_UNLOCK(cif);
 				return (1);
 			}
 		}
 	}
 	CARP_UNLOCK(cif);
 	return (0);
 }
 
 #ifdef INET6
 struct ifaddr *
 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
 {
 	struct carp_if *cif;
 	struct carp_softc *vh;
 	struct ifaddr *ifa;
 
 	cif = ifp->if_carp;
 	CARP_LOCK(cif);
 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 		IF_ADDR_LOCK(SC2IFP(vh));
 		TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) {
 			if (IN6_ARE_ADDR_EQUAL(taddr,
 			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
  			    (SC2IFP(vh)->if_flags & IFF_UP) &&
 			    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
 			    vh->sc_state == MASTER) {
 				ifa_ref(ifa);
 				IF_ADDR_UNLOCK(SC2IFP(vh));
 			    	CARP_UNLOCK(cif);
 				return (ifa);
 			}
 		}
 		IF_ADDR_UNLOCK(SC2IFP(vh));
 	}
 	CARP_UNLOCK(cif);
 	
 	return (NULL);
 }
 
 caddr_t
 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
 {
 	struct m_tag *mtag;
 	struct carp_if *cif;
 	struct carp_softc *sc;
 	struct ifaddr *ifa;
 
 	cif = ifp->if_carp;
 	CARP_LOCK(cif);
 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
 		IF_ADDR_LOCK(SC2IFP(sc));
 		TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
 			if (IN6_ARE_ADDR_EQUAL(taddr,
 			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
  			    (SC2IFP(sc)->if_flags & IFF_UP) &&
 			    (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) {
 				struct ifnet *ifp = SC2IFP(sc);
 				mtag = m_tag_get(PACKET_TAG_CARP,
 				    sizeof(struct ifnet *), M_NOWAIT);
 				if (mtag == NULL) {
 					/* better a bit than nothing */
 					IF_ADDR_UNLOCK(SC2IFP(sc));
 					CARP_UNLOCK(cif);
 					return (IF_LLADDR(sc->sc_ifp));
 				}
 				bcopy(&ifp, (caddr_t)(mtag + 1),
 				    sizeof(struct ifnet *));
 				m_tag_prepend(m, mtag);
 
 				IF_ADDR_UNLOCK(SC2IFP(sc));
 				CARP_UNLOCK(cif);
 				return (IF_LLADDR(sc->sc_ifp));
 			}
 		}
 		IF_ADDR_UNLOCK(SC2IFP(sc));
 	}
 	CARP_UNLOCK(cif);
 
 	return (NULL);
 }
 #endif
 
 struct ifnet *
 carp_forus(struct ifnet *ifp, u_char *dhost)
 {
 	struct carp_if *cif;
 	struct carp_softc *vh;
 	u_int8_t *ena = dhost;
 
 	if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
 		return (NULL);
 
 	cif = ifp->if_carp;
 	CARP_LOCK(cif);
 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
 		if ((SC2IFP(vh)->if_flags & IFF_UP) &&
 		    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
 		    vh->sc_state == MASTER &&
 		    !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
 		    	CARP_UNLOCK(cif);
 			return (SC2IFP(vh));
 		}
 
     	CARP_UNLOCK(cif);
 	return (NULL);
 }
 
 static void
 carp_master_down(void *v)
 {
 	struct carp_softc *sc = v;
 
 	CARP_SCLOCK(sc);
 	carp_master_down_locked(sc);
 	CARP_SCUNLOCK(sc);
 }
 
 static void
 carp_master_down_locked(struct carp_softc *sc)
 {
 	if (sc->sc_carpdev)
 		CARP_SCLOCK_ASSERT(sc);
 
 	switch (sc->sc_state) {
 	case INIT:
 		printf("%s: master_down event in INIT state\n",
 		    SC2IFP(sc)->if_xname);
 		break;
 	case MASTER:
 		break;
 	case BACKUP:
 		carp_set_state(sc, MASTER);
 		carp_send_ad_locked(sc);
 		carp_send_arp(sc);
 #ifdef INET6
 		carp_send_na(sc);
 #endif /* INET6 */
 		carp_setrun(sc, 0);
 		carp_setroute(sc, RTM_ADD);
 		break;
 	}
 }
 
 /*
  * When in backup state, af indicates whether to reset the master down timer
  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
  */
 static void
 carp_setrun(struct carp_softc *sc, sa_family_t af)
 {
 	struct timeval tv;
 
 	if (sc->sc_carpdev == NULL) {
 		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 		carp_set_state(sc, INIT);
 		return;
 	} else
 		CARP_SCLOCK_ASSERT(sc);
 
 	if (SC2IFP(sc)->if_flags & IFF_UP &&
 	    sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6) &&
 	    sc->sc_carpdev->if_link_state == LINK_STATE_UP)
 		SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
 	else {
 		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 		carp_setroute(sc, RTM_DELETE);
 		return;
 	}
 
 	switch (sc->sc_state) {
 	case INIT:
 		if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
 			carp_send_ad_locked(sc);
 			carp_send_arp(sc);
 #ifdef INET6
 			carp_send_na(sc);
 #endif /* INET6 */
 			CARP_LOG("%s: INIT -> MASTER (preempting)\n",
 			    SC2IFP(sc)->if_xname);
 			carp_set_state(sc, MASTER);
 			carp_setroute(sc, RTM_ADD);
 		} else {
 			CARP_LOG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname);
 			carp_set_state(sc, BACKUP);
 			carp_setroute(sc, RTM_DELETE);
 			carp_setrun(sc, 0);
 		}
 		break;
 	case BACKUP:
 		callout_stop(&sc->sc_ad_tmo);
 		tv.tv_sec = 3 * sc->sc_advbase;
 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 		switch (af) {
 #ifdef INET
 		case AF_INET:
 			callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 			    carp_master_down, sc);
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 			    carp_master_down, sc);
 			break;
 #endif /* INET6 */
 		default:
 			if (sc->sc_naddrs)
 				callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 				    carp_master_down, sc);
 			if (sc->sc_naddrs6)
 				callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 				    carp_master_down, sc);
 			break;
 		}
 		break;
 	case MASTER:
 		tv.tv_sec = sc->sc_advbase;
 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 		callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
 		    carp_send_ad, sc);
 		break;
 	}
 }
 
 static void
-carp_multicast_cleanup(struct carp_softc *sc)
+carp_multicast_cleanup(struct carp_softc *sc, int dofree)
 {
 	struct ip_moptions *imo = &sc->sc_imo;
 	u_int16_t n = imo->imo_num_memberships;
 
 	/* Clean up our own multicast memberships */
 	while (n-- > 0) {
 		if (imo->imo_membership[n] != NULL) {
-			in_delmulti(imo->imo_membership[n]);
+			if (dofree)
+				in_delmulti(imo->imo_membership[n]);
 			imo->imo_membership[n] = NULL;
 		}
 	}
 	KASSERT(imo->imo_mfilters == NULL,
 	   ("%s: imo_mfilters != NULL", __func__));
 	imo->imo_num_memberships = 0;
 	imo->imo_multicast_ifp = NULL;
 }
 
 #ifdef INET6
 static void
-carp_multicast6_cleanup(struct carp_softc *sc)
+carp_multicast6_cleanup(struct carp_softc *sc, int dofree)
 {
 	struct ip6_moptions *im6o = &sc->sc_im6o;
 	u_int16_t n = im6o->im6o_num_memberships;
 
 	while (n-- > 0) {
 		if (im6o->im6o_membership[n] != NULL) {
-			in6_mc_leave(im6o->im6o_membership[n], NULL);
+			if (dofree)
+				in6_mc_leave(im6o->im6o_membership[n], NULL);
 			im6o->im6o_membership[n] = NULL;
 		}
 	}
 	KASSERT(im6o->im6o_mfilters == NULL,
 	   ("%s: im6o_mfilters != NULL", __func__));
 	im6o->im6o_num_memberships = 0;
 	im6o->im6o_multicast_ifp = NULL;
 }
 #endif
 
 static int
 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
 {
 	struct ifnet *ifp;
 	struct carp_if *cif;
 	struct in_ifaddr *ia, *ia_if;
 	struct ip_moptions *imo = &sc->sc_imo;
 	struct in_addr addr;
 	u_long iaddr = htonl(sin->sin_addr.s_addr);
 	int own, error;
 
 	if (sin->sin_addr.s_addr == 0) {
 		if (!(SC2IFP(sc)->if_flags & IFF_UP))
 			carp_set_state(sc, INIT);
 		if (sc->sc_naddrs)
 			SC2IFP(sc)->if_flags |= IFF_UP;
 		if (sc->sc_carpdev)
 			CARP_SCLOCK(sc);
 		carp_setrun(sc, 0);
 		if (sc->sc_carpdev)
 			CARP_SCUNLOCK(sc);
 		return (0);
 	}
 
 	/* we have to do it by hands to check we won't match on us */
 	ia_if = NULL; own = 0;
 	IN_IFADDR_RLOCK();
 	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 		/* and, yeah, we need a multicast-capable iface too */
 		if (ia->ia_ifp != SC2IFP(sc) &&
 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
 		    (iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
 			if (!ia_if)
 				ia_if = ia;
 			if (sin->sin_addr.s_addr ==
 			    ia->ia_addr.sin_addr.s_addr)
 				own++;
 		}
 	}
 
 	if (!ia_if) {
 		IN_IFADDR_RUNLOCK();
 		return (EADDRNOTAVAIL);
 	}
 
 	ia = ia_if;
 	ifa_ref(&ia->ia_ifa);
 	IN_IFADDR_RUNLOCK();
 
 	ifp = ia->ia_ifp;
 
 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
 	    (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) {
 		ifa_free(&ia->ia_ifa);
 		return (EADDRNOTAVAIL);
 	}
 
 	if (imo->imo_num_memberships == 0) {
 		addr.s_addr = htonl(INADDR_CARP_GROUP);
 		if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) ==
 		    NULL) {
 			ifa_free(&ia->ia_ifa);
 			return (ENOBUFS);
 		}
 		imo->imo_num_memberships++;
 		imo->imo_multicast_ifp = ifp;
 		imo->imo_multicast_ttl = CARP_DFLTTL;
 		imo->imo_multicast_loop = 0;
 	}
 
 	if (!ifp->if_carp) {
 
 		cif = malloc(sizeof(*cif), M_CARP,
 		    M_WAITOK|M_ZERO);
 		if (!cif) {
 			error = ENOBUFS;
 			goto cleanup;
 		}
 		if ((error = ifpromisc(ifp, 1))) {
 			free(cif, M_CARP);
 			goto cleanup;
 		}
 		
 		CARP_LOCK_INIT(cif);
 		CARP_LOCK(cif);
 		cif->vhif_ifp = ifp;
 		TAILQ_INIT(&cif->vhif_vrs);
 		ifp->if_carp = cif;
 
 	} else {
 		struct carp_softc *vr;
 
 		cif = (struct carp_if *)ifp->if_carp;
 		CARP_LOCK(cif);
 		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
 			if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
 				CARP_UNLOCK(cif);
 				error = EEXIST;
 				goto cleanup;
 			}
 	}
 	sc->sc_ia = ia;
 	sc->sc_carpdev = ifp;
 
 	{ /* XXX prevent endless loop if already in queue */
 	struct carp_softc *vr, *after = NULL;
 	int myself = 0;
 	cif = (struct carp_if *)ifp->if_carp;
 
 	/* XXX: cif should not change, right? So we still hold the lock */
 	CARP_LOCK_ASSERT(cif);
 
 	TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
 		if (vr == sc)
 			myself = 1;
 		if (vr->sc_vhid < sc->sc_vhid)
 			after = vr;
 	}
 
 	if (!myself) {
 		/* We're trying to keep things in order */
 		if (after == NULL) {
 			TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
 		} else {
 			TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
 		}
 		cif->vhif_nvrs++;
 	}
 	}
 
 	sc->sc_naddrs++;
 	SC2IFP(sc)->if_flags |= IFF_UP;
 	if (own)
 		sc->sc_advskew = 0;
 	carp_sc_state_locked(sc);
 	carp_setrun(sc, 0);
 
 	CARP_UNLOCK(cif);
 	ifa_free(&ia->ia_ifa);	/* XXXRW: should hold reference for softc. */
 
 	return (0);
 
 cleanup:
 	in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
 	ifa_free(&ia->ia_ifa);
 	return (error);
 }
 
 static int
 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
 {
 	int error = 0;
 
 	if (!--sc->sc_naddrs) {
 		struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
 		struct ip_moptions *imo = &sc->sc_imo;
 
 		CARP_LOCK(cif);
 		callout_stop(&sc->sc_ad_tmo);
 		SC2IFP(sc)->if_flags &= ~IFF_UP;
 		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 		sc->sc_vhid = -1;
 		in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
 		imo->imo_multicast_ifp = NULL;
 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
 		if (!--cif->vhif_nvrs) {
 			sc->sc_carpdev->if_carp = NULL;
 			CARP_LOCK_DESTROY(cif);
 			free(cif, M_CARP);
 		} else {
 			CARP_UNLOCK(cif);
 		}
 	}
 
 	return (error);
 }
 
 #ifdef INET6
 static int
 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
 {
 	struct ifnet *ifp;
 	struct carp_if *cif;
 	struct in6_ifaddr *ia, *ia_if;
 	struct ip6_moptions *im6o = &sc->sc_im6o;
 	struct in6_addr in6;
 	int own, error;
 
 	error = 0;
 
 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 		if (!(SC2IFP(sc)->if_flags & IFF_UP))
 			carp_set_state(sc, INIT);
 		if (sc->sc_naddrs6)
 			SC2IFP(sc)->if_flags |= IFF_UP;
 		if (sc->sc_carpdev)
 			CARP_SCLOCK(sc);
 		carp_setrun(sc, 0);
 		if (sc->sc_carpdev)
 			CARP_SCUNLOCK(sc);
 		return (0);
 	}
 
 	/* we have to do it by hands to check we won't match on us */
 	ia_if = NULL; own = 0;
 	IN6_IFADDR_RLOCK();
 	TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
 		int i;
 
 		for (i = 0; i < 4; i++) {
 			if ((sin6->sin6_addr.s6_addr32[i] &
 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
 			    (ia->ia_addr.sin6_addr.s6_addr32[i] &
 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
 				break;
 		}
 		/* and, yeah, we need a multicast-capable iface too */
 		if (ia->ia_ifp != SC2IFP(sc) &&
 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
 		    (i == 4)) {
 			if (!ia_if)
 				ia_if = ia;
 			if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
 			    &ia->ia_addr.sin6_addr))
 				own++;
 		}
 	}
 
 	if (!ia_if) {
 		IN6_IFADDR_RUNLOCK();
 		return (EADDRNOTAVAIL);
 	}
 	ia = ia_if;
 	ifa_ref(&ia->ia_ifa);
 	IN6_IFADDR_RUNLOCK();
 	ifp = ia->ia_ifp;
 
 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
 	    (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) {
 		ifa_free(&ia->ia_ifa);
 		return (EADDRNOTAVAIL);
 	}
 
 	if (!sc->sc_naddrs6) {
 		struct in6_multi *in6m;
 
 		im6o->im6o_multicast_ifp = ifp;
 
 		/* join CARP multicast address */
 		bzero(&in6, sizeof(in6));
 		in6.s6_addr16[0] = htons(0xff02);
 		in6.s6_addr8[15] = 0x12;
 		if (in6_setscope(&in6, ifp, NULL) != 0)
 			goto cleanup;
 		in6m = NULL;
 		error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
 		if (error)
 			goto cleanup;
 		im6o->im6o_membership[0] = in6m;
 		im6o->im6o_num_memberships++;
 
 		/* join solicited multicast address */
 		bzero(&in6, sizeof(in6));
 		in6.s6_addr16[0] = htons(0xff02);
 		in6.s6_addr32[1] = 0;
 		in6.s6_addr32[2] = htonl(1);
 		in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
 		in6.s6_addr8[12] = 0xff;
 		if (in6_setscope(&in6, ifp, NULL) != 0)
 			goto cleanup;
 		in6m = NULL;
 		error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
 		if (error)
 			goto cleanup;
 		im6o->im6o_membership[1] = in6m;
 		im6o->im6o_num_memberships++;
 	}
 
 	if (!ifp->if_carp) {
 		cif = malloc(sizeof(*cif), M_CARP,
 		    M_WAITOK|M_ZERO);
 		if (!cif) {
 			error = ENOBUFS;
 			goto cleanup;
 		}
 		if ((error = ifpromisc(ifp, 1))) {
 			free(cif, M_CARP);
 			goto cleanup;
 		}
 
 		CARP_LOCK_INIT(cif);
 		CARP_LOCK(cif);
 		cif->vhif_ifp = ifp;
 		TAILQ_INIT(&cif->vhif_vrs);
 		ifp->if_carp = cif;
 
 	} else {
 		struct carp_softc *vr;
 
 		cif = (struct carp_if *)ifp->if_carp;
 		CARP_LOCK(cif);
 		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
 			if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
 				CARP_UNLOCK(cif);
 				error = EINVAL;
 				goto cleanup;
 			}
 	}
 	sc->sc_ia6 = ia;
 	sc->sc_carpdev = ifp;
 
 	{ /* XXX prevent endless loop if already in queue */
 	struct carp_softc *vr, *after = NULL;
 	int myself = 0;
 	cif = (struct carp_if *)ifp->if_carp;
 	CARP_LOCK_ASSERT(cif);
 
 	TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
 		if (vr == sc)
 			myself = 1;
 		if (vr->sc_vhid < sc->sc_vhid)
 			after = vr;
 	}
 
 	if (!myself) {
 		/* We're trying to keep things in order */
 		if (after == NULL) {
 			TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
 		} else {
 			TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
 		}
 		cif->vhif_nvrs++;
 	}
 	}
 
 	sc->sc_naddrs6++;
 	SC2IFP(sc)->if_flags |= IFF_UP;
 	if (own)
 		sc->sc_advskew = 0;
 	carp_sc_state_locked(sc);
 	carp_setrun(sc, 0);
 
 	CARP_UNLOCK(cif);
 	ifa_free(&ia->ia_ifa);	/* XXXRW: should hold reference for softc. */
 
 	return (0);
 
 cleanup:
 	if (!sc->sc_naddrs6)
-		carp_multicast6_cleanup(sc);
+		carp_multicast6_cleanup(sc, 1);
 	ifa_free(&ia->ia_ifa);
 	return (error);
 }
 
 static int
 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
 {
 	int error = 0;
 
 	if (!--sc->sc_naddrs6) {
 		struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
 
 		CARP_LOCK(cif);
 		callout_stop(&sc->sc_ad_tmo);
 		SC2IFP(sc)->if_flags &= ~IFF_UP;
 		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 		sc->sc_vhid = -1;
-		carp_multicast6_cleanup(sc);
+		carp_multicast6_cleanup(sc, 1);
 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
 		if (!--cif->vhif_nvrs) {
 			CARP_LOCK_DESTROY(cif);
 			sc->sc_carpdev->if_carp = NULL;
 			free(cif, M_CARP);
 		} else
 			CARP_UNLOCK(cif);
 	}
 
 	return (error);
 }
 #endif /* INET6 */
 
 static int
 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
 {
 	struct carp_softc *sc = ifp->if_softc, *vr;
 	struct carpreq carpr;
 	struct ifaddr *ifa;
 	struct ifreq *ifr;
 	struct ifaliasreq *ifra;
 	int locked = 0, error = 0;
 
 	ifa = (struct ifaddr *)addr;
 	ifra = (struct ifaliasreq *)addr;
 	ifr = (struct ifreq *)addr;
 
 	switch (cmd) {
 	case SIOCSIFADDR:
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			SC2IFP(sc)->if_flags |= IFF_UP;
 			bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
 			    sizeof(struct sockaddr));
 			error = carp_set_addr(sc, satosin(ifa->ifa_addr));
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			SC2IFP(sc)->if_flags |= IFF_UP;
 			error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
 			break;
 #endif /* INET6 */
 		default:
 			error = EAFNOSUPPORT;
 			break;
 		}
 		break;
 
 	case SIOCAIFADDR:
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			SC2IFP(sc)->if_flags |= IFF_UP;
 			bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
 			    sizeof(struct sockaddr));
 			error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			SC2IFP(sc)->if_flags |= IFF_UP;
 			error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
 			break;
 #endif /* INET6 */
 		default:
 			error = EAFNOSUPPORT;
 			break;
 		}
 		break;
 
 	case SIOCDIFADDR:
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
 			break;
 #endif /* INET6 */
 		default:
 			error = EAFNOSUPPORT;
 			break;
 		}
 		break;
 
 	case SIOCSIFFLAGS:
 		if (sc->sc_carpdev) {
 			locked = 1;
 			CARP_SCLOCK(sc);
 		}
 		if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
  			callout_stop(&sc->sc_ad_tmo);
  			callout_stop(&sc->sc_md_tmo);
  			callout_stop(&sc->sc_md6_tmo);
 			if (sc->sc_state == MASTER)
 				carp_send_ad_locked(sc);
 			carp_set_state(sc, INIT);
 			carp_setrun(sc, 0);
 		} else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
 			SC2IFP(sc)->if_flags |= IFF_UP;
 			carp_setrun(sc, 0);
 		}
 		break;
 
 	case SIOCSVH:
 		error = priv_check(curthread, PRIV_NETINET_CARP);
 		if (error)
 			break;
 		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
 			break;
 		error = 1;
 		if (sc->sc_carpdev) {
 			locked = 1;
 			CARP_SCLOCK(sc);
 		}
 		if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
 			switch (carpr.carpr_state) {
 			case BACKUP:
 				callout_stop(&sc->sc_ad_tmo);
 				carp_set_state(sc, BACKUP);
 				carp_setrun(sc, 0);
 				carp_setroute(sc, RTM_DELETE);
 				break;
 			case MASTER:
 				carp_master_down_locked(sc);
 				break;
 			default:
 				break;
 			}
 		}
 		if (carpr.carpr_vhid > 0) {
 			if (carpr.carpr_vhid > 255) {
 				error = EINVAL;
 				break;
 			}
 			if (sc->sc_carpdev) {
 				struct carp_if *cif;
 				cif = (struct carp_if *)sc->sc_carpdev->if_carp;
 				TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
 					if (vr != sc &&
 					    vr->sc_vhid == carpr.carpr_vhid) {
 						error = EEXIST;
 						break;
 					}
 				if (error == EEXIST)
 					break;
 			}
 			sc->sc_vhid = carpr.carpr_vhid;
 			IF_LLADDR(sc->sc_ifp)[0] = 0;
 			IF_LLADDR(sc->sc_ifp)[1] = 0;
 			IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
 			IF_LLADDR(sc->sc_ifp)[3] = 0;
 			IF_LLADDR(sc->sc_ifp)[4] = 1;
 			IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
 			error--;
 		}
 		if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
 			if (carpr.carpr_advskew >= 255) {
 				error = EINVAL;
 				break;
 			}
 			if (carpr.carpr_advbase > 255) {
 				error = EINVAL;
 				break;
 			}
 			sc->sc_advbase = carpr.carpr_advbase;
 			sc->sc_advskew = carpr.carpr_advskew;
 			error--;
 		}
 		bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
 		if (error > 0)
 			error = EINVAL;
 		else {
 			error = 0;
 			carp_setrun(sc, 0);
 		}
 		break;
 
 	case SIOCGVH:
 		/* XXX: lockless read */
 		bzero(&carpr, sizeof(carpr));
 		carpr.carpr_state = sc->sc_state;
 		carpr.carpr_vhid = sc->sc_vhid;
 		carpr.carpr_advbase = sc->sc_advbase;
 		carpr.carpr_advskew = sc->sc_advskew;
 		error = priv_check(curthread, PRIV_NETINET_CARP);
 		if (error == 0)
 			bcopy(sc->sc_key, carpr.carpr_key,
 			    sizeof(carpr.carpr_key));
 		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
 		break;
 
 	default:
 		error = EINVAL;
 	}
 
 	if (locked)
 		CARP_SCUNLOCK(sc);
 
 	carp_hmac_prepare(sc);
 
 	return (error);
 }
 
 /*
  * XXX: this is looutput. We should eventually use it from there.
  */
 static int
 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
     struct route *ro)
 {
 	u_int32_t af;
 	struct rtentry *rt = NULL;
 
 	M_ASSERTPKTHDR(m); /* check if we have the packet header */
 
 	if (ro != NULL)
 		rt = ro->ro_rt;
 	if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 		m_freem(m);
 		return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
 			rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
 	}
 
 	ifp->if_opackets++;
 	ifp->if_obytes += m->m_pkthdr.len;
 
 	/* BPF writes need to be handled specially. */
 	if (dst->sa_family == AF_UNSPEC) {
 		bcopy(dst->sa_data, &af, sizeof(af));
 		dst->sa_family = af;
 	}
 
 #if 1	/* XXX */
 	switch (dst->sa_family) {
 	case AF_INET:
 	case AF_INET6:
 	case AF_IPX:
 	case AF_APPLETALK:
 		break;
 	default:
 		printf("carp_looutput: af=%d unexpected\n", dst->sa_family);
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 #endif
 	return(if_simloop(ifp, m, dst->sa_family, 0));
 }
 
 /*
  * Start output on carp interface. This function should never be called.
  */
 static void
 carp_start(struct ifnet *ifp)
 {
 #ifdef DEBUG
 	printf("%s: start called\n", ifp->if_xname);
 #endif
 }
 
 int
 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
     struct rtentry *rt)
 {
 	struct m_tag *mtag;
 	struct carp_softc *sc;
 	struct ifnet *carp_ifp;
 
 	if (!sa)
 		return (0);
 
 	switch (sa->sa_family) {
 #ifdef INET
 	case AF_INET:
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		break;
 #endif /* INET6 */
 	default:
 		return (0);
 	}
 
 	mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
 	if (mtag == NULL)
 		return (0);
 
 	bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
 	sc = carp_ifp->if_softc;
 
 	/* Set the source MAC address to Virtual Router MAC Address */
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_L2VLAN: {
 			struct ether_header *eh;
 
 			eh = mtod(m, struct ether_header *);
 			eh->ether_shost[0] = 0;
 			eh->ether_shost[1] = 0;
 			eh->ether_shost[2] = 0x5e;
 			eh->ether_shost[3] = 0;
 			eh->ether_shost[4] = 1;
 			eh->ether_shost[5] = sc->sc_vhid;
 		}
 		break;
 	case IFT_FDDI: {
 			struct fddi_header *fh;
 
 			fh = mtod(m, struct fddi_header *);
 			fh->fddi_shost[0] = 0;
 			fh->fddi_shost[1] = 0;
 			fh->fddi_shost[2] = 0x5e;
 			fh->fddi_shost[3] = 0;
 			fh->fddi_shost[4] = 1;
 			fh->fddi_shost[5] = sc->sc_vhid;
 		}
 		break;
 	case IFT_ISO88025: {
  			struct iso88025_header *th;
  			th = mtod(m, struct iso88025_header *);
 			th->iso88025_shost[0] = 3;
 			th->iso88025_shost[1] = 0;
 			th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
 			th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
 			th->iso88025_shost[4] = 0;
 			th->iso88025_shost[5] = 0;
 		}
 		break;
 	default:
 		printf("%s: carp is not supported for this interface type\n",
 		    ifp->if_xname);
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 static void
 carp_set_state(struct carp_softc *sc, int state)
 {
 	int link_state;
 
 	if (sc->sc_carpdev)
 		CARP_SCLOCK_ASSERT(sc);
 
 	if (sc->sc_state == state)
 		return;
 
 	sc->sc_state = state;
 	switch (state) {
 	case BACKUP:
 		link_state = LINK_STATE_DOWN;
 		break;
 	case MASTER:
 		link_state = LINK_STATE_UP;
 		break;
 	default:
 		link_state = LINK_STATE_UNKNOWN;
 		break;
 	}
 	if_link_state_change(SC2IFP(sc), link_state);
 }
 
 void
 carp_carpdev_state(struct ifnet *ifp)
 {
 	struct carp_if *cif;
 
 	cif = ifp->if_carp;
 	CARP_LOCK(cif);
 	carp_carpdev_state_locked(cif);
 	CARP_UNLOCK(cif);
 }
 
 static void
 carp_carpdev_state_locked(struct carp_if *cif)
 {
 	struct carp_softc *sc;
 
 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
 		carp_sc_state_locked(sc);
 }
 
 static void
 carp_sc_state_locked(struct carp_softc *sc)
 {
 	CARP_SCLOCK_ASSERT(sc);
 
 	if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
 	    !(sc->sc_carpdev->if_flags & IFF_UP)) {
 		sc->sc_flags_backup = SC2IFP(sc)->if_flags;
 		SC2IFP(sc)->if_flags &= ~IFF_UP;
 		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 		callout_stop(&sc->sc_ad_tmo);
 		callout_stop(&sc->sc_md_tmo);
 		callout_stop(&sc->sc_md6_tmo);
 		carp_set_state(sc, INIT);
 		carp_setrun(sc, 0);
 		if (!sc->sc_suppress) {
 			carp_suppress_preempt++;
 			if (carp_suppress_preempt == 1) {
 				CARP_SCUNLOCK(sc);
 				carp_send_ad_all();
 				CARP_SCLOCK(sc);
 			}
 		}
 		sc->sc_suppress = 1;
 	} else {
 		SC2IFP(sc)->if_flags |= sc->sc_flags_backup;
 		carp_set_state(sc, INIT);
 		carp_setrun(sc, 0);
 		if (sc->sc_suppress)
 			carp_suppress_preempt--;
 		sc->sc_suppress = 0;
 	}
 
 	return;
 }
 
 #ifdef INET
 extern  struct domain inetdomain;
 static struct protosw in_carp_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_CARP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		carp_input,
 	.pr_output =		(pr_output_t *)rip_output,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 #endif
 
 #ifdef INET6
 extern	struct domain inet6domain;
 static struct ip6protosw in6_carp_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_CARP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		carp6_input,
 	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_usrreqs =		&rip6_usrreqs
 };
 #endif
 
 static void
 carp_mod_cleanup(void)
 {
 
 	if (if_detach_event_tag == NULL)
 		return;
 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
 	if_clone_detach(&carp_cloner);
 #ifdef INET
 	if (proto_reg[CARP_INET] == 0) {
 		(void)ipproto_unregister(IPPROTO_CARP);
 		pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW);
 		proto_reg[CARP_INET] = -1;
 	}
 	carp_iamatch_p = NULL;
 #endif
 #ifdef INET6
 	if (proto_reg[CARP_INET6] == 0) {
 		(void)ip6proto_unregister(IPPROTO_CARP);
 		pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW);
 		proto_reg[CARP_INET6] = -1;
 	}
 	carp_iamatch6_p = NULL;
 	carp_macmatch6_p = NULL;
 #endif
 	carp_linkstate_p = NULL;
 	carp_forus_p = NULL;
 	carp_output_p = NULL;
 	mtx_destroy(&carp_mtx);
 }
 
 static int
 carp_mod_load(void)
 {
 	int err;
 
 	if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
 		carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
 	if (if_detach_event_tag == NULL)
 		return (ENOMEM);
 	mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
 	LIST_INIT(&carpif_list);
 	if_clone_attach(&carp_cloner);
 	carp_linkstate_p = carp_carpdev_state;
 	carp_forus_p = carp_forus;
 	carp_output_p = carp_output;
 #ifdef INET6
 	carp_iamatch6_p = carp_iamatch6;
 	carp_macmatch6_p = carp_macmatch6;
 	proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
 	    (struct protosw *)&in6_carp_protosw);
 	if (proto_reg[CARP_INET6] != 0) {
 		printf("carp: error %d attaching to PF_INET6\n",
 		    proto_reg[CARP_INET6]);
 		carp_mod_cleanup();
 		return (proto_reg[CARP_INET6]);
 	}
 	err = ip6proto_register(IPPROTO_CARP);
 	if (err) {
 		printf("carp: error %d registering with INET6\n", err);
 		carp_mod_cleanup();
 		return (err);
 	}
 #endif
 #ifdef INET
 	carp_iamatch_p = carp_iamatch;
 	proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
 	if (proto_reg[CARP_INET] != 0) {
 		printf("carp: error %d attaching to PF_INET\n",
 		    proto_reg[CARP_INET]);
 		carp_mod_cleanup();
 		return (proto_reg[CARP_INET]);
 	}
 	err = ipproto_register(IPPROTO_CARP);
 	if (err) {
 		printf("carp: error %d registering with INET\n", err);
 		carp_mod_cleanup();
 		return (err);
 	}
 #endif
 	return 0;
 }
 
 static int
 carp_modevent(module_t mod, int type, void *data)
 {
 	switch (type) {
 	case MOD_LOAD:
 		return carp_mod_load();
 		/* NOTREACHED */
 	case MOD_UNLOAD:
 		/*
 		 * XXX: For now, disallow module unloading by default due to
 		 * a race condition where a thread may dereference one of the
 		 * function pointer hooks after the module has been
 		 * unloaded, during processing of a packet, causing a panic.
 		 */
 #ifdef CARPMOD_CAN_UNLOAD
 		carp_mod_cleanup();
 #else
 		return (EBUSY);
 #endif
 		break;
 
 	default:
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 static moduledata_t carp_mod = {
 	"carp",
 	carp_modevent,
 	0
 };
 
 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
Index: projects/binutils-2.17/sys/netinet/sctp_cc_functions.c
===================================================================
--- projects/binutils-2.17/sys/netinet/sctp_cc_functions.c	(revision 215829)
+++ projects/binutils-2.17/sys/netinet/sctp_cc_functions.c	(revision 215830)
@@ -1,1563 +1,1617 @@
 /*-
  * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * a) Redistributions of source code must retain the above copyright notice,
  *   this list of conditions and the following disclaimer.
  *
  * b) Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *   the documentation and/or other materials provided with the distribution.
  *
  * c) Neither the name of Cisco Systems, Inc. nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp_var.h>
 #include <netinet/sctp_sysctl.h>
 #include <netinet/sctp_pcb.h>
 #include <netinet/sctp_header.h>
 #include <netinet/sctputil.h>
 #include <netinet/sctp_output.h>
 #include <netinet/sctp_input.h>
 #include <netinet/sctp_indata.h>
 #include <netinet/sctp_uio.h>
 #include <netinet/sctp_timer.h>
 #include <netinet/sctp_auth.h>
 #include <netinet/sctp_asconf.h>
 #include <netinet/sctp_cc_functions.h>
+#include <netinet/sctp_dtrace_declare.h>
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 void
 sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	struct sctp_association *assoc;
 	uint32_t cwnd_in_mtu;
 
 	assoc = &stcb->asoc;
 	/*
 	 * We take the minimum of the burst limit and the initial congestion
 	 * window. The initial congestion window is at least two times the
 	 * MTU.
 	 */
 	cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd);
 	if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst))
 		cwnd_in_mtu = assoc->max_burst;
 	net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu;
 	net->ssthresh = assoc->peers_rwnd;
 
+	SDT_PROBE(sctp, cwnd, net, init,
+	    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
+	    0, net->cwnd);
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) &
 	    (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
 		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
 	}
 }
 
 void
 sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
     struct sctp_association *asoc)
 {
 	struct sctp_nets *net;
 
 	/*-
 	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
 	 * (net->fast_retran_loss_recovery == 0)))
 	 */
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 		if ((asoc->fast_retran_loss_recovery == 0) ||
 		    (asoc->sctp_cmt_on_off == 1)) {
 			/* out of a RFC2582 Fast recovery window? */
 			if (net->net_ack > 0) {
 				/*
 				 * per section 7.2.3, are there any
 				 * destinations that had a fast retransmit
 				 * to them. If so what we need to do is
 				 * adjust ssthresh and cwnd.
 				 */
 				struct sctp_tmit_chunk *lchk;
 				int old_cwnd = net->cwnd;
 
 				net->ssthresh = net->cwnd / 2;
 				if (net->ssthresh < (net->mtu * 2)) {
 					net->ssthresh = 2 * net->mtu;
 				}
 				net->cwnd = net->ssthresh;
+				SDT_PROBE(sctp, cwnd, net, fr,
+				    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
+				    old_cwnd, net->cwnd);
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
 					    SCTP_CWND_LOG_FROM_FR);
 				}
 				lchk = TAILQ_FIRST(&asoc->send_queue);
 
 				net->partial_bytes_acked = 0;
 				/* Turn on fast recovery window */
 				asoc->fast_retran_loss_recovery = 1;
 				if (lchk == NULL) {
 					/* Mark end of the window */
 					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
 				} else {
 					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
 				}
 
 				/*
 				 * CMT fast recovery -- per destination
 				 * recovery variable.
 				 */
 				net->fast_retran_loss_recovery = 1;
 
 				if (lchk == NULL) {
 					/* Mark end of the window */
 					net->fast_recovery_tsn = asoc->sending_seq - 1;
 				} else {
 					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
 				}
 
 				/*
 				 * Disable Nonce Sum Checking and store the
 				 * resync tsn
 				 */
 				asoc->nonce_sum_check = 0;
 				asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
 
 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
 				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
 				    stcb->sctp_ep, stcb, net);
 			}
 		} else if (net->net_ack > 0) {
 			/*
 			 * Mark a peg that we WOULD have done a cwnd
 			 * reduction but RFC2582 prevented this action.
 			 */
 			SCTP_STAT_INCR(sctps_fastretransinrtt);
 		}
 	}
 }
 
 void
 sctp_cwnd_update_after_sack(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     int accum_moved, int reneged_all, int will_exit)
 {
 	struct sctp_nets *net;
+	int old_cwnd;
 
 	/******************************/
 	/* update cwnd and Early FR   */
 	/******************************/
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 
 #ifdef JANA_CMT_FAST_RECOVERY
 		/*
 		 * CMT fast recovery code. Need to debug.
 		 */
 		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
 			if (compare_with_wrap(asoc->last_acked_seq,
 			    net->fast_recovery_tsn, MAX_TSN) ||
 			    (asoc->last_acked_seq == net->fast_recovery_tsn) ||
 			    compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
 			    (net->pseudo_cumack == net->fast_recovery_tsn)) {
 				net->will_exit_fast_recovery = 1;
 			}
 		}
 #endif
 		if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
 			/*
 			 * So, first of all do we need to have a Early FR
 			 * timer running?
 			 */
 			if ((!TAILQ_EMPTY(&asoc->sent_queue) &&
 			    (net->ref_count > 1) &&
 			    (net->flight_size < net->cwnd)) ||
 			    (reneged_all)) {
 				/*
 				 * yes, so in this case stop it if its
 				 * running, and then restart it. Reneging
 				 * all is a special case where we want to
 				 * run the Early FR timer and then force the
 				 * last few unacked to be sent, causing us
 				 * to illicit a sack with gaps to force out
 				 * the others.
 				 */
 				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
 					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
 					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
 					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
 				}
 				SCTP_STAT_INCR(sctps_earlyfrstrid);
 				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
 			} else {
 				/* No, stop it if its running */
 				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
 					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
 					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
 					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
 				}
 			}
 		}
 		/* if nothing was acked on this destination skip it */
 		if (net->net_ack == 0) {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
 			}
 			continue;
 		}
 		if (net->net_ack2 > 0) {
 			/*
 			 * Karn's rule applies to clearing error count, this
 			 * is optional.
 			 */
 			net->error_count = 0;
 			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
 			    SCTP_ADDR_NOT_REACHABLE) {
 				/* addr came good */
 				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
 				net->dest_state |= SCTP_ADDR_REACHABLE;
 				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
 				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
 				/* now was it the primary? if so restore */
 				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
 					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
 				}
 			}
 			/*
 			 * JRS 5/14/07 - If CMT PF is on and the destination
 			 * is in PF state, set the destination to active
 			 * state and set the cwnd to one or two MTU's based
 			 * on whether PF1 or PF2 is being used.
 			 * 
 			 * Should we stop any running T3 timer here?
 			 */
 			if ((asoc->sctp_cmt_on_off == 1) &&
 			    (asoc->sctp_cmt_pf > 0) &&
 			    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
 				net->dest_state &= ~SCTP_ADDR_PF;
+				old_cwnd = net->cwnd;
 				net->cwnd = net->mtu * asoc->sctp_cmt_pf;
+				SDT_PROBE(sctp, cwnd, net, ack,
+				    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
+				    old_cwnd, net->cwnd);
 				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
 				    net, net->cwnd);
 				/*
 				 * Since the cwnd value is explicitly set,
 				 * skip the code that updates the cwnd
 				 * value.
 				 */
 				goto skip_cwnd_update;
 			}
 		}
 #ifdef JANA_CMT_FAST_RECOVERY
 		/*
 		 * CMT fast recovery code
 		 */
 		/*
 		 * if (sctp_cmt_on_off == 1 &&
 		 * net->fast_retran_loss_recovery &&
 		 * net->will_exit_fast_recovery == 0) { @@@ Do something }
 		 * else if (sctp_cmt_on_off == 0 &&
 		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
 		 */
 #endif
 
 		if (asoc->fast_retran_loss_recovery &&
 		    (will_exit == 0) &&
 		    (asoc->sctp_cmt_on_off == 0)) {
 			/*
 			 * If we are in loss recovery we skip any cwnd
 			 * update
 			 */
 			goto skip_cwnd_update;
 		}
 		/*
 		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
 		 * moved.
 		 */
 		if (accum_moved ||
 		    ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) {
 			/* If the cumulative ack moved we can proceed */
 			if (net->cwnd <= net->ssthresh) {
 				/* We are in slow start */
 				if (net->flight_size + net->net_ack >= net->cwnd) {
 					if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) {
+						old_cwnd = net->cwnd;
 						net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable));
+						SDT_PROBE(sctp, cwnd, net, ack,
+						    stcb->asoc.my_vtag,
+						    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
+						    net,
+						    old_cwnd, net->cwnd);
 						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 							sctp_log_cwnd(stcb, net, net->mtu,
 							    SCTP_CWND_LOG_FROM_SS);
 						}
 					} else {
+						old_cwnd = net->cwnd;
 						net->cwnd += net->net_ack;
+						SDT_PROBE(sctp, cwnd, net, ack,
+						    stcb->asoc.my_vtag,
+						    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
+						    net,
+						    old_cwnd, net->cwnd);
+
 						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 							sctp_log_cwnd(stcb, net, net->net_ack,
 							    SCTP_CWND_LOG_FROM_SS);
 						}
 					}
 				} else {
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 						sctp_log_cwnd(stcb, net, net->net_ack,
 						    SCTP_CWND_LOG_NOADV_SS);
 					}
 				}
 			} else {
 				/* We are in congestion avoidance */
 				/*
 				 * Add to pba
 				 */
 				net->partial_bytes_acked += net->net_ack;
 
 				if ((net->flight_size + net->net_ack >= net->cwnd) &&
 				    (net->partial_bytes_acked >= net->cwnd)) {
 					net->partial_bytes_acked -= net->cwnd;
+					old_cwnd = net->cwnd;
 					net->cwnd += net->mtu;
+					SDT_PROBE(sctp, cwnd, net, ack,
+					    stcb->asoc.my_vtag,
+					    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
+					    net,
+					    old_cwnd, net->cwnd);
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 						sctp_log_cwnd(stcb, net, net->mtu,
 						    SCTP_CWND_LOG_FROM_CA);
 					}
 				} else {
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 						sctp_log_cwnd(stcb, net, net->net_ack,
 						    SCTP_CWND_LOG_NOADV_CA);
 					}
 				}
 			}
 		} else {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->mtu,
 				    SCTP_CWND_LOG_NO_CUMACK);
 			}
 		}
 skip_cwnd_update:
 		/*
 		 * NOW, according to Karn's rule do we need to restore the
 		 * RTO timer back? Check our net_ack2. If not set then we
 		 * have a ambiguity.. i.e. all data ack'd was sent to more
 		 * than one place.
 		 */
 		if (net->net_ack2) {
 			/* restore any doubled timers */
 			net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
 			if (net->RTO < stcb->asoc.minrto) {
 				net->RTO = stcb->asoc.minrto;
 			}
 			if (net->RTO > stcb->asoc.maxrto) {
 				net->RTO = stcb->asoc.maxrto;
 			}
 		}
 	}
 }
 
 void
 sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	int old_cwnd = net->cwnd;
 
 	net->ssthresh = max(net->cwnd / 2, 4 * net->mtu);
 	net->cwnd = net->mtu;
 	net->partial_bytes_acked = 0;
-
+	SDT_PROBE(sctp, cwnd, net, to,
+	    stcb->asoc.my_vtag,
+	    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
+	    net,
+	    old_cwnd, net->cwnd);
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
 	}
 }
 
 void
 sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	int old_cwnd = net->cwnd;
 
 	SCTP_STAT_INCR(sctps_ecnereducedcwnd);
 	net->ssthresh = net->cwnd / 2;
 	if (net->ssthresh < net->mtu) {
 		net->ssthresh = net->mtu;
 		/* here back off the timer as well, to slow us down */
 		net->RTO <<= 1;
 	}
 	net->cwnd = net->ssthresh;
+	SDT_PROBE(sctp, cwnd, net, ecn,
+	    stcb->asoc.my_vtag,
+	    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
+	    net,
+	    old_cwnd, net->cwnd);
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
 	}
 }
 
 void
 sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
     struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
     uint32_t * bottle_bw, uint32_t * on_queue)
 {
 	uint32_t bw_avail;
 	int rtt, incr;
 	int old_cwnd = net->cwnd;
 
 	/* need real RTT for this calc */
 	rtt = ((net->lastsa >> 2) + net->lastsv) >> 1;
 	/* get bottle neck bw */
 	*bottle_bw = ntohl(cp->bottle_bw);
 	/* and whats on queue */
 	*on_queue = ntohl(cp->current_onq);
 	/*
 	 * adjust the on-queue if our flight is more it could be that the
 	 * router has not yet gotten data "in-flight" to it
 	 */
 	if (*on_queue < net->flight_size)
 		*on_queue = net->flight_size;
 	/* calculate the available space */
 	bw_avail = (*bottle_bw * rtt) / 1000;
 	if (bw_avail > *bottle_bw) {
 		/*
 		 * Cap the growth to no more than the bottle neck. This can
 		 * happen as RTT slides up due to queues. It also means if
 		 * you have more than a 1 second RTT with a empty queue you
 		 * will be limited to the bottle_bw per second no matter if
 		 * other points have 1/2 the RTT and you could get more
 		 * out...
 		 */
 		bw_avail = *bottle_bw;
 	}
 	if (*on_queue > bw_avail) {
 		/*
 		 * No room for anything else don't allow anything else to be
 		 * "added to the fire".
 		 */
 		int seg_inflight, seg_onqueue, my_portion;
 
 		net->partial_bytes_acked = 0;
 
 		/* how much are we over queue size? */
 		incr = *on_queue - bw_avail;
 		if (stcb->asoc.seen_a_sack_this_pkt) {
 			/*
 			 * undo any cwnd adjustment that the sack might have
 			 * made
 			 */
 			net->cwnd = net->prev_cwnd;
 		}
 		/* Now how much of that is mine? */
 		seg_inflight = net->flight_size / net->mtu;
 		seg_onqueue = *on_queue / net->mtu;
 		my_portion = (incr * seg_inflight) / seg_onqueue;
 
 		/* Have I made an adjustment already */
 		if (net->cwnd > net->flight_size) {
 			/*
 			 * for this flight I made an adjustment we need to
 			 * decrease the portion by a share our previous
 			 * adjustment.
 			 */
 			int diff_adj;
 
 			diff_adj = net->cwnd - net->flight_size;
 			if (diff_adj > my_portion)
 				my_portion = 0;
 			else
 				my_portion -= diff_adj;
 		}
 		/*
 		 * back down to the previous cwnd (assume we have had a sack
 		 * before this packet). minus what ever portion of the
 		 * overage is my fault.
 		 */
 		net->cwnd -= my_portion;
 
 		/* we will NOT back down more than 1 MTU */
 		if (net->cwnd <= net->mtu) {
 			net->cwnd = net->mtu;
 		}
 		/* force into CA */
 		net->ssthresh = net->cwnd - 1;
 	} else {
 		/*
 		 * Take 1/4 of the space left or max burst up .. whichever
 		 * is less.
 		 */
 		incr = min((bw_avail - *on_queue) >> 2,
 		    stcb->asoc.max_burst * net->mtu);
 		net->cwnd += incr;
 	}
 	if (net->cwnd > bw_avail) {
 		/* We can't exceed the pipe size */
 		net->cwnd = bw_avail;
 	}
 	if (net->cwnd < net->mtu) {
 		/* We always have 1 MTU */
 		net->cwnd = net->mtu;
 	}
 	if (net->cwnd - old_cwnd != 0) {
 		/* log only changes */
+		SDT_PROBE(sctp, cwnd, net, pd,
+		    stcb->asoc.my_vtag,
+		    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
+		    net,
+		    old_cwnd, net->cwnd);
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
 			    SCTP_CWND_LOG_FROM_SAT);
 		}
 	}
 }
 
 void
 sctp_cwnd_update_after_output(struct sctp_tcb *stcb,
     struct sctp_nets *net, int burst_limit)
 {
 	int old_cwnd = net->cwnd;
 
 	if (net->ssthresh < net->cwnd)
 		net->ssthresh = net->cwnd;
 	net->cwnd = (net->flight_size + (burst_limit * net->mtu));
-
+	SDT_PROBE(sctp, cwnd, net, bl,
+	    stcb->asoc.my_vtag,
+	    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
+	    net,
+	    old_cwnd, net->cwnd);
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST);
 	}
 }
 
 void
 sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	int old_cwnd = net->cwnd;
 
 	sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
 	/*
 	 * make a small adjustment to cwnd and force to CA.
 	 */
 	if (net->cwnd > net->mtu)
 		/* drop down one MTU after sending */
 		net->cwnd -= net->mtu;
 	if (net->cwnd < net->ssthresh)
 		/* still in SS move to CA */
 		net->ssthresh = net->cwnd - 1;
+	SDT_PROBE(sctp, cwnd, net, fr,
+	    stcb->asoc.my_vtag,
+	    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
+	    net,
+	    old_cwnd, net->cwnd);
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 		sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR);
 	}
 }
 
 struct sctp_hs_raise_drop {
 	int32_t cwnd;
 	int32_t increase;
 	int32_t drop_percent;
 };
 
 #define SCTP_HS_TABLE_SIZE 73
 
 struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = {
 	{38, 1, 50},		/* 0   */
 	{118, 2, 44},		/* 1   */
 	{221, 3, 41},		/* 2   */
 	{347, 4, 38},		/* 3   */
 	{495, 5, 37},		/* 4   */
 	{663, 6, 35},		/* 5   */
 	{851, 7, 34},		/* 6   */
 	{1058, 8, 33},		/* 7   */
 	{1284, 9, 32},		/* 8   */
 	{1529, 10, 31},		/* 9   */
 	{1793, 11, 30},		/* 10  */
 	{2076, 12, 29},		/* 11  */
 	{2378, 13, 28},		/* 12  */
 	{2699, 14, 28},		/* 13  */
 	{3039, 15, 27},		/* 14  */
 	{3399, 16, 27},		/* 15  */
 	{3778, 17, 26},		/* 16  */
 	{4177, 18, 26},		/* 17  */
 	{4596, 19, 25},		/* 18  */
 	{5036, 20, 25},		/* 19  */
 	{5497, 21, 24},		/* 20  */
 	{5979, 22, 24},		/* 21  */
 	{6483, 23, 23},		/* 22  */
 	{7009, 24, 23},		/* 23  */
 	{7558, 25, 22},		/* 24  */
 	{8130, 26, 22},		/* 25  */
 	{8726, 27, 22},		/* 26  */
 	{9346, 28, 21},		/* 27  */
 	{9991, 29, 21},		/* 28  */
 	{10661, 30, 21},	/* 29  */
 	{11358, 31, 20},	/* 30  */
 	{12082, 32, 20},	/* 31  */
 	{12834, 33, 20},	/* 32  */
 	{13614, 34, 19},	/* 33  */
 	{14424, 35, 19},	/* 34  */
 	{15265, 36, 19},	/* 35  */
 	{16137, 37, 19},	/* 36  */
 	{17042, 38, 18},	/* 37  */
 	{17981, 39, 18},	/* 38  */
 	{18955, 40, 18},	/* 39  */
 	{19965, 41, 17},	/* 40  */
 	{21013, 42, 17},	/* 41  */
 	{22101, 43, 17},	/* 42  */
 	{23230, 44, 17},	/* 43  */
 	{24402, 45, 16},	/* 44  */
 	{25618, 46, 16},	/* 45  */
 	{26881, 47, 16},	/* 46  */
 	{28193, 48, 16},	/* 47  */
 	{29557, 49, 15},	/* 48  */
 	{30975, 50, 15},	/* 49  */
 	{32450, 51, 15},	/* 50  */
 	{33986, 52, 15},	/* 51  */
 	{35586, 53, 14},	/* 52  */
 	{37253, 54, 14},	/* 53  */
 	{38992, 55, 14},	/* 54  */
 	{40808, 56, 14},	/* 55  */
 	{42707, 57, 13},	/* 56  */
 	{44694, 58, 13},	/* 57  */
 	{46776, 59, 13},	/* 58  */
 	{48961, 60, 13},	/* 59  */
 	{51258, 61, 13},	/* 60  */
 	{53677, 62, 12},	/* 61  */
 	{56230, 63, 12},	/* 62  */
 	{58932, 64, 12},	/* 63  */
 	{61799, 65, 12},	/* 64  */
 	{64851, 66, 11},	/* 65  */
 	{68113, 67, 11},	/* 66  */
 	{71617, 68, 11},	/* 67  */
 	{75401, 69, 10},	/* 68  */
 	{79517, 70, 10},	/* 69  */
 	{84035, 71, 10},	/* 70  */
 	{89053, 72, 10},	/* 71  */
 	{94717, 73, 9}		/* 72  */
 };
 
 static void
 sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	int cur_val, i, indx, incr;
 
 	cur_val = net->cwnd >> 10;
 	indx = SCTP_HS_TABLE_SIZE - 1;
 #ifdef SCTP_DEBUG
 	printf("HS CC CAlled.\n");
 #endif
 	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
 		/* normal mode */
 		if (net->net_ack > net->mtu) {
 			net->cwnd += net->mtu;
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS);
 			}
 		} else {
 			net->cwnd += net->net_ack;
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS);
 			}
 		}
 	} else {
 		for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) {
 			if (cur_val < sctp_cwnd_adjust[i].cwnd) {
 				indx = i;
 				break;
 			}
 		}
 		net->last_hs_used = indx;
 		incr = ((sctp_cwnd_adjust[indx].increase) << 10);
 		net->cwnd += incr;
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 			sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS);
 		}
 	}
 }
 
 static void
 sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	int cur_val, i, indx;
 	int old_cwnd = net->cwnd;
 
 	cur_val = net->cwnd >> 10;
 	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
 		/* normal mode */
 		net->ssthresh = net->cwnd / 2;
 		if (net->ssthresh < (net->mtu * 2)) {
 			net->ssthresh = 2 * net->mtu;
 		}
 		net->cwnd = net->ssthresh;
 	} else {
 		/* drop by the proper amount */
 		net->ssthresh = net->cwnd - (int)((net->cwnd / 100) *
 		    sctp_cwnd_adjust[net->last_hs_used].drop_percent);
 		net->cwnd = net->ssthresh;
 		/* now where are we */
 		indx = net->last_hs_used;
 		cur_val = net->cwnd >> 10;
 		/* reset where we are in the table */
 		if (cur_val < sctp_cwnd_adjust[0].cwnd) {
 			/* feel out of hs */
 			net->last_hs_used = 0;
 		} else {
 			for (i = indx; i >= 1; i--) {
 				if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) {
 					break;
 				}
 			}
 			net->last_hs_used = indx;
 		}
 	}
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR);
 	}
 }
 
 void
 sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb,
     struct sctp_association *asoc)
 {
 	struct sctp_nets *net;
 
 	/*
 	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
 	 * (net->fast_retran_loss_recovery == 0)))
 	 */
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 		if ((asoc->fast_retran_loss_recovery == 0) ||
 		    (asoc->sctp_cmt_on_off == 1)) {
 			/* out of a RFC2582 Fast recovery window? */
 			if (net->net_ack > 0) {
 				/*
 				 * per section 7.2.3, are there any
 				 * destinations that had a fast retransmit
 				 * to them. If so what we need to do is
 				 * adjust ssthresh and cwnd.
 				 */
 				struct sctp_tmit_chunk *lchk;
 
 				sctp_hs_cwnd_decrease(stcb, net);
 
 				lchk = TAILQ_FIRST(&asoc->send_queue);
 
 				net->partial_bytes_acked = 0;
 				/* Turn on fast recovery window */
 				asoc->fast_retran_loss_recovery = 1;
 				if (lchk == NULL) {
 					/* Mark end of the window */
 					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
 				} else {
 					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
 				}
 
 				/*
 				 * CMT fast recovery -- per destination
 				 * recovery variable.
 				 */
 				net->fast_retran_loss_recovery = 1;
 
 				if (lchk == NULL) {
 					/* Mark end of the window */
 					net->fast_recovery_tsn = asoc->sending_seq - 1;
 				} else {
 					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
 				}
 
 				/*
 				 * Disable Nonce Sum Checking and store the
 				 * resync tsn
 				 */
 				asoc->nonce_sum_check = 0;
 				asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
 
 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
 				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
 				    stcb->sctp_ep, stcb, net);
 			}
 		} else if (net->net_ack > 0) {
 			/*
 			 * Mark a peg that we WOULD have done a cwnd
 			 * reduction but RFC2582 prevented this action.
 			 */
 			SCTP_STAT_INCR(sctps_fastretransinrtt);
 		}
 	}
 }
 
 void
 sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     int accum_moved, int reneged_all, int will_exit)
 {
 	struct sctp_nets *net;
 
 	/******************************/
 	/* update cwnd and Early FR   */
 	/******************************/
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 
 #ifdef JANA_CMT_FAST_RECOVERY
 		/*
 		 * CMT fast recovery code. Need to debug.
 		 */
 		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
 			if (compare_with_wrap(asoc->last_acked_seq,
 			    net->fast_recovery_tsn, MAX_TSN) ||
 			    (asoc->last_acked_seq == net->fast_recovery_tsn) ||
 			    compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
 			    (net->pseudo_cumack == net->fast_recovery_tsn)) {
 				net->will_exit_fast_recovery = 1;
 			}
 		}
 #endif
 		if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
 			/*
 			 * So, first of all do we need to have a Early FR
 			 * timer running?
 			 */
 			if ((!TAILQ_EMPTY(&asoc->sent_queue) &&
 			    (net->ref_count > 1) &&
 			    (net->flight_size < net->cwnd)) ||
 			    (reneged_all)) {
 				/*
 				 * yes, so in this case stop it if its
 				 * running, and then restart it. Reneging
 				 * all is a special case where we want to
 				 * run the Early FR timer and then force the
 				 * last few unacked to be sent, causing us
 				 * to illicit a sack with gaps to force out
 				 * the others.
 				 */
 				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
 					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
 					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
 					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
 				}
 				SCTP_STAT_INCR(sctps_earlyfrstrid);
 				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
 			} else {
 				/* No, stop it if its running */
 				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
 					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
 					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
 					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
 				}
 			}
 		}
 		/* if nothing was acked on this destination skip it */
 		if (net->net_ack == 0) {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
 			}
 			continue;
 		}
 		if (net->net_ack2 > 0) {
 			/*
 			 * Karn's rule applies to clearing error count, this
 			 * is optional.
 			 */
 			net->error_count = 0;
 			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
 			    SCTP_ADDR_NOT_REACHABLE) {
 				/* addr came good */
 				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
 				net->dest_state |= SCTP_ADDR_REACHABLE;
 				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
 				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
 				/* now was it the primary? if so restore */
 				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
 					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
 				}
 			}
 			/*
 			 * JRS 5/14/07 - If CMT PF is on and the destination
 			 * is in PF state, set the destination to active
 			 * state and set the cwnd to one or two MTU's based
 			 * on whether PF1 or PF2 is being used.
 			 * 
 			 * Should we stop any running T3 timer here?
 			 */
 			if ((asoc->sctp_cmt_on_off == 1) &&
 			    (asoc->sctp_cmt_pf > 0) &&
 			    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
 				net->dest_state &= ~SCTP_ADDR_PF;
 				net->cwnd = net->mtu * asoc->sctp_cmt_pf;
 				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
 				    net, net->cwnd);
 				/*
 				 * Since the cwnd value is explicitly set,
 				 * skip the code that updates the cwnd
 				 * value.
 				 */
 				goto skip_cwnd_update;
 			}
 		}
 #ifdef JANA_CMT_FAST_RECOVERY
 		/*
 		 * CMT fast recovery code
 		 */
 		/*
 		 * if (sctp_cmt_on_off == 1 &&
 		 * net->fast_retran_loss_recovery &&
 		 * net->will_exit_fast_recovery == 0) { @@@ Do something }
 		 * else if (sctp_cmt_on_off == 0 &&
 		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
 		 */
 #endif
 
 		if (asoc->fast_retran_loss_recovery &&
 		    (will_exit == 0) &&
 		    (asoc->sctp_cmt_on_off == 0)) {
 			/*
 			 * If we are in loss recovery we skip any cwnd
 			 * update
 			 */
 			goto skip_cwnd_update;
 		}
 		/*
 		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
 		 * moved.
 		 */
 		if (accum_moved ||
 		    ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) {
 			/* If the cumulative ack moved we can proceed */
 			if (net->cwnd <= net->ssthresh) {
 				/* We are in slow start */
 				if (net->flight_size + net->net_ack >= net->cwnd) {
 
 					sctp_hs_cwnd_increase(stcb, net);
 
 				} else {
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 						sctp_log_cwnd(stcb, net, net->net_ack,
 						    SCTP_CWND_LOG_NOADV_SS);
 					}
 				}
 			} else {
 				/* We are in congestion avoidance */
 				net->partial_bytes_acked += net->net_ack;
 				if ((net->flight_size + net->net_ack >= net->cwnd) &&
 				    (net->partial_bytes_acked >= net->cwnd)) {
 					net->partial_bytes_acked -= net->cwnd;
 					net->cwnd += net->mtu;
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 						sctp_log_cwnd(stcb, net, net->mtu,
 						    SCTP_CWND_LOG_FROM_CA);
 					}
 				} else {
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 						sctp_log_cwnd(stcb, net, net->net_ack,
 						    SCTP_CWND_LOG_NOADV_CA);
 					}
 				}
 			}
 		} else {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->mtu,
 				    SCTP_CWND_LOG_NO_CUMACK);
 			}
 		}
 skip_cwnd_update:
 		/*
 		 * NOW, according to Karn's rule do we need to restore the
 		 * RTO timer back? Check our net_ack2. If not set then we
 		 * have a ambiguity.. i.e. all data ack'd was sent to more
 		 * than one place.
 		 */
 		if (net->net_ack2) {
 			/* restore any doubled timers */
 			net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
 			if (net->RTO < stcb->asoc.minrto) {
 				net->RTO = stcb->asoc.minrto;
 			}
 			if (net->RTO > stcb->asoc.maxrto) {
 				net->RTO = stcb->asoc.maxrto;
 			}
 		}
 	}
 }
 
 
 /*
  * H-TCP congestion control. The algorithm is detailed in:
  * R.N.Shorten, D.J.Leith:
  *   "H-TCP: TCP for high-speed and long-distance networks"
  *   Proc. PFLDnet, Argonne, 2004.
  * http://www.hamilton.ie/net/htcp3.pdf
  */
 
 
 static int use_rtt_scaling = 1;
 static int use_bandwidth_switch = 1;
 
 static inline int
 between(uint32_t seq1, uint32_t seq2, uint32_t seq3)
 {
 	return seq3 - seq2 >= seq1 - seq2;
 }
 
 static inline uint32_t
 htcp_cong_time(struct htcp *ca)
 {
 	return sctp_get_tick_count() - ca->last_cong;
 }
 
 static inline uint32_t
 htcp_ccount(struct htcp *ca)
 {
 	return htcp_cong_time(ca) / ca->minRTT;
 }
 
 static inline void
 htcp_reset(struct htcp *ca)
 {
 	ca->undo_last_cong = ca->last_cong;
 	ca->undo_maxRTT = ca->maxRTT;
 	ca->undo_old_maxB = ca->old_maxB;
 	ca->last_cong = sctp_get_tick_count();
 }
 
 #ifdef SCTP_NOT_USED
 
 static uint32_t
 htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	net->htcp_ca.last_cong = net->htcp_ca.undo_last_cong;
 	net->htcp_ca.maxRTT = net->htcp_ca.undo_maxRTT;
 	net->htcp_ca.old_maxB = net->htcp_ca.undo_old_maxB;
 	return max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->htcp_ca.beta) * net->mtu);
 }
 
 #endif
 
 static inline void
 measure_rtt(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	uint32_t srtt = net->lastsa >> 3;
 
 	/* keep track of minimum RTT seen so far, minRTT is zero at first */
 	if (net->htcp_ca.minRTT > srtt || !net->htcp_ca.minRTT)
 		net->htcp_ca.minRTT = srtt;
 
 	/* max RTT */
 	if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->htcp_ca) > 3) {
 		if (net->htcp_ca.maxRTT < net->htcp_ca.minRTT)
 			net->htcp_ca.maxRTT = net->htcp_ca.minRTT;
 		if (net->htcp_ca.maxRTT < srtt && srtt <= net->htcp_ca.maxRTT + MSEC_TO_TICKS(20))
 			net->htcp_ca.maxRTT = srtt;
 	}
 }
 
 static void
 measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	uint32_t now = sctp_get_tick_count();
 
 	if (net->fast_retran_ip == 0)
 		net->htcp_ca.bytes_acked = net->net_ack;
 
 	if (!use_bandwidth_switch)
 		return;
 
 	/* achieved throughput calculations */
 	/* JRS - not 100% sure of this statement */
 	if (net->fast_retran_ip == 1) {
 		net->htcp_ca.bytecount = 0;
 		net->htcp_ca.lasttime = now;
 		return;
 	}
 	net->htcp_ca.bytecount += net->net_ack;
 
 	if (net->htcp_ca.bytecount >= net->cwnd - ((net->htcp_ca.alpha >> 7 ? : 1) * net->mtu)
 	    && now - net->htcp_ca.lasttime >= net->htcp_ca.minRTT
 	    && net->htcp_ca.minRTT > 0) {
 		uint32_t cur_Bi = net->htcp_ca.bytecount / net->mtu * hz / (now - net->htcp_ca.lasttime);
 
 		if (htcp_ccount(&net->htcp_ca) <= 3) {
 			/* just after backoff */
 			net->htcp_ca.minB = net->htcp_ca.maxB = net->htcp_ca.Bi = cur_Bi;
 		} else {
 			net->htcp_ca.Bi = (3 * net->htcp_ca.Bi + cur_Bi) / 4;
 			if (net->htcp_ca.Bi > net->htcp_ca.maxB)
 				net->htcp_ca.maxB = net->htcp_ca.Bi;
 			if (net->htcp_ca.minB > net->htcp_ca.maxB)
 				net->htcp_ca.minB = net->htcp_ca.maxB;
 		}
 		net->htcp_ca.bytecount = 0;
 		net->htcp_ca.lasttime = now;
 	}
 }
 
 static inline void
 htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT)
 {
 	if (use_bandwidth_switch) {
 		uint32_t maxB = ca->maxB;
 		uint32_t old_maxB = ca->old_maxB;
 
 		ca->old_maxB = ca->maxB;
 
 		if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
 			ca->beta = BETA_MIN;
 			ca->modeswitch = 0;
 			return;
 		}
 	}
 	if (ca->modeswitch && minRTT > (uint32_t) MSEC_TO_TICKS(10) && maxRTT) {
 		ca->beta = (minRTT << 7) / maxRTT;
 		if (ca->beta < BETA_MIN)
 			ca->beta = BETA_MIN;
 		else if (ca->beta > BETA_MAX)
 			ca->beta = BETA_MAX;
 	} else {
 		ca->beta = BETA_MIN;
 		ca->modeswitch = 1;
 	}
 }
 
 static inline void
 htcp_alpha_update(struct htcp *ca)
 {
 	uint32_t minRTT = ca->minRTT;
 	uint32_t factor = 1;
 	uint32_t diff = htcp_cong_time(ca);
 
 	if (diff > (uint32_t) hz) {
 		diff -= hz;
 		factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz;
 	}
 	if (use_rtt_scaling && minRTT) {
 		uint32_t scale = (hz << 3) / (10 * minRTT);
 
 		scale = min(max(scale, 1U << 2), 10U << 3);	/* clamping ratio to
 								 * interval [0.5,10]<<3 */
 		factor = (factor << 3) / scale;
 		if (!factor)
 			factor = 1;
 	}
 	ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
 	if (!ca->alpha)
 		ca->alpha = ALPHA_BASE;
 }
 
 /* After we have the rtt data to calculate beta, we'd still prefer to wait one
  * rtt before we adjust our beta to ensure we are working from a consistent
  * data.
  *
  * This function should be called when we hit a congestion event since only at
  * that point do we really have a real sense of maxRTT (the queues en route
  * were getting just too full now).
  */
 static void
 htcp_param_update(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	uint32_t minRTT = net->htcp_ca.minRTT;
 	uint32_t maxRTT = net->htcp_ca.maxRTT;
 
 	htcp_beta_update(&net->htcp_ca, minRTT, maxRTT);
 	htcp_alpha_update(&net->htcp_ca);
 
 	/*
 	 * add slowly fading memory for maxRTT to accommodate routing
 	 * changes etc
 	 */
 	if (minRTT > 0 && maxRTT > minRTT)
 		net->htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
 }
 
 static uint32_t
 htcp_recalc_ssthresh(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	htcp_param_update(stcb, net);
 	return max(((net->cwnd / net->mtu * net->htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu);
 }
 
 static void
 htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	/*-
 	 * How to handle these functions?
          *	if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question.
 	 *		return;
 	 */
 	if (net->cwnd <= net->ssthresh) {
 		/* We are in slow start */
 		if (net->flight_size + net->net_ack >= net->cwnd) {
 			if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) {
 				net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable));
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 					sctp_log_cwnd(stcb, net, net->mtu,
 					    SCTP_CWND_LOG_FROM_SS);
 				}
 			} else {
 				net->cwnd += net->net_ack;
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 					sctp_log_cwnd(stcb, net, net->net_ack,
 					    SCTP_CWND_LOG_FROM_SS);
 				}
 			}
 		} else {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->net_ack,
 				    SCTP_CWND_LOG_NOADV_SS);
 			}
 		}
 	} else {
 		measure_rtt(stcb, net);
 
 		/*
 		 * In dangerous area, increase slowly. In theory this is
 		 * net->cwnd += alpha / net->cwnd
 		 */
 		/* What is snd_cwnd_cnt?? */
 		if (((net->partial_bytes_acked / net->mtu * net->htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) {
 			/*-
 			 * Does SCTP have a cwnd clamp?
 			 * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS).
 			 */
 			net->cwnd += net->mtu;
 			net->partial_bytes_acked = 0;
 			htcp_alpha_update(&net->htcp_ca);
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->mtu,
 				    SCTP_CWND_LOG_FROM_CA);
 			}
 		} else {
 			net->partial_bytes_acked += net->net_ack;
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->net_ack,
 				    SCTP_CWND_LOG_NOADV_CA);
 			}
 		}
 
 		net->htcp_ca.bytes_acked = net->mtu;
 	}
 }
 
 #ifdef SCTP_NOT_USED
 /* Lower bound on congestion window. */
 static uint32_t
 htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	return net->ssthresh;
 }
 
 #endif
 
 static void
 htcp_init(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	memset(&net->htcp_ca, 0, sizeof(struct htcp));
 	net->htcp_ca.alpha = ALPHA_BASE;
 	net->htcp_ca.beta = BETA_MIN;
 	net->htcp_ca.bytes_acked = net->mtu;
 	net->htcp_ca.last_cong = sctp_get_tick_count();
 }
 
 void
 sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	/*
 	 * We take the max of the burst limit times a MTU or the
 	 * INITIAL_CWND. We then limit this to 4 MTU's of sending.
 	 */
 	net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
 	net->ssthresh = stcb->asoc.peers_rwnd;
 	htcp_init(stcb, net);
 
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
 		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
 	}
 }
 
 void
 sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     int accum_moved, int reneged_all, int will_exit)
 {
 	struct sctp_nets *net;
 
 	/******************************/
 	/* update cwnd and Early FR   */
 	/******************************/
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 
 #ifdef JANA_CMT_FAST_RECOVERY
 		/*
 		 * CMT fast recovery code. Need to debug.
 		 */
 		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
 			if (compare_with_wrap(asoc->last_acked_seq,
 			    net->fast_recovery_tsn, MAX_TSN) ||
 			    (asoc->last_acked_seq == net->fast_recovery_tsn) ||
 			    compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
 			    (net->pseudo_cumack == net->fast_recovery_tsn)) {
 				net->will_exit_fast_recovery = 1;
 			}
 		}
 #endif
 		if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
 			/*
 			 * So, first of all do we need to have a Early FR
 			 * timer running?
 			 */
 			if ((!TAILQ_EMPTY(&asoc->sent_queue) &&
 			    (net->ref_count > 1) &&
 			    (net->flight_size < net->cwnd)) ||
 			    (reneged_all)) {
 				/*
 				 * yes, so in this case stop it if its
 				 * running, and then restart it. Reneging
 				 * all is a special case where we want to
 				 * run the Early FR timer and then force the
 				 * last few unacked to be sent, causing us
 				 * to illicit a sack with gaps to force out
 				 * the others.
 				 */
 				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
 					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
 					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
 					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
 				}
 				SCTP_STAT_INCR(sctps_earlyfrstrid);
 				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
 			} else {
 				/* No, stop it if its running */
 				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
 					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
 					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
 					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
 				}
 			}
 		}
 		/* if nothing was acked on this destination skip it */
 		if (net->net_ack == 0) {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
 			}
 			continue;
 		}
 		if (net->net_ack2 > 0) {
 			/*
 			 * Karn's rule applies to clearing error count, this
 			 * is optional.
 			 */
 			net->error_count = 0;
 			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
 			    SCTP_ADDR_NOT_REACHABLE) {
 				/* addr came good */
 				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
 				net->dest_state |= SCTP_ADDR_REACHABLE;
 				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
 				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
 				/* now was it the primary? if so restore */
 				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
 					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
 				}
 			}
 			/*
 			 * JRS 5/14/07 - If CMT PF is on and the destination
 			 * is in PF state, set the destination to active
 			 * state and set the cwnd to one or two MTU's based
 			 * on whether PF1 or PF2 is being used.
 			 * 
 			 * Should we stop any running T3 timer here?
 			 */
 			if ((asoc->sctp_cmt_on_off == 1) &&
 			    (asoc->sctp_cmt_pf > 0) &&
 			    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
 				net->dest_state &= ~SCTP_ADDR_PF;
 				net->cwnd = net->mtu * asoc->sctp_cmt_pf;
 				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
 				    net, net->cwnd);
 				/*
 				 * Since the cwnd value is explicitly set,
 				 * skip the code that updates the cwnd
 				 * value.
 				 */
 				goto skip_cwnd_update;
 			}
 		}
 #ifdef JANA_CMT_FAST_RECOVERY
 		/*
 		 * CMT fast recovery code
 		 */
 		/*
 		 * if (sctp_cmt_on_off == 1 &&
 		 * net->fast_retran_loss_recovery &&
 		 * net->will_exit_fast_recovery == 0) { @@@ Do something }
 		 * else if (sctp_cmt_on_off == 0 &&
 		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
 		 */
 #endif
 
 		if (asoc->fast_retran_loss_recovery &&
 		    will_exit == 0 &&
 		    (asoc->sctp_cmt_on_off == 0)) {
 			/*
 			 * If we are in loss recovery we skip any cwnd
 			 * update
 			 */
 			goto skip_cwnd_update;
 		}
 		/*
 		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
 		 * moved.
 		 */
 		if (accum_moved ||
 		    ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) {
 			htcp_cong_avoid(stcb, net);
 			measure_achieved_throughput(stcb, net);
 		} else {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->mtu,
 				    SCTP_CWND_LOG_NO_CUMACK);
 			}
 		}
 skip_cwnd_update:
 		/*
 		 * NOW, according to Karn's rule do we need to restore the
 		 * RTO timer back? Check our net_ack2. If not set then we
 		 * have a ambiguity.. i.e. all data ack'd was sent to more
 		 * than one place.
 		 */
 		if (net->net_ack2) {
 			/* restore any doubled timers */
 			net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
 			if (net->RTO < stcb->asoc.minrto) {
 				net->RTO = stcb->asoc.minrto;
 			}
 			if (net->RTO > stcb->asoc.maxrto) {
 				net->RTO = stcb->asoc.maxrto;
 			}
 		}
 	}
 }
 
 void
 sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb,
     struct sctp_association *asoc)
 {
 	struct sctp_nets *net;
 
 	/*
 	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
 	 * (net->fast_retran_loss_recovery == 0)))
 	 */
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 		if ((asoc->fast_retran_loss_recovery == 0) ||
 		    (asoc->sctp_cmt_on_off == 1)) {
 			/* out of a RFC2582 Fast recovery window? */
 			if (net->net_ack > 0) {
 				/*
 				 * per section 7.2.3, are there any
 				 * destinations that had a fast retransmit
 				 * to them. If so what we need to do is
 				 * adjust ssthresh and cwnd.
 				 */
 				struct sctp_tmit_chunk *lchk;
 				int old_cwnd = net->cwnd;
 
 				/* JRS - reset as if state were changed */
 				htcp_reset(&net->htcp_ca);
 				net->ssthresh = htcp_recalc_ssthresh(stcb, net);
 				net->cwnd = net->ssthresh;
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
 					    SCTP_CWND_LOG_FROM_FR);
 				}
 				lchk = TAILQ_FIRST(&asoc->send_queue);
 
 				net->partial_bytes_acked = 0;
 				/* Turn on fast recovery window */
 				asoc->fast_retran_loss_recovery = 1;
 				if (lchk == NULL) {
 					/* Mark end of the window */
 					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
 				} else {
 					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
 				}
 
 				/*
 				 * CMT fast recovery -- per destination
 				 * recovery variable.
 				 */
 				net->fast_retran_loss_recovery = 1;
 
 				if (lchk == NULL) {
 					/* Mark end of the window */
 					net->fast_recovery_tsn = asoc->sending_seq - 1;
 				} else {
 					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
 				}
 
 				/*
 				 * Disable Nonce Sum Checking and store the
 				 * resync tsn
 				 */
 				asoc->nonce_sum_check = 0;
 				asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
 
 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
 				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
 				    stcb->sctp_ep, stcb, net);
 			}
 		} else if (net->net_ack > 0) {
 			/*
 			 * Mark a peg that we WOULD have done a cwnd
 			 * reduction but RFC2582 prevented this action.
 			 */
 			SCTP_STAT_INCR(sctps_fastretransinrtt);
 		}
 	}
 }
 
 void
 sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
     struct sctp_nets *net)
 {
 	int old_cwnd = net->cwnd;
 
 	/* JRS - reset as if the state were being changed to timeout */
 	htcp_reset(&net->htcp_ca);
 	net->ssthresh = htcp_recalc_ssthresh(stcb, net);
 	net->cwnd = net->mtu;
 	net->partial_bytes_acked = 0;
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
 	}
 }
 
 void
 sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	int old_cwnd;
 
 	old_cwnd = net->cwnd;
 
 	sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
 	net->htcp_ca.last_cong = sctp_get_tick_count();
 	/*
 	 * make a small adjustment to cwnd and force to CA.
 	 */
 	if (net->cwnd > net->mtu)
 		/* drop down one MTU after sending */
 		net->cwnd -= net->mtu;
 	if (net->cwnd < net->ssthresh)
 		/* still in SS move to CA */
 		net->ssthresh = net->cwnd - 1;
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 		sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR);
 	}
 }
 
 void
 sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
     struct sctp_nets *net)
 {
 	int old_cwnd;
 
 	old_cwnd = net->cwnd;
 
 	/* JRS - reset hctp as if state changed */
 	htcp_reset(&net->htcp_ca);
 	SCTP_STAT_INCR(sctps_ecnereducedcwnd);
 	net->ssthresh = htcp_recalc_ssthresh(stcb, net);
 	if (net->ssthresh < net->mtu) {
 		net->ssthresh = net->mtu;
 		/* here back off the timer as well, to slow us down */
 		net->RTO <<= 1;
 	}
 	net->cwnd = net->ssthresh;
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
 	}
 }
Index: projects/binutils-2.17/sys/netinet/sctp_dtrace_declare.h
===================================================================
--- projects/binutils-2.17/sys/netinet/sctp_dtrace_declare.h	(nonexistent)
+++ projects/binutils-2.17/sys/netinet/sctp_dtrace_declare.h	(revision 215830)
@@ -0,0 +1,73 @@
+/*-
+ * Copyright (c) 2010, by Randall Stewart & Michael Tuexen,
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#ifndef __sctp_dtrace_declare_h__
+#include "opt_kdtrace.h"
+#include <sys/kernel.h>
+#include <sys/sdt.h>
+
+/* Declare the SCTP provider */
+SDT_PROVIDER_DECLARE(sctp);
+
+/* The probes we have so far: */
+
+/* One to track a net's cwnd */
+/* initial */
+SDT_PROBE_DECLARE(sctp, cwnd, net, init);
+/* update at a ack -- increase */
+SDT_PROBE_DECLARE(sctp, cwnd, net, ack);
+/* update at a fast retransmit -- decrease */
+SDT_PROBE_DECLARE(sctp, cwnd, net, fr);
+/* update at a time-out -- decrease */
+SDT_PROBE_DECLARE(sctp, cwnd, net, to);
+/* update at a burst-limit -- decrease */
+SDT_PROBE_DECLARE(sctp, cwnd, net, bl);
+/* update at a ECN -- decrease */
+SDT_PROBE_DECLARE(sctp, cwnd, net, ecn);
+/* update at a Packet-Drop -- decrease */
+SDT_PROBE_DECLARE(sctp, cwnd, net, pd);
+
+/* One to track an associations rwnd */
+SDT_PROBE_DECLARE(sctp, rwnd, assoc, val);
+
+/* One to track a net's flight size */
+SDT_PROBE_DECLARE(sctp, flightsize, net, val);
+
+/* One to track an associations flight size */
+SDT_PROBE_DECLARE(sctp, flightsize, assoc, val);
+
+
+
+
+
+
+#endif

Property changes on: projects/binutils-2.17/sys/netinet/sctp_dtrace_declare.h
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: projects/binutils-2.17/sys/netinet/sctp_dtrace_define.h
===================================================================
--- projects/binutils-2.17/sys/netinet/sctp_dtrace_define.h	(nonexistent)
+++ projects/binutils-2.17/sys/netinet/sctp_dtrace_define.h	(revision 215830)
@@ -0,0 +1,201 @@
+/*-
+ * Copyright (c) 2010, by Randall Stewart & Michael Tuexen,
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#ifndef __sctp_dtrace_define_h__
+#include "opt_kdtrace.h"
+#include <sys/kernel.h>
+#include <sys/sdt.h>
+
+SDT_PROVIDER_DEFINE(sctp);
+
+/********************************************************/
+/* Cwnd probe - tracks changes in the congestion window on a netp */
+/********************************************************/
+/* Initial */
+SDT_PROBE_DEFINE(sctp, cwnd, net, init, init);
+/* The Vtag for this end */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 0, "uint32_t");
+/* The port number of the local side << 16 | port number of remote
+ * in network byte order.
+ */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 1, "uint32_t");
+/* The pointer to the struct sctp_nets * changing */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 2, "uintptr_t");
+/* The old value of the cwnd  */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 3, "int");
+/* The new value of the cwnd */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 4, "int");
+
+
+/* ACK-INCREASE */
+SDT_PROBE_DEFINE(sctp, cwnd, net, ack, ack);
+/* The Vtag for this end */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 0, "uint32_t");
+/* The port number of the local side << 16 | port number of remote
+ * in network byte order.
+ */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 1, "uint32_t");
+/* The pointer to the struct sctp_nets * changing */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 2, "uintptr_t");
+/* The old value of the cwnd  */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 3, "int");
+/* The new value of the cwnd */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 4, "int");
+
+/* FastRetransmit-DECREASE */
+SDT_PROBE_DEFINE(sctp, cwnd, net, fr, fr);
+/* The Vtag for this end */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 0, "uint32_t");
+/* The port number of the local side << 16 | port number of remote
+ * in network byte order.
+ */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 1, "uint32_t");
+/* The pointer to the struct sctp_nets * changing */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 2, "uintptr_t");
+/* The old value of the cwnd  */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 3, "int");
+/* The new value of the cwnd */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 4, "int");
+
+
+/* TimeOut-DECREASE */
+SDT_PROBE_DEFINE(sctp, cwnd, net, to, to);
+/* The Vtag for this end */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 0, "uint32_t");
+/* The port number of the local side << 16 | port number of remote
+ * in network byte order.
+ */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 1, "uint32_t");
+/* The pointer to the struct sctp_nets * changing */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 2, "uintptr_t");
+/* The old value of the cwnd  */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 3, "int");
+/* The new value of the cwnd */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 4, "int");
+
+
+/* BurstLimit-DECREASE */
+SDT_PROBE_DEFINE(sctp, cwnd, net, bl, bl);
+/* The Vtag for this end */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 0, "uint32_t");
+/* The port number of the local side << 16 | port number of remote
+ * in network byte order.
+ */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 1, "uint32_t");
+/* The pointer to the struct sctp_nets * changing */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 2, "uintptr_t");
+/* The old value of the cwnd  */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 3, "int");
+/* The new value of the cwnd */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 4, "int");
+
+
+/* ECN-DECREASE */
+SDT_PROBE_DEFINE(sctp, cwnd, net, ecn, ecn);
+/* The Vtag for this end */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 0, "uint32_t");
+/* The port number of the local side << 16 | port number of remote
+ * in network byte order.
+ */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 1, "uint32_t");
+/* The pointer to the struct sctp_nets * changing */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 2, "uintptr_t");
+/* The old value of the cwnd  */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 3, "int");
+/* The new value of the cwnd */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 4, "int");
+
+
+/* PacketDrop-DECREASE */
+SDT_PROBE_DEFINE(sctp, cwnd, net, pd, pd);
+/* The Vtag for this end */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 0, "uint32_t");
+/* The port number of the local side << 16 | port number of remote
+ * in network byte order.
+ */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 1, "uint32_t");
+/* The pointer to the struct sctp_nets * changing */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 2, "uintptr_t");
+/* The old value of the cwnd  */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 3, "int");
+/* The new value of the cwnd */
+SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 4, "int");
+
+
+
+/********************************************************/
+/* Rwnd probe - tracks changes in the receiver window for an assoc */
+/********************************************************/
+SDT_PROBE_DEFINE(sctp, rwnd, assoc, val, val);
+/* The Vtag for this end */
+SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 0, "uint32_t");
+/* The port number of the local side << 16 | port number of remote
+ * in network byte order.
+ */
+SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 1, "uint32_t");
+/* The up/down amount */
+SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 2, "int");
+/* The new value of the cwnd */
+SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 3, "int");
+
+/********************************************************/
+/* flight probe - tracks changes in the flight size on a net or assoc */
+/********************************************************/
+SDT_PROBE_DEFINE(sctp, flightsize, net, val, val);
+/* The Vtag for this end */
+SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 0, "uint32_t");
+/* The port number of the local side << 16 | port number of remote
+ * in network byte order.
+ */
+SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 1, "uint32_t");
+/* The pointer to the struct sctp_nets * changing */
+SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 2, "uintptr_t");
+/* The up/down amount */
+SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 3, "int");
+/* The new value of the cwnd */
+SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 4, "int");
+/********************************************************/
+/* The total flight version */
+/********************************************************/
+SDT_PROBE_DEFINE(sctp, flightsize, assoc, val, val);
+/* The Vtag for this end */
+SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 0, "uint32_t");
+/* The port number of the local side << 16 | port number of remote
+ * in network byte order.
+ */
+SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 1, "uint32_t");
+/* The up/down amount */
+SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 2, "int");
+/* The new value of the cwnd */
+SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 3, "int");
+
+#endif

Property changes on: projects/binutils-2.17/sys/netinet/sctp_dtrace_define.h
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: projects/binutils-2.17/sys/netinet/sctp_pcb.c
===================================================================
--- projects/binutils-2.17/sys/netinet/sctp_pcb.c	(revision 215829)
+++ projects/binutils-2.17/sys/netinet/sctp_pcb.c	(revision 215830)
@@ -1,6806 +1,6807 @@
 /*-
  * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * a) Redistributions of source code must retain the above copyright notice,
  *   this list of conditions and the following disclaimer.
  *
  * b) Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *   the documentation and/or other materials provided with the distribution.
  *
  * c) Neither the name of Cisco Systems, Inc. nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /* $KAME: sctp_pcb.c,v 1.38 2005/03/06 16:04:18 itojun Exp $	 */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <netinet/sctp_os.h>
 #include <sys/proc.h>
 #include <netinet/sctp_var.h>
 #include <netinet/sctp_sysctl.h>
 #include <netinet/sctp_pcb.h>
 #include <netinet/sctputil.h>
 #include <netinet/sctp.h>
 #include <netinet/sctp_header.h>
 #include <netinet/sctp_asconf.h>
 #include <netinet/sctp_output.h>
 #include <netinet/sctp_timer.h>
 #include <netinet/sctp_bsd_addr.h>
+#include <netinet/sctp_dtrace_define.h>
 #include <netinet/udp.h>
 
 
 VNET_DEFINE(struct sctp_base_info, system_base_info);
 
 /* FIX: we don't handle multiple link local scopes */
 /* "scopeless" replacement IN6_ARE_ADDR_EQUAL */
 #ifdef INET6
 int
 SCTP6_ARE_ADDR_EQUAL(struct sockaddr_in6 *a, struct sockaddr_in6 *b)
 {
 	struct sockaddr_in6 tmp_a, tmp_b;
 
 	memcpy(&tmp_a, a, sizeof(struct sockaddr_in6));
 	if (sa6_embedscope(&tmp_a, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
 		return 0;
 	}
 	memcpy(&tmp_b, b, sizeof(struct sockaddr_in6));
 	if (sa6_embedscope(&tmp_b, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
 		return 0;
 	}
 	return (IN6_ARE_ADDR_EQUAL(&tmp_a.sin6_addr, &tmp_b.sin6_addr));
 }
 
 #endif
 
 void
 sctp_fill_pcbinfo(struct sctp_pcbinfo *spcb)
 {
 	/*
 	 * We really don't need to lock this, but I will just because it
 	 * does not hurt.
 	 */
 	SCTP_INP_INFO_RLOCK();
 	spcb->ep_count = SCTP_BASE_INFO(ipi_count_ep);
 	spcb->asoc_count = SCTP_BASE_INFO(ipi_count_asoc);
 	spcb->laddr_count = SCTP_BASE_INFO(ipi_count_laddr);
 	spcb->raddr_count = SCTP_BASE_INFO(ipi_count_raddr);
 	spcb->chk_count = SCTP_BASE_INFO(ipi_count_chunk);
 	spcb->readq_count = SCTP_BASE_INFO(ipi_count_readq);
 	spcb->stream_oque = SCTP_BASE_INFO(ipi_count_strmoq);
 	spcb->free_chunks = SCTP_BASE_INFO(ipi_free_chunks);
 
 	SCTP_INP_INFO_RUNLOCK();
 }
 
 /*
  * Addresses are added to VRF's (Virtual Router's). For BSD we
  * have only the default VRF 0. We maintain a hash list of
  * VRF's. Each VRF has its own list of sctp_ifn's. Each of
  * these has a list of addresses. When we add a new address
  * to a VRF we lookup the ifn/ifn_index, if the ifn does
  * not exist we create it and add it to the list of IFN's
  * within the VRF. Once we have the sctp_ifn, we add the
  * address to the list. So we look something like:
  *
  * hash-vrf-table
  *   vrf-> ifn-> ifn -> ifn
  *   vrf    |
  *    ...   +--ifa-> ifa -> ifa
  *   vrf
  *
  * We keep these separate lists since the SCTP subsystem will
  * point to these from its source address selection nets structure.
  * When an address is deleted it does not happen right away on
  * the SCTP side, it gets scheduled. What we do when a
  * delete happens is immediately remove the address from
  * the master list and decrement the refcount. As our
  * addip iterator works through and frees the src address
  * selection pointing to the sctp_ifa, eventually the refcount
  * will reach 0 and we will delete it. Note that it is assumed
  * that any locking on system level ifn/ifa is done at the
  * caller of these functions and these routines will only
  * lock the SCTP structures as they add or delete things.
  *
  * Other notes on VRF concepts.
  *  - An endpoint can be in multiple VRF's
  *  - An association lives within a VRF and only one VRF.
  *  - Any incoming packet we can deduce the VRF for by
  *    looking at the mbuf/pak inbound (for BSD its VRF=0 :D)
  *  - Any downward send call or connect call must supply the
  *    VRF via ancillary data or via some sort of set default
  *    VRF socket option call (again for BSD no brainer since
  *    the VRF is always 0).
  *  - An endpoint may add multiple VRF's to it.
  *  - Listening sockets can accept associations in any
  *    of the VRF's they are in but the assoc will end up
  *    in only one VRF (gotten from the packet or connect/send).
  *
  */
 
 struct sctp_vrf *
 sctp_allocate_vrf(int vrf_id)
 {
 	struct sctp_vrf *vrf = NULL;
 	struct sctp_vrflist *bucket;
 
 	/* First allocate the VRF structure */
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf) {
 		/* Already allocated */
 		return (vrf);
 	}
 	SCTP_MALLOC(vrf, struct sctp_vrf *, sizeof(struct sctp_vrf),
 	    SCTP_M_VRF);
 	if (vrf == NULL) {
 		/* No memory */
 #ifdef INVARIANTS
 		panic("No memory for VRF:%d", vrf_id);
 #endif
 		return (NULL);
 	}
 	/* setup the VRF */
 	memset(vrf, 0, sizeof(struct sctp_vrf));
 	vrf->vrf_id = vrf_id;
 	LIST_INIT(&vrf->ifnlist);
 	vrf->total_ifa_count = 0;
 	vrf->refcount = 0;
 	/* now also setup table ids */
 	SCTP_INIT_VRF_TABLEID(vrf);
 	/* Init the HASH of addresses */
 	vrf->vrf_addr_hash = SCTP_HASH_INIT(SCTP_VRF_ADDR_HASH_SIZE,
 	    &vrf->vrf_addr_hashmark);
 	if (vrf->vrf_addr_hash == NULL) {
 		/* No memory */
 #ifdef INVARIANTS
 		panic("No memory for VRF:%d", vrf_id);
 #endif
 		SCTP_FREE(vrf, SCTP_M_VRF);
 		return (NULL);
 	}
 	/* Add it to the hash table */
 	bucket = &SCTP_BASE_INFO(sctp_vrfhash)[(vrf_id & SCTP_BASE_INFO(hashvrfmark))];
 	LIST_INSERT_HEAD(bucket, vrf, next_vrf);
 	atomic_add_int(&SCTP_BASE_INFO(ipi_count_vrfs), 1);
 	return (vrf);
 }
 
 
 struct sctp_ifn *
 sctp_find_ifn(void *ifn, uint32_t ifn_index)
 {
 	struct sctp_ifn *sctp_ifnp;
 	struct sctp_ifnlist *hash_ifn_head;
 
 	/*
 	 * We assume the lock is held for the addresses if that's wrong
 	 * problems could occur :-)
 	 */
 	hash_ifn_head = &SCTP_BASE_INFO(vrf_ifn_hash)[(ifn_index & SCTP_BASE_INFO(vrf_ifn_hashmark))];
 	LIST_FOREACH(sctp_ifnp, hash_ifn_head, next_bucket) {
 		if (sctp_ifnp->ifn_index == ifn_index) {
 			return (sctp_ifnp);
 		}
 		if (sctp_ifnp->ifn_p && ifn && (sctp_ifnp->ifn_p == ifn)) {
 			return (sctp_ifnp);
 		}
 	}
 	return (NULL);
 }
 
 
 
 struct sctp_vrf *
 sctp_find_vrf(uint32_t vrf_id)
 {
 	struct sctp_vrflist *bucket;
 	struct sctp_vrf *liste;
 
 	bucket = &SCTP_BASE_INFO(sctp_vrfhash)[(vrf_id & SCTP_BASE_INFO(hashvrfmark))];
 	LIST_FOREACH(liste, bucket, next_vrf) {
 		if (vrf_id == liste->vrf_id) {
 			return (liste);
 		}
 	}
 	return (NULL);
 }
 
 void
 sctp_free_vrf(struct sctp_vrf *vrf)
 {
 	if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&vrf->refcount)) {
 		if (vrf->vrf_addr_hash) {
 			SCTP_HASH_FREE(vrf->vrf_addr_hash, vrf->vrf_addr_hashmark);
 			vrf->vrf_addr_hash = NULL;
 		}
 		/* We zero'd the count */
 		LIST_REMOVE(vrf, next_vrf);
 		SCTP_FREE(vrf, SCTP_M_VRF);
 		atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_vrfs), 1);
 	}
 }
 
 void
 sctp_free_ifn(struct sctp_ifn *sctp_ifnp)
 {
 	if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&sctp_ifnp->refcount)) {
 		/* We zero'd the count */
 		if (sctp_ifnp->vrf) {
 			sctp_free_vrf(sctp_ifnp->vrf);
 		}
 		SCTP_FREE(sctp_ifnp, SCTP_M_IFN);
 		atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_ifns), 1);
 	}
 }
 
 void
 sctp_update_ifn_mtu(uint32_t ifn_index, uint32_t mtu)
 {
 	struct sctp_ifn *sctp_ifnp;
 
 	sctp_ifnp = sctp_find_ifn((void *)NULL, ifn_index);
 	if (sctp_ifnp != NULL) {
 		sctp_ifnp->ifn_mtu = mtu;
 	}
 }
 
 
 void
 sctp_free_ifa(struct sctp_ifa *sctp_ifap)
 {
 	if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&sctp_ifap->refcount)) {
 		/* We zero'd the count */
 		if (sctp_ifap->ifn_p) {
 			sctp_free_ifn(sctp_ifap->ifn_p);
 		}
 		SCTP_FREE(sctp_ifap, SCTP_M_IFA);
 		atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_ifas), 1);
 	}
 }
 
 static void
 sctp_delete_ifn(struct sctp_ifn *sctp_ifnp, int hold_addr_lock)
 {
 	struct sctp_ifn *found;
 
 	found = sctp_find_ifn(sctp_ifnp->ifn_p, sctp_ifnp->ifn_index);
 	if (found == NULL) {
 		/* Not in the list.. sorry */
 		return;
 	}
 	if (hold_addr_lock == 0)
 		SCTP_IPI_ADDR_WLOCK();
 	LIST_REMOVE(sctp_ifnp, next_bucket);
 	LIST_REMOVE(sctp_ifnp, next_ifn);
 	SCTP_DEREGISTER_INTERFACE(sctp_ifnp->ifn_index,
 	    sctp_ifnp->registered_af);
 	if (hold_addr_lock == 0)
 		SCTP_IPI_ADDR_WUNLOCK();
 	/* Take away the reference, and possibly free it */
 	sctp_free_ifn(sctp_ifnp);
 }
 
 void
 sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr,
     const char *if_name, uint32_t ifn_index)
 {
 	struct sctp_vrf *vrf;
 	struct sctp_ifa *sctp_ifap = NULL;
 
 	SCTP_IPI_ADDR_RLOCK();
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL) {
 		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
 		goto out;
 
 	}
 	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
 	if (sctp_ifap == NULL) {
 		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find sctp_ifap for address\n");
 		goto out;
 	}
 	if (sctp_ifap->ifn_p == NULL) {
 		SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unuseable\n");
 		goto out;
 	}
 	if (if_name) {
 		int len1, len2;
 
 		len1 = strlen(if_name);
 		len2 = strlen(sctp_ifap->ifn_p->ifn_name);
 		if (len1 != len2) {
 			SCTPDBG(SCTP_DEBUG_PCB4, "IFN of ifa names different length %d vs %d - ignored\n",
 			    len1, len2);
 			goto out;
 		}
 		if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) != 0) {
 			SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n",
 			    sctp_ifap->ifn_p->ifn_name,
 			    if_name);
 			goto out;
 		}
 	} else {
 		if (sctp_ifap->ifn_p->ifn_index != ifn_index) {
 			SCTPDBG(SCTP_DEBUG_PCB4, "IFA owned by ifn_index:%d down command for ifn_index:%d - ignored\n",
 			    sctp_ifap->ifn_p->ifn_index, ifn_index);
 			goto out;
 		}
 	}
 
 	sctp_ifap->localifa_flags &= (~SCTP_ADDR_VALID);
 	sctp_ifap->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE;
 out:
 	SCTP_IPI_ADDR_RUNLOCK();
 }
 
 void
 sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr,
     const char *if_name, uint32_t ifn_index)
 {
 	struct sctp_vrf *vrf;
 	struct sctp_ifa *sctp_ifap = NULL;
 
 	SCTP_IPI_ADDR_RLOCK();
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL) {
 		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
 		goto out;
 
 	}
 	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
 	if (sctp_ifap == NULL) {
 		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find sctp_ifap for address\n");
 		goto out;
 	}
 	if (sctp_ifap->ifn_p == NULL) {
 		SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unuseable\n");
 		goto out;
 	}
 	if (if_name) {
 		int len1, len2;
 
 		len1 = strlen(if_name);
 		len2 = strlen(sctp_ifap->ifn_p->ifn_name);
 		if (len1 != len2) {
 			SCTPDBG(SCTP_DEBUG_PCB4, "IFN of ifa names different length %d vs %d - ignored\n",
 			    len1, len2);
 			goto out;
 		}
 		if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) != 0) {
 			SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n",
 			    sctp_ifap->ifn_p->ifn_name,
 			    if_name);
 			goto out;
 		}
 	} else {
 		if (sctp_ifap->ifn_p->ifn_index != ifn_index) {
 			SCTPDBG(SCTP_DEBUG_PCB4, "IFA owned by ifn_index:%d down command for ifn_index:%d - ignored\n",
 			    sctp_ifap->ifn_p->ifn_index, ifn_index);
 			goto out;
 		}
 	}
 
 	sctp_ifap->localifa_flags &= (~SCTP_ADDR_IFA_UNUSEABLE);
 	sctp_ifap->localifa_flags |= SCTP_ADDR_VALID;
 out:
 	SCTP_IPI_ADDR_RUNLOCK();
 }
 
 /*-
  * Add an ifa to an ifn.
  * Register the interface as necessary.
  * NOTE: ADDR write lock MUST be held.
  */
 static void
 sctp_add_ifa_to_ifn(struct sctp_ifn *sctp_ifnp, struct sctp_ifa *sctp_ifap)
 {
 	int ifa_af;
 
 	LIST_INSERT_HEAD(&sctp_ifnp->ifalist, sctp_ifap, next_ifa);
 	sctp_ifap->ifn_p = sctp_ifnp;
 	atomic_add_int(&sctp_ifap->ifn_p->refcount, 1);
 	/* update address counts */
 	sctp_ifnp->ifa_count++;
 	ifa_af = sctp_ifap->address.sa.sa_family;
 	if (ifa_af == AF_INET)
 		sctp_ifnp->num_v4++;
 	else
 		sctp_ifnp->num_v6++;
 	if (sctp_ifnp->ifa_count == 1) {
 		/* register the new interface */
 		SCTP_REGISTER_INTERFACE(sctp_ifnp->ifn_index, ifa_af);
 		sctp_ifnp->registered_af = ifa_af;
 	}
 }
 
 /*-
  * Remove an ifa from its ifn.
  * If no more addresses exist, remove the ifn too. Otherwise, re-register
  * the interface based on the remaining address families left.
  * NOTE: ADDR write lock MUST be held.
  */
 static void
 sctp_remove_ifa_from_ifn(struct sctp_ifa *sctp_ifap)
 {
 	uint32_t ifn_index;
 
 	LIST_REMOVE(sctp_ifap, next_ifa);
 	if (sctp_ifap->ifn_p) {
 		/* update address counts */
 		sctp_ifap->ifn_p->ifa_count--;
 		if (sctp_ifap->address.sa.sa_family == AF_INET6)
 			sctp_ifap->ifn_p->num_v6--;
 		else if (sctp_ifap->address.sa.sa_family == AF_INET)
 			sctp_ifap->ifn_p->num_v4--;
 
 		ifn_index = sctp_ifap->ifn_p->ifn_index;
 		if (LIST_EMPTY(&sctp_ifap->ifn_p->ifalist)) {
 			/* remove the ifn, possibly freeing it */
 			sctp_delete_ifn(sctp_ifap->ifn_p, SCTP_ADDR_LOCKED);
 		} else {
 			/* re-register address family type, if needed */
 			if ((sctp_ifap->ifn_p->num_v6 == 0) &&
 			    (sctp_ifap->ifn_p->registered_af == AF_INET6)) {
 				SCTP_DEREGISTER_INTERFACE(ifn_index, AF_INET6);
 				SCTP_REGISTER_INTERFACE(ifn_index, AF_INET);
 				sctp_ifap->ifn_p->registered_af = AF_INET;
 			} else if ((sctp_ifap->ifn_p->num_v4 == 0) &&
 			    (sctp_ifap->ifn_p->registered_af == AF_INET)) {
 				SCTP_DEREGISTER_INTERFACE(ifn_index, AF_INET);
 				SCTP_REGISTER_INTERFACE(ifn_index, AF_INET6);
 				sctp_ifap->ifn_p->registered_af = AF_INET6;
 			}
 			/* free the ifn refcount */
 			sctp_free_ifn(sctp_ifap->ifn_p);
 		}
 		sctp_ifap->ifn_p = NULL;
 	}
 }
 
 struct sctp_ifa *
 sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
     uint32_t ifn_type, const char *if_name, void *ifa,
     struct sockaddr *addr, uint32_t ifa_flags,
     int dynamic_add)
 {
 	struct sctp_vrf *vrf;
 	struct sctp_ifn *sctp_ifnp = NULL;
 	struct sctp_ifa *sctp_ifap = NULL;
 	struct sctp_ifalist *hash_addr_head;
 	struct sctp_ifnlist *hash_ifn_head;
 	uint32_t hash_of_addr;
 	int new_ifn_af = 0;
 
 #ifdef SCTP_DEBUG
 	SCTPDBG(SCTP_DEBUG_PCB4, "vrf_id 0x%x: adding address: ", vrf_id);
 	SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr);
 #endif
 	SCTP_IPI_ADDR_WLOCK();
 	sctp_ifnp = sctp_find_ifn(ifn, ifn_index);
 	if (sctp_ifnp) {
 		vrf = sctp_ifnp->vrf;
 	} else {
 		vrf = sctp_find_vrf(vrf_id);
 		if (vrf == NULL) {
 			vrf = sctp_allocate_vrf(vrf_id);
 			if (vrf == NULL) {
 				SCTP_IPI_ADDR_WUNLOCK();
 				return (NULL);
 			}
 		}
 	}
 	if (sctp_ifnp == NULL) {
 		/*
 		 * build one and add it, can't hold lock until after malloc
 		 * done though.
 		 */
 		SCTP_IPI_ADDR_WUNLOCK();
 		SCTP_MALLOC(sctp_ifnp, struct sctp_ifn *,
 		    sizeof(struct sctp_ifn), SCTP_M_IFN);
 		if (sctp_ifnp == NULL) {
 #ifdef INVARIANTS
 			panic("No memory for IFN");
 #endif
 			return (NULL);
 		}
 		memset(sctp_ifnp, 0, sizeof(struct sctp_ifn));
 		sctp_ifnp->ifn_index = ifn_index;
 		sctp_ifnp->ifn_p = ifn;
 		sctp_ifnp->ifn_type = ifn_type;
 		sctp_ifnp->refcount = 0;
 		sctp_ifnp->vrf = vrf;
 		atomic_add_int(&vrf->refcount, 1);
 		sctp_ifnp->ifn_mtu = SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, addr->sa_family);
 		if (if_name != NULL) {
 			memcpy(sctp_ifnp->ifn_name, if_name, SCTP_IFNAMSIZ);
 		} else {
 			memcpy(sctp_ifnp->ifn_name, "unknown", min(7, SCTP_IFNAMSIZ));
 		}
 		hash_ifn_head = &SCTP_BASE_INFO(vrf_ifn_hash)[(ifn_index & SCTP_BASE_INFO(vrf_ifn_hashmark))];
 		LIST_INIT(&sctp_ifnp->ifalist);
 		SCTP_IPI_ADDR_WLOCK();
 		LIST_INSERT_HEAD(hash_ifn_head, sctp_ifnp, next_bucket);
 		LIST_INSERT_HEAD(&vrf->ifnlist, sctp_ifnp, next_ifn);
 		atomic_add_int(&SCTP_BASE_INFO(ipi_count_ifns), 1);
 		new_ifn_af = 1;
 	}
 	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
 	if (sctp_ifap) {
 		/* Hmm, it already exists? */
 		if ((sctp_ifap->ifn_p) &&
 		    (sctp_ifap->ifn_p->ifn_index == ifn_index)) {
 			SCTPDBG(SCTP_DEBUG_PCB4, "Using existing ifn %s (0x%x) for ifa %p\n",
 			    sctp_ifap->ifn_p->ifn_name, ifn_index,
 			    sctp_ifap);
 			if (new_ifn_af) {
 				/* Remove the created one that we don't want */
 				sctp_delete_ifn(sctp_ifnp, SCTP_ADDR_LOCKED);
 			}
 			if (sctp_ifap->localifa_flags & SCTP_BEING_DELETED) {
 				/* easy to solve, just switch back to active */
 				SCTPDBG(SCTP_DEBUG_PCB4, "Clearing deleted ifa flag\n");
 				sctp_ifap->localifa_flags = SCTP_ADDR_VALID;
 				sctp_ifap->ifn_p = sctp_ifnp;
 				atomic_add_int(&sctp_ifap->ifn_p->refcount, 1);
 			}
 	exit_stage_left:
 			SCTP_IPI_ADDR_WUNLOCK();
 			return (sctp_ifap);
 		} else {
 			if (sctp_ifap->ifn_p) {
 				/*
 				 * The last IFN gets the address, remove the
 				 * old one
 				 */
 				SCTPDBG(SCTP_DEBUG_PCB4, "Moving ifa %p from %s (0x%x) to %s (0x%x)\n",
 				    sctp_ifap, sctp_ifap->ifn_p->ifn_name,
 				    sctp_ifap->ifn_p->ifn_index, if_name,
 				    ifn_index);
 				/* remove the address from the old ifn */
 				sctp_remove_ifa_from_ifn(sctp_ifap);
 				/* move the address over to the new ifn */
 				sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap);
 				goto exit_stage_left;
 			} else {
 				/* repair ifnp which was NULL ? */
 				sctp_ifap->localifa_flags = SCTP_ADDR_VALID;
 				SCTPDBG(SCTP_DEBUG_PCB4, "Repairing ifn %p for ifa %p\n",
 				    sctp_ifnp, sctp_ifap);
 				sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap);
 			}
 			goto exit_stage_left;
 		}
 	}
 	SCTP_IPI_ADDR_WUNLOCK();
 	SCTP_MALLOC(sctp_ifap, struct sctp_ifa *, sizeof(struct sctp_ifa), SCTP_M_IFA);
 	if (sctp_ifap == NULL) {
 #ifdef INVARIANTS
 		panic("No memory for IFA");
 #endif
 		return (NULL);
 	}
 	memset(sctp_ifap, 0, sizeof(struct sctp_ifa));
 	sctp_ifap->ifn_p = sctp_ifnp;
 	atomic_add_int(&sctp_ifnp->refcount, 1);
 	sctp_ifap->vrf_id = vrf_id;
 	sctp_ifap->ifa = ifa;
 	memcpy(&sctp_ifap->address, addr, addr->sa_len);
 	sctp_ifap->localifa_flags = SCTP_ADDR_VALID | SCTP_ADDR_DEFER_USE;
 	sctp_ifap->flags = ifa_flags;
 	/* Set scope */
 	switch (sctp_ifap->address.sa.sa_family) {
 	case AF_INET:
 		{
 			struct sockaddr_in *sin;
 
 			sin = (struct sockaddr_in *)&sctp_ifap->address.sin;
 			if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) ||
 			    (IN4_ISLOOPBACK_ADDRESS(&sin->sin_addr))) {
 				sctp_ifap->src_is_loop = 1;
 			}
 			if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
 				sctp_ifap->src_is_priv = 1;
 			}
 			sctp_ifnp->num_v4++;
 			if (new_ifn_af)
 				new_ifn_af = AF_INET;
 			break;
 		}
 #ifdef INET6
 	case AF_INET6:
 		{
 			/* ok to use deprecated addresses? */
 			struct sockaddr_in6 *sin6;
 
 			sin6 = (struct sockaddr_in6 *)&sctp_ifap->address.sin6;
 			if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) ||
 			    (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))) {
 				sctp_ifap->src_is_loop = 1;
 			}
 			if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
 				sctp_ifap->src_is_priv = 1;
 			}
 			sctp_ifnp->num_v6++;
 			if (new_ifn_af)
 				new_ifn_af = AF_INET6;
 			break;
 		}
 #endif
 	default:
 		new_ifn_af = 0;
 		break;
 	}
 	hash_of_addr = sctp_get_ifa_hash_val(&sctp_ifap->address.sa);
 
 	if ((sctp_ifap->src_is_priv == 0) &&
 	    (sctp_ifap->src_is_loop == 0)) {
 		sctp_ifap->src_is_glob = 1;
 	}
 	SCTP_IPI_ADDR_WLOCK();
 	hash_addr_head = &vrf->vrf_addr_hash[(hash_of_addr & vrf->vrf_addr_hashmark)];
 	LIST_INSERT_HEAD(hash_addr_head, sctp_ifap, next_bucket);
 	sctp_ifap->refcount = 1;
 	LIST_INSERT_HEAD(&sctp_ifnp->ifalist, sctp_ifap, next_ifa);
 	sctp_ifnp->ifa_count++;
 	vrf->total_ifa_count++;
 	atomic_add_int(&SCTP_BASE_INFO(ipi_count_ifas), 1);
 	if (new_ifn_af) {
 		SCTP_REGISTER_INTERFACE(ifn_index, new_ifn_af);
 		sctp_ifnp->registered_af = new_ifn_af;
 	}
 	SCTP_IPI_ADDR_WUNLOCK();
 	if (dynamic_add) {
 		/*
 		 * Bump up the refcount so that when the timer completes it
 		 * will drop back down.
 		 */
 		struct sctp_laddr *wi;
 
 		atomic_add_int(&sctp_ifap->refcount, 1);
 		wi = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
 		if (wi == NULL) {
 			/*
 			 * Gak, what can we do? We have lost an address
 			 * change can you say HOSED?
 			 */
 			SCTPDBG(SCTP_DEBUG_PCB4, "Lost an address change?\n");
 			/* Opps, must decrement the count */
 			sctp_del_addr_from_vrf(vrf_id, addr, ifn_index,
 			    if_name);
 			return (NULL);
 		}
 		SCTP_INCR_LADDR_COUNT();
 		bzero(wi, sizeof(*wi));
 		(void)SCTP_GETTIME_TIMEVAL(&wi->start_time);
 		wi->ifa = sctp_ifap;
 		wi->action = SCTP_ADD_IP_ADDRESS;
 
 		SCTP_WQ_ADDR_LOCK();
 		LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr);
 		SCTP_WQ_ADDR_UNLOCK();
 
 		sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
 		    (struct sctp_inpcb *)NULL,
 		    (struct sctp_tcb *)NULL,
 		    (struct sctp_nets *)NULL);
 	} else {
 		/* it's ready for use */
 		sctp_ifap->localifa_flags &= ~SCTP_ADDR_DEFER_USE;
 	}
 	return (sctp_ifap);
 }
 
 void
 sctp_del_addr_from_vrf(uint32_t vrf_id, struct sockaddr *addr,
     uint32_t ifn_index, const char *if_name)
 {
 	struct sctp_vrf *vrf;
 	struct sctp_ifa *sctp_ifap = NULL;
 
 	SCTP_IPI_ADDR_WLOCK();
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL) {
 		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
 		goto out_now;
 	}
 #ifdef SCTP_DEBUG
 	SCTPDBG(SCTP_DEBUG_PCB4, "vrf_id 0x%x: deleting address:", vrf_id);
 	SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr);
 #endif
 	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
 	if (sctp_ifap) {
 		/* Validate the delete */
 		if (sctp_ifap->ifn_p) {
 			int valid = 0;
 
 			/*-
 			 * The name has priority over the ifn_index
 			 * if its given. We do this especially for
 			 * panda who might recycle indexes fast.
 			 */
 			if (if_name) {
 				int len1, len2;
 
 				len1 = min(SCTP_IFNAMSIZ, strlen(if_name));
 				len2 = min(SCTP_IFNAMSIZ, strlen(sctp_ifap->ifn_p->ifn_name));
 				if (len1 && len2 && (len1 == len2)) {
 					/* we can compare them */
 					if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) == 0) {
 						/*
 						 * They match its a correct
 						 * delete
 						 */
 						valid = 1;
 					}
 				}
 			}
 			if (!valid) {
 				/* last ditch check ifn_index */
 				if (ifn_index == sctp_ifap->ifn_p->ifn_index) {
 					valid = 1;
 				}
 			}
 			if (!valid) {
 				SCTPDBG(SCTP_DEBUG_PCB4, "ifn:%d ifname:%s does not match addresses\n",
 				    ifn_index, ((if_name == NULL) ? "NULL" : if_name));
 				SCTPDBG(SCTP_DEBUG_PCB4, "ifn:%d ifname:%s - ignoring delete\n",
 				    sctp_ifap->ifn_p->ifn_index, sctp_ifap->ifn_p->ifn_name);
 				SCTP_IPI_ADDR_WUNLOCK();
 				return;
 			}
 		}
 		SCTPDBG(SCTP_DEBUG_PCB4, "Deleting ifa %p\n", sctp_ifap);
 		sctp_ifap->localifa_flags &= SCTP_ADDR_VALID;
 		sctp_ifap->localifa_flags |= SCTP_BEING_DELETED;
 		vrf->total_ifa_count--;
 		LIST_REMOVE(sctp_ifap, next_bucket);
 		sctp_remove_ifa_from_ifn(sctp_ifap);
 	}
 #ifdef SCTP_DEBUG
 	else {
 		SCTPDBG(SCTP_DEBUG_PCB4, "Del Addr-ifn:%d Could not find address:",
 		    ifn_index);
 		SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr);
 	}
 #endif
 
 out_now:
 	SCTP_IPI_ADDR_WUNLOCK();
 	if (sctp_ifap) {
 		struct sctp_laddr *wi;
 
 		wi = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
 		if (wi == NULL) {
 			/*
 			 * Gak, what can we do? We have lost an address
 			 * change can you say HOSED?
 			 */
 			SCTPDBG(SCTP_DEBUG_PCB4, "Lost an address change?\n");
 
 			/* Oops, must decrement the count */
 			sctp_free_ifa(sctp_ifap);
 			return;
 		}
 		SCTP_INCR_LADDR_COUNT();
 		bzero(wi, sizeof(*wi));
 		(void)SCTP_GETTIME_TIMEVAL(&wi->start_time);
 		wi->ifa = sctp_ifap;
 		wi->action = SCTP_DEL_IP_ADDRESS;
 		SCTP_WQ_ADDR_LOCK();
 		/*
 		 * Should this really be a tailq? As it is we will process
 		 * the newest first :-0
 		 */
 		LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr);
 		SCTP_WQ_ADDR_UNLOCK();
 
 		sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
 		    (struct sctp_inpcb *)NULL,
 		    (struct sctp_tcb *)NULL,
 		    (struct sctp_nets *)NULL);
 	}
 	return;
 }
 
 
 static struct sctp_tcb *
 sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from,
     struct sockaddr *to, struct sctp_nets **netp, uint32_t vrf_id)
 {
 	/**** ASSUMES THE CALLER holds the INP_INFO_RLOCK */
 	/*
 	 * If we support the TCP model, then we must now dig through to see
 	 * if we can find our endpoint in the list of tcp ep's.
 	 */
 	uint16_t lport, rport;
 	struct sctppcbhead *ephead;
 	struct sctp_inpcb *inp;
 	struct sctp_laddr *laddr;
 	struct sctp_tcb *stcb;
 	struct sctp_nets *net;
 
 	if ((to == NULL) || (from == NULL)) {
 		return (NULL);
 	}
 	if (to->sa_family == AF_INET && from->sa_family == AF_INET) {
 		lport = ((struct sockaddr_in *)to)->sin_port;
 		rport = ((struct sockaddr_in *)from)->sin_port;
 	} else if (to->sa_family == AF_INET6 && from->sa_family == AF_INET6) {
 		lport = ((struct sockaddr_in6 *)to)->sin6_port;
 		rport = ((struct sockaddr_in6 *)from)->sin6_port;
 	} else {
 		return NULL;
 	}
 	ephead = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR((lport | rport), SCTP_BASE_INFO(hashtcpmark))];
 	/*
 	 * Ok now for each of the guys in this bucket we must look and see:
 	 * - Does the remote port match. - Does there single association's
 	 * addresses match this address (to). If so we update p_ep to point
 	 * to this ep and return the tcb from it.
 	 */
 	LIST_FOREACH(inp, ephead, sctp_hash) {
 		SCTP_INP_RLOCK(inp);
 		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
 			SCTP_INP_RUNLOCK(inp);
 			continue;
 		}
 		if (lport != inp->sctp_lport) {
 			SCTP_INP_RUNLOCK(inp);
 			continue;
 		}
 		if (inp->def_vrf_id != vrf_id) {
 			SCTP_INP_RUNLOCK(inp);
 			continue;
 		}
 		/* check to see if the ep has one of the addresses */
 		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
 			/* We are NOT bound all, so look further */
 			int match = 0;
 
 			LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 
 				if (laddr->ifa == NULL) {
 					SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __FUNCTION__);
 					continue;
 				}
 				if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
 					SCTPDBG(SCTP_DEBUG_PCB1, "ifa being deleted\n");
 					continue;
 				}
 				if (laddr->ifa->address.sa.sa_family ==
 				    to->sa_family) {
 					/* see if it matches */
 					struct sockaddr_in *intf_addr, *sin;
 
 					intf_addr = &laddr->ifa->address.sin;
 					sin = (struct sockaddr_in *)to;
 					if (from->sa_family == AF_INET) {
 						if (sin->sin_addr.s_addr ==
 						    intf_addr->sin_addr.s_addr) {
 							match = 1;
 							break;
 						}
 					}
 #ifdef INET6
 					if (from->sa_family == AF_INET6) {
 						struct sockaddr_in6 *intf_addr6;
 						struct sockaddr_in6 *sin6;
 
 						sin6 = (struct sockaddr_in6 *)
 						    to;
 						intf_addr6 = &laddr->ifa->address.sin6;
 
 						if (SCTP6_ARE_ADDR_EQUAL(sin6,
 						    intf_addr6)) {
 							match = 1;
 							break;
 						}
 					}
 #endif
 				}
 			}
 			if (match == 0) {
 				/* This endpoint does not have this address */
 				SCTP_INP_RUNLOCK(inp);
 				continue;
 			}
 		}
 		/*
 		 * Ok if we hit here the ep has the address, does it hold
 		 * the tcb?
 		 */
 
 		stcb = LIST_FIRST(&inp->sctp_asoc_list);
 		if (stcb == NULL) {
 			SCTP_INP_RUNLOCK(inp);
 			continue;
 		}
 		SCTP_TCB_LOCK(stcb);
 		if (stcb->rport != rport) {
 			/* remote port does not match. */
 			SCTP_TCB_UNLOCK(stcb);
 			SCTP_INP_RUNLOCK(inp);
 			continue;
 		}
 		if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 			SCTP_TCB_UNLOCK(stcb);
 			SCTP_INP_RUNLOCK(inp);
 			continue;
 		}
 		/* Does this TCB have a matching address? */
 		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 
 			if (net->ro._l_addr.sa.sa_family != from->sa_family) {
 				/* not the same family, can't be a match */
 				continue;
 			}
 			switch (from->sa_family) {
 			case AF_INET:
 				{
 					struct sockaddr_in *sin, *rsin;
 
 					sin = (struct sockaddr_in *)&net->ro._l_addr;
 					rsin = (struct sockaddr_in *)from;
 					if (sin->sin_addr.s_addr ==
 					    rsin->sin_addr.s_addr) {
 						/* found it */
 						if (netp != NULL) {
 							*netp = net;
 						}
 						/*
 						 * Update the endpoint
 						 * pointer
 						 */
 						*inp_p = inp;
 						SCTP_INP_RUNLOCK(inp);
 						return (stcb);
 					}
 					break;
 				}
 #ifdef INET6
 			case AF_INET6:
 				{
 					struct sockaddr_in6 *sin6, *rsin6;
 
 					sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
 					rsin6 = (struct sockaddr_in6 *)from;
 					if (SCTP6_ARE_ADDR_EQUAL(sin6,
 					    rsin6)) {
 						/* found it */
 						if (netp != NULL) {
 							*netp = net;
 						}
 						/*
 						 * Update the endpoint
 						 * pointer
 						 */
 						*inp_p = inp;
 						SCTP_INP_RUNLOCK(inp);
 						return (stcb);
 					}
 					break;
 				}
 #endif
 			default:
 				/* TSNH */
 				break;
 			}
 		}
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_INP_RUNLOCK(inp);
 	}
 	return (NULL);
 }
 
 static int
 sctp_does_stcb_own_this_addr(struct sctp_tcb *stcb, struct sockaddr *to)
 {
 	int loopback_scope, ipv4_local_scope, local_scope, site_scope;
 	int ipv4_addr_legal, ipv6_addr_legal;
 	struct sctp_vrf *vrf;
 	struct sctp_ifn *sctp_ifn;
 	struct sctp_ifa *sctp_ifa;
 
 	loopback_scope = stcb->asoc.loopback_scope;
 	ipv4_local_scope = stcb->asoc.ipv4_local_scope;
 	local_scope = stcb->asoc.local_scope;
 	site_scope = stcb->asoc.site_scope;
 	ipv4_addr_legal = ipv6_addr_legal = 0;
 	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 		ipv6_addr_legal = 1;
 		if (SCTP_IPV6_V6ONLY(stcb->sctp_ep) == 0) {
 			ipv4_addr_legal = 1;
 		}
 	} else {
 		ipv4_addr_legal = 1;
 	}
 
 	SCTP_IPI_ADDR_RLOCK();
 	vrf = sctp_find_vrf(stcb->asoc.vrf_id);
 	if (vrf == NULL) {
 		/* no vrf, no addresses */
 		SCTP_IPI_ADDR_RUNLOCK();
 		return (0);
 	}
 	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
 			if ((loopback_scope == 0) &&
 			    SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
 				continue;
 			}
 			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 				if (sctp_is_addr_restricted(stcb, sctp_ifa))
 					continue;
 				switch (sctp_ifa->address.sa.sa_family) {
 #ifdef INET
 				case AF_INET:
 					if (ipv4_addr_legal) {
 						struct sockaddr_in *sin,
 						           *rsin;
 
 						sin = &sctp_ifa->address.sin;
 						rsin = (struct sockaddr_in *)to;
 						if ((ipv4_local_scope == 0) &&
 						    IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
 							continue;
 						}
 						if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) {
 							SCTP_IPI_ADDR_RUNLOCK();
 							return (1);
 						}
 					}
 					break;
 #endif
 #ifdef INET6
 				case AF_INET6:
 					if (ipv6_addr_legal) {
 						struct sockaddr_in6 *sin6,
 						            *rsin6;
 
 						sin6 = &sctp_ifa->address.sin6;
 						rsin6 = (struct sockaddr_in6 *)to;
 						if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
 							if (local_scope == 0)
 								continue;
 							if (sin6->sin6_scope_id == 0) {
 								if (sa6_recoverscope(sin6) != 0)
 									continue;
 							}
 						}
 						if ((site_scope == 0) &&
 						    (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
 							continue;
 						}
 						if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) {
 							SCTP_IPI_ADDR_RUNLOCK();
 							return (1);
 						}
 					}
 					break;
 #endif
 				default:
 					/* TSNH */
 					break;
 				}
 			}
 		}
 	} else {
 		struct sctp_laddr *laddr;
 
 		LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) {
 			if (sctp_is_addr_restricted(stcb, laddr->ifa)) {
 				continue;
 			}
 			if (laddr->ifa->address.sa.sa_family != to->sa_family) {
 				continue;
 			}
 			switch (to->sa_family) {
 #ifdef INET
 			case AF_INET:
 				{
 					struct sockaddr_in *sin, *rsin;
 
 					sin = (struct sockaddr_in *)&laddr->ifa->address.sin;
 					rsin = (struct sockaddr_in *)to;
 					if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) {
 						SCTP_IPI_ADDR_RUNLOCK();
 						return (1);
 					}
 					break;
 				}
 #endif
 #ifdef INET6
 			case AF_INET6:
 				{
 					struct sockaddr_in6 *sin6, *rsin6;
 
 					sin6 = (struct sockaddr_in6 *)&laddr->ifa->address.sin6;
 					rsin6 = (struct sockaddr_in6 *)to;
 					if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) {
 						SCTP_IPI_ADDR_RUNLOCK();
 						return (1);
 					}
 					break;
 				}
 
 #endif
 			default:
 				/* TSNH */
 				break;
 			}
 
 		}
 	}
 	SCTP_IPI_ADDR_RUNLOCK();
 	return (0);
 }
 
 /*
  * rules for use
  *
  * 1) If I return a NULL you must decrement any INP ref cnt. 2) If I find an
  * stcb, both will be locked (locked_tcb and stcb) but decrement will be done
  * (if locked == NULL). 3) Decrement happens on return ONLY if locked ==
  * NULL.
  */
 
 struct sctp_tcb *
 sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote,
     struct sctp_nets **netp, struct sockaddr *local, struct sctp_tcb *locked_tcb)
 {
 	struct sctpasochead *head;
 	struct sctp_inpcb *inp;
 	struct sctp_tcb *stcb = NULL;
 	struct sctp_nets *net;
 	uint16_t rport;
 
 	inp = *inp_p;
 	if (remote->sa_family == AF_INET) {
 		rport = (((struct sockaddr_in *)remote)->sin_port);
 	} else if (remote->sa_family == AF_INET6) {
 		rport = (((struct sockaddr_in6 *)remote)->sin6_port);
 	} else {
 		return (NULL);
 	}
 	if (locked_tcb) {
 		/*
 		 * UN-lock so we can do proper locking here this occurs when
 		 * called from load_addresses_from_init.
 		 */
 		atomic_add_int(&locked_tcb->asoc.refcnt, 1);
 		SCTP_TCB_UNLOCK(locked_tcb);
 	}
 	SCTP_INP_INFO_RLOCK();
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
 		/*-
 		 * Now either this guy is our listener or it's the
 		 * connector. If it is the one that issued the connect, then
 		 * it's only chance is to be the first TCB in the list. If
 		 * it is the acceptor, then do the special_lookup to hash
 		 * and find the real inp.
 		 */
 		if ((inp->sctp_socket) && (inp->sctp_socket->so_qlimit)) {
 			/* to is peer addr, from is my addr */
 			stcb = sctp_tcb_special_locate(inp_p, remote, local,
 			    netp, inp->def_vrf_id);
 			if ((stcb != NULL) && (locked_tcb == NULL)) {
 				/* we have a locked tcb, lower refcount */
 				SCTP_INP_DECR_REF(inp);
 			}
 			if ((locked_tcb != NULL) && (locked_tcb != stcb)) {
 				SCTP_INP_RLOCK(locked_tcb->sctp_ep);
 				SCTP_TCB_LOCK(locked_tcb);
 				atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
 				SCTP_INP_RUNLOCK(locked_tcb->sctp_ep);
 			}
 			SCTP_INP_INFO_RUNLOCK();
 			return (stcb);
 		} else {
 			SCTP_INP_WLOCK(inp);
 			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
 				goto null_return;
 			}
 			stcb = LIST_FIRST(&inp->sctp_asoc_list);
 			if (stcb == NULL) {
 				goto null_return;
 			}
 			SCTP_TCB_LOCK(stcb);
 
 			if (stcb->rport != rport) {
 				/* remote port does not match. */
 				SCTP_TCB_UNLOCK(stcb);
 				goto null_return;
 			}
 			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 				SCTP_TCB_UNLOCK(stcb);
 				goto null_return;
 			}
 			if (local && !sctp_does_stcb_own_this_addr(stcb, local)) {
 				SCTP_TCB_UNLOCK(stcb);
 				goto null_return;
 			}
 			/* now look at the list of remote addresses */
 			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 #ifdef INVARIANTS
 				if (net == (TAILQ_NEXT(net, sctp_next))) {
 					panic("Corrupt net list");
 				}
 #endif
 				if (net->ro._l_addr.sa.sa_family !=
 				    remote->sa_family) {
 					/* not the same family */
 					continue;
 				}
 				switch (remote->sa_family) {
 				case AF_INET:
 					{
 						struct sockaddr_in *sin,
 						           *rsin;
 
 						sin = (struct sockaddr_in *)
 						    &net->ro._l_addr;
 						rsin = (struct sockaddr_in *)remote;
 						if (sin->sin_addr.s_addr ==
 						    rsin->sin_addr.s_addr) {
 							/* found it */
 							if (netp != NULL) {
 								*netp = net;
 							}
 							if (locked_tcb == NULL) {
 								SCTP_INP_DECR_REF(inp);
 							} else if (locked_tcb != stcb) {
 								SCTP_TCB_LOCK(locked_tcb);
 							}
 							if (locked_tcb) {
 								atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
 							}
 							SCTP_INP_WUNLOCK(inp);
 							SCTP_INP_INFO_RUNLOCK();
 							return (stcb);
 						}
 						break;
 					}
 #ifdef INET6
 				case AF_INET6:
 					{
 						struct sockaddr_in6 *sin6,
 						            *rsin6;
 
 						sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
 						rsin6 = (struct sockaddr_in6 *)remote;
 						if (SCTP6_ARE_ADDR_EQUAL(sin6,
 						    rsin6)) {
 							/* found it */
 							if (netp != NULL) {
 								*netp = net;
 							}
 							if (locked_tcb == NULL) {
 								SCTP_INP_DECR_REF(inp);
 							} else if (locked_tcb != stcb) {
 								SCTP_TCB_LOCK(locked_tcb);
 							}
 							if (locked_tcb) {
 								atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
 							}
 							SCTP_INP_WUNLOCK(inp);
 							SCTP_INP_INFO_RUNLOCK();
 							return (stcb);
 						}
 						break;
 					}
 #endif
 				default:
 					/* TSNH */
 					break;
 				}
 			}
 			SCTP_TCB_UNLOCK(stcb);
 		}
 	} else {
 		SCTP_INP_WLOCK(inp);
 		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
 			goto null_return;
 		}
 		head = &inp->sctp_tcbhash[SCTP_PCBHASH_ALLADDR(rport,
 		    inp->sctp_hashmark)];
 		if (head == NULL) {
 			goto null_return;
 		}
 		LIST_FOREACH(stcb, head, sctp_tcbhash) {
 			if (stcb->rport != rport) {
 				/* remote port does not match */
 				continue;
 			}
 			SCTP_TCB_LOCK(stcb);
 			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 				SCTP_TCB_UNLOCK(stcb);
 				continue;
 			}
 			if (local && !sctp_does_stcb_own_this_addr(stcb, local)) {
 				SCTP_TCB_UNLOCK(stcb);
 				continue;
 			}
 			/* now look at the list of remote addresses */
 			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 #ifdef INVARIANTS
 				if (net == (TAILQ_NEXT(net, sctp_next))) {
 					panic("Corrupt net list");
 				}
 #endif
 				if (net->ro._l_addr.sa.sa_family !=
 				    remote->sa_family) {
 					/* not the same family */
 					continue;
 				}
 				switch (remote->sa_family) {
 				case AF_INET:
 					{
 						struct sockaddr_in *sin,
 						           *rsin;
 
 						sin = (struct sockaddr_in *)
 						    &net->ro._l_addr;
 						rsin = (struct sockaddr_in *)remote;
 						if (sin->sin_addr.s_addr ==
 						    rsin->sin_addr.s_addr) {
 							/* found it */
 							if (netp != NULL) {
 								*netp = net;
 							}
 							if (locked_tcb == NULL) {
 								SCTP_INP_DECR_REF(inp);
 							} else if (locked_tcb != stcb) {
 								SCTP_TCB_LOCK(locked_tcb);
 							}
 							if (locked_tcb) {
 								atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
 							}
 							SCTP_INP_WUNLOCK(inp);
 							SCTP_INP_INFO_RUNLOCK();
 							return (stcb);
 						}
 						break;
 					}
 #ifdef INET6
 				case AF_INET6:
 					{
 						struct sockaddr_in6 *sin6,
 						            *rsin6;
 
 						sin6 = (struct sockaddr_in6 *)
 						    &net->ro._l_addr;
 						rsin6 = (struct sockaddr_in6 *)remote;
 						if (SCTP6_ARE_ADDR_EQUAL(sin6,
 						    rsin6)) {
 							/* found it */
 							if (netp != NULL) {
 								*netp = net;
 							}
 							if (locked_tcb == NULL) {
 								SCTP_INP_DECR_REF(inp);
 							} else if (locked_tcb != stcb) {
 								SCTP_TCB_LOCK(locked_tcb);
 							}
 							if (locked_tcb) {
 								atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
 							}
 							SCTP_INP_WUNLOCK(inp);
 							SCTP_INP_INFO_RUNLOCK();
 							return (stcb);
 						}
 						break;
 					}
 #endif
 				default:
 					/* TSNH */
 					break;
 				}
 			}
 			SCTP_TCB_UNLOCK(stcb);
 		}
 	}
 null_return:
 	/* clean up for returning null */
 	if (locked_tcb) {
 		SCTP_TCB_LOCK(locked_tcb);
 		atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
 	}
 	SCTP_INP_WUNLOCK(inp);
 	SCTP_INP_INFO_RUNLOCK();
 	/* not found */
 	return (NULL);
 }
 
 /*
  * Find an association for a specific endpoint using the association id given
  * out in the COMM_UP notification
  */
 
 struct sctp_tcb *
 sctp_findasoc_ep_asocid_locked(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock)
 {
 	/*
 	 * Use my the assoc_id to find a endpoint
 	 */
 	struct sctpasochead *head;
 	struct sctp_tcb *stcb;
 	uint32_t id;
 
 	if (inp == NULL) {
 		SCTP_PRINTF("TSNH ep_associd\n");
 		return (NULL);
 	}
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
 		SCTP_PRINTF("TSNH ep_associd0\n");
 		return (NULL);
 	}
 	id = (uint32_t) asoc_id;
 	head = &inp->sctp_asocidhash[SCTP_PCBHASH_ASOC(id, inp->hashasocidmark)];
 	if (head == NULL) {
 		/* invalid id TSNH */
 		SCTP_PRINTF("TSNH ep_associd1\n");
 		return (NULL);
 	}
 	LIST_FOREACH(stcb, head, sctp_tcbasocidhash) {
 		if (stcb->asoc.assoc_id == id) {
 			if (inp != stcb->sctp_ep) {
 				/*
 				 * some other guy has the same id active (id
 				 * collision ??).
 				 */
 				SCTP_PRINTF("TSNH ep_associd2\n");
 				continue;
 			}
 			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 				continue;
 			}
 			if (want_lock) {
 				SCTP_TCB_LOCK(stcb);
 			}
 			return (stcb);
 		}
 	}
 	return (NULL);
 }
 
 
 struct sctp_tcb *
 sctp_findassociation_ep_asocid(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock)
 {
 	struct sctp_tcb *stcb;
 
 	SCTP_INP_RLOCK(inp);
 	stcb = sctp_findasoc_ep_asocid_locked(inp, asoc_id, want_lock);
 	SCTP_INP_RUNLOCK(inp);
 	return (stcb);
 }
 
 
 static struct sctp_inpcb *
 sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head,
     uint16_t lport, uint32_t vrf_id)
 {
 	struct sctp_inpcb *inp;
 	struct sockaddr_in *sin;
 
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 
 #endif
 	struct sctp_laddr *laddr;
 
 #ifdef INET6
 	struct sockaddr_in6 *intf_addr6;
 
 #endif
 
 	int fnd;
 
 	/*
 	 * Endpoint probe expects that the INP_INFO is locked.
 	 */
 	sin = NULL;
 #ifdef INET6
 	sin6 = NULL;
 #endif
 	switch (nam->sa_family) {
 	case AF_INET:
 		sin = (struct sockaddr_in *)nam;
 		break;
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)nam;
 		break;
 #endif
 	default:
 		/* unsupported family */
 		return (NULL);
 	}
 
 	if (head == NULL)
 		return (NULL);
 
 	LIST_FOREACH(inp, head, sctp_hash) {
 		SCTP_INP_RLOCK(inp);
 		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
 			SCTP_INP_RUNLOCK(inp);
 			continue;
 		}
 		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) &&
 		    (inp->sctp_lport == lport)) {
 			/* got it */
 			if ((nam->sa_family == AF_INET) &&
 			    (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
 			    SCTP_IPV6_V6ONLY(inp)) {
 				/* IPv4 on a IPv6 socket with ONLY IPv6 set */
 				SCTP_INP_RUNLOCK(inp);
 				continue;
 			}
 			/* A V6 address and the endpoint is NOT bound V6 */
 			if (nam->sa_family == AF_INET6 &&
 			    (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
 				SCTP_INP_RUNLOCK(inp);
 				continue;
 			}
 			/* does a VRF id match? */
 			fnd = 0;
 			if (inp->def_vrf_id == vrf_id)
 				fnd = 1;
 
 			SCTP_INP_RUNLOCK(inp);
 			if (!fnd)
 				continue;
 			return (inp);
 		}
 		SCTP_INP_RUNLOCK(inp);
 	}
 	if ((nam->sa_family == AF_INET) &&
 	    (sin->sin_addr.s_addr == INADDR_ANY)) {
 		/* Can't hunt for one that has no address specified */
 		return (NULL);
 	}
 #ifdef INET6
 	if ((nam->sa_family == AF_INET6) &&
 	    (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))) {
 		/* Can't hunt for one that has no address specified */
 		return (NULL);
 	}
 #endif
 	/*
 	 * ok, not bound to all so see if we can find a EP bound to this
 	 * address.
 	 */
 	LIST_FOREACH(inp, head, sctp_hash) {
 		SCTP_INP_RLOCK(inp);
 		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
 			SCTP_INP_RUNLOCK(inp);
 			continue;
 		}
 		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL)) {
 			SCTP_INP_RUNLOCK(inp);
 			continue;
 		}
 		/*
 		 * Ok this could be a likely candidate, look at all of its
 		 * addresses
 		 */
 		if (inp->sctp_lport != lport) {
 			SCTP_INP_RUNLOCK(inp);
 			continue;
 		}
 		/* does a VRF id match? */
 		fnd = 0;
 		if (inp->def_vrf_id == vrf_id)
 			fnd = 1;
 
 		if (!fnd) {
 			SCTP_INP_RUNLOCK(inp);
 			continue;
 		}
 		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 			if (laddr->ifa == NULL) {
 				SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n",
 				    __FUNCTION__);
 				continue;
 			}
 			SCTPDBG(SCTP_DEBUG_PCB1, "Ok laddr->ifa:%p is possible, ",
 			    laddr->ifa);
 			if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
 				SCTPDBG(SCTP_DEBUG_PCB1, "Huh IFA being deleted\n");
 				continue;
 			}
 			if (laddr->ifa->address.sa.sa_family == nam->sa_family) {
 				/* possible, see if it matches */
 				struct sockaddr_in *intf_addr;
 
 				intf_addr = &laddr->ifa->address.sin;
 				switch (nam->sa_family) {
 				case AF_INET:
 					if (sin->sin_addr.s_addr ==
 					    intf_addr->sin_addr.s_addr) {
 						SCTP_INP_RUNLOCK(inp);
 						return (inp);
 					}
 					break;
 #ifdef INET6
 				case AF_INET6:
 					intf_addr6 = &laddr->ifa->address.sin6;
 					if (SCTP6_ARE_ADDR_EQUAL(sin6,
 					    intf_addr6)) {
 						SCTP_INP_RUNLOCK(inp);
 						return (inp);
 					}
 					break;
 #endif
 				}
 			}
 		}
 		SCTP_INP_RUNLOCK(inp);
 	}
 	return (NULL);
 }
 
 
 static struct sctp_inpcb *
 sctp_isport_inuse(struct sctp_inpcb *inp, uint16_t lport, uint32_t vrf_id)
 {
 	struct sctppcbhead *head;
 	struct sctp_inpcb *t_inp;
 	int fnd;
 
 	head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(lport,
 	    SCTP_BASE_INFO(hashmark))];
 	LIST_FOREACH(t_inp, head, sctp_hash) {
 		if (t_inp->sctp_lport != lport) {
 			continue;
 		}
 		/* is it in the VRF in question */
 		fnd = 0;
 		if (t_inp->def_vrf_id == vrf_id)
 			fnd = 1;
 		if (!fnd)
 			continue;
 
 		/* This one is in use. */
 		/* check the v6/v4 binding issue */
 		if ((t_inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
 		    SCTP_IPV6_V6ONLY(t_inp)) {
 			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 				/* collision in V6 space */
 				return (t_inp);
 			} else {
 				/* inp is BOUND_V4 no conflict */
 				continue;
 			}
 		} else if (t_inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 			/* t_inp is bound v4 and v6, conflict always */
 			return (t_inp);
 		} else {
 			/* t_inp is bound only V4 */
 			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
 			    SCTP_IPV6_V6ONLY(inp)) {
 				/* no conflict */
 				continue;
 			}
 			/* else fall through to conflict */
 		}
 		return (t_inp);
 	}
 	return (NULL);
 }
 
 
 int
 sctp_swap_inpcb_for_listen(struct sctp_inpcb *inp)
 {
 	/* For 1-2-1 with port reuse */
 	struct sctppcbhead *head;
 	struct sctp_inpcb *tinp;
 
 	if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE)) {
 		/* only works with port reuse on */
 		return (-1);
 	}
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) == 0) {
 		return (0);
 	}
 	SCTP_INP_RUNLOCK(inp);
 	head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(inp->sctp_lport,
 	    SCTP_BASE_INFO(hashmark))];
 	/* Kick out all non-listeners to the TCP hash */
 	LIST_FOREACH(tinp, head, sctp_hash) {
 		if (tinp->sctp_lport != inp->sctp_lport) {
 			continue;
 		}
 		if (tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
 			continue;
 		}
 		if (tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
 			continue;
 		}
 		if (tinp->sctp_socket->so_qlimit) {
 			continue;
 		}
 		SCTP_INP_WLOCK(tinp);
 		LIST_REMOVE(tinp, sctp_hash);
 		head = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR(tinp->sctp_lport, SCTP_BASE_INFO(hashtcpmark))];
 		tinp->sctp_flags |= SCTP_PCB_FLAGS_IN_TCPPOOL;
 		LIST_INSERT_HEAD(head, tinp, sctp_hash);
 		SCTP_INP_WUNLOCK(tinp);
 	}
 	SCTP_INP_WLOCK(inp);
 	/* Pull from where he was */
 	LIST_REMOVE(inp, sctp_hash);
 	inp->sctp_flags &= ~SCTP_PCB_FLAGS_IN_TCPPOOL;
 	head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(inp->sctp_lport, SCTP_BASE_INFO(hashmark))];
 	LIST_INSERT_HEAD(head, inp, sctp_hash);
 	SCTP_INP_WUNLOCK(inp);
 	SCTP_INP_RLOCK(inp);
 	return (0);
 }
 
 
 struct sctp_inpcb *
 sctp_pcb_findep(struct sockaddr *nam, int find_tcp_pool, int have_lock,
     uint32_t vrf_id)
 {
 	/*
 	 * First we check the hash table to see if someone has this port
 	 * bound with just the port.
 	 */
 	struct sctp_inpcb *inp;
 	struct sctppcbhead *head;
 	struct sockaddr_in *sin;
 	struct sockaddr_in6 *sin6;
 	int lport;
 	unsigned int i;
 
 	if (nam->sa_family == AF_INET) {
 		sin = (struct sockaddr_in *)nam;
 		lport = ((struct sockaddr_in *)nam)->sin_port;
 	} else if (nam->sa_family == AF_INET6) {
 		sin6 = (struct sockaddr_in6 *)nam;
 		lport = ((struct sockaddr_in6 *)nam)->sin6_port;
 	} else {
 		/* unsupported family */
 		return (NULL);
 	}
 	/*
 	 * I could cheat here and just cast to one of the types but we will
 	 * do it right. It also provides the check against an Unsupported
 	 * type too.
 	 */
 	/* Find the head of the ALLADDR chain */
 	if (have_lock == 0) {
 		SCTP_INP_INFO_RLOCK();
 	}
 	head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(lport,
 	    SCTP_BASE_INFO(hashmark))];
 	inp = sctp_endpoint_probe(nam, head, lport, vrf_id);
 
 	/*
 	 * If the TCP model exists it could be that the main listening
 	 * endpoint is gone but there still exists a connected socket for
 	 * this guy. If so we can return the first one that we find. This
 	 * may NOT be the correct one so the caller should be wary on the
 	 * returned INP. Currently the only caller that sets find_tcp_pool
 	 * is in bindx where we are verifying that a user CAN bind the
 	 * address. He either has bound it already, or someone else has, or
 	 * its open to bind, so this is good enough.
 	 */
 	if (inp == NULL && find_tcp_pool) {
 		for (i = 0; i < SCTP_BASE_INFO(hashtcpmark) + 1; i++) {
 			head = &SCTP_BASE_INFO(sctp_tcpephash)[i];
 			inp = sctp_endpoint_probe(nam, head, lport, vrf_id);
 			if (inp) {
 				break;
 			}
 		}
 	}
 	if (inp) {
 		SCTP_INP_INCR_REF(inp);
 	}
 	if (have_lock == 0) {
 		SCTP_INP_INFO_RUNLOCK();
 	}
 	return (inp);
 }
 
 /*
  * Find an association for an endpoint with the pointer to whom you want to
  * send to and the endpoint pointer. The address can be IPv4 or IPv6. We may
  * need to change the *to to some other struct like a mbuf...
  */
 struct sctp_tcb *
 sctp_findassociation_addr_sa(struct sockaddr *to, struct sockaddr *from,
     struct sctp_inpcb **inp_p, struct sctp_nets **netp, int find_tcp_pool,
     uint32_t vrf_id)
 {
 	struct sctp_inpcb *inp = NULL;
 	struct sctp_tcb *retval;
 
 	SCTP_INP_INFO_RLOCK();
 	if (find_tcp_pool) {
 		if (inp_p != NULL) {
 			retval = sctp_tcb_special_locate(inp_p, from, to, netp,
 			    vrf_id);
 		} else {
 			retval = sctp_tcb_special_locate(&inp, from, to, netp,
 			    vrf_id);
 		}
 		if (retval != NULL) {
 			SCTP_INP_INFO_RUNLOCK();
 			return (retval);
 		}
 	}
 	inp = sctp_pcb_findep(to, 0, 1, vrf_id);
 	if (inp_p != NULL) {
 		*inp_p = inp;
 	}
 	SCTP_INP_INFO_RUNLOCK();
 
 	if (inp == NULL) {
 		return (NULL);
 	}
 	/*
 	 * ok, we have an endpoint, now lets find the assoc for it (if any)
 	 * we now place the source address or from in the to of the find
 	 * endpoint call. Since in reality this chain is used from the
 	 * inbound packet side.
 	 */
 	if (inp_p != NULL) {
 		retval = sctp_findassociation_ep_addr(inp_p, from, netp, to,
 		    NULL);
 	} else {
 		retval = sctp_findassociation_ep_addr(&inp, from, netp, to,
 		    NULL);
 	}
 	return retval;
 }
 
 
 /*
  * This routine will grub through the mbuf that is a INIT or INIT-ACK and
  * find all addresses that the sender has specified in any address list. Each
  * address will be used to lookup the TCB and see if one exits.
  */
 static struct sctp_tcb *
 sctp_findassociation_special_addr(struct mbuf *m, int iphlen, int offset,
     struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp,
     struct sockaddr *dest)
 {
 	struct sockaddr_in sin4;
 	struct sockaddr_in6 sin6;
 	struct sctp_paramhdr *phdr, parm_buf;
 	struct sctp_tcb *retval;
 	uint32_t ptype, plen;
 
 	memset(&sin4, 0, sizeof(sin4));
 	memset(&sin6, 0, sizeof(sin6));
 	sin4.sin_len = sizeof(sin4);
 	sin4.sin_family = AF_INET;
 	sin4.sin_port = sh->src_port;
 	sin6.sin6_len = sizeof(sin6);
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_port = sh->src_port;
 
 	retval = NULL;
 	offset += sizeof(struct sctp_init_chunk);
 
 	phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
 	while (phdr != NULL) {
 		/* now we must see if we want the parameter */
 		ptype = ntohs(phdr->param_type);
 		plen = ntohs(phdr->param_length);
 		if (plen == 0) {
 			break;
 		}
 		if (ptype == SCTP_IPV4_ADDRESS &&
 		    plen == sizeof(struct sctp_ipv4addr_param)) {
 			/* Get the rest of the address */
 			struct sctp_ipv4addr_param ip4_parm, *p4;
 
 			phdr = sctp_get_next_param(m, offset,
 			    (struct sctp_paramhdr *)&ip4_parm, min(plen, sizeof(ip4_parm)));
 			if (phdr == NULL) {
 				return (NULL);
 			}
 			p4 = (struct sctp_ipv4addr_param *)phdr;
 			memcpy(&sin4.sin_addr, &p4->addr, sizeof(p4->addr));
 			/* look it up */
 			retval = sctp_findassociation_ep_addr(inp_p,
 			    (struct sockaddr *)&sin4, netp, dest, NULL);
 			if (retval != NULL) {
 				return (retval);
 			}
 		} else if (ptype == SCTP_IPV6_ADDRESS &&
 		    plen == sizeof(struct sctp_ipv6addr_param)) {
 			/* Get the rest of the address */
 			struct sctp_ipv6addr_param ip6_parm, *p6;
 
 			phdr = sctp_get_next_param(m, offset,
 			    (struct sctp_paramhdr *)&ip6_parm, min(plen, sizeof(ip6_parm)));
 			if (phdr == NULL) {
 				return (NULL);
 			}
 			p6 = (struct sctp_ipv6addr_param *)phdr;
 			memcpy(&sin6.sin6_addr, &p6->addr, sizeof(p6->addr));
 			/* look it up */
 			retval = sctp_findassociation_ep_addr(inp_p,
 			    (struct sockaddr *)&sin6, netp, dest, NULL);
 			if (retval != NULL) {
 				return (retval);
 			}
 		}
 		offset += SCTP_SIZE32(plen);
 		phdr = sctp_get_next_param(m, offset, &parm_buf,
 		    sizeof(parm_buf));
 	}
 	return (NULL);
 }
 
 static struct sctp_tcb *
 sctp_findassoc_by_vtag(struct sockaddr *from, struct sockaddr *to, uint32_t vtag,
     struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint16_t rport,
     uint16_t lport, int skip_src_check, uint32_t vrf_id, uint32_t remote_tag)
 {
 	/*
 	 * Use my vtag to hash. If we find it we then verify the source addr
 	 * is in the assoc. If all goes well we save a bit on rec of a
 	 * packet.
 	 */
 	struct sctpasochead *head;
 	struct sctp_nets *net;
 	struct sctp_tcb *stcb;
 
 	*netp = NULL;
 	*inp_p = NULL;
 	SCTP_INP_INFO_RLOCK();
 	head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(vtag,
 	    SCTP_BASE_INFO(hashasocmark))];
 	if (head == NULL) {
 		/* invalid vtag */
 		SCTP_INP_INFO_RUNLOCK();
 		return (NULL);
 	}
 	LIST_FOREACH(stcb, head, sctp_asocs) {
 		SCTP_INP_RLOCK(stcb->sctp_ep);
 		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
 			SCTP_INP_RUNLOCK(stcb->sctp_ep);
 			continue;
 		}
 		SCTP_TCB_LOCK(stcb);
 		SCTP_INP_RUNLOCK(stcb->sctp_ep);
 		if (stcb->asoc.my_vtag == vtag) {
 			/* candidate */
 			if (stcb->rport != rport) {
 				SCTP_TCB_UNLOCK(stcb);
 				continue;
 			}
 			if (stcb->sctp_ep->sctp_lport != lport) {
 				SCTP_TCB_UNLOCK(stcb);
 				continue;
 			}
 			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 				SCTP_TCB_UNLOCK(stcb);
 				continue;
 			}
 			/* RRS:Need toaddr check here */
 			if (sctp_does_stcb_own_this_addr(stcb, to) == 0) {
 				/* Endpoint does not own this address */
 				SCTP_TCB_UNLOCK(stcb);
 				continue;
 			}
 			if (remote_tag) {
 				/*
 				 * If we have both vtags that's all we match
 				 * on
 				 */
 				if (stcb->asoc.peer_vtag == remote_tag) {
 					/*
 					 * If both tags match we consider it
 					 * conclusive and check NO
 					 * source/destination addresses
 					 */
 					goto conclusive;
 				}
 			}
 			if (skip_src_check) {
 		conclusive:
 				if (from) {
 					net = sctp_findnet(stcb, from);
 				} else {
 					*netp = NULL;	/* unknown */
 				}
 				if (inp_p)
 					*inp_p = stcb->sctp_ep;
 				SCTP_INP_INFO_RUNLOCK();
 				return (stcb);
 			}
 			net = sctp_findnet(stcb, from);
 			if (net) {
 				/* yep its him. */
 				*netp = net;
 				SCTP_STAT_INCR(sctps_vtagexpress);
 				*inp_p = stcb->sctp_ep;
 				SCTP_INP_INFO_RUNLOCK();
 				return (stcb);
 			} else {
 				/*
 				 * not him, this should only happen in rare
 				 * cases so I peg it.
 				 */
 				SCTP_STAT_INCR(sctps_vtagbogus);
 			}
 		}
 		SCTP_TCB_UNLOCK(stcb);
 	}
 	SCTP_INP_INFO_RUNLOCK();
 	return (NULL);
 }
 
 /*
  * Find an association with the pointer to the inbound IP packet. This can be
  * a IPv4 or IPv6 packet.
  */
 struct sctp_tcb *
 sctp_findassociation_addr(struct mbuf *m, int iphlen, int offset,
     struct sctphdr *sh, struct sctp_chunkhdr *ch,
     struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
 {
 	int find_tcp_pool;
 	struct ip *iph;
 	struct sctp_tcb *retval;
 	struct sockaddr_storage to_store, from_store;
 	struct sockaddr *to = (struct sockaddr *)&to_store;
 	struct sockaddr *from = (struct sockaddr *)&from_store;
 	struct sctp_inpcb *inp;
 
 	iph = mtod(m, struct ip *);
 	switch (iph->ip_v) {
 	case IPVERSION:
 		{
 			/* its IPv4 */
 			struct sockaddr_in *from4;
 
 			from4 = (struct sockaddr_in *)&from_store;
 			bzero(from4, sizeof(*from4));
 			from4->sin_family = AF_INET;
 			from4->sin_len = sizeof(struct sockaddr_in);
 			from4->sin_addr.s_addr = iph->ip_src.s_addr;
 			from4->sin_port = sh->src_port;
 			break;
 		}
 #ifdef INET6
 	case IPV6_VERSION >> 4:
 		{
 			/* its IPv6 */
 			struct ip6_hdr *ip6;
 			struct sockaddr_in6 *from6;
 
 			ip6 = mtod(m, struct ip6_hdr *);
 			from6 = (struct sockaddr_in6 *)&from_store;
 			bzero(from6, sizeof(*from6));
 			from6->sin6_family = AF_INET6;
 			from6->sin6_len = sizeof(struct sockaddr_in6);
 			from6->sin6_addr = ip6->ip6_src;
 			from6->sin6_port = sh->src_port;
 			/* Get the scopes in properly to the sin6 addr's */
 			/* we probably don't need these operations */
 			(void)sa6_recoverscope(from6);
 			sa6_embedscope(from6, MODULE_GLOBAL(ip6_use_defzone));
 			break;
 		}
 #endif
 	default:
 		/* Currently not supported. */
 		return (NULL);
 	}
 
 
 	switch (iph->ip_v) {
 	case IPVERSION:
 		{
 			/* its IPv4 */
 			struct sockaddr_in *to4;
 
 			to4 = (struct sockaddr_in *)&to_store;
 			bzero(to4, sizeof(*to4));
 			to4->sin_family = AF_INET;
 			to4->sin_len = sizeof(struct sockaddr_in);
 			to4->sin_addr.s_addr = iph->ip_dst.s_addr;
 			to4->sin_port = sh->dest_port;
 			break;
 		}
 #ifdef INET6
 	case IPV6_VERSION >> 4:
 		{
 			/* its IPv6 */
 			struct ip6_hdr *ip6;
 			struct sockaddr_in6 *to6;
 
 			ip6 = mtod(m, struct ip6_hdr *);
 			to6 = (struct sockaddr_in6 *)&to_store;
 			bzero(to6, sizeof(*to6));
 			to6->sin6_family = AF_INET6;
 			to6->sin6_len = sizeof(struct sockaddr_in6);
 			to6->sin6_addr = ip6->ip6_dst;
 			to6->sin6_port = sh->dest_port;
 			/* Get the scopes in properly to the sin6 addr's */
 			/* we probably don't need these operations */
 			(void)sa6_recoverscope(to6);
 			sa6_embedscope(to6, MODULE_GLOBAL(ip6_use_defzone));
 			break;
 		}
 #endif
 	default:
 		/* TSNH */
 		break;
 	}
 	if (sh->v_tag) {
 		/* we only go down this path if vtag is non-zero */
 		retval = sctp_findassoc_by_vtag(from, to, ntohl(sh->v_tag),
 		    inp_p, netp, sh->src_port, sh->dest_port, 0, vrf_id, 0);
 		if (retval) {
 			return (retval);
 		}
 	}
 	find_tcp_pool = 0;
 	if ((ch->chunk_type != SCTP_INITIATION) &&
 	    (ch->chunk_type != SCTP_INITIATION_ACK) &&
 	    (ch->chunk_type != SCTP_COOKIE_ACK) &&
 	    (ch->chunk_type != SCTP_COOKIE_ECHO)) {
 		/* Other chunk types go to the tcp pool. */
 		find_tcp_pool = 1;
 	}
 	if (inp_p) {
 		retval = sctp_findassociation_addr_sa(to, from, inp_p, netp,
 		    find_tcp_pool, vrf_id);
 		inp = *inp_p;
 	} else {
 		retval = sctp_findassociation_addr_sa(to, from, &inp, netp,
 		    find_tcp_pool, vrf_id);
 	}
 	SCTPDBG(SCTP_DEBUG_PCB1, "retval:%p inp:%p\n", retval, inp);
 	if (retval == NULL && inp) {
 		/* Found a EP but not this address */
 		if ((ch->chunk_type == SCTP_INITIATION) ||
 		    (ch->chunk_type == SCTP_INITIATION_ACK)) {
 			/*-
 			 * special hook, we do NOT return linp or an
 			 * association that is linked to an existing
 			 * association that is under the TCP pool (i.e. no
 			 * listener exists). The endpoint finding routine
 			 * will always find a listener before examining the
 			 * TCP pool.
 			 */
 			if (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) {
 				if (inp_p) {
 					*inp_p = NULL;
 				}
 				return (NULL);
 			}
 			retval = sctp_findassociation_special_addr(m, iphlen,
 			    offset, sh, &inp, netp, to);
 			if (inp_p != NULL) {
 				*inp_p = inp;
 			}
 		}
 	}
 	SCTPDBG(SCTP_DEBUG_PCB1, "retval is %p\n", retval);
 	return (retval);
 }
 
 /*
  * lookup an association by an ASCONF lookup address.
  * if the lookup address is 0.0.0.0 or ::0, use the vtag to do the lookup
  */
 struct sctp_tcb *
 sctp_findassociation_ep_asconf(struct mbuf *m, int iphlen, int offset,
     struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
 {
 	struct sctp_tcb *stcb;
 	struct sockaddr_in *sin;
 
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 
 #endif
 	struct sockaddr_storage local_store, remote_store;
 	struct sockaddr *to;
 	struct ip *iph;
 
 #ifdef INET6
 	struct ip6_hdr *ip6;
 
 #endif
 	struct sctp_paramhdr parm_buf, *phdr;
 	int ptype;
 	int zero_address = 0;
 
 
 	memset(&local_store, 0, sizeof(local_store));
 	memset(&remote_store, 0, sizeof(remote_store));
 	to = (struct sockaddr *)&local_store;
 	/* First get the destination address setup too. */
 	iph = mtod(m, struct ip *);
 	switch (iph->ip_v) {
 	case IPVERSION:
 		/* its IPv4 */
 		sin = (struct sockaddr_in *)&local_store;
 		sin->sin_family = AF_INET;
 		sin->sin_len = sizeof(*sin);
 		sin->sin_port = sh->dest_port;
 		sin->sin_addr.s_addr = iph->ip_dst.s_addr;
 		break;
 #ifdef INET6
 	case IPV6_VERSION >> 4:
 		/* its IPv6 */
 		ip6 = mtod(m, struct ip6_hdr *);
 		sin6 = (struct sockaddr_in6 *)&local_store;
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_len = sizeof(*sin6);
 		sin6->sin6_port = sh->dest_port;
 		sin6->sin6_addr = ip6->ip6_dst;
 		break;
 #endif
 	default:
 		return NULL;
 	}
 
 	phdr = sctp_get_next_param(m, offset + sizeof(struct sctp_asconf_chunk),
 	    &parm_buf, sizeof(struct sctp_paramhdr));
 	if (phdr == NULL) {
 		SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf lookup addr\n",
 		    __FUNCTION__);
 		return NULL;
 	}
 	ptype = (int)((uint32_t) ntohs(phdr->param_type));
 	/* get the correlation address */
 	switch (ptype) {
 #ifdef INET6
 	case SCTP_IPV6_ADDRESS:
 		{
 			/* ipv6 address param */
 			struct sctp_ipv6addr_param *p6, p6_buf;
 
 			if (ntohs(phdr->param_length) != sizeof(struct sctp_ipv6addr_param)) {
 				return NULL;
 			}
 			p6 = (struct sctp_ipv6addr_param *)sctp_get_next_param(m,
 			    offset + sizeof(struct sctp_asconf_chunk),
 			    &p6_buf.ph, sizeof(*p6));
 			if (p6 == NULL) {
 				SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v6 lookup addr\n",
 				    __FUNCTION__);
 				return (NULL);
 			}
 			sin6 = (struct sockaddr_in6 *)&remote_store;
 			sin6->sin6_family = AF_INET6;
 			sin6->sin6_len = sizeof(*sin6);
 			sin6->sin6_port = sh->src_port;
 			memcpy(&sin6->sin6_addr, &p6->addr, sizeof(struct in6_addr));
 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
 				zero_address = 1;
 			break;
 		}
 #endif
 	case SCTP_IPV4_ADDRESS:
 		{
 			/* ipv4 address param */
 			struct sctp_ipv4addr_param *p4, p4_buf;
 
 			if (ntohs(phdr->param_length) != sizeof(struct sctp_ipv4addr_param)) {
 				return NULL;
 			}
 			p4 = (struct sctp_ipv4addr_param *)sctp_get_next_param(m,
 			    offset + sizeof(struct sctp_asconf_chunk),
 			    &p4_buf.ph, sizeof(*p4));
 			if (p4 == NULL) {
 				SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v4 lookup addr\n",
 				    __FUNCTION__);
 				return (NULL);
 			}
 			sin = (struct sockaddr_in *)&remote_store;
 			sin->sin_family = AF_INET;
 			sin->sin_len = sizeof(*sin);
 			sin->sin_port = sh->src_port;
 			memcpy(&sin->sin_addr, &p4->addr, sizeof(struct in_addr));
 			if (sin->sin_addr.s_addr == INADDR_ANY)
 				zero_address = 1;
 			break;
 		}
 	default:
 		/* invalid address param type */
 		return NULL;
 	}
 
 	if (zero_address) {
 		stcb = sctp_findassoc_by_vtag(NULL, to, ntohl(sh->v_tag), inp_p,
 		    netp, sh->src_port, sh->dest_port, 1, vrf_id, 0);
 		/*
 		 * printf("findassociation_ep_asconf: zero lookup address
 		 * finds stcb 0x%x\n", (uint32_t)stcb);
 		 */
 	} else {
 		stcb = sctp_findassociation_ep_addr(inp_p,
 		    (struct sockaddr *)&remote_store, netp,
 		    to, NULL);
 	}
 	return (stcb);
 }
 
 
 /*
  * allocate a sctp_inpcb and setup a temporary binding to a port/all
  * addresses. This way if we don't get a bind we by default pick a ephemeral
  * port with all addresses bound.
  */
 int
 sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id)
 {
 	/*
 	 * we get called when a new endpoint starts up. We need to allocate
 	 * the sctp_inpcb structure from the zone and init it. Mark it as
 	 * unbound and find a port that we can use as an ephemeral with
 	 * INADDR_ANY. If the user binds later no problem we can then add in
 	 * the specific addresses. And setup the default parameters for the
 	 * EP.
 	 */
 	int i, error;
 	struct sctp_inpcb *inp;
 	struct sctp_pcb *m;
 	struct timeval time;
 	sctp_sharedkey_t *null_key;
 
 	error = 0;
 
 	SCTP_INP_INFO_WLOCK();
 	inp = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_ep), struct sctp_inpcb);
 	if (inp == NULL) {
 		SCTP_PRINTF("Out of SCTP-INPCB structures - no resources\n");
 		SCTP_INP_INFO_WUNLOCK();
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
 		return (ENOBUFS);
 	}
 	/* zap it */
 	bzero(inp, sizeof(*inp));
 
 	/* bump generations */
 	/* setup socket pointers */
 	inp->sctp_socket = so;
 	inp->ip_inp.inp.inp_socket = so;
 	inp->sctp_associd_counter = 1;
 	inp->partial_delivery_point = SCTP_SB_LIMIT_RCV(so) >> SCTP_PARTIAL_DELIVERY_SHIFT;
 	inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
 	inp->sctp_cmt_on_off = SCTP_BASE_SYSCTL(sctp_cmt_on_off);
 	/* init the small hash table we use to track asocid <-> tcb */
 	inp->sctp_asocidhash = SCTP_HASH_INIT(SCTP_STACK_VTAG_HASH_SIZE, &inp->hashasocidmark);
 	if (inp->sctp_asocidhash == NULL) {
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
 		SCTP_INP_INFO_WUNLOCK();
 		return (ENOBUFS);
 	}
 #ifdef IPSEC
 	{
 		struct inpcbpolicy *pcb_sp = NULL;
 
 		error = ipsec_init_policy(so, &pcb_sp);
 		/* Arrange to share the policy */
 		inp->ip_inp.inp.inp_sp = pcb_sp;
 		((struct in6pcb *)(&inp->ip_inp.inp))->in6p_sp = pcb_sp;
 	}
 	if (error != 0) {
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
 		SCTP_INP_INFO_WUNLOCK();
 		return error;
 	}
 #endif				/* IPSEC */
 	SCTP_INCR_EP_COUNT();
 	inp->ip_inp.inp.inp_ip_ttl = MODULE_GLOBAL(ip_defttl);
 	SCTP_INP_INFO_WUNLOCK();
 
 	so->so_pcb = (caddr_t)inp;
 
 	if ((SCTP_SO_TYPE(so) == SOCK_DGRAM) ||
 	    (SCTP_SO_TYPE(so) == SOCK_SEQPACKET)) {
 		/* UDP style socket */
 		inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
 		    SCTP_PCB_FLAGS_UNBOUND);
 		/* Be sure it is NON-BLOCKING IO for UDP */
 		/* SCTP_SET_SO_NBIO(so); */
 	} else if (SCTP_SO_TYPE(so) == SOCK_STREAM) {
 		/* TCP style socket */
 		inp->sctp_flags = (SCTP_PCB_FLAGS_TCPTYPE |
 		    SCTP_PCB_FLAGS_UNBOUND);
 		/* Be sure we have blocking IO by default */
 		SCTP_CLEAR_SO_NBIO(so);
 	} else {
 		/*
 		 * unsupported socket type (RAW, etc)- in case we missed it
 		 * in protosw
 		 */
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EOPNOTSUPP);
 		so->so_pcb = NULL;
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
 		return (EOPNOTSUPP);
 	}
 	if (SCTP_BASE_SYSCTL(sctp_default_frag_interleave) == SCTP_FRAG_LEVEL_1) {
 		sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
 		sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
 	} else if (SCTP_BASE_SYSCTL(sctp_default_frag_interleave) == SCTP_FRAG_LEVEL_2) {
 		sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
 		sctp_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
 	} else if (SCTP_BASE_SYSCTL(sctp_default_frag_interleave) == SCTP_FRAG_LEVEL_0) {
 		sctp_feature_off(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
 		sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
 	}
 	inp->sctp_tcbhash = SCTP_HASH_INIT(SCTP_BASE_SYSCTL(sctp_pcbtblsize),
 	    &inp->sctp_hashmark);
 	if (inp->sctp_tcbhash == NULL) {
 		SCTP_PRINTF("Out of SCTP-INPCB->hashinit - no resources\n");
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
 		so->so_pcb = NULL;
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
 		return (ENOBUFS);
 	}
 	inp->def_vrf_id = vrf_id;
 
 	SCTP_INP_INFO_WLOCK();
 	SCTP_INP_LOCK_INIT(inp);
 	INP_LOCK_INIT(&inp->ip_inp.inp, "inp", "sctpinp");
 	SCTP_INP_READ_INIT(inp);
 	SCTP_ASOC_CREATE_LOCK_INIT(inp);
 	/* lock the new ep */
 	SCTP_INP_WLOCK(inp);
 
 	/* add it to the info area */
 	LIST_INSERT_HEAD(&SCTP_BASE_INFO(listhead), inp, sctp_list);
 	SCTP_INP_INFO_WUNLOCK();
 
 	TAILQ_INIT(&inp->read_queue);
 	LIST_INIT(&inp->sctp_addr_list);
 
 	LIST_INIT(&inp->sctp_asoc_list);
 
 #ifdef SCTP_TRACK_FREED_ASOCS
 	/* TEMP CODE */
 	LIST_INIT(&inp->sctp_asoc_free_list);
 #endif
 	/* Init the timer structure for signature change */
 	SCTP_OS_TIMER_INIT(&inp->sctp_ep.signature_change.timer);
 	inp->sctp_ep.signature_change.type = SCTP_TIMER_TYPE_NEWCOOKIE;
 
 	/* now init the actual endpoint default data */
 	m = &inp->sctp_ep;
 
 	/* setup the base timeout information */
 	m->sctp_timeoutticks[SCTP_TIMER_SEND] = SEC_TO_TICKS(SCTP_SEND_SEC);	/* needed ? */
 	m->sctp_timeoutticks[SCTP_TIMER_INIT] = SEC_TO_TICKS(SCTP_INIT_SEC);	/* needed ? */
 	m->sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default));
 	m->sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default));
 	m->sctp_timeoutticks[SCTP_TIMER_PMTU] = SEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default));
 	m->sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN] = SEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default));
 	m->sctp_timeoutticks[SCTP_TIMER_SIGNATURE] = SEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_secret_lifetime_default));
 	/* all max/min max are in ms */
 	m->sctp_maxrto = SCTP_BASE_SYSCTL(sctp_rto_max_default);
 	m->sctp_minrto = SCTP_BASE_SYSCTL(sctp_rto_min_default);
 	m->initial_rto = SCTP_BASE_SYSCTL(sctp_rto_initial_default);
 	m->initial_init_rto_max = SCTP_BASE_SYSCTL(sctp_init_rto_max_default);
 	m->sctp_sack_freq = SCTP_BASE_SYSCTL(sctp_sack_freq_default);
 
 	m->max_open_streams_intome = MAX_SCTP_STREAMS;
 
 	m->max_init_times = SCTP_BASE_SYSCTL(sctp_init_rtx_max_default);
 	m->max_send_times = SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default);
 	m->def_net_failure = SCTP_BASE_SYSCTL(sctp_path_rtx_max_default);
 	m->sctp_sws_sender = SCTP_SWS_SENDER_DEF;
 	m->sctp_sws_receiver = SCTP_SWS_RECEIVER_DEF;
 	m->max_burst = SCTP_BASE_SYSCTL(sctp_max_burst_default);
 	if ((SCTP_BASE_SYSCTL(sctp_default_cc_module) >= SCTP_CC_RFC2581) &&
 	    (SCTP_BASE_SYSCTL(sctp_default_cc_module) <= SCTP_CC_HTCP)) {
 		m->sctp_default_cc_module = SCTP_BASE_SYSCTL(sctp_default_cc_module);
 	} else {
 		/* sysctl done with invalid value, set to 2581 */
 		m->sctp_default_cc_module = SCTP_CC_RFC2581;
 	}
 	/* number of streams to pre-open on a association */
 	m->pre_open_stream_count = SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default);
 
 	/* Add adaptation cookie */
 	m->adaptation_layer_indicator = 0x504C5253;
 
 	/* seed random number generator */
 	m->random_counter = 1;
 	m->store_at = SCTP_SIGNATURE_SIZE;
 	SCTP_READ_RANDOM(m->random_numbers, sizeof(m->random_numbers));
 	sctp_fill_random_store(m);
 
 	/* Minimum cookie size */
 	m->size_of_a_cookie = (sizeof(struct sctp_init_msg) * 2) +
 	    sizeof(struct sctp_state_cookie);
 	m->size_of_a_cookie += SCTP_SIGNATURE_SIZE;
 
 	/* Setup the initial secret */
 	(void)SCTP_GETTIME_TIMEVAL(&time);
 	m->time_of_secret_change = time.tv_sec;
 
 	for (i = 0; i < SCTP_NUMBER_OF_SECRETS; i++) {
 		m->secret_key[0][i] = sctp_select_initial_TSN(m);
 	}
 	sctp_timer_start(SCTP_TIMER_TYPE_NEWCOOKIE, inp, NULL, NULL);
 
 	/* How long is a cookie good for ? */
 	m->def_cookie_life = MSEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default));
 	/*
 	 * Initialize authentication parameters
 	 */
 	m->local_hmacs = sctp_default_supported_hmaclist();
 	m->local_auth_chunks = sctp_alloc_chunklist();
 	sctp_auth_set_default_chunks(m->local_auth_chunks);
 	LIST_INIT(&m->shared_keys);
 	/* add default NULL key as key id 0 */
 	null_key = sctp_alloc_sharedkey();
 	sctp_insert_sharedkey(&m->shared_keys, null_key);
 	SCTP_INP_WUNLOCK(inp);
 #ifdef SCTP_LOG_CLOSING
 	sctp_log_closing(inp, NULL, 12);
 #endif
 	return (error);
 }
 
 
 void
 sctp_move_pcb_and_assoc(struct sctp_inpcb *old_inp, struct sctp_inpcb *new_inp,
     struct sctp_tcb *stcb)
 {
 	struct sctp_nets *net;
 	uint16_t lport, rport;
 	struct sctppcbhead *head;
 	struct sctp_laddr *laddr, *oladdr;
 
 	atomic_add_int(&stcb->asoc.refcnt, 1);
 	SCTP_TCB_UNLOCK(stcb);
 	SCTP_INP_INFO_WLOCK();
 	SCTP_INP_WLOCK(old_inp);
 	SCTP_INP_WLOCK(new_inp);
 	SCTP_TCB_LOCK(stcb);
 	atomic_subtract_int(&stcb->asoc.refcnt, 1);
 
 	new_inp->sctp_ep.time_of_secret_change =
 	    old_inp->sctp_ep.time_of_secret_change;
 	memcpy(new_inp->sctp_ep.secret_key, old_inp->sctp_ep.secret_key,
 	    sizeof(old_inp->sctp_ep.secret_key));
 	new_inp->sctp_ep.current_secret_number =
 	    old_inp->sctp_ep.current_secret_number;
 	new_inp->sctp_ep.last_secret_number =
 	    old_inp->sctp_ep.last_secret_number;
 	new_inp->sctp_ep.size_of_a_cookie = old_inp->sctp_ep.size_of_a_cookie;
 
 	/* make it so new data pours into the new socket */
 	stcb->sctp_socket = new_inp->sctp_socket;
 	stcb->sctp_ep = new_inp;
 
 	/* Copy the port across */
 	lport = new_inp->sctp_lport = old_inp->sctp_lport;
 	rport = stcb->rport;
 	/* Pull the tcb from the old association */
 	LIST_REMOVE(stcb, sctp_tcbhash);
 	LIST_REMOVE(stcb, sctp_tcblist);
 	if (stcb->asoc.in_asocid_hash) {
 		LIST_REMOVE(stcb, sctp_tcbasocidhash);
 	}
 	/* Now insert the new_inp into the TCP connected hash */
 	head = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR((lport | rport), SCTP_BASE_INFO(hashtcpmark))];
 
 	LIST_INSERT_HEAD(head, new_inp, sctp_hash);
 	/* Its safe to access */
 	new_inp->sctp_flags &= ~SCTP_PCB_FLAGS_UNBOUND;
 
 	/* Now move the tcb into the endpoint list */
 	LIST_INSERT_HEAD(&new_inp->sctp_asoc_list, stcb, sctp_tcblist);
 	/*
 	 * Question, do we even need to worry about the ep-hash since we
 	 * only have one connection? Probably not :> so lets get rid of it
 	 * and not suck up any kernel memory in that.
 	 */
 	if (stcb->asoc.in_asocid_hash) {
 		struct sctpasochead *lhd;
 
 		lhd = &new_inp->sctp_asocidhash[SCTP_PCBHASH_ASOC(stcb->asoc.assoc_id,
 		    new_inp->hashasocidmark)];
 		LIST_INSERT_HEAD(lhd, stcb, sctp_tcbasocidhash);
 	}
 	/* Ok. Let's restart timer. */
 	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 		sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, new_inp,
 		    stcb, net);
 	}
 
 	SCTP_INP_INFO_WUNLOCK();
 	if (new_inp->sctp_tcbhash != NULL) {
 		SCTP_HASH_FREE(new_inp->sctp_tcbhash, new_inp->sctp_hashmark);
 		new_inp->sctp_tcbhash = NULL;
 	}
 	if ((new_inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
 		/* Subset bound, so copy in the laddr list from the old_inp */
 		LIST_FOREACH(oladdr, &old_inp->sctp_addr_list, sctp_nxt_addr) {
 			laddr = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
 			if (laddr == NULL) {
 				/*
 				 * Gak, what can we do? This assoc is really
 				 * HOSED. We probably should send an abort
 				 * here.
 				 */
 				SCTPDBG(SCTP_DEBUG_PCB1, "Association hosed in TCP model, out of laddr memory\n");
 				continue;
 			}
 			SCTP_INCR_LADDR_COUNT();
 			bzero(laddr, sizeof(*laddr));
 			(void)SCTP_GETTIME_TIMEVAL(&laddr->start_time);
 			laddr->ifa = oladdr->ifa;
 			atomic_add_int(&laddr->ifa->refcount, 1);
 			LIST_INSERT_HEAD(&new_inp->sctp_addr_list, laddr,
 			    sctp_nxt_addr);
 			new_inp->laddr_count++;
 		}
 	}
 	/*
 	 * Now any running timers need to be adjusted since we really don't
 	 * care if they are running or not just blast in the new_inp into
 	 * all of them.
 	 */
 
 	stcb->asoc.hb_timer.ep = (void *)new_inp;
 	stcb->asoc.dack_timer.ep = (void *)new_inp;
 	stcb->asoc.asconf_timer.ep = (void *)new_inp;
 	stcb->asoc.strreset_timer.ep = (void *)new_inp;
 	stcb->asoc.shut_guard_timer.ep = (void *)new_inp;
 	stcb->asoc.autoclose_timer.ep = (void *)new_inp;
 	stcb->asoc.delayed_event_timer.ep = (void *)new_inp;
 	stcb->asoc.delete_prim_timer.ep = (void *)new_inp;
 	/* now what about the nets? */
 	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 		net->pmtu_timer.ep = (void *)new_inp;
 		net->rxt_timer.ep = (void *)new_inp;
 		net->fr_timer.ep = (void *)new_inp;
 	}
 	SCTP_INP_WUNLOCK(new_inp);
 	SCTP_INP_WUNLOCK(old_inp);
 }
 
 
 
 
 /* sctp_ifap is used to bypass normal local address validation checks */
 int
 sctp_inpcb_bind(struct socket *so, struct sockaddr *addr,
     struct sctp_ifa *sctp_ifap, struct thread *p)
 {
 	/* bind a ep to a socket address */
 	struct sctppcbhead *head;
 	struct sctp_inpcb *inp, *inp_tmp;
 	struct inpcb *ip_inp;
 	int port_reuse_active = 0;
 	int bindall;
 	uint16_t lport;
 	int error;
 	uint32_t vrf_id;
 
 	lport = 0;
 	error = 0;
 	bindall = 1;
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	ip_inp = (struct inpcb *)so->so_pcb;
 #ifdef SCTP_DEBUG
 	if (addr) {
 		SCTPDBG(SCTP_DEBUG_PCB1, "Bind called port:%d\n",
 		    ntohs(((struct sockaddr_in *)addr)->sin_port));
 		SCTPDBG(SCTP_DEBUG_PCB1, "Addr :");
 		SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr);
 	}
 #endif
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) == 0) {
 		/* already did a bind, subsequent binds NOT allowed ! */
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
 		return (EINVAL);
 	}
 #ifdef INVARIANTS
 	if (p == NULL)
 		panic("null proc/thread");
 #endif
 	if (addr != NULL) {
 		switch (addr->sa_family) {
 		case AF_INET:
 			{
 				struct sockaddr_in *sin;
 
 				/* IPV6_V6ONLY socket? */
 				if (SCTP_IPV6_V6ONLY(ip_inp)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
 					return (EINVAL);
 				}
 				if (addr->sa_len != sizeof(*sin)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
 					return (EINVAL);
 				}
 				sin = (struct sockaddr_in *)addr;
 				lport = sin->sin_port;
 				/*
 				 * For LOOPBACK the prison_local_ip4() call
 				 * will transmute the ip address to the
 				 * proper value.
 				 */
 				if (p && (error = prison_local_ip4(p->td_ucred, &sin->sin_addr)) != 0) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error);
 					return (error);
 				}
 				if (sin->sin_addr.s_addr != INADDR_ANY) {
 					bindall = 0;
 				}
 				break;
 			}
 #ifdef INET6
 		case AF_INET6:
 			{
 				/*
 				 * Only for pure IPv6 Address. (No IPv4
 				 * Mapped!)
 				 */
 				struct sockaddr_in6 *sin6;
 
 				sin6 = (struct sockaddr_in6 *)addr;
 
 				if (addr->sa_len != sizeof(*sin6)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
 					return (EINVAL);
 				}
 				lport = sin6->sin6_port;
 
 				/*
 				 * For LOOPBACK the prison_local_ip6() call
 				 * will transmute the ipv6 address to the
 				 * proper value.
 				 */
 				if (p && (error = prison_local_ip6(p->td_ucred, &sin6->sin6_addr,
 				    (SCTP_IPV6_V6ONLY(inp) != 0))) != 0) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error);
 					return (error);
 				}
 				if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 					bindall = 0;
 					/* KAME hack: embed scopeid */
 					if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
 						return (EINVAL);
 					}
 				}
 				/* this must be cleared for ifa_ifwithaddr() */
 				sin6->sin6_scope_id = 0;
 				break;
 			}
 #endif
 		default:
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EAFNOSUPPORT);
 			return (EAFNOSUPPORT);
 		}
 	}
 	SCTP_INP_INFO_WLOCK();
 	SCTP_INP_WLOCK(inp);
 	/* Setup a vrf_id to be the default for the non-bind-all case. */
 	vrf_id = inp->def_vrf_id;
 
 	/* increase our count due to the unlock we do */
 	SCTP_INP_INCR_REF(inp);
 	if (lport) {
 		/*
 		 * Did the caller specify a port? if so we must see if a ep
 		 * already has this one bound.
 		 */
 		/* got to be root to get at low ports */
 		if (ntohs(lport) < IPPORT_RESERVED) {
 			if (p && (error =
 			    priv_check(p, PRIV_NETINET_RESERVEDPORT)
 			    )) {
 				SCTP_INP_DECR_REF(inp);
 				SCTP_INP_WUNLOCK(inp);
 				SCTP_INP_INFO_WUNLOCK();
 				return (error);
 			}
 		}
 		if (p == NULL) {
 			SCTP_INP_DECR_REF(inp);
 			SCTP_INP_WUNLOCK(inp);
 			SCTP_INP_INFO_WUNLOCK();
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error);
 			return (error);
 		}
 		SCTP_INP_WUNLOCK(inp);
 		if (bindall) {
 			vrf_id = inp->def_vrf_id;
 			inp_tmp = sctp_pcb_findep(addr, 0, 1, vrf_id);
 			if (inp_tmp != NULL) {
 				/*
 				 * lock guy returned and lower count note
 				 * that we are not bound so inp_tmp should
 				 * NEVER be inp. And it is this inp
 				 * (inp_tmp) that gets the reference bump,
 				 * so we must lower it.
 				 */
 				SCTP_INP_DECR_REF(inp_tmp);
 				/* unlock info */
 				if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) &&
 				    (sctp_is_feature_on(inp_tmp, SCTP_PCB_FLAGS_PORTREUSE))) {
 					/*
 					 * Ok, must be one-2-one and
 					 * allowing port re-use
 					 */
 					port_reuse_active = 1;
 					goto continue_anyway;
 				}
 				SCTP_INP_DECR_REF(inp);
 				SCTP_INP_INFO_WUNLOCK();
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
 				return (EADDRINUSE);
 			}
 		} else {
 			inp_tmp = sctp_pcb_findep(addr, 0, 1, vrf_id);
 			if (inp_tmp != NULL) {
 				/*
 				 * lock guy returned and lower count note
 				 * that we are not bound so inp_tmp should
 				 * NEVER be inp. And it is this inp
 				 * (inp_tmp) that gets the reference bump,
 				 * so we must lower it.
 				 */
 				SCTP_INP_DECR_REF(inp_tmp);
 				/* unlock info */
 				if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) &&
 				    (sctp_is_feature_on(inp_tmp, SCTP_PCB_FLAGS_PORTREUSE))) {
 					/*
 					 * Ok, must be one-2-one and
 					 * allowing port re-use
 					 */
 					port_reuse_active = 1;
 					goto continue_anyway;
 				}
 				SCTP_INP_DECR_REF(inp);
 				SCTP_INP_INFO_WUNLOCK();
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
 				return (EADDRINUSE);
 			}
 		}
 continue_anyway:
 		SCTP_INP_WLOCK(inp);
 		if (bindall) {
 			/* verify that no lport is not used by a singleton */
 			if ((port_reuse_active == 0) &&
 			    (inp_tmp = sctp_isport_inuse(inp, lport, vrf_id))
 			    ) {
 				/* Sorry someone already has this one bound */
 				if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) &&
 				    (sctp_is_feature_on(inp_tmp, SCTP_PCB_FLAGS_PORTREUSE))) {
 					port_reuse_active = 1;
 				} else {
 					SCTP_INP_DECR_REF(inp);
 					SCTP_INP_WUNLOCK(inp);
 					SCTP_INP_INFO_WUNLOCK();
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
 					return (EADDRINUSE);
 				}
 			}
 		}
 	} else {
 		uint16_t first, last, candidate;
 		uint16_t count;
 		int done;
 
 		if (ip_inp->inp_flags & INP_HIGHPORT) {
 			first = MODULE_GLOBAL(ipport_hifirstauto);
 			last = MODULE_GLOBAL(ipport_hilastauto);
 		} else if (ip_inp->inp_flags & INP_LOWPORT) {
 			if (p && (error =
 			    priv_check(p, PRIV_NETINET_RESERVEDPORT)
 			    )) {
 				SCTP_INP_DECR_REF(inp);
 				SCTP_INP_WUNLOCK(inp);
 				SCTP_INP_INFO_WUNLOCK();
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error);
 				return (error);
 			}
 			first = MODULE_GLOBAL(ipport_lowfirstauto);
 			last = MODULE_GLOBAL(ipport_lowlastauto);
 		} else {
 			first = MODULE_GLOBAL(ipport_firstauto);
 			last = MODULE_GLOBAL(ipport_lastauto);
 		}
 		if (first > last) {
 			uint16_t temp;
 
 			temp = first;
 			first = last;
 			last = temp;
 		}
 		count = last - first + 1;	/* number of candidates */
 		candidate = first + sctp_select_initial_TSN(&inp->sctp_ep) % (count);
 
 		done = 0;
 		while (!done) {
 			if (sctp_isport_inuse(inp, htons(candidate), inp->def_vrf_id) == NULL) {
 				done = 1;
 			}
 			if (!done) {
 				if (--count == 0) {
 					SCTP_INP_DECR_REF(inp);
 					SCTP_INP_WUNLOCK(inp);
 					SCTP_INP_INFO_WUNLOCK();
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
 					return (EADDRINUSE);
 				}
 				if (candidate == last)
 					candidate = first;
 				else
 					candidate = candidate + 1;
 			}
 		}
 		lport = htons(candidate);
 	}
 	SCTP_INP_DECR_REF(inp);
 	if (inp->sctp_flags & (SCTP_PCB_FLAGS_SOCKET_GONE |
 	    SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
 		/*
 		 * this really should not happen. The guy did a non-blocking
 		 * bind and then did a close at the same time.
 		 */
 		SCTP_INP_WUNLOCK(inp);
 		SCTP_INP_INFO_WUNLOCK();
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
 		return (EINVAL);
 	}
 	/* ok we look clear to give out this port, so lets setup the binding */
 	if (bindall) {
 		/* binding to all addresses, so just set in the proper flags */
 		inp->sctp_flags |= SCTP_PCB_FLAGS_BOUNDALL;
 		/* set the automatic addr changes from kernel flag */
 		if (SCTP_BASE_SYSCTL(sctp_auto_asconf) == 0) {
 			sctp_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF);
 			sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
 		} else {
 			sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF);
 			sctp_feature_on(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
 		}
 		if (SCTP_BASE_SYSCTL(sctp_multiple_asconfs) == 0) {
 			sctp_feature_off(inp, SCTP_PCB_FLAGS_MULTIPLE_ASCONFS);
 		} else {
 			sctp_feature_on(inp, SCTP_PCB_FLAGS_MULTIPLE_ASCONFS);
 		}
 		/*
 		 * set the automatic mobility_base from kernel flag (by
 		 * micchie)
 		 */
 		if (SCTP_BASE_SYSCTL(sctp_mobility_base) == 0) {
 			sctp_mobility_feature_off(inp, SCTP_MOBILITY_BASE);
 			sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
 		} else {
 			sctp_mobility_feature_on(inp, SCTP_MOBILITY_BASE);
 			sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
 		}
 		/*
 		 * set the automatic mobility_fasthandoff from kernel flag
 		 * (by micchie)
 		 */
 		if (SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff) == 0) {
 			sctp_mobility_feature_off(inp, SCTP_MOBILITY_FASTHANDOFF);
 			sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
 		} else {
 			sctp_mobility_feature_on(inp, SCTP_MOBILITY_FASTHANDOFF);
 			sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
 		}
 	} else {
 		/*
 		 * bind specific, make sure flags is off and add a new
 		 * address structure to the sctp_addr_list inside the ep
 		 * structure.
 		 * 
 		 * We will need to allocate one and insert it at the head. The
 		 * socketopt call can just insert new addresses in there as
 		 * well. It will also have to do the embed scope kame hack
 		 * too (before adding).
 		 */
 		struct sctp_ifa *ifa;
 		struct sockaddr_storage store_sa;
 
 		memset(&store_sa, 0, sizeof(store_sa));
 		if (addr->sa_family == AF_INET) {
 			struct sockaddr_in *sin;
 
 			sin = (struct sockaddr_in *)&store_sa;
 			memcpy(sin, addr, sizeof(struct sockaddr_in));
 			sin->sin_port = 0;
 		} else if (addr->sa_family == AF_INET6) {
 			struct sockaddr_in6 *sin6;
 
 			sin6 = (struct sockaddr_in6 *)&store_sa;
 			memcpy(sin6, addr, sizeof(struct sockaddr_in6));
 			sin6->sin6_port = 0;
 		}
 		/*
 		 * first find the interface with the bound address need to
 		 * zero out the port to find the address! yuck! can't do
 		 * this earlier since need port for sctp_pcb_findep()
 		 */
 		if (sctp_ifap != NULL)
 			ifa = sctp_ifap;
 		else {
 			/*
 			 * Note for BSD we hit here always other O/S's will
 			 * pass things in via the sctp_ifap argument
 			 * (Panda).
 			 */
 			ifa = sctp_find_ifa_by_addr((struct sockaddr *)&store_sa,
 			    vrf_id, SCTP_ADDR_NOT_LOCKED);
 		}
 		if (ifa == NULL) {
 			/* Can't find an interface with that address */
 			SCTP_INP_WUNLOCK(inp);
 			SCTP_INP_INFO_WUNLOCK();
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRNOTAVAIL);
 			return (EADDRNOTAVAIL);
 		}
 		if (addr->sa_family == AF_INET6) {
 			/* GAK, more FIXME IFA lock? */
 			if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
 				/* Can't bind a non-existent addr. */
 				SCTP_INP_WUNLOCK(inp);
 				SCTP_INP_INFO_WUNLOCK();
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
 				return (EINVAL);
 			}
 		}
 		/* we're not bound all */
 		inp->sctp_flags &= ~SCTP_PCB_FLAGS_BOUNDALL;
 		/* allow bindx() to send ASCONF's for binding changes */
 		sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF);
 		/* clear automatic addr changes from kernel flag */
 		sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
 
 		/* add this address to the endpoint list */
 		error = sctp_insert_laddr(&inp->sctp_addr_list, ifa, 0);
 		if (error != 0) {
 			SCTP_INP_WUNLOCK(inp);
 			SCTP_INP_INFO_WUNLOCK();
 			return (error);
 		}
 		inp->laddr_count++;
 	}
 	/* find the bucket */
 	if (port_reuse_active) {
 		/* Put it into tcp 1-2-1 hash */
 		head = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR(lport, SCTP_BASE_INFO(hashtcpmark))];
 		inp->sctp_flags |= SCTP_PCB_FLAGS_IN_TCPPOOL;
 	} else {
 		head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(lport, SCTP_BASE_INFO(hashmark))];
 	}
 	/* put it in the bucket */
 	LIST_INSERT_HEAD(head, inp, sctp_hash);
 	SCTPDBG(SCTP_DEBUG_PCB1, "Main hash to bind at head:%p, bound port:%d - in tcp_pool=%d\n",
 	    head, ntohs(lport), port_reuse_active);
 	/* set in the port */
 	inp->sctp_lport = lport;
 
 	/* turn off just the unbound flag */
 	inp->sctp_flags &= ~SCTP_PCB_FLAGS_UNBOUND;
 	SCTP_INP_WUNLOCK(inp);
 	SCTP_INP_INFO_WUNLOCK();
 	return (0);
 }
 
 
 static void
 sctp_iterator_inp_being_freed(struct sctp_inpcb *inp)
 {
 	struct sctp_iterator *it, *nit;
 
 	/*
 	 * We enter with the only the ITERATOR_LOCK in place and a write
 	 * lock on the inp_info stuff.
 	 */
 	it = sctp_it_ctl.cur_it;
 	if (it && (it->vn != curvnet)) {
 		/* Its not looking at our VNET */
 		return;
 	}
 	if (it && (it->inp == inp)) {
 		/*
 		 * This is tricky and we hold the iterator lock, but when it
 		 * returns and gets the lock (when we release it) the
 		 * iterator will try to operate on inp. We need to stop that
 		 * from happening. But of course the iterator has a
 		 * reference on the stcb and inp. We can mark it and it will
 		 * stop.
 		 * 
 		 * If its a single iterator situation, we set the end iterator
 		 * flag. Otherwise we set the iterator to go to the next
 		 * inp.
 		 * 
 		 */
 		if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
 			sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT;
 		} else {
 			sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_INP;
 		}
 	}
 	/*
 	 * Now go through and remove any single reference to our inp that
 	 * may be still pending on the list
 	 */
 	SCTP_IPI_ITERATOR_WQ_LOCK();
 	it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead);
 	while (it) {
 		nit = TAILQ_NEXT(it, sctp_nxt_itr);
 		if (it->vn != curvnet) {
 			it = nit;
 			continue;
 		}
 		if (it->inp == inp) {
 			/* This one points to me is it inp specific? */
 			if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
 				/* Remove and free this one */
 				TAILQ_REMOVE(&sctp_it_ctl.iteratorhead,
 				    it, sctp_nxt_itr);
 				if (it->function_atend != NULL) {
 					(*it->function_atend) (it->pointer, it->val);
 				}
 				SCTP_FREE(it, SCTP_M_ITER);
 			} else {
 				it->inp = LIST_NEXT(it->inp, sctp_list);
 				if (it->inp) {
 					SCTP_INP_INCR_REF(it->inp);
 				}
 			}
 			/*
 			 * When its put in the refcnt is incremented so decr
 			 * it
 			 */
 			SCTP_INP_DECR_REF(inp);
 		}
 		it = nit;
 	}
 	SCTP_IPI_ITERATOR_WQ_UNLOCK();
 }
 
 /* release sctp_inpcb unbind the port */
 void
 sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
 {
 	/*
 	 * Here we free a endpoint. We must find it (if it is in the Hash
 	 * table) and remove it from there. Then we must also find it in the
 	 * overall list and remove it from there. After all removals are
 	 * complete then any timer has to be stopped. Then start the actual
 	 * freeing. a) Any local lists. b) Any associations. c) The hash of
 	 * all associations. d) finally the ep itself.
 	 */
 	struct sctp_pcb *m;
 	struct sctp_tcb *asoc, *nasoc;
 	struct sctp_laddr *laddr, *nladdr;
 	struct inpcb *ip_pcb;
 	struct socket *so;
 	int being_refed = 0;
 	struct sctp_queued_to_read *sq;
 
 
 	int cnt;
 	sctp_sharedkey_t *shared_key;
 
 
 #ifdef SCTP_LOG_CLOSING
 	sctp_log_closing(inp, NULL, 0);
 #endif
 	SCTP_ITERATOR_LOCK();
 	/* mark any iterators on the list or being processed */
 	sctp_iterator_inp_being_freed(inp);
 	SCTP_ITERATOR_UNLOCK();
 	so = inp->sctp_socket;
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
 		/* been here before.. eeks.. get out of here */
 		SCTP_PRINTF("This conflict in free SHOULD not be happening! from %d, imm %d\n", from, immediate);
 #ifdef SCTP_LOG_CLOSING
 		sctp_log_closing(inp, NULL, 1);
 #endif
 		return;
 	}
 	SCTP_ASOC_CREATE_LOCK(inp);
 	SCTP_INP_INFO_WLOCK();
 
 	SCTP_INP_WLOCK(inp);
 	if (from == SCTP_CALLED_AFTER_CMPSET_OFCLOSE) {
 		inp->sctp_flags &= ~SCTP_PCB_FLAGS_CLOSE_IP;
 		/* socket is gone, so no more wakeups allowed */
 		inp->sctp_flags |= SCTP_PCB_FLAGS_DONT_WAKE;
 		inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEINPUT;
 		inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEOUTPUT;
 
 	}
 	/* First time through we have the socket lock, after that no more. */
 	sctp_timer_stop(SCTP_TIMER_TYPE_NEWCOOKIE, inp, NULL, NULL,
 	    SCTP_FROM_SCTP_PCB + SCTP_LOC_1);
 
 	if (inp->control) {
 		sctp_m_freem(inp->control);
 		inp->control = NULL;
 	}
 	if (inp->pkt) {
 		sctp_m_freem(inp->pkt);
 		inp->pkt = NULL;
 	}
 	m = &inp->sctp_ep;
 	ip_pcb = &inp->ip_inp.inp;	/* we could just cast the main pointer
 					 * here but I will be nice :> (i.e.
 					 * ip_pcb = ep;) */
 	if (immediate == SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE) {
 		int cnt_in_sd;
 
 		cnt_in_sd = 0;
 		for ((asoc = LIST_FIRST(&inp->sctp_asoc_list)); asoc != NULL;
 		    asoc = nasoc) {
 			SCTP_TCB_LOCK(asoc);
 			nasoc = LIST_NEXT(asoc, sctp_tcblist);
 			if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 				/* Skip guys being freed */
 				cnt_in_sd++;
 				if (asoc->asoc.state & SCTP_STATE_IN_ACCEPT_QUEUE) {
 					/*
 					 * Special case - we did not start a
 					 * kill timer on the asoc due to it
 					 * was not closed. So go ahead and
 					 * start it now.
 					 */
 					asoc->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
 					sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, asoc, NULL);
 				}
 				SCTP_TCB_UNLOCK(asoc);
 				continue;
 			}
 			if (((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_WAIT) ||
 			    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_ECHOED)) &&
 			    (asoc->asoc.total_output_queue_size == 0)) {
 				/*
 				 * If we have data in queue, we don't want
 				 * to just free since the app may have done,
 				 * send()/close or connect/send/close. And
 				 * it wants the data to get across first.
 				 */
 				/* Just abandon things in the front states */
 				if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_NOFORCE,
 				    SCTP_FROM_SCTP_PCB + SCTP_LOC_2) == 0) {
 					cnt_in_sd++;
 				}
 				continue;
 			}
 			/* Disconnect the socket please */
 			asoc->sctp_socket = NULL;
 			asoc->asoc.state |= SCTP_STATE_CLOSED_SOCKET;
 			if ((asoc->asoc.size_on_reasm_queue > 0) ||
 			    (asoc->asoc.control_pdapi) ||
 			    (asoc->asoc.size_on_all_streams > 0) ||
 			    (so && (so->so_rcv.sb_cc > 0))
 			    ) {
 				/* Left with Data unread */
 				struct mbuf *op_err;
 
 				op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
 				    0, M_DONTWAIT, 1, MT_DATA);
 				if (op_err) {
 					/* Fill in the user initiated abort */
 					struct sctp_paramhdr *ph;
 					uint32_t *ippp;
 
 					SCTP_BUF_LEN(op_err) =
 					    sizeof(struct sctp_paramhdr) + sizeof(uint32_t);
 					ph = mtod(op_err,
 					    struct sctp_paramhdr *);
 					ph->param_type = htons(
 					    SCTP_CAUSE_USER_INITIATED_ABT);
 					ph->param_length = htons(SCTP_BUF_LEN(op_err));
 					ippp = (uint32_t *) (ph + 1);
 					*ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_3);
 				}
 				asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_3;
 #if defined(SCTP_PANIC_ON_ABORT)
 				panic("inpcb_free does an abort");
 #endif
 				sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
 				SCTP_STAT_INCR_COUNTER32(sctps_aborted);
 				if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
 				    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 				}
 				if (sctp_free_assoc(inp, asoc,
 				    SCTP_PCBFREE_NOFORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_4) == 0) {
 					cnt_in_sd++;
 				}
 				continue;
 			} else if (TAILQ_EMPTY(&asoc->asoc.send_queue) &&
 				    TAILQ_EMPTY(&asoc->asoc.sent_queue) &&
 				    (asoc->asoc.stream_queue_cnt == 0)
 			    ) {
 				if (asoc->asoc.locked_on_sending) {
 					goto abort_anyway;
 				}
 				if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
 				    (SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
 					/*
 					 * there is nothing queued to send,
 					 * so I send shutdown
 					 */
 					sctp_send_shutdown(asoc, asoc->asoc.primary_destination);
 					if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
 					    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 					}
 					SCTP_SET_STATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_SENT);
 					SCTP_CLEAR_SUBSTATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_PENDING);
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, asoc->sctp_ep, asoc,
 					    asoc->asoc.primary_destination);
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc,
 					    asoc->asoc.primary_destination);
 					sctp_chunk_output(inp, asoc, SCTP_OUTPUT_FROM_SHUT_TMR, SCTP_SO_LOCKED);
 				}
 			} else {
 				/* mark into shutdown pending */
 				struct sctp_stream_queue_pending *sp;
 
 				asoc->asoc.state |= SCTP_STATE_SHUTDOWN_PENDING;
 				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc,
 				    asoc->asoc.primary_destination);
 				if (asoc->asoc.locked_on_sending) {
 					sp = TAILQ_LAST(&((asoc->asoc.locked_on_sending)->outqueue),
 					    sctp_streamhead);
 					if (sp == NULL) {
 						SCTP_PRINTF("Error, sp is NULL, locked on sending is %p strm:%d\n",
 						    asoc->asoc.locked_on_sending,
 						    asoc->asoc.locked_on_sending->stream_no);
 					} else {
 						if ((sp->length == 0) && (sp->msg_is_complete == 0))
 							asoc->asoc.state |= SCTP_STATE_PARTIAL_MSG_LEFT;
 					}
 				}
 				if (TAILQ_EMPTY(&asoc->asoc.send_queue) &&
 				    TAILQ_EMPTY(&asoc->asoc.sent_queue) &&
 				    (asoc->asoc.state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
 					struct mbuf *op_err;
 
 			abort_anyway:
 					op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
 					    0, M_DONTWAIT, 1, MT_DATA);
 					if (op_err) {
 						/*
 						 * Fill in the user
 						 * initiated abort
 						 */
 						struct sctp_paramhdr *ph;
 						uint32_t *ippp;
 
 						SCTP_BUF_LEN(op_err) =
 						    (sizeof(struct sctp_paramhdr) +
 						    sizeof(uint32_t));
 						ph = mtod(op_err,
 						    struct sctp_paramhdr *);
 						ph->param_type = htons(
 						    SCTP_CAUSE_USER_INITIATED_ABT);
 						ph->param_length = htons(SCTP_BUF_LEN(op_err));
 						ippp = (uint32_t *) (ph + 1);
 						*ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_5);
 					}
 					asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_5;
 #if defined(SCTP_PANIC_ON_ABORT)
 					panic("inpcb_free does an abort");
 #endif
 
 					sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
 					SCTP_STAT_INCR_COUNTER32(sctps_aborted);
 					if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
 					    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 					}
 					if (sctp_free_assoc(inp, asoc,
 					    SCTP_PCBFREE_NOFORCE,
 					    SCTP_FROM_SCTP_PCB + SCTP_LOC_6) == 0) {
 						cnt_in_sd++;
 					}
 					continue;
 				} else {
 					sctp_chunk_output(inp, asoc, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
 				}
 			}
 			cnt_in_sd++;
 			SCTP_TCB_UNLOCK(asoc);
 		}
 		/* now is there some left in our SHUTDOWN state? */
 		if (cnt_in_sd) {
 #ifdef SCTP_LOG_CLOSING
 			sctp_log_closing(inp, NULL, 2);
 #endif
 			inp->sctp_socket = NULL;
 			SCTP_INP_WUNLOCK(inp);
 			SCTP_ASOC_CREATE_UNLOCK(inp);
 			SCTP_INP_INFO_WUNLOCK();
 			return;
 		}
 	}
 	inp->sctp_socket = NULL;
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) !=
 	    SCTP_PCB_FLAGS_UNBOUND) {
 		/*
 		 * ok, this guy has been bound. It's port is somewhere in
 		 * the SCTP_BASE_INFO(hash table). Remove it!
 		 */
 		LIST_REMOVE(inp, sctp_hash);
 		inp->sctp_flags |= SCTP_PCB_FLAGS_UNBOUND;
 	}
 	/*
 	 * If there is a timer running to kill us, forget it, since it may
 	 * have a contest on the INP lock.. which would cause us to die ...
 	 */
 	cnt = 0;
 	for ((asoc = LIST_FIRST(&inp->sctp_asoc_list)); asoc != NULL;
 	    asoc = nasoc) {
 		SCTP_TCB_LOCK(asoc);
 		nasoc = LIST_NEXT(asoc, sctp_tcblist);
 		if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 			if (asoc->asoc.state & SCTP_STATE_IN_ACCEPT_QUEUE) {
 				asoc->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
 				sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, asoc, NULL);
 			}
 			cnt++;
 			SCTP_TCB_UNLOCK(asoc);
 			continue;
 		}
 		/* Free associations that are NOT killing us */
 		if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_COOKIE_WAIT) &&
 		    ((asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) {
 			struct mbuf *op_err;
 			uint32_t *ippp;
 
 			op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
 			    0, M_DONTWAIT, 1, MT_DATA);
 			if (op_err) {
 				/* Fill in the user initiated abort */
 				struct sctp_paramhdr *ph;
 
 				SCTP_BUF_LEN(op_err) = (sizeof(struct sctp_paramhdr) +
 				    sizeof(uint32_t));
 				ph = mtod(op_err, struct sctp_paramhdr *);
 				ph->param_type = htons(
 				    SCTP_CAUSE_USER_INITIATED_ABT);
 				ph->param_length = htons(SCTP_BUF_LEN(op_err));
 				ippp = (uint32_t *) (ph + 1);
 				*ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_7);
 
 			}
 			asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_7;
 #if defined(SCTP_PANIC_ON_ABORT)
 			panic("inpcb_free does an abort");
 #endif
 			sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
 			SCTP_STAT_INCR_COUNTER32(sctps_aborted);
 		} else if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 			cnt++;
 			SCTP_TCB_UNLOCK(asoc);
 			continue;
 		}
 		if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
 		    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 		}
 		if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_8) == 0) {
 			cnt++;
 		}
 	}
 	if (cnt) {
 		/* Ok we have someone out there that will kill us */
 		(void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer);
 #ifdef SCTP_LOG_CLOSING
 		sctp_log_closing(inp, NULL, 3);
 #endif
 		SCTP_INP_WUNLOCK(inp);
 		SCTP_ASOC_CREATE_UNLOCK(inp);
 		SCTP_INP_INFO_WUNLOCK();
 		return;
 	}
 	if (SCTP_INP_LOCK_CONTENDED(inp))
 		being_refed++;
 	if (SCTP_INP_READ_CONTENDED(inp))
 		being_refed++;
 	if (SCTP_ASOC_CREATE_LOCK_CONTENDED(inp))
 		being_refed++;
 
 	if ((inp->refcount) ||
 	    (being_refed) ||
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_CLOSE_IP)) {
 		(void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer);
 #ifdef SCTP_LOG_CLOSING
 		sctp_log_closing(inp, NULL, 4);
 #endif
 		sctp_timer_start(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL);
 		SCTP_INP_WUNLOCK(inp);
 		SCTP_ASOC_CREATE_UNLOCK(inp);
 		SCTP_INP_INFO_WUNLOCK();
 		return;
 	}
 	inp->sctp_ep.signature_change.type = 0;
 	inp->sctp_flags |= SCTP_PCB_FLAGS_SOCKET_ALLGONE;
 	/*
 	 * Remove it from the list .. last thing we need a lock for.
 	 */
 	LIST_REMOVE(inp, sctp_list);
 	SCTP_INP_WUNLOCK(inp);
 	SCTP_ASOC_CREATE_UNLOCK(inp);
 	SCTP_INP_INFO_WUNLOCK();
 	/*
 	 * Now we release all locks. Since this INP cannot be found anymore
 	 * except possibly by the kill timer that might be running. We call
 	 * the drain function here. It should hit the case were it sees the
 	 * ACTIVE flag cleared and exit out freeing us to proceed and
 	 * destroy everything.
 	 */
 	if (from != SCTP_CALLED_FROM_INPKILL_TIMER) {
 		(void)SCTP_OS_TIMER_STOP_DRAIN(&inp->sctp_ep.signature_change.timer);
 	} else {
 		/* Probably un-needed */
 		(void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer);
 	}
 
 #ifdef SCTP_LOG_CLOSING
 	sctp_log_closing(inp, NULL, 5);
 #endif
 
 
 	if ((inp->sctp_asocidhash) != NULL) {
 		SCTP_HASH_FREE(inp->sctp_asocidhash, inp->hashasocidmark);
 		inp->sctp_asocidhash = NULL;
 	}
 	/* sa_ignore FREED_MEMORY */
 	while ((sq = TAILQ_FIRST(&inp->read_queue)) != NULL) {
 		/* Its only abandoned if it had data left */
 		if (sq->length)
 			SCTP_STAT_INCR(sctps_left_abandon);
 
 		TAILQ_REMOVE(&inp->read_queue, sq, next);
 		sctp_free_remote_addr(sq->whoFrom);
 		if (so)
 			so->so_rcv.sb_cc -= sq->length;
 		if (sq->data) {
 			sctp_m_freem(sq->data);
 			sq->data = NULL;
 		}
 		/*
 		 * no need to free the net count, since at this point all
 		 * assoc's are gone.
 		 */
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), sq);
 		SCTP_DECR_READQ_COUNT();
 	}
 	/* Now the sctp_pcb things */
 	/*
 	 * free each asoc if it is not already closed/free. we can't use the
 	 * macro here since le_next will get freed as part of the
 	 * sctp_free_assoc() call.
 	 */
 	cnt = 0;
 	if (so) {
 #ifdef IPSEC
 		ipsec_delete_pcbpolicy(ip_pcb);
 #endif				/* IPSEC */
 
 		/* Unlocks not needed since the socket is gone now */
 	}
 	if (ip_pcb->inp_options) {
 		(void)sctp_m_free(ip_pcb->inp_options);
 		ip_pcb->inp_options = 0;
 	}
 	if (ip_pcb->inp_moptions) {
 		inp_freemoptions(ip_pcb->inp_moptions);
 		ip_pcb->inp_moptions = 0;
 	}
 #ifdef INET6
 	if (ip_pcb->inp_vflag & INP_IPV6) {
 		struct in6pcb *in6p;
 
 		in6p = (struct in6pcb *)inp;
 		ip6_freepcbopts(in6p->in6p_outputopts);
 	}
 #endif				/* INET6 */
 	ip_pcb->inp_vflag = 0;
 	/* free up authentication fields */
 	if (inp->sctp_ep.local_auth_chunks != NULL)
 		sctp_free_chunklist(inp->sctp_ep.local_auth_chunks);
 	if (inp->sctp_ep.local_hmacs != NULL)
 		sctp_free_hmaclist(inp->sctp_ep.local_hmacs);
 
 	shared_key = LIST_FIRST(&inp->sctp_ep.shared_keys);
 	while (shared_key) {
 		LIST_REMOVE(shared_key, next);
 		sctp_free_sharedkey(shared_key);
 		/* sa_ignore FREED_MEMORY */
 		shared_key = LIST_FIRST(&inp->sctp_ep.shared_keys);
 	}
 
 	/*
 	 * if we have an address list the following will free the list of
 	 * ifaddr's that are set into this ep. Again macro limitations here,
 	 * since the LIST_FOREACH could be a bad idea.
 	 */
 	for ((laddr = LIST_FIRST(&inp->sctp_addr_list)); laddr != NULL;
 	    laddr = nladdr) {
 		nladdr = LIST_NEXT(laddr, sctp_nxt_addr);
 		sctp_remove_laddr(laddr);
 	}
 
 #ifdef SCTP_TRACK_FREED_ASOCS
 	/* TEMP CODE */
 	for ((asoc = LIST_FIRST(&inp->sctp_asoc_free_list)); asoc != NULL;
 	    asoc = nasoc) {
 		nasoc = LIST_NEXT(asoc, sctp_tcblist);
 		LIST_REMOVE(asoc, sctp_tcblist);
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), asoc);
 		SCTP_DECR_ASOC_COUNT();
 	}
 	/* *** END TEMP CODE *** */
 #endif
 	/* Now lets see about freeing the EP hash table. */
 	if (inp->sctp_tcbhash != NULL) {
 		SCTP_HASH_FREE(inp->sctp_tcbhash, inp->sctp_hashmark);
 		inp->sctp_tcbhash = NULL;
 	}
 	/* Now we must put the ep memory back into the zone pool */
 	INP_LOCK_DESTROY(&inp->ip_inp.inp);
 	SCTP_INP_LOCK_DESTROY(inp);
 	SCTP_INP_READ_DESTROY(inp);
 	SCTP_ASOC_CREATE_LOCK_DESTROY(inp);
 	SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
 	SCTP_DECR_EP_COUNT();
 }
 
 
 struct sctp_nets *
 sctp_findnet(struct sctp_tcb *stcb, struct sockaddr *addr)
 {
 	struct sctp_nets *net;
 
 	/* locate the address */
 	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 		if (sctp_cmpaddr(addr, (struct sockaddr *)&net->ro._l_addr))
 			return (net);
 	}
 	return (NULL);
 }
 
 
 int
 sctp_is_address_on_local_host(struct sockaddr *addr, uint32_t vrf_id)
 {
 	struct sctp_ifa *sctp_ifa;
 
 	sctp_ifa = sctp_find_ifa_by_addr(addr, vrf_id, SCTP_ADDR_NOT_LOCKED);
 	if (sctp_ifa) {
 		return (1);
 	} else {
 		return (0);
 	}
 }
 
 /*
  * add's a remote endpoint address, done with the INIT/INIT-ACK as well as
  * when a ASCONF arrives that adds it. It will also initialize all the cwnd
  * stats of stuff.
  */
 int
 sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
     int set_scope, int from)
 {
 	/*
 	 * The following is redundant to the same lines in the
 	 * sctp_aloc_assoc() but is needed since others call the add address
 	 * function
 	 */
 	struct sctp_nets *net, *netfirst;
 	int addr_inscope;
 
 	SCTPDBG(SCTP_DEBUG_PCB1, "Adding an address (from:%d) to the peer: ",
 	    from);
 	SCTPDBG_ADDR(SCTP_DEBUG_PCB1, newaddr);
 
 	netfirst = sctp_findnet(stcb, newaddr);
 	if (netfirst) {
 		/*
 		 * Lie and return ok, we don't want to make the association
 		 * go away for this behavior. It will happen in the TCP
 		 * model in a connected socket. It does not reach the hash
 		 * table until after the association is built so it can't be
 		 * found. Mark as reachable, since the initial creation will
 		 * have been cleared and the NOT_IN_ASSOC flag will have
 		 * been added... and we don't want to end up removing it
 		 * back out.
 		 */
 		if (netfirst->dest_state & SCTP_ADDR_UNCONFIRMED) {
 			netfirst->dest_state = (SCTP_ADDR_REACHABLE |
 			    SCTP_ADDR_UNCONFIRMED);
 		} else {
 			netfirst->dest_state = SCTP_ADDR_REACHABLE;
 		}
 
 		return (0);
 	}
 	addr_inscope = 1;
 	if (newaddr->sa_family == AF_INET) {
 		struct sockaddr_in *sin;
 
 		sin = (struct sockaddr_in *)newaddr;
 		if (sin->sin_addr.s_addr == 0) {
 			/* Invalid address */
 			return (-1);
 		}
 		/* zero out the bzero area */
 		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
 
 		/* assure len is set */
 		sin->sin_len = sizeof(struct sockaddr_in);
 		if (set_scope) {
 #ifdef SCTP_DONT_DO_PRIVADDR_SCOPE
 			stcb->ipv4_local_scope = 1;
 #else
 			if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
 				stcb->asoc.ipv4_local_scope = 1;
 			}
 #endif				/* SCTP_DONT_DO_PRIVADDR_SCOPE */
 		} else {
 			/* Validate the address is in scope */
 			if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) &&
 			    (stcb->asoc.ipv4_local_scope == 0)) {
 				addr_inscope = 0;
 			}
 		}
 #ifdef INET6
 	} else if (newaddr->sa_family == AF_INET6) {
 		struct sockaddr_in6 *sin6;
 
 		sin6 = (struct sockaddr_in6 *)newaddr;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/* Invalid address */
 			return (-1);
 		}
 		/* assure len is set */
 		sin6->sin6_len = sizeof(struct sockaddr_in6);
 		if (set_scope) {
 			if (sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id)) {
 				stcb->asoc.loopback_scope = 1;
 				stcb->asoc.local_scope = 0;
 				stcb->asoc.ipv4_local_scope = 1;
 				stcb->asoc.site_scope = 1;
 			} else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
 				/*
 				 * If the new destination is a LINK_LOCAL we
 				 * must have common site scope. Don't set
 				 * the local scope since we may not share
 				 * all links, only loopback can do this.
 				 * Links on the local network would also be
 				 * on our private network for v4 too.
 				 */
 				stcb->asoc.ipv4_local_scope = 1;
 				stcb->asoc.site_scope = 1;
 			} else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
 				/*
 				 * If the new destination is SITE_LOCAL then
 				 * we must have site scope in common.
 				 */
 				stcb->asoc.site_scope = 1;
 			}
 		} else {
 			/* Validate the address is in scope */
 			if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr) &&
 			    (stcb->asoc.loopback_scope == 0)) {
 				addr_inscope = 0;
 			} else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
 			    (stcb->asoc.local_scope == 0)) {
 				addr_inscope = 0;
 			} else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
 			    (stcb->asoc.site_scope == 0)) {
 				addr_inscope = 0;
 			}
 		}
 #endif
 	} else {
 		/* not supported family type */
 		return (-1);
 	}
 	net = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_net), struct sctp_nets);
 	if (net == NULL) {
 		return (-1);
 	}
 	SCTP_INCR_RADDR_COUNT();
 	bzero(net, sizeof(*net));
 	(void)SCTP_GETTIME_TIMEVAL(&net->start_time);
 	memcpy(&net->ro._l_addr, newaddr, newaddr->sa_len);
 	if (newaddr->sa_family == AF_INET) {
 		((struct sockaddr_in *)&net->ro._l_addr)->sin_port = stcb->rport;
 	} else if (newaddr->sa_family == AF_INET6) {
 		((struct sockaddr_in6 *)&net->ro._l_addr)->sin6_port = stcb->rport;
 	}
 	net->addr_is_local = sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id);
 	if (net->addr_is_local && ((set_scope || (from == SCTP_ADDR_IS_CONFIRMED)))) {
 		stcb->asoc.loopback_scope = 1;
 		stcb->asoc.ipv4_local_scope = 1;
 		stcb->asoc.local_scope = 0;
 		stcb->asoc.site_scope = 1;
 		addr_inscope = 1;
 	}
 	net->failure_threshold = stcb->asoc.def_net_failure;
 	if (addr_inscope == 0) {
 		net->dest_state = (SCTP_ADDR_REACHABLE |
 		    SCTP_ADDR_OUT_OF_SCOPE);
 	} else {
 		if (from == SCTP_ADDR_IS_CONFIRMED)
 			/* SCTP_ADDR_IS_CONFIRMED is passed by connect_x */
 			net->dest_state = SCTP_ADDR_REACHABLE;
 		else
 			net->dest_state = SCTP_ADDR_REACHABLE |
 			    SCTP_ADDR_UNCONFIRMED;
 	}
 	/*
 	 * We set this to 0, the timer code knows that this means its an
 	 * initial value
 	 */
 	net->RTO = 0;
 	net->RTO_measured = 0;
 	stcb->asoc.numnets++;
 	*(&net->ref_count) = 1;
 	net->tos_flowlabel = 0;
 	if (SCTP_BASE_SYSCTL(sctp_udp_tunneling_for_client_enable)) {
 		net->port = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
 	} else {
 		net->port = 0;
 	}
 #ifdef INET
 	if (newaddr->sa_family == AF_INET)
 		net->tos_flowlabel = stcb->asoc.default_tos;
 #endif
 #ifdef INET6
 	if (newaddr->sa_family == AF_INET6)
 		net->tos_flowlabel = stcb->asoc.default_flowlabel;
 #endif
 	/* Init the timer structure */
 	SCTP_OS_TIMER_INIT(&net->rxt_timer.timer);
 	SCTP_OS_TIMER_INIT(&net->fr_timer.timer);
 	SCTP_OS_TIMER_INIT(&net->pmtu_timer.timer);
 
 	/* Now generate a route for this guy */
 #ifdef INET6
 	/* KAME hack: embed scopeid */
 	if (newaddr->sa_family == AF_INET6) {
 		struct sockaddr_in6 *sin6;
 
 		sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
 		(void)sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone));
 		sin6->sin6_scope_id = 0;
 	}
 #endif
 	SCTP_RTALLOC((sctp_route_t *) & net->ro, stcb->asoc.vrf_id);
 
 	if (SCTP_ROUTE_HAS_VALID_IFN(&net->ro)) {
 		/* Get source address */
 		net->ro._s_addr = sctp_source_address_selection(stcb->sctp_ep,
 		    stcb,
 		    (sctp_route_t *) & net->ro,
 		    net,
 		    0,
 		    stcb->asoc.vrf_id);
 		/* Now get the interface MTU */
 		if (net->ro._s_addr && net->ro._s_addr->ifn_p) {
 			net->mtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p);
 		} else {
 			net->mtu = 0;
 		}
 		if (net->mtu == 0) {
 			/* Huh ?? */
 			net->mtu = SCTP_DEFAULT_MTU;
 		} else {
 			uint32_t rmtu;
 
 			rmtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, net->ro.ro_rt);
 			if (rmtu == 0) {
 				/*
 				 * Start things off to match mtu of
 				 * interface please.
 				 */
 				SCTP_SET_MTU_OF_ROUTE(&net->ro._l_addr.sa,
 				    net->ro.ro_rt, net->mtu);
 			} else {
 				/*
 				 * we take the route mtu over the interface,
 				 * since the route may be leading out the
 				 * loopback, or a different interface.
 				 */
 				net->mtu = rmtu;
 			}
 		}
 		if (from == SCTP_ALLOC_ASOC) {
 			stcb->asoc.smallest_mtu = net->mtu;
 		}
 	} else {
 		net->mtu = stcb->asoc.smallest_mtu;
 	}
 #ifdef INET6
 	if (newaddr->sa_family == AF_INET6) {
 		struct sockaddr_in6 *sin6;
 
 		sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
 		(void)sa6_recoverscope(sin6);
 	}
 #endif
 	if (net->port) {
 		net->mtu -= sizeof(struct udphdr);
 	}
 	if (stcb->asoc.smallest_mtu > net->mtu) {
 		stcb->asoc.smallest_mtu = net->mtu;
 	}
 	/* JRS - Use the congestion control given in the CC module */
 	stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
 
 	/*
 	 * CMT: CUC algo - set find_pseudo_cumack to TRUE (1) at beginning
 	 * of assoc (2005/06/27, iyengar@cis.udel.edu)
 	 */
 	net->find_pseudo_cumack = 1;
 	net->find_rtx_pseudo_cumack = 1;
 	net->src_addr_selected = 0;
 	netfirst = TAILQ_FIRST(&stcb->asoc.nets);
 	if (net->ro.ro_rt == NULL) {
 		/* Since we have no route put it at the back */
 		TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next);
 	} else if (netfirst == NULL) {
 		/* We are the first one in the pool. */
 		TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next);
 	} else if (netfirst->ro.ro_rt == NULL) {
 		/*
 		 * First one has NO route. Place this one ahead of the first
 		 * one.
 		 */
 		TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next);
 	} else if (net->ro.ro_rt->rt_ifp != netfirst->ro.ro_rt->rt_ifp) {
 		/*
 		 * This one has a different interface than the one at the
 		 * top of the list. Place it ahead.
 		 */
 		TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next);
 	} else {
 		/*
 		 * Ok we have the same interface as the first one. Move
 		 * forward until we find either a) one with a NULL route...
 		 * insert ahead of that b) one with a different ifp.. insert
 		 * after that. c) end of the list.. insert at the tail.
 		 */
 		struct sctp_nets *netlook;
 
 		do {
 			netlook = TAILQ_NEXT(netfirst, sctp_next);
 			if (netlook == NULL) {
 				/* End of the list */
 				TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next);
 				break;
 			} else if (netlook->ro.ro_rt == NULL) {
 				/* next one has NO route */
 				TAILQ_INSERT_BEFORE(netfirst, net, sctp_next);
 				break;
 			} else if (netlook->ro.ro_rt->rt_ifp != net->ro.ro_rt->rt_ifp) {
 				TAILQ_INSERT_AFTER(&stcb->asoc.nets, netlook,
 				    net, sctp_next);
 				break;
 			}
 			/* Shift forward */
 			netfirst = netlook;
 		} while (netlook != NULL);
 	}
 
 	/* got to have a primary set */
 	if (stcb->asoc.primary_destination == 0) {
 		stcb->asoc.primary_destination = net;
 	} else if ((stcb->asoc.primary_destination->ro.ro_rt == NULL) &&
 		    (net->ro.ro_rt) &&
 	    ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0)) {
 		/* No route to current primary adopt new primary */
 		stcb->asoc.primary_destination = net;
 	}
 	sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, stcb->sctp_ep, stcb,
 	    net);
 	/* Validate primary is first */
 	net = TAILQ_FIRST(&stcb->asoc.nets);
 	if ((net != stcb->asoc.primary_destination) &&
 	    (stcb->asoc.primary_destination)) {
 		/*
 		 * first one on the list is NOT the primary sctp_cmpaddr()
 		 * is much more efficient if the primary is the first on the
 		 * list, make it so.
 		 */
 		TAILQ_REMOVE(&stcb->asoc.nets,
 		    stcb->asoc.primary_destination, sctp_next);
 		TAILQ_INSERT_HEAD(&stcb->asoc.nets,
 		    stcb->asoc.primary_destination, sctp_next);
 	}
 	return (0);
 }
 
 
 static uint32_t
 sctp_aloc_a_assoc_id(struct sctp_inpcb *inp, struct sctp_tcb *stcb)
 {
 	uint32_t id;
 	struct sctpasochead *head;
 	struct sctp_tcb *lstcb;
 
 	SCTP_INP_WLOCK(inp);
 try_again:
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
 		/* TSNH */
 		SCTP_INP_WUNLOCK(inp);
 		return (0);
 	}
 	/*
 	 * We don't allow assoc id to be 0, this is needed otherwise if the
 	 * id were to wrap we would have issues with some socket options.
 	 */
 	if (inp->sctp_associd_counter == 0) {
 		inp->sctp_associd_counter++;
 	}
 	id = inp->sctp_associd_counter;
 	inp->sctp_associd_counter++;
 	lstcb = sctp_findasoc_ep_asocid_locked(inp, (sctp_assoc_t) id, 0);
 	if (lstcb) {
 		goto try_again;
 	}
 	head = &inp->sctp_asocidhash[SCTP_PCBHASH_ASOC(id, inp->hashasocidmark)];
 	LIST_INSERT_HEAD(head, stcb, sctp_tcbasocidhash);
 	stcb->asoc.in_asocid_hash = 1;
 	SCTP_INP_WUNLOCK(inp);
 	return id;
 }
 
 /*
  * allocate an association and add it to the endpoint. The caller must be
  * careful to add all additional addresses once they are know right away or
  * else the assoc will be may experience a blackout scenario.
  */
 struct sctp_tcb *
 sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
     int *error, uint32_t override_tag, uint32_t vrf_id,
     struct thread *p
 )
 {
 	/* note the p argument is only valid in unbound sockets */
 
 	struct sctp_tcb *stcb;
 	struct sctp_association *asoc;
 	struct sctpasochead *head;
 	uint16_t rport;
 	int err;
 
 	/*
 	 * Assumption made here: Caller has done a
 	 * sctp_findassociation_ep_addr(ep, addr's); to make sure the
 	 * address does not exist already.
 	 */
 	if (SCTP_BASE_INFO(ipi_count_asoc) >= SCTP_MAX_NUM_OF_ASOC) {
 		/* Hit max assoc, sorry no more */
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
 		*error = ENOBUFS;
 		return (NULL);
 	}
 	if (firstaddr == NULL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
 		*error = EINVAL;
 		return (NULL);
 	}
 	SCTP_INP_RLOCK(inp);
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) &&
 	    ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE)) ||
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED))) {
 		/*
 		 * If its in the TCP pool, its NOT allowed to create an
 		 * association. The parent listener needs to call
 		 * sctp_aloc_assoc.. or the one-2-many socket. If a peeled
 		 * off, or connected one does this.. its an error.
 		 */
 		SCTP_INP_RUNLOCK(inp);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
 		*error = EINVAL;
 		return (NULL);
 	}
 	SCTPDBG(SCTP_DEBUG_PCB3, "Allocate an association for peer:");
 #ifdef SCTP_DEBUG
 	if (firstaddr) {
 		SCTPDBG_ADDR(SCTP_DEBUG_PCB3, firstaddr);
 		SCTPDBG(SCTP_DEBUG_PCB3, "Port:%d\n",
 		    ntohs(((struct sockaddr_in *)firstaddr)->sin_port));
 	} else {
 		SCTPDBG(SCTP_DEBUG_PCB3, "None\n");
 	}
 #endif				/* SCTP_DEBUG */
 	if (firstaddr->sa_family == AF_INET) {
 		struct sockaddr_in *sin;
 
 		sin = (struct sockaddr_in *)firstaddr;
 		if ((sin->sin_port == 0) || (sin->sin_addr.s_addr == 0)) {
 			/* Invalid address */
 			SCTP_INP_RUNLOCK(inp);
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
 			*error = EINVAL;
 			return (NULL);
 		}
 		rport = sin->sin_port;
 	} else if (firstaddr->sa_family == AF_INET6) {
 		struct sockaddr_in6 *sin6;
 
 		sin6 = (struct sockaddr_in6 *)firstaddr;
 		if ((sin6->sin6_port == 0) ||
 		    (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))) {
 			/* Invalid address */
 			SCTP_INP_RUNLOCK(inp);
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
 			*error = EINVAL;
 			return (NULL);
 		}
 		rport = sin6->sin6_port;
 	} else {
 		/* not supported family type */
 		SCTP_INP_RUNLOCK(inp);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
 		*error = EINVAL;
 		return (NULL);
 	}
 	SCTP_INP_RUNLOCK(inp);
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
 		/*
 		 * If you have not performed a bind, then we need to do the
 		 * ephemeral bind for you.
 		 */
 		if ((err = sctp_inpcb_bind(inp->sctp_socket,
 		    (struct sockaddr *)NULL,
 		    (struct sctp_ifa *)NULL,
 		    p
 		    ))) {
 			/* bind error, probably perm */
 			*error = err;
 			return (NULL);
 		}
 	}
 	stcb = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_asoc), struct sctp_tcb);
 	if (stcb == NULL) {
 		/* out of memory? */
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOMEM);
 		*error = ENOMEM;
 		return (NULL);
 	}
 	SCTP_INCR_ASOC_COUNT();
 
 	bzero(stcb, sizeof(*stcb));
 	asoc = &stcb->asoc;
 
 	asoc->assoc_id = sctp_aloc_a_assoc_id(inp, stcb);
 	SCTP_TCB_LOCK_INIT(stcb);
 	SCTP_TCB_SEND_LOCK_INIT(stcb);
 	stcb->rport = rport;
 	/* setup back pointer's */
 	stcb->sctp_ep = inp;
 	stcb->sctp_socket = inp->sctp_socket;
 	if ((err = sctp_init_asoc(inp, stcb, override_tag, vrf_id))) {
 		/* failed */
 		SCTP_TCB_LOCK_DESTROY(stcb);
 		SCTP_TCB_SEND_LOCK_DESTROY(stcb);
 		LIST_REMOVE(stcb, sctp_tcbasocidhash);
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb);
 		SCTP_DECR_ASOC_COUNT();
 		*error = err;
 		return (NULL);
 	}
 	/* and the port */
 	SCTP_INP_INFO_WLOCK();
 	SCTP_INP_WLOCK(inp);
 	if (inp->sctp_flags & (SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
 		/* inpcb freed while alloc going on */
 		SCTP_TCB_LOCK_DESTROY(stcb);
 		SCTP_TCB_SEND_LOCK_DESTROY(stcb);
 		LIST_REMOVE(stcb, sctp_tcbasocidhash);
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb);
 		SCTP_INP_WUNLOCK(inp);
 		SCTP_INP_INFO_WUNLOCK();
 		SCTP_DECR_ASOC_COUNT();
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
 		*error = EINVAL;
 		return (NULL);
 	}
 	SCTP_TCB_LOCK(stcb);
 
 	/* now that my_vtag is set, add it to the hash */
 	head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))];
 	/* put it in the bucket in the vtag hash of assoc's for the system */
 	LIST_INSERT_HEAD(head, stcb, sctp_asocs);
 	SCTP_INP_INFO_WUNLOCK();
 
 	if ((err = sctp_add_remote_addr(stcb, firstaddr, SCTP_DO_SETSCOPE, SCTP_ALLOC_ASOC))) {
 		/* failure.. memory error? */
 		if (asoc->strmout) {
 			SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
 			asoc->strmout = NULL;
 		}
 		if (asoc->mapping_array) {
 			SCTP_FREE(asoc->mapping_array, SCTP_M_MAP);
 			asoc->mapping_array = NULL;
 		}
 		if (asoc->nr_mapping_array) {
 			SCTP_FREE(asoc->nr_mapping_array, SCTP_M_MAP);
 			asoc->nr_mapping_array = NULL;
 		}
 		SCTP_DECR_ASOC_COUNT();
 		SCTP_TCB_LOCK_DESTROY(stcb);
 		SCTP_TCB_SEND_LOCK_DESTROY(stcb);
 		LIST_REMOVE(stcb, sctp_tcbasocidhash);
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb);
 		SCTP_INP_WUNLOCK(inp);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
 		*error = ENOBUFS;
 		return (NULL);
 	}
 	/* Init all the timers */
 	SCTP_OS_TIMER_INIT(&asoc->hb_timer.timer);
 	SCTP_OS_TIMER_INIT(&asoc->dack_timer.timer);
 	SCTP_OS_TIMER_INIT(&asoc->strreset_timer.timer);
 	SCTP_OS_TIMER_INIT(&asoc->asconf_timer.timer);
 	SCTP_OS_TIMER_INIT(&asoc->shut_guard_timer.timer);
 	SCTP_OS_TIMER_INIT(&asoc->autoclose_timer.timer);
 	SCTP_OS_TIMER_INIT(&asoc->delayed_event_timer.timer);
 	SCTP_OS_TIMER_INIT(&asoc->delete_prim_timer.timer);
 
 	LIST_INSERT_HEAD(&inp->sctp_asoc_list, stcb, sctp_tcblist);
 	/* now file the port under the hash as well */
 	if (inp->sctp_tcbhash != NULL) {
 		head = &inp->sctp_tcbhash[SCTP_PCBHASH_ALLADDR(stcb->rport,
 		    inp->sctp_hashmark)];
 		LIST_INSERT_HEAD(head, stcb, sctp_tcbhash);
 	}
 	SCTP_INP_WUNLOCK(inp);
 	SCTPDBG(SCTP_DEBUG_PCB1, "Association %p now allocated\n", stcb);
 	return (stcb);
 }
 
 
 void
 sctp_remove_net(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	struct sctp_association *asoc;
 
 	asoc = &stcb->asoc;
 	asoc->numnets--;
 	TAILQ_REMOVE(&asoc->nets, net, sctp_next);
 	if (net == asoc->primary_destination) {
 		/* Reset primary */
 		struct sctp_nets *lnet;
 
 		lnet = TAILQ_FIRST(&asoc->nets);
 		/*
 		 * Mobility adaptation Ideally, if deleted destination is
 		 * the primary, it becomes a fast retransmission trigger by
 		 * the subsequent SET PRIMARY. (by micchie)
 		 */
 		if (sctp_is_mobility_feature_on(stcb->sctp_ep,
 		    SCTP_MOBILITY_BASE) ||
 		    sctp_is_mobility_feature_on(stcb->sctp_ep,
 		    SCTP_MOBILITY_FASTHANDOFF)) {
 			SCTPDBG(SCTP_DEBUG_ASCONF1, "remove_net: primary dst is deleting\n");
 			if (asoc->deleted_primary != NULL) {
 				SCTPDBG(SCTP_DEBUG_ASCONF1, "remove_net: deleted primary may be already stored\n");
 				goto out;
 			}
 			asoc->deleted_primary = net;
 			atomic_add_int(&net->ref_count, 1);
 			memset(&net->lastsa, 0, sizeof(net->lastsa));
 			memset(&net->lastsv, 0, sizeof(net->lastsv));
 			sctp_mobility_feature_on(stcb->sctp_ep,
 			    SCTP_MOBILITY_PRIM_DELETED);
 			sctp_timer_start(SCTP_TIMER_TYPE_PRIM_DELETED,
 			    stcb->sctp_ep, stcb, NULL);
 		}
 out:
 		/* Try to find a confirmed primary */
 		asoc->primary_destination = sctp_find_alternate_net(stcb, lnet, 0);
 	}
 	if (net == asoc->last_data_chunk_from) {
 		/* Reset primary */
 		asoc->last_data_chunk_from = TAILQ_FIRST(&asoc->nets);
 	}
 	if (net == asoc->last_control_chunk_from) {
 		/* Clear net */
 		asoc->last_control_chunk_from = NULL;
 	}
 	sctp_free_remote_addr(net);
 }
 
 /*
  * remove a remote endpoint address from an association, it will fail if the
  * address does not exist.
  */
 int
 sctp_del_remote_addr(struct sctp_tcb *stcb, struct sockaddr *remaddr)
 {
 	/*
 	 * Here we need to remove a remote address. This is quite simple, we
 	 * first find it in the list of address for the association
 	 * (tasoc->asoc.nets) and then if it is there, we do a LIST_REMOVE
 	 * on that item. Note we do not allow it to be removed if there are
 	 * no other addresses.
 	 */
 	struct sctp_association *asoc;
 	struct sctp_nets *net, *net_tmp;
 
 	asoc = &stcb->asoc;
 
 	/* locate the address */
 	for (net = TAILQ_FIRST(&asoc->nets); net != NULL; net = net_tmp) {
 		net_tmp = TAILQ_NEXT(net, sctp_next);
 		if (net->ro._l_addr.sa.sa_family != remaddr->sa_family) {
 			continue;
 		}
 		if (sctp_cmpaddr((struct sockaddr *)&net->ro._l_addr,
 		    remaddr)) {
 			/* we found the guy */
 			if (asoc->numnets < 2) {
 				/* Must have at LEAST two remote addresses */
 				return (-1);
 			} else {
 				sctp_remove_net(stcb, net);
 				return (0);
 			}
 		}
 	}
 	/* not found. */
 	return (-2);
 }
 
 void
 sctp_delete_from_timewait(uint32_t tag, uint16_t lport, uint16_t rport)
 {
 	struct sctpvtaghead *chain;
 	struct sctp_tagblock *twait_block;
 	int found = 0;
 	int i;
 
 	chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
 	if (!LIST_EMPTY(chain)) {
 		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
 			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
 				if ((twait_block->vtag_block[i].v_tag == tag) &&
 				    (twait_block->vtag_block[i].lport == lport) &&
 				    (twait_block->vtag_block[i].rport == rport)) {
 					twait_block->vtag_block[i].tv_sec_at_expire = 0;
 					twait_block->vtag_block[i].v_tag = 0;
 					twait_block->vtag_block[i].lport = 0;
 					twait_block->vtag_block[i].rport = 0;
 					found = 1;
 					break;
 				}
 			}
 			if (found)
 				break;
 		}
 	}
 }
 
 int
 sctp_is_in_timewait(uint32_t tag, uint16_t lport, uint16_t rport)
 {
 	struct sctpvtaghead *chain;
 	struct sctp_tagblock *twait_block;
 	int found = 0;
 	int i;
 
 	SCTP_INP_INFO_WLOCK();
 	chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
 	if (!LIST_EMPTY(chain)) {
 		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
 			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
 				if ((twait_block->vtag_block[i].v_tag == tag) &&
 				    (twait_block->vtag_block[i].lport == lport) &&
 				    (twait_block->vtag_block[i].rport == rport)) {
 					found = 1;
 					break;
 				}
 			}
 			if (found)
 				break;
 		}
 	}
 	SCTP_INP_INFO_WUNLOCK();
 	return (found);
 }
 
 
 void
 sctp_add_vtag_to_timewait(uint32_t tag, uint32_t time, uint16_t lport, uint16_t rport)
 {
 	struct sctpvtaghead *chain;
 	struct sctp_tagblock *twait_block;
 	struct timeval now;
 	int set, i;
 
 	if (time == 0) {
 		/* Its disabled */
 		return;
 	}
 	(void)SCTP_GETTIME_TIMEVAL(&now);
 	chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
 	set = 0;
 	if (!LIST_EMPTY(chain)) {
 		/* Block(s) present, lets find space, and expire on the fly */
 		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
 			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
 				if ((twait_block->vtag_block[i].v_tag == 0) &&
 				    !set) {
 					twait_block->vtag_block[i].tv_sec_at_expire =
 					    now.tv_sec + time;
 					twait_block->vtag_block[i].v_tag = tag;
 					twait_block->vtag_block[i].lport = lport;
 					twait_block->vtag_block[i].rport = rport;
 					set = 1;
 				} else if ((twait_block->vtag_block[i].v_tag) &&
 				    ((long)twait_block->vtag_block[i].tv_sec_at_expire < now.tv_sec)) {
 					/* Audit expires this guy */
 					twait_block->vtag_block[i].tv_sec_at_expire = 0;
 					twait_block->vtag_block[i].v_tag = 0;
 					twait_block->vtag_block[i].lport = 0;
 					twait_block->vtag_block[i].rport = 0;
 					if (set == 0) {
 						/* Reuse it for my new tag */
 						twait_block->vtag_block[i].tv_sec_at_expire = now.tv_sec + time;
 						twait_block->vtag_block[i].v_tag = tag;
 						twait_block->vtag_block[i].lport = lport;
 						twait_block->vtag_block[i].rport = rport;
 						set = 1;
 					}
 				}
 			}
 			if (set) {
 				/*
 				 * We only do up to the block where we can
 				 * place our tag for audits
 				 */
 				break;
 			}
 		}
 	}
 	/* Need to add a new block to chain */
 	if (!set) {
 		SCTP_MALLOC(twait_block, struct sctp_tagblock *,
 		    sizeof(struct sctp_tagblock), SCTP_M_TIMW);
 		if (twait_block == NULL) {
 #ifdef INVARIANTS
 			panic("Can not alloc tagblock");
 #endif
 			return;
 		}
 		memset(twait_block, 0, sizeof(struct sctp_tagblock));
 		LIST_INSERT_HEAD(chain, twait_block, sctp_nxt_tagblock);
 		twait_block->vtag_block[0].tv_sec_at_expire = now.tv_sec + time;
 		twait_block->vtag_block[0].v_tag = tag;
 		twait_block->vtag_block[0].lport = lport;
 		twait_block->vtag_block[0].rport = rport;
 	}
 }
 
 
 
 /*-
  * Free the association after un-hashing the remote port. This
  * function ALWAYS returns holding NO LOCK on the stcb. It DOES
  * expect that the input to this function IS a locked TCB.
  * It will return 0, if it did NOT destroy the association (instead
  * it unlocks it. It will return NON-zero if it either destroyed the
  * association OR the association is already destroyed.
  */
 int
 sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfree, int from_location)
 {
 	int i;
 	struct sctp_association *asoc;
 	struct sctp_nets *net, *prev;
 	struct sctp_laddr *laddr;
 	struct sctp_tmit_chunk *chk;
 	struct sctp_asconf_addr *aparam;
 	struct sctp_asconf_ack *aack;
 	struct sctp_stream_reset_list *liste;
 	struct sctp_queued_to_read *sq;
 	struct sctp_stream_queue_pending *sp;
 	sctp_sharedkey_t *shared_key;
 	struct socket *so;
 	int ccnt = 0;
 	int cnt = 0;
 
 	/* first, lets purge the entry from the hash table. */
 
 #ifdef SCTP_LOG_CLOSING
 	sctp_log_closing(inp, stcb, 6);
 #endif
 	if (stcb->asoc.state == 0) {
 #ifdef SCTP_LOG_CLOSING
 		sctp_log_closing(inp, NULL, 7);
 #endif
 		/* there is no asoc, really TSNH :-0 */
 		return (1);
 	}
 	/* TEMP CODE */
 	if (stcb->freed_from_where == 0) {
 		/* Only record the first place free happened from */
 		stcb->freed_from_where = from_location;
 	}
 	/* TEMP CODE */
 
 	asoc = &stcb->asoc;
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE))
 		/* nothing around */
 		so = NULL;
 	else
 		so = inp->sctp_socket;
 
 	/*
 	 * We used timer based freeing if a reader or writer is in the way.
 	 * So we first check if we are actually being called from a timer,
 	 * if so we abort early if a reader or writer is still in the way.
 	 */
 	if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) &&
 	    (from_inpcbfree == SCTP_NORMAL_PROC)) {
 		/*
 		 * is it the timer driving us? if so are the reader/writers
 		 * gone?
 		 */
 		if (stcb->asoc.refcnt) {
 			/* nope, reader or writer in the way */
 			sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL);
 			/* no asoc destroyed */
 			SCTP_TCB_UNLOCK(stcb);
 #ifdef SCTP_LOG_CLOSING
 			sctp_log_closing(inp, stcb, 8);
 #endif
 			return (0);
 		}
 	}
 	/* now clean up any other timers */
 	(void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer);
 	asoc->hb_timer.self = NULL;
 	(void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer);
 	asoc->dack_timer.self = NULL;
 	(void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer);
 	/*-
 	 * For stream reset we don't blast this unless
 	 * it is a str-reset timer, it might be the
 	 * free-asoc timer which we DON'T want to
 	 * disturb.
 	 */
 	if (asoc->strreset_timer.type == SCTP_TIMER_TYPE_STRRESET)
 		asoc->strreset_timer.self = NULL;
 	(void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer);
 	asoc->asconf_timer.self = NULL;
 	(void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer);
 	asoc->autoclose_timer.self = NULL;
 	(void)SCTP_OS_TIMER_STOP(&asoc->shut_guard_timer.timer);
 	asoc->shut_guard_timer.self = NULL;
 	(void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer);
 	asoc->delayed_event_timer.self = NULL;
 	/* Mobility adaptation */
 	(void)SCTP_OS_TIMER_STOP(&asoc->delete_prim_timer.timer);
 	asoc->delete_prim_timer.self = NULL;
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 		(void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer);
 		net->fr_timer.self = NULL;
 		(void)SCTP_OS_TIMER_STOP(&net->rxt_timer.timer);
 		net->rxt_timer.self = NULL;
 		(void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer);
 		net->pmtu_timer.self = NULL;
 	}
 	/* Now the read queue needs to be cleaned up (only once) */
 	cnt = 0;
 	if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0) {
 		stcb->asoc.state |= SCTP_STATE_ABOUT_TO_BE_FREED;
 		SCTP_INP_READ_LOCK(inp);
 		TAILQ_FOREACH(sq, &inp->read_queue, next) {
 			if (sq->stcb == stcb) {
 				sq->do_not_ref_stcb = 1;
 				sq->sinfo_cumtsn = stcb->asoc.cumulative_tsn;
 				/*
 				 * If there is no end, there never will be
 				 * now.
 				 */
 				if (sq->end_added == 0) {
 					/* Held for PD-API clear that. */
 					sq->pdapi_aborted = 1;
 					sq->held_length = 0;
 					if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT) && (so != NULL)) {
 						/*
 						 * Need to add a PD-API
 						 * aborted indication.
 						 * Setting the control_pdapi
 						 * assures that it will be
 						 * added right after this
 						 * msg.
 						 */
 						uint32_t strseq;
 
 						stcb->asoc.control_pdapi = sq;
 						strseq = (sq->sinfo_stream << 16) | sq->sinfo_ssn;
 						sctp_ulp_notify(SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION,
 						    stcb,
 						    SCTP_PARTIAL_DELIVERY_ABORTED,
 						    (void *)&strseq,
 						    SCTP_SO_LOCKED);
 						stcb->asoc.control_pdapi = NULL;
 					}
 				}
 				/* Add an end to wake them */
 				sq->end_added = 1;
 				cnt++;
 			}
 		}
 		SCTP_INP_READ_UNLOCK(inp);
 		if (stcb->block_entry) {
 			cnt++;
 			SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PCB, ECONNRESET);
 			stcb->block_entry->error = ECONNRESET;
 			stcb->block_entry = NULL;
 		}
 	}
 	if ((stcb->asoc.refcnt) || (stcb->asoc.state & SCTP_STATE_IN_ACCEPT_QUEUE)) {
 		/*
 		 * Someone holds a reference OR the socket is unaccepted
 		 * yet.
 		 */
 		if ((stcb->asoc.refcnt) ||
 		    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
 		    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
 			stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
 			sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL);
 		}
 		SCTP_TCB_UNLOCK(stcb);
 		if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
 		    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE))
 			/* nothing around */
 			so = NULL;
 		if (so) {
 			/* Wake any reader/writers */
 			sctp_sorwakeup(inp, so);
 			sctp_sowwakeup(inp, so);
 		}
 #ifdef SCTP_LOG_CLOSING
 		sctp_log_closing(inp, stcb, 9);
 #endif
 		/* no asoc destroyed */
 		return (0);
 	}
 #ifdef SCTP_LOG_CLOSING
 	sctp_log_closing(inp, stcb, 10);
 #endif
 	/*
 	 * When I reach here, no others want to kill the assoc yet.. and I
 	 * own the lock. Now its possible an abort comes in when I do the
 	 * lock exchange below to grab all the locks to do the final take
 	 * out. to prevent this we increment the count, which will start a
 	 * timer and blow out above thus assuring us that we hold exclusive
 	 * killing of the asoc. Note that after getting back the TCB lock we
 	 * will go ahead and increment the counter back up and stop any
 	 * timer a passing stranger may have started :-S
 	 */
 	if (from_inpcbfree == SCTP_NORMAL_PROC) {
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_INP_INFO_WLOCK();
 		SCTP_INP_WLOCK(inp);
 		SCTP_TCB_LOCK(stcb);
 	}
 	/* Double check the GONE flag */
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE))
 		/* nothing around */
 		so = NULL;
 
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
 		/*
 		 * For TCP type we need special handling when we are
 		 * connected. We also include the peel'ed off ones to.
 		 */
 		if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
 			inp->sctp_flags &= ~SCTP_PCB_FLAGS_CONNECTED;
 			inp->sctp_flags |= SCTP_PCB_FLAGS_WAS_CONNECTED;
 			if (so) {
 				SOCK_LOCK(so);
 				if (so->so_rcv.sb_cc == 0) {
 					so->so_state &= ~(SS_ISCONNECTING |
 					    SS_ISDISCONNECTING |
 					    SS_ISCONFIRMING |
 					    SS_ISCONNECTED);
 				}
 				socantrcvmore_locked(so);
 				sctp_sowwakeup(inp, so);
 				sctp_sorwakeup(inp, so);
 				SCTP_SOWAKEUP(so);
 			}
 		}
 	}
 	/*
 	 * Make it invalid too, that way if its about to run it will abort
 	 * and return.
 	 */
 	/* re-increment the lock */
 	if (from_inpcbfree == SCTP_NORMAL_PROC) {
 		atomic_add_int(&stcb->asoc.refcnt, -1);
 	}
 	if (stcb->asoc.refcnt) {
 		stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
 		sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL);
 		if (from_inpcbfree == SCTP_NORMAL_PROC) {
 			SCTP_INP_INFO_WUNLOCK();
 			SCTP_INP_WUNLOCK(inp);
 		}
 		SCTP_TCB_UNLOCK(stcb);
 		return (0);
 	}
 	asoc->state = 0;
 	if (inp->sctp_tcbhash) {
 		LIST_REMOVE(stcb, sctp_tcbhash);
 	}
 	if (stcb->asoc.in_asocid_hash) {
 		LIST_REMOVE(stcb, sctp_tcbasocidhash);
 	}
 	/* Now lets remove it from the list of ALL associations in the EP */
 	LIST_REMOVE(stcb, sctp_tcblist);
 	if (from_inpcbfree == SCTP_NORMAL_PROC) {
 		SCTP_INP_INCR_REF(inp);
 		SCTP_INP_WUNLOCK(inp);
 	}
 	/* pull from vtag hash */
 	LIST_REMOVE(stcb, sctp_asocs);
 	sctp_add_vtag_to_timewait(asoc->my_vtag, SCTP_BASE_SYSCTL(sctp_vtag_time_wait),
 	    inp->sctp_lport, stcb->rport);
 
 	/*
 	 * Now restop the timers to be sure this is paranoia at is finest!
 	 */
 	(void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer);
 	(void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer);
 	(void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer);
 	(void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer);
 	(void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer);
 	(void)SCTP_OS_TIMER_STOP(&asoc->shut_guard_timer.timer);
 	(void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer);
 	(void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer);
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 		(void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer);
 		(void)SCTP_OS_TIMER_STOP(&net->rxt_timer.timer);
 		(void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer);
 	}
 
 	asoc->strreset_timer.type = SCTP_TIMER_TYPE_NONE;
 	prev = NULL;
 	/*
 	 * The chunk lists and such SHOULD be empty but we check them just
 	 * in case.
 	 */
 	/* anything on the wheel needs to be removed */
 	for (i = 0; i < asoc->streamoutcnt; i++) {
 		struct sctp_stream_out *outs;
 
 		outs = &asoc->strmout[i];
 		/* now clean up any chunks here */
 		sp = TAILQ_FIRST(&outs->outqueue);
 		while (sp) {
 			TAILQ_REMOVE(&outs->outqueue, sp, next);
 			if (sp->data) {
 				if (so) {
 					/* Still an open socket - report */
 					sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb,
 					    SCTP_NOTIFY_DATAGRAM_UNSENT,
 					    (void *)sp, SCTP_SO_LOCKED);
 				}
 				if (sp->data) {
 					sctp_m_freem(sp->data);
 					sp->data = NULL;
 					sp->tail_mbuf = NULL;
 				}
 			}
 			if (sp->net) {
 				sctp_free_remote_addr(sp->net);
 				sp->net = NULL;
 			}
 			sctp_free_spbufspace(stcb, asoc, sp);
 			if (sp->holds_key_ref)
 				sctp_auth_key_release(stcb, sp->auth_keyid);
 			/* Free the zone stuff  */
 			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_strmoq), sp);
 			SCTP_DECR_STRMOQ_COUNT();
 			/* sa_ignore FREED_MEMORY */
 			sp = TAILQ_FIRST(&outs->outqueue);
 		}
 	}
 
 	/* sa_ignore FREED_MEMORY */
 	while ((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) {
 		TAILQ_REMOVE(&asoc->resetHead, liste, next_resp);
 		SCTP_FREE(liste, SCTP_M_STRESET);
 	}
 
 	sq = TAILQ_FIRST(&asoc->pending_reply_queue);
 	while (sq) {
 		TAILQ_REMOVE(&asoc->pending_reply_queue, sq, next);
 		if (sq->data) {
 			sctp_m_freem(sq->data);
 			sq->data = NULL;
 		}
 		sctp_free_remote_addr(sq->whoFrom);
 		sq->whoFrom = NULL;
 		sq->stcb = NULL;
 		/* Free the ctl entry */
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), sq);
 		SCTP_DECR_READQ_COUNT();
 		/* sa_ignore FREED_MEMORY */
 		sq = TAILQ_FIRST(&asoc->pending_reply_queue);
 	}
 
 	chk = TAILQ_FIRST(&asoc->free_chunks);
 	while (chk) {
 		TAILQ_REMOVE(&asoc->free_chunks, chk, sctp_next);
 		if (chk->data) {
 			sctp_m_freem(chk->data);
 			chk->data = NULL;
 		}
 		if (chk->holds_key_ref)
 			sctp_auth_key_release(stcb, chk->auth_keyid);
 		ccnt++;
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk);
 		SCTP_DECR_CHK_COUNT();
 		atomic_subtract_int(&SCTP_BASE_INFO(ipi_free_chunks), 1);
 		asoc->free_chunk_cnt--;
 		/* sa_ignore FREED_MEMORY */
 		chk = TAILQ_FIRST(&asoc->free_chunks);
 	}
 	/* pending send queue SHOULD be empty */
 	if (!TAILQ_EMPTY(&asoc->send_queue)) {
 		chk = TAILQ_FIRST(&asoc->send_queue);
 		while (chk) {
 			TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
 			if (chk->data) {
 				if (so) {
 					/* Still a socket? */
 					sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
 					    SCTP_NOTIFY_DATAGRAM_UNSENT, chk, SCTP_SO_LOCKED);
 				}
 				if (chk->data) {
 					sctp_m_freem(chk->data);
 					chk->data = NULL;
 				}
 			}
 			if (chk->holds_key_ref)
 				sctp_auth_key_release(stcb, chk->auth_keyid);
 			ccnt++;
 			if (chk->whoTo) {
 				sctp_free_remote_addr(chk->whoTo);
 				chk->whoTo = NULL;
 			}
 			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk);
 			SCTP_DECR_CHK_COUNT();
 			/* sa_ignore FREED_MEMORY */
 			chk = TAILQ_FIRST(&asoc->send_queue);
 		}
 	}
 /*
   if (ccnt) {
   printf("Freed %d from send_queue\n", ccnt);
   ccnt = 0;
   }
 */
 	/* sent queue SHOULD be empty */
 	if (!TAILQ_EMPTY(&asoc->sent_queue)) {
 		chk = TAILQ_FIRST(&asoc->sent_queue);
 		while (chk) {
 			TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
 			if (chk->data) {
 				if (so) {
 					/* Still a socket? */
 					sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
 					    SCTP_NOTIFY_DATAGRAM_SENT, chk, SCTP_SO_LOCKED);
 				}
 				if (chk->data) {
 					sctp_m_freem(chk->data);
 					chk->data = NULL;
 				}
 			}
 			if (chk->holds_key_ref)
 				sctp_auth_key_release(stcb, chk->auth_keyid);
 			ccnt++;
 			sctp_free_remote_addr(chk->whoTo);
 			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk);
 			SCTP_DECR_CHK_COUNT();
 			/* sa_ignore FREED_MEMORY */
 			chk = TAILQ_FIRST(&asoc->sent_queue);
 		}
 	}
 /*
   if (ccnt) {
   printf("Freed %d from sent_queue\n", ccnt);
   ccnt = 0;
   }
 */
 	/* control queue MAY not be empty */
 	if (!TAILQ_EMPTY(&asoc->control_send_queue)) {
 		chk = TAILQ_FIRST(&asoc->control_send_queue);
 		while (chk) {
 			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
 			if (chk->data) {
 				sctp_m_freem(chk->data);
 				chk->data = NULL;
 			}
 			if (chk->holds_key_ref)
 				sctp_auth_key_release(stcb, chk->auth_keyid);
 			ccnt++;
 			sctp_free_remote_addr(chk->whoTo);
 			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk);
 			SCTP_DECR_CHK_COUNT();
 			/* sa_ignore FREED_MEMORY */
 			chk = TAILQ_FIRST(&asoc->control_send_queue);
 		}
 	}
 /*
   if (ccnt) {
   printf("Freed %d from ctrl_queue\n", ccnt);
   ccnt = 0;
   }
 */
 
 	/* ASCONF queue MAY not be empty */
 	if (!TAILQ_EMPTY(&asoc->asconf_send_queue)) {
 		chk = TAILQ_FIRST(&asoc->asconf_send_queue);
 		while (chk) {
 			TAILQ_REMOVE(&asoc->asconf_send_queue, chk, sctp_next);
 			if (chk->data) {
 				sctp_m_freem(chk->data);
 				chk->data = NULL;
 			}
 			if (chk->holds_key_ref)
 				sctp_auth_key_release(stcb, chk->auth_keyid);
 			ccnt++;
 			sctp_free_remote_addr(chk->whoTo);
 			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk);
 			SCTP_DECR_CHK_COUNT();
 			/* sa_ignore FREED_MEMORY */
 			chk = TAILQ_FIRST(&asoc->asconf_send_queue);
 		}
 	}
 /*
   if (ccnt) {
   printf("Freed %d from asconf_queue\n", ccnt);
   ccnt = 0;
   }
 */
 	if (!TAILQ_EMPTY(&asoc->reasmqueue)) {
 		chk = TAILQ_FIRST(&asoc->reasmqueue);
 		while (chk) {
 			TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
 			if (chk->data) {
 				sctp_m_freem(chk->data);
 				chk->data = NULL;
 			}
 			if (chk->holds_key_ref)
 				sctp_auth_key_release(stcb, chk->auth_keyid);
 			sctp_free_remote_addr(chk->whoTo);
 			ccnt++;
 			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk);
 			SCTP_DECR_CHK_COUNT();
 			/* sa_ignore FREED_MEMORY */
 			chk = TAILQ_FIRST(&asoc->reasmqueue);
 		}
 	}
 /*
   if (ccnt) {
   printf("Freed %d from reasm_queue\n", ccnt);
   ccnt = 0;
   }
 */
 	if (asoc->mapping_array) {
 		SCTP_FREE(asoc->mapping_array, SCTP_M_MAP);
 		asoc->mapping_array = NULL;
 	}
 	if (asoc->nr_mapping_array) {
 		SCTP_FREE(asoc->nr_mapping_array, SCTP_M_MAP);
 		asoc->nr_mapping_array = NULL;
 	}
 	/* the stream outs */
 	if (asoc->strmout) {
 		SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
 		asoc->strmout = NULL;
 	}
 	asoc->strm_realoutsize = asoc->streamoutcnt = 0;
 	if (asoc->strmin) {
 		struct sctp_queued_to_read *ctl;
 
 		for (i = 0; i < asoc->streamincnt; i++) {
 			if (!TAILQ_EMPTY(&asoc->strmin[i].inqueue)) {
 				/* We have somethings on the streamin queue */
 				ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue);
 				while (ctl) {
 					TAILQ_REMOVE(&asoc->strmin[i].inqueue,
 					    ctl, next);
 					sctp_free_remote_addr(ctl->whoFrom);
 					if (ctl->data) {
 						sctp_m_freem(ctl->data);
 						ctl->data = NULL;
 					}
 					/*
 					 * We don't free the address here
 					 * since all the net's were freed
 					 * above.
 					 */
 					SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), ctl);
 					SCTP_DECR_READQ_COUNT();
 					ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue);
 				}
 			}
 		}
 		SCTP_FREE(asoc->strmin, SCTP_M_STRMI);
 		asoc->strmin = NULL;
 	}
 	asoc->streamincnt = 0;
 	while (!TAILQ_EMPTY(&asoc->nets)) {
 		/* sa_ignore FREED_MEMORY */
 		net = TAILQ_FIRST(&asoc->nets);
 		/* pull from list */
 		if ((SCTP_BASE_INFO(ipi_count_raddr) == 0) || (prev == net)) {
 #ifdef INVARIANTS
 			panic("no net's left alloc'ed, or list points to itself");
 #endif
 			break;
 		}
 		prev = net;
 		TAILQ_REMOVE(&asoc->nets, net, sctp_next);
 		sctp_free_remote_addr(net);
 	}
 
 	while (!LIST_EMPTY(&asoc->sctp_restricted_addrs)) {
 		/* sa_ignore FREED_MEMORY */
 		laddr = LIST_FIRST(&asoc->sctp_restricted_addrs);
 		sctp_remove_laddr(laddr);
 	}
 
 	/* pending asconf (address) parameters */
 	while (!TAILQ_EMPTY(&asoc->asconf_queue)) {
 		/* sa_ignore FREED_MEMORY */
 		aparam = TAILQ_FIRST(&asoc->asconf_queue);
 		TAILQ_REMOVE(&asoc->asconf_queue, aparam, next);
 		SCTP_FREE(aparam, SCTP_M_ASC_ADDR);
 	}
 	while (!TAILQ_EMPTY(&asoc->asconf_ack_sent)) {
 		/* sa_ignore FREED_MEMORY */
 		aack = TAILQ_FIRST(&asoc->asconf_ack_sent);
 		TAILQ_REMOVE(&asoc->asconf_ack_sent, aack, next);
 		if (aack->data != NULL) {
 			sctp_m_freem(aack->data);
 		}
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asconf_ack), aack);
 	}
 	/* clean up auth stuff */
 	if (asoc->local_hmacs)
 		sctp_free_hmaclist(asoc->local_hmacs);
 	if (asoc->peer_hmacs)
 		sctp_free_hmaclist(asoc->peer_hmacs);
 
 	if (asoc->local_auth_chunks)
 		sctp_free_chunklist(asoc->local_auth_chunks);
 	if (asoc->peer_auth_chunks)
 		sctp_free_chunklist(asoc->peer_auth_chunks);
 
 	sctp_free_authinfo(&asoc->authinfo);
 
 	shared_key = LIST_FIRST(&asoc->shared_keys);
 	while (shared_key) {
 		LIST_REMOVE(shared_key, next);
 		sctp_free_sharedkey(shared_key);
 		/* sa_ignore FREED_MEMORY */
 		shared_key = LIST_FIRST(&asoc->shared_keys);
 	}
 
 	/* Insert new items here :> */
 
 	/* Get rid of LOCK */
 	SCTP_TCB_LOCK_DESTROY(stcb);
 	SCTP_TCB_SEND_LOCK_DESTROY(stcb);
 	if (from_inpcbfree == SCTP_NORMAL_PROC) {
 		SCTP_INP_INFO_WUNLOCK();
 		SCTP_INP_RLOCK(inp);
 	}
 #ifdef SCTP_TRACK_FREED_ASOCS
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
 		/* now clean up the tasoc itself */
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb);
 		SCTP_DECR_ASOC_COUNT();
 	} else {
 		LIST_INSERT_HEAD(&inp->sctp_asoc_free_list, stcb, sctp_tcblist);
 	}
 #else
 	SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb);
 	SCTP_DECR_ASOC_COUNT();
 #endif
 	if (from_inpcbfree == SCTP_NORMAL_PROC) {
 		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
 			/*
 			 * If its NOT the inp_free calling us AND sctp_close
 			 * as been called, we call back...
 			 */
 			SCTP_INP_RUNLOCK(inp);
 			/*
 			 * This will start the kill timer (if we are the
 			 * last one) since we hold an increment yet. But
 			 * this is the only safe way to do this since
 			 * otherwise if the socket closes at the same time
 			 * we are here we might collide in the cleanup.
 			 */
 			sctp_inpcb_free(inp,
 			    SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE,
 			    SCTP_CALLED_DIRECTLY_NOCMPSET);
 			SCTP_INP_DECR_REF(inp);
 			goto out_of;
 		} else {
 			/* The socket is still open. */
 			SCTP_INP_DECR_REF(inp);
 		}
 	}
 	if (from_inpcbfree == SCTP_NORMAL_PROC) {
 		SCTP_INP_RUNLOCK(inp);
 	}
 out_of:
 	/* destroyed the asoc */
 #ifdef SCTP_LOG_CLOSING
 	sctp_log_closing(inp, NULL, 11);
 #endif
 	return (1);
 }
 
 
 
 /*
  * determine if a destination is "reachable" based upon the addresses bound
  * to the current endpoint (e.g. only v4 or v6 currently bound)
  */
 /*
  * FIX: if we allow assoc-level bindx(), then this needs to be fixed to use
  * assoc level v4/v6 flags, as the assoc *may* not have the same address
  * types bound as its endpoint
  */
 int
 sctp_destination_is_reachable(struct sctp_tcb *stcb, struct sockaddr *destaddr)
 {
 	struct sctp_inpcb *inp;
 	int answer;
 
 	/*
 	 * No locks here, the TCB, in all cases is already locked and an
 	 * assoc is up. There is either a INP lock by the caller applied (in
 	 * asconf case when deleting an address) or NOT in the HB case,
 	 * however if HB then the INP increment is up and the INP will not
 	 * be removed (on top of the fact that we have a TCB lock). So we
 	 * only want to read the sctp_flags, which is either bound-all or
 	 * not.. no protection needed since once an assoc is up you can't be
 	 * changing your binding.
 	 */
 	inp = stcb->sctp_ep;
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		/* if bound all, destination is not restricted */
 		/*
 		 * RRS: Question during lock work: Is this correct? If you
 		 * are bound-all you still might need to obey the V4--V6
 		 * flags??? IMO this bound-all stuff needs to be removed!
 		 */
 		return (1);
 	}
 	/* NOTE: all "scope" checks are done when local addresses are added */
 	if (destaddr->sa_family == AF_INET6) {
 		answer = inp->ip_inp.inp.inp_vflag & INP_IPV6;
 	} else if (destaddr->sa_family == AF_INET) {
 		answer = inp->ip_inp.inp.inp_vflag & INP_IPV4;
 	} else {
 		/* invalid family, so it's unreachable */
 		answer = 0;
 	}
 	return (answer);
 }
 
 /*
  * update the inp_vflags on an endpoint
  */
 static void
 sctp_update_ep_vflag(struct sctp_inpcb *inp)
 {
 	struct sctp_laddr *laddr;
 
 	/* first clear the flag */
 	inp->ip_inp.inp.inp_vflag = 0;
 	/* set the flag based on addresses on the ep list */
 	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 		if (laddr->ifa == NULL) {
 			SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n",
 			    __FUNCTION__);
 			continue;
 		}
 		if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
 			continue;
 		}
 		if (laddr->ifa->address.sa.sa_family == AF_INET6) {
 			inp->ip_inp.inp.inp_vflag |= INP_IPV6;
 		} else if (laddr->ifa->address.sa.sa_family == AF_INET) {
 			inp->ip_inp.inp.inp_vflag |= INP_IPV4;
 		}
 	}
 }
 
 /*
  * Add the address to the endpoint local address list There is nothing to be
  * done if we are bound to all addresses
  */
 void
 sctp_add_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa, uint32_t action)
 {
 	struct sctp_laddr *laddr;
 	int fnd, error = 0;
 
 	fnd = 0;
 
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		/* You are already bound to all. You have it already */
 		return;
 	}
 	if (ifa->address.sa.sa_family == AF_INET6) {
 		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
 			/* Can't bind a non-useable addr. */
 			return;
 		}
 	}
 	/* first, is it already present? */
 	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 		if (laddr->ifa == ifa) {
 			fnd = 1;
 			break;
 		}
 	}
 
 	if (fnd == 0) {
 		/* Not in the ep list */
 		error = sctp_insert_laddr(&inp->sctp_addr_list, ifa, action);
 		if (error != 0)
 			return;
 		inp->laddr_count++;
 		/* update inp_vflag flags */
 		if (ifa->address.sa.sa_family == AF_INET6) {
 			inp->ip_inp.inp.inp_vflag |= INP_IPV6;
 		} else if (ifa->address.sa.sa_family == AF_INET) {
 			inp->ip_inp.inp.inp_vflag |= INP_IPV4;
 		}
 	}
 	return;
 }
 
 
 /*
  * select a new (hopefully reachable) destination net (should only be used
  * when we deleted an ep addr that is the only usable source address to reach
  * the destination net)
  */
 static void
 sctp_select_primary_destination(struct sctp_tcb *stcb)
 {
 	struct sctp_nets *net;
 
 	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 		/* for now, we'll just pick the first reachable one we find */
 		if (net->dest_state & SCTP_ADDR_UNCONFIRMED)
 			continue;
 		if (sctp_destination_is_reachable(stcb,
 		    (struct sockaddr *)&net->ro._l_addr)) {
 			/* found a reachable destination */
 			stcb->asoc.primary_destination = net;
 		}
 	}
 	/* I can't there from here! ...we're gonna die shortly... */
 }
 
 
 /*
  * Delete the address from the endpoint local address list There is nothing
  * to be done if we are bound to all addresses
  */
 void
 sctp_del_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa)
 {
 	struct sctp_laddr *laddr;
 	int fnd;
 
 	fnd = 0;
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		/* You are already bound to all. You have it already */
 		return;
 	}
 	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 		if (laddr->ifa == ifa) {
 			fnd = 1;
 			break;
 		}
 	}
 	if (fnd && (inp->laddr_count < 2)) {
 		/* can't delete unless there are at LEAST 2 addresses */
 		return;
 	}
 	if (fnd) {
 		/*
 		 * clean up any use of this address go through our
 		 * associations and clear any last_used_address that match
 		 * this one for each assoc, see if a new primary_destination
 		 * is needed
 		 */
 		struct sctp_tcb *stcb;
 
 		/* clean up "next_addr_touse" */
 		if (inp->next_addr_touse == laddr)
 			/* delete this address */
 			inp->next_addr_touse = NULL;
 
 		/* clean up "last_used_address" */
 		LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 			struct sctp_nets *net;
 
 			SCTP_TCB_LOCK(stcb);
 			if (stcb->asoc.last_used_address == laddr)
 				/* delete this address */
 				stcb->asoc.last_used_address = NULL;
 			/*
 			 * Now spin through all the nets and purge any ref
 			 * to laddr
 			 */
 			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 				if (net->ro._s_addr &&
 				    (net->ro._s_addr->ifa == laddr->ifa)) {
 					/* Yep, purge src address selected */
 					sctp_rtentry_t *rt;
 
 					/* delete this address if cached */
 					rt = net->ro.ro_rt;
 					if (rt != NULL) {
 						RTFREE(rt);
 						net->ro.ro_rt = NULL;
 					}
 					sctp_free_ifa(net->ro._s_addr);
 					net->ro._s_addr = NULL;
 					net->src_addr_selected = 0;
 				}
 			}
 			SCTP_TCB_UNLOCK(stcb);
 		}		/* for each tcb */
 		/* remove it from the ep list */
 		sctp_remove_laddr(laddr);
 		inp->laddr_count--;
 		/* update inp_vflag flags */
 		sctp_update_ep_vflag(inp);
 	}
 	return;
 }
 
 /*
  * Add the address to the TCB local address restricted list.
  * This is a "pending" address list (eg. addresses waiting for an
  * ASCONF-ACK response) and cannot be used as a valid source address.
  */
 void
 sctp_add_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa)
 {
 	struct sctp_inpcb *inp;
 	struct sctp_laddr *laddr;
 	struct sctpladdr *list;
 
 	/*
 	 * Assumes TCB is locked.. and possibly the INP. May need to
 	 * confirm/fix that if we need it and is not the case.
 	 */
 	list = &stcb->asoc.sctp_restricted_addrs;
 
 	inp = stcb->sctp_ep;
 	if (ifa->address.sa.sa_family == AF_INET6) {
 		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
 			/* Can't bind a non-existent addr. */
 			return;
 		}
 	}
 	/* does the address already exist? */
 	LIST_FOREACH(laddr, list, sctp_nxt_addr) {
 		if (laddr->ifa == ifa) {
 			return;
 		}
 	}
 
 	/* add to the list */
 	(void)sctp_insert_laddr(list, ifa, 0);
 	return;
 }
 
 /*
  * insert an laddr entry with the given ifa for the desired list
  */
 int
 sctp_insert_laddr(struct sctpladdr *list, struct sctp_ifa *ifa, uint32_t act)
 {
 	struct sctp_laddr *laddr;
 
 	laddr = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
 	if (laddr == NULL) {
 		/* out of memory? */
 		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
 		return (EINVAL);
 	}
 	SCTP_INCR_LADDR_COUNT();
 	bzero(laddr, sizeof(*laddr));
 	(void)SCTP_GETTIME_TIMEVAL(&laddr->start_time);
 	laddr->ifa = ifa;
 	laddr->action = act;
 	atomic_add_int(&ifa->refcount, 1);
 	/* insert it */
 	LIST_INSERT_HEAD(list, laddr, sctp_nxt_addr);
 
 	return (0);
 }
 
 /*
  * Remove an laddr entry from the local address list (on an assoc)
  */
 void
 sctp_remove_laddr(struct sctp_laddr *laddr)
 {
 
 	/* remove from the list */
 	LIST_REMOVE(laddr, sctp_nxt_addr);
 	sctp_free_ifa(laddr->ifa);
 	SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), laddr);
 	SCTP_DECR_LADDR_COUNT();
 }
 
 /*
  * Remove a local address from the TCB local address restricted list
  */
 void
 sctp_del_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa)
 {
 	struct sctp_inpcb *inp;
 	struct sctp_laddr *laddr;
 
 	/*
 	 * This is called by asconf work. It is assumed that a) The TCB is
 	 * locked and b) The INP is locked. This is true in as much as I can
 	 * trace through the entry asconf code where I did these locks.
 	 * Again, the ASCONF code is a bit different in that it does lock
 	 * the INP during its work often times. This must be since we don't
 	 * want other proc's looking up things while what they are looking
 	 * up is changing :-D
 	 */
 
 	inp = stcb->sctp_ep;
 	/* if subset bound and don't allow ASCONF's, can't delete last */
 	if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) &&
 	    sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF)) {
 		if (stcb->sctp_ep->laddr_count < 2) {
 			/* can't delete last address */
 			return;
 		}
 	}
 	LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) {
 		/* remove the address if it exists */
 		if (laddr->ifa == NULL)
 			continue;
 		if (laddr->ifa == ifa) {
 			sctp_remove_laddr(laddr);
 			return;
 		}
 	}
 
 	/* address not found! */
 	return;
 }
 
 /*
  * Temporarily remove for __APPLE__ until we use the Tiger equivalents
  */
 /* sysctl */
 static int sctp_max_number_of_assoc = SCTP_MAX_NUM_OF_ASOC;
 static int sctp_scale_up_for_address = SCTP_SCALE_FOR_ADDR;
 
 void
 sctp_pcb_init()
 {
 	/*
 	 * SCTP initialization for the PCB structures should be called by
 	 * the sctp_init() funciton.
 	 */
 	int i;
 	struct timeval tv;
 
 	if (SCTP_BASE_VAR(sctp_pcb_initialized) != 0) {
 		/* error I was called twice */
 		return;
 	}
 	SCTP_BASE_VAR(sctp_pcb_initialized) = 1;
 
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	bzero(&SCTP_BASE_SYSCTL(sctp_log), sizeof(struct sctp_log));
 #endif
 	(void)SCTP_GETTIME_TIMEVAL(&tv);
 #if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
 	SCTP_BASE_STATS[PCPU_GET(cpuid)].sctps_discontinuitytime.tv_sec = (uint32_t) tv.tv_sec;
 	SCTP_BASE_STATS[PCPU_GET(cpuid)].sctps_discontinuitytime.tv_usec = (uint32_t) tv.tv_usec;
 #else
 	SCTP_BASE_STAT(sctps_discontinuitytime).tv_sec = (uint32_t) tv.tv_sec;
 	SCTP_BASE_STAT(sctps_discontinuitytime).tv_usec = (uint32_t) tv.tv_usec;
 #endif
 	/* init the empty list of (All) Endpoints */
 	LIST_INIT(&SCTP_BASE_INFO(listhead));
 
 
 	/* init the hash table of endpoints */
 	TUNABLE_INT_FETCH("net.inet.sctp.tcbhashsize", &SCTP_BASE_SYSCTL(sctp_hashtblsize));
 	TUNABLE_INT_FETCH("net.inet.sctp.pcbhashsize", &SCTP_BASE_SYSCTL(sctp_pcbtblsize));
 	TUNABLE_INT_FETCH("net.inet.sctp.chunkscale", &SCTP_BASE_SYSCTL(sctp_chunkscale));
 	SCTP_BASE_INFO(sctp_asochash) = SCTP_HASH_INIT((SCTP_BASE_SYSCTL(sctp_hashtblsize) * 31),
 	    &SCTP_BASE_INFO(hashasocmark));
 	SCTP_BASE_INFO(sctp_ephash) = SCTP_HASH_INIT(SCTP_BASE_SYSCTL(sctp_hashtblsize),
 	    &SCTP_BASE_INFO(hashmark));
 	SCTP_BASE_INFO(sctp_tcpephash) = SCTP_HASH_INIT(SCTP_BASE_SYSCTL(sctp_hashtblsize),
 	    &SCTP_BASE_INFO(hashtcpmark));
 	SCTP_BASE_INFO(hashtblsize) = SCTP_BASE_SYSCTL(sctp_hashtblsize);
 
 
 	SCTP_BASE_INFO(sctp_vrfhash) = SCTP_HASH_INIT(SCTP_SIZE_OF_VRF_HASH,
 	    &SCTP_BASE_INFO(hashvrfmark));
 
 	SCTP_BASE_INFO(vrf_ifn_hash) = SCTP_HASH_INIT(SCTP_VRF_IFN_HASH_SIZE,
 	    &SCTP_BASE_INFO(vrf_ifn_hashmark));
 	/* init the zones */
 	/*
 	 * FIX ME: Should check for NULL returns, but if it does fail we are
 	 * doomed to panic anyways... add later maybe.
 	 */
 	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_ep), "sctp_ep",
 	    sizeof(struct sctp_inpcb), maxsockets);
 
 	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_asoc), "sctp_asoc",
 	    sizeof(struct sctp_tcb), sctp_max_number_of_assoc);
 
 	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_laddr), "sctp_laddr",
 	    sizeof(struct sctp_laddr),
 	    (sctp_max_number_of_assoc * sctp_scale_up_for_address));
 
 	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_net), "sctp_raddr",
 	    sizeof(struct sctp_nets),
 	    (sctp_max_number_of_assoc * sctp_scale_up_for_address));
 
 	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_chunk), "sctp_chunk",
 	    sizeof(struct sctp_tmit_chunk),
 	    (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale)));
 
 	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_readq), "sctp_readq",
 	    sizeof(struct sctp_queued_to_read),
 	    (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale)));
 
 	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_strmoq), "sctp_stream_msg_out",
 	    sizeof(struct sctp_stream_queue_pending),
 	    (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale)));
 
 	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_asconf), "sctp_asconf",
 	    sizeof(struct sctp_asconf),
 	    (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale)));
 
 	SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_asconf_ack), "sctp_asconf_ack",
 	    sizeof(struct sctp_asconf_ack),
 	    (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale)));
 
 
 	/* Master Lock INIT for info structure */
 	SCTP_INP_INFO_LOCK_INIT();
 	SCTP_STATLOG_INIT_LOCK();
 
 	SCTP_IPI_COUNT_INIT();
 	SCTP_IPI_ADDR_INIT();
 #ifdef SCTP_PACKET_LOGGING
 	SCTP_IP_PKTLOG_INIT();
 #endif
 	LIST_INIT(&SCTP_BASE_INFO(addr_wq));
 
 	SCTP_WQ_ADDR_INIT();
 	/* not sure if we need all the counts */
 	SCTP_BASE_INFO(ipi_count_ep) = 0;
 	/* assoc/tcb zone info */
 	SCTP_BASE_INFO(ipi_count_asoc) = 0;
 	/* local addrlist zone info */
 	SCTP_BASE_INFO(ipi_count_laddr) = 0;
 	/* remote addrlist zone info */
 	SCTP_BASE_INFO(ipi_count_raddr) = 0;
 	/* chunk info */
 	SCTP_BASE_INFO(ipi_count_chunk) = 0;
 
 	/* socket queue zone info */
 	SCTP_BASE_INFO(ipi_count_readq) = 0;
 
 	/* stream out queue cont */
 	SCTP_BASE_INFO(ipi_count_strmoq) = 0;
 
 	SCTP_BASE_INFO(ipi_free_strmoq) = 0;
 	SCTP_BASE_INFO(ipi_free_chunks) = 0;
 
 	SCTP_OS_TIMER_INIT(&SCTP_BASE_INFO(addr_wq_timer.timer));
 
 	/* Init the TIMEWAIT list */
 	for (i = 0; i < SCTP_STACK_VTAG_HASH_SIZE; i++) {
 		LIST_INIT(&SCTP_BASE_INFO(vtag_timewait)[i]);
 	}
 
 	sctp_startup_iterator();
 
 	/*
 	 * INIT the default VRF which for BSD is the only one, other O/S's
 	 * may have more. But initially they must start with one and then
 	 * add the VRF's as addresses are added.
 	 */
 	sctp_init_vrf_list(SCTP_DEFAULT_VRF);
 
 }
 
 /*
  * Assumes that the SCTP_BASE_INFO() lock is NOT held.
  */
 void
 sctp_pcb_finish(void)
 {
 	struct sctp_vrflist *vrf_bucket;
 	struct sctp_vrf *vrf;
 	struct sctp_ifn *ifn;
 	struct sctp_ifa *ifa;
 	struct sctpvtaghead *chain;
 	struct sctp_tagblock *twait_block, *prev_twait_block;
 	struct sctp_laddr *wi;
 	int i;
 
 	/*
 	 * Free BSD the it thread never exits but we do clean up. The only
 	 * way freebsd reaches here if we have VRF's but we still add the
 	 * ifdef to make it compile on old versions.
 	 */
 	{
 		struct sctp_iterator *it, *nit;
 
 		SCTP_IPI_ITERATOR_WQ_LOCK();
 		it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead);
 		while (it) {
 			nit = TAILQ_NEXT(it, sctp_nxt_itr);
 			if (it->vn != curvnet) {
 				it = nit;
 				continue;
 			}
 			TAILQ_REMOVE(&sctp_it_ctl.iteratorhead,
 			    it, sctp_nxt_itr);
 			if (it->function_atend != NULL) {
 				(*it->function_atend) (it->pointer, it->val);
 			}
 			SCTP_FREE(it, SCTP_M_ITER);
 			it = nit;
 		}
 		SCTP_IPI_ITERATOR_WQ_UNLOCK();
 		SCTP_ITERATOR_LOCK();
 		if ((sctp_it_ctl.cur_it) &&
 		    (sctp_it_ctl.cur_it->vn == curvnet)) {
 			sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT;
 		}
 		SCTP_ITERATOR_UNLOCK();
 	}
 
 	SCTP_OS_TIMER_STOP(&SCTP_BASE_INFO(addr_wq_timer.timer));
 	SCTP_WQ_ADDR_LOCK();
 	while ((wi = LIST_FIRST(&SCTP_BASE_INFO(addr_wq))) != NULL) {
 		LIST_REMOVE(wi, sctp_nxt_addr);
 		SCTP_DECR_LADDR_COUNT();
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), wi);
 	}
 	SCTP_WQ_ADDR_UNLOCK();
 
 	/*
 	 * free the vrf/ifn/ifa lists and hashes (be sure address monitor is
 	 * destroyed first).
 	 */
 	vrf_bucket = &SCTP_BASE_INFO(sctp_vrfhash)[(SCTP_DEFAULT_VRFID & SCTP_BASE_INFO(hashvrfmark))];
 	while ((vrf = LIST_FIRST(vrf_bucket)) != NULL) {
 		while ((ifn = LIST_FIRST(&vrf->ifnlist)) != NULL) {
 			while ((ifa = LIST_FIRST(&ifn->ifalist)) != NULL) {
 				/* free the ifa */
 				LIST_REMOVE(ifa, next_bucket);
 				LIST_REMOVE(ifa, next_ifa);
 				SCTP_FREE(ifa, SCTP_M_IFA);
 			}
 			/* free the ifn */
 			LIST_REMOVE(ifn, next_bucket);
 			LIST_REMOVE(ifn, next_ifn);
 			SCTP_FREE(ifn, SCTP_M_IFN);
 		}
 		SCTP_HASH_FREE(vrf->vrf_addr_hash, vrf->vrf_addr_hashmark);
 		/* free the vrf */
 		LIST_REMOVE(vrf, next_vrf);
 		SCTP_FREE(vrf, SCTP_M_VRF);
 	}
 	/* free the vrf hashes */
 	SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_vrfhash), SCTP_BASE_INFO(hashvrfmark));
 	SCTP_HASH_FREE(SCTP_BASE_INFO(vrf_ifn_hash), SCTP_BASE_INFO(vrf_ifn_hashmark));
 
 	/*
 	 * free the TIMEWAIT list elements malloc'd in the function
 	 * sctp_add_vtag_to_timewait()...
 	 */
 	for (i = 0; i < SCTP_STACK_VTAG_HASH_SIZE; i++) {
 		chain = &SCTP_BASE_INFO(vtag_timewait)[i];
 		if (!LIST_EMPTY(chain)) {
 			prev_twait_block = NULL;
 			LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
 				if (prev_twait_block) {
 					SCTP_FREE(prev_twait_block, SCTP_M_TIMW);
 				}
 				prev_twait_block = twait_block;
 			}
 			SCTP_FREE(prev_twait_block, SCTP_M_TIMW);
 		}
 	}
 
 	/* free the locks and mutexes */
 #ifdef SCTP_PACKET_LOGGING
 	SCTP_IP_PKTLOG_DESTROY();
 #endif
 	SCTP_IPI_ADDR_DESTROY();
 	SCTP_STATLOG_DESTROY();
 	SCTP_INP_INFO_LOCK_DESTROY();
 
 	SCTP_WQ_ADDR_DESTROY();
 
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_ep));
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asoc));
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_laddr));
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_net));
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_chunk));
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_readq));
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_strmoq));
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asconf));
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asconf_ack));
 	/* Get rid of other stuff to */
 	if (SCTP_BASE_INFO(sctp_asochash) != NULL)
 		SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_asochash), SCTP_BASE_INFO(hashasocmark));
 	if (SCTP_BASE_INFO(sctp_ephash) != NULL)
 		SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_ephash), SCTP_BASE_INFO(hashmark));
 	if (SCTP_BASE_INFO(sctp_tcpephash) != NULL)
 		SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_tcpephash), SCTP_BASE_INFO(hashtcpmark));
 
 }
 
 
 int
 sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
     int iphlen, int offset, int limit, struct sctphdr *sh,
     struct sockaddr *altsa)
 {
 	/*
 	 * grub through the INIT pulling addresses and loading them to the
 	 * nets structure in the asoc. The from address in the mbuf should
 	 * also be loaded (if it is not already). This routine can be called
 	 * with either INIT or INIT-ACK's as long as the m points to the IP
 	 * packet and the offset points to the beginning of the parameters.
 	 */
 	struct sctp_inpcb *inp, *l_inp;
 	struct sctp_nets *net, *net_tmp;
 	struct ip *iph;
 	struct sctp_paramhdr *phdr, parm_buf;
 	struct sctp_tcb *stcb_tmp;
 	uint16_t ptype, plen;
 	struct sockaddr *sa;
 	struct sockaddr_storage dest_store;
 	struct sockaddr *local_sa = (struct sockaddr *)&dest_store;
 	struct sockaddr_in sin;
 	struct sockaddr_in6 sin6;
 	uint8_t random_store[SCTP_PARAM_BUFFER_SIZE];
 	struct sctp_auth_random *p_random = NULL;
 	uint16_t random_len = 0;
 	uint8_t hmacs_store[SCTP_PARAM_BUFFER_SIZE];
 	struct sctp_auth_hmac_algo *hmacs = NULL;
 	uint16_t hmacs_len = 0;
 	uint8_t saw_asconf = 0;
 	uint8_t saw_asconf_ack = 0;
 	uint8_t chunks_store[SCTP_PARAM_BUFFER_SIZE];
 	struct sctp_auth_chunk_list *chunks = NULL;
 	uint16_t num_chunks = 0;
 	sctp_key_t *new_key;
 	uint32_t keylen;
 	int got_random = 0, got_hmacs = 0, got_chklist = 0;
 
 	/* First get the destination address setup too. */
 	memset(&sin, 0, sizeof(sin));
 	memset(&sin6, 0, sizeof(sin6));
 
 	sin.sin_family = AF_INET;
 	sin.sin_len = sizeof(sin);
 	sin.sin_port = stcb->rport;
 
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_port = stcb->rport;
 	if (altsa == NULL) {
 		iph = mtod(m, struct ip *);
 		switch (iph->ip_v) {
 		case IPVERSION:
 			{
 				/* its IPv4 */
 				struct sockaddr_in *sin_2;
 
 				sin_2 = (struct sockaddr_in *)(local_sa);
 				memset(sin_2, 0, sizeof(sin));
 				sin_2->sin_family = AF_INET;
 				sin_2->sin_len = sizeof(sin);
 				sin_2->sin_port = sh->dest_port;
 				sin_2->sin_addr.s_addr = iph->ip_dst.s_addr;
 				sin.sin_addr = iph->ip_src;
 				sa = (struct sockaddr *)&sin;
 				break;
 			}
 #ifdef INET6
 		case IPV6_VERSION >> 4:
 			{
 				/* its IPv6 */
 				struct ip6_hdr *ip6;
 				struct sockaddr_in6 *sin6_2;
 
 				ip6 = mtod(m, struct ip6_hdr *);
 				sin6_2 = (struct sockaddr_in6 *)(local_sa);
 				memset(sin6_2, 0, sizeof(sin6));
 				sin6_2->sin6_family = AF_INET6;
 				sin6_2->sin6_len = sizeof(struct sockaddr_in6);
 				sin6_2->sin6_port = sh->dest_port;
 				sin6.sin6_addr = ip6->ip6_src;
 				sa = (struct sockaddr *)&sin6;
 				break;
 			}
 #endif
 		default:
 			return (-1);
 			break;
 		}
 	} else {
 		/*
 		 * For cookies we use the src address NOT from the packet
 		 * but from the original INIT
 		 */
 		sa = altsa;
 	}
 	/* Turn off ECN until we get through all params */
 	stcb->asoc.ecn_allowed = 0;
 	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 		/* mark all addresses that we have currently on the list */
 		net->dest_state |= SCTP_ADDR_NOT_IN_ASSOC;
 	}
 	/* does the source address already exist? if so skip it */
 	l_inp = inp = stcb->sctp_ep;
 
 	atomic_add_int(&stcb->asoc.refcnt, 1);
 	stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net_tmp, local_sa, stcb);
 	atomic_add_int(&stcb->asoc.refcnt, -1);
 
 	if ((stcb_tmp == NULL && inp == stcb->sctp_ep) || inp == NULL) {
 		/* we must add the source address */
 		/* no scope set here since we have a tcb already. */
 		if ((sa->sa_family == AF_INET) &&
 		    (stcb->asoc.ipv4_addr_legal)) {
 			if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_2)) {
 				return (-1);
 			}
 		} else if ((sa->sa_family == AF_INET6) &&
 		    (stcb->asoc.ipv6_addr_legal)) {
 			if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_3)) {
 				return (-2);
 			}
 		}
 	} else {
 		if (net_tmp != NULL && stcb_tmp == stcb) {
 			net_tmp->dest_state &= ~SCTP_ADDR_NOT_IN_ASSOC;
 		} else if (stcb_tmp != stcb) {
 			/* It belongs to another association? */
 			if (stcb_tmp)
 				SCTP_TCB_UNLOCK(stcb_tmp);
 			return (-3);
 		}
 	}
 	if (stcb->asoc.state == 0) {
 		/* the assoc was freed? */
 		return (-4);
 	}
 	/*
 	 * peer must explicitly turn this on. This may have been initialized
 	 * to be "on" in order to allow local addr changes while INIT's are
 	 * in flight.
 	 */
 	stcb->asoc.peer_supports_asconf = 0;
 	/* now we must go through each of the params. */
 	phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
 	while (phdr) {
 		ptype = ntohs(phdr->param_type);
 		plen = ntohs(phdr->param_length);
 		/*
 		 * printf("ptype => %0x, plen => %d\n", (uint32_t)ptype,
 		 * (int)plen);
 		 */
 		if (offset + plen > limit) {
 			break;
 		}
 		if (plen == 0) {
 			break;
 		}
 		if (ptype == SCTP_IPV4_ADDRESS) {
 			if (stcb->asoc.ipv4_addr_legal) {
 				struct sctp_ipv4addr_param *p4, p4_buf;
 
 				/* ok get the v4 address and check/add */
 				phdr = sctp_get_next_param(m, offset,
 				    (struct sctp_paramhdr *)&p4_buf,
 				    sizeof(p4_buf));
 				if (plen != sizeof(struct sctp_ipv4addr_param) ||
 				    phdr == NULL) {
 					return (-5);
 				}
 				p4 = (struct sctp_ipv4addr_param *)phdr;
 				sin.sin_addr.s_addr = p4->addr;
 				if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
 					/* Skip multi-cast addresses */
 					goto next_param;
 				}
 				if ((sin.sin_addr.s_addr == INADDR_BROADCAST) ||
 				    (sin.sin_addr.s_addr == INADDR_ANY)) {
 					goto next_param;
 				}
 				sa = (struct sockaddr *)&sin;
 				inp = stcb->sctp_ep;
 				atomic_add_int(&stcb->asoc.refcnt, 1);
 				stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net,
 				    local_sa, stcb);
 				atomic_add_int(&stcb->asoc.refcnt, -1);
 
 				if ((stcb_tmp == NULL && inp == stcb->sctp_ep) ||
 				    inp == NULL) {
 					/* we must add the source address */
 					/*
 					 * no scope set since we have a tcb
 					 * already
 					 */
 
 					/*
 					 * we must validate the state again
 					 * here
 					 */
 			add_it_now:
 					if (stcb->asoc.state == 0) {
 						/* the assoc was freed? */
 						return (-7);
 					}
 					if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_4)) {
 						return (-8);
 					}
 				} else if (stcb_tmp == stcb) {
 					if (stcb->asoc.state == 0) {
 						/* the assoc was freed? */
 						return (-10);
 					}
 					if (net != NULL) {
 						/* clear flag */
 						net->dest_state &=
 						    ~SCTP_ADDR_NOT_IN_ASSOC;
 					}
 				} else {
 					/*
 					 * strange, address is in another
 					 * assoc? straighten out locks.
 					 */
 					if (stcb_tmp) {
 						if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) {
 							/*
 							 * in setup state we
 							 * abort this guy
 							 */
 							sctp_abort_an_association(stcb_tmp->sctp_ep,
 							    stcb_tmp, 1, NULL, 0);
 							goto add_it_now;
 						}
 						SCTP_TCB_UNLOCK(stcb_tmp);
 					}
 					if (stcb->asoc.state == 0) {
 						/* the assoc was freed? */
 						return (-12);
 					}
 					return (-13);
 				}
 			}
 		} else if (ptype == SCTP_IPV6_ADDRESS) {
 			if (stcb->asoc.ipv6_addr_legal) {
 				/* ok get the v6 address and check/add */
 				struct sctp_ipv6addr_param *p6, p6_buf;
 
 				phdr = sctp_get_next_param(m, offset,
 				    (struct sctp_paramhdr *)&p6_buf,
 				    sizeof(p6_buf));
 				if (plen != sizeof(struct sctp_ipv6addr_param) ||
 				    phdr == NULL) {
 					return (-14);
 				}
 				p6 = (struct sctp_ipv6addr_param *)phdr;
 				memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
 				    sizeof(p6->addr));
 				if (IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) {
 					/* Skip multi-cast addresses */
 					goto next_param;
 				}
 				if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
 					/*
 					 * Link local make no sense without
 					 * scope
 					 */
 					goto next_param;
 				}
 				sa = (struct sockaddr *)&sin6;
 				inp = stcb->sctp_ep;
 				atomic_add_int(&stcb->asoc.refcnt, 1);
 				stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net,
 				    local_sa, stcb);
 				atomic_add_int(&stcb->asoc.refcnt, -1);
 				if (stcb_tmp == NULL &&
 				    (inp == stcb->sctp_ep || inp == NULL)) {
 					/*
 					 * we must validate the state again
 					 * here
 					 */
 			add_it_now6:
 					if (stcb->asoc.state == 0) {
 						/* the assoc was freed? */
 						return (-16);
 					}
 					/*
 					 * we must add the address, no scope
 					 * set
 					 */
 					if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_5)) {
 						return (-17);
 					}
 				} else if (stcb_tmp == stcb) {
 					/*
 					 * we must validate the state again
 					 * here
 					 */
 					if (stcb->asoc.state == 0) {
 						/* the assoc was freed? */
 						return (-19);
 					}
 					if (net != NULL) {
 						/* clear flag */
 						net->dest_state &=
 						    ~SCTP_ADDR_NOT_IN_ASSOC;
 					}
 				} else {
 					/*
 					 * strange, address is in another
 					 * assoc? straighten out locks.
 					 */
 					if (stcb_tmp)
 						if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) {
 							/*
 							 * in setup state we
 							 * abort this guy
 							 */
 							sctp_abort_an_association(stcb_tmp->sctp_ep,
 							    stcb_tmp, 1, NULL, 0);
 							goto add_it_now6;
 						}
 					SCTP_TCB_UNLOCK(stcb_tmp);
 
 					if (stcb->asoc.state == 0) {
 						/* the assoc was freed? */
 						return (-21);
 					}
 					return (-22);
 				}
 			}
 		} else if (ptype == SCTP_ECN_CAPABLE) {
 			stcb->asoc.ecn_allowed = 1;
 		} else if (ptype == SCTP_ULP_ADAPTATION) {
 			if (stcb->asoc.state != SCTP_STATE_OPEN) {
 				struct sctp_adaptation_layer_indication ai,
 				                                *aip;
 
 				phdr = sctp_get_next_param(m, offset,
 				    (struct sctp_paramhdr *)&ai, sizeof(ai));
 				aip = (struct sctp_adaptation_layer_indication *)phdr;
 				if (aip) {
 					stcb->asoc.peers_adaptation = ntohl(aip->indication);
 					stcb->asoc.adaptation_needed = 1;
 				}
 			}
 		} else if (ptype == SCTP_SET_PRIM_ADDR) {
 			struct sctp_asconf_addr_param lstore, *fee;
 			struct sctp_asconf_addrv4_param *fii;
 			int lptype;
 			struct sockaddr *lsa = NULL;
 
 			stcb->asoc.peer_supports_asconf = 1;
 			if (plen > sizeof(lstore)) {
 				return (-23);
 			}
 			phdr = sctp_get_next_param(m, offset,
 			    (struct sctp_paramhdr *)&lstore,
 			    min(plen, sizeof(lstore)));
 			if (phdr == NULL) {
 				return (-24);
 			}
 			fee = (struct sctp_asconf_addr_param *)phdr;
 			lptype = ntohs(fee->addrp.ph.param_type);
 			if (lptype == SCTP_IPV4_ADDRESS) {
 				if (plen !=
 				    sizeof(struct sctp_asconf_addrv4_param)) {
 					SCTP_PRINTF("Sizeof setprim in init/init ack not %d but %d - ignored\n",
 					    (int)sizeof(struct sctp_asconf_addrv4_param),
 					    plen);
 				} else {
 					fii = (struct sctp_asconf_addrv4_param *)fee;
 					sin.sin_addr.s_addr = fii->addrp.addr;
 					lsa = (struct sockaddr *)&sin;
 				}
 			} else if (lptype == SCTP_IPV6_ADDRESS) {
 				if (plen !=
 				    sizeof(struct sctp_asconf_addr_param)) {
 					SCTP_PRINTF("Sizeof setprim (v6) in init/init ack not %d but %d - ignored\n",
 					    (int)sizeof(struct sctp_asconf_addr_param),
 					    plen);
 				} else {
 					memcpy(sin6.sin6_addr.s6_addr,
 					    fee->addrp.addr,
 					    sizeof(fee->addrp.addr));
 					lsa = (struct sockaddr *)&sin6;
 				}
 			}
 			if (lsa) {
 				(void)sctp_set_primary_addr(stcb, sa, NULL);
 			}
 		} else if (ptype == SCTP_HAS_NAT_SUPPORT) {
 			stcb->asoc.peer_supports_nat = 1;
 		} else if (ptype == SCTP_PRSCTP_SUPPORTED) {
 			/* Peer supports pr-sctp */
 			stcb->asoc.peer_supports_prsctp = 1;
 		} else if (ptype == SCTP_SUPPORTED_CHUNK_EXT) {
 			/* A supported extension chunk */
 			struct sctp_supported_chunk_types_param *pr_supported;
 			uint8_t local_store[SCTP_PARAM_BUFFER_SIZE];
 			int num_ent, i;
 
 			phdr = sctp_get_next_param(m, offset,
 			    (struct sctp_paramhdr *)&local_store, min(sizeof(local_store), plen));
 			if (phdr == NULL) {
 				return (-25);
 			}
 			stcb->asoc.peer_supports_asconf = 0;
 			stcb->asoc.peer_supports_prsctp = 0;
 			stcb->asoc.peer_supports_pktdrop = 0;
 			stcb->asoc.peer_supports_strreset = 0;
 			stcb->asoc.peer_supports_nr_sack = 0;
 			stcb->asoc.peer_supports_auth = 0;
 			pr_supported = (struct sctp_supported_chunk_types_param *)phdr;
 			num_ent = plen - sizeof(struct sctp_paramhdr);
 			for (i = 0; i < num_ent; i++) {
 				switch (pr_supported->chunk_types[i]) {
 				case SCTP_ASCONF:
 				case SCTP_ASCONF_ACK:
 					stcb->asoc.peer_supports_asconf = 1;
 					break;
 				case SCTP_FORWARD_CUM_TSN:
 					stcb->asoc.peer_supports_prsctp = 1;
 					break;
 				case SCTP_PACKET_DROPPED:
 					stcb->asoc.peer_supports_pktdrop = 1;
 					break;
 				case SCTP_NR_SELECTIVE_ACK:
 					stcb->asoc.peer_supports_nr_sack = 1;
 					break;
 				case SCTP_STREAM_RESET:
 					stcb->asoc.peer_supports_strreset = 1;
 					break;
 				case SCTP_AUTHENTICATION:
 					stcb->asoc.peer_supports_auth = 1;
 					break;
 				default:
 					/* one I have not learned yet */
 					break;
 
 				}
 			}
 		} else if (ptype == SCTP_ECN_NONCE_SUPPORTED) {
 			/* Peer supports ECN-nonce */
 			stcb->asoc.peer_supports_ecn_nonce = 1;
 			stcb->asoc.ecn_nonce_allowed = 1;
 		} else if (ptype == SCTP_RANDOM) {
 			if (plen > sizeof(random_store))
 				break;
 			if (got_random) {
 				/* already processed a RANDOM */
 				goto next_param;
 			}
 			phdr = sctp_get_next_param(m, offset,
 			    (struct sctp_paramhdr *)random_store,
 			    min(sizeof(random_store), plen));
 			if (phdr == NULL)
 				return (-26);
 			p_random = (struct sctp_auth_random *)phdr;
 			random_len = plen - sizeof(*p_random);
 			/* enforce the random length */
 			if (random_len != SCTP_AUTH_RANDOM_SIZE_REQUIRED) {
 				SCTPDBG(SCTP_DEBUG_AUTH1, "SCTP: invalid RANDOM len\n");
 				return (-27);
 			}
 			got_random = 1;
 		} else if (ptype == SCTP_HMAC_LIST) {
 			int num_hmacs;
 			int i;
 
 			if (plen > sizeof(hmacs_store))
 				break;
 			if (got_hmacs) {
 				/* already processed a HMAC list */
 				goto next_param;
 			}
 			phdr = sctp_get_next_param(m, offset,
 			    (struct sctp_paramhdr *)hmacs_store,
 			    min(plen, sizeof(hmacs_store)));
 			if (phdr == NULL)
 				return (-28);
 			hmacs = (struct sctp_auth_hmac_algo *)phdr;
 			hmacs_len = plen - sizeof(*hmacs);
 			num_hmacs = hmacs_len / sizeof(hmacs->hmac_ids[0]);
 			/* validate the hmac list */
 			if (sctp_verify_hmac_param(hmacs, num_hmacs)) {
 				return (-29);
 			}
 			if (stcb->asoc.peer_hmacs != NULL)
 				sctp_free_hmaclist(stcb->asoc.peer_hmacs);
 			stcb->asoc.peer_hmacs = sctp_alloc_hmaclist(num_hmacs);
 			if (stcb->asoc.peer_hmacs != NULL) {
 				for (i = 0; i < num_hmacs; i++) {
 					(void)sctp_auth_add_hmacid(stcb->asoc.peer_hmacs,
 					    ntohs(hmacs->hmac_ids[i]));
 				}
 			}
 			got_hmacs = 1;
 		} else if (ptype == SCTP_CHUNK_LIST) {
 			int i;
 
 			if (plen > sizeof(chunks_store))
 				break;
 			if (got_chklist) {
 				/* already processed a Chunks list */
 				goto next_param;
 			}
 			phdr = sctp_get_next_param(m, offset,
 			    (struct sctp_paramhdr *)chunks_store,
 			    min(plen, sizeof(chunks_store)));
 			if (phdr == NULL)
 				return (-30);
 			chunks = (struct sctp_auth_chunk_list *)phdr;
 			num_chunks = plen - sizeof(*chunks);
 			if (stcb->asoc.peer_auth_chunks != NULL)
 				sctp_clear_chunklist(stcb->asoc.peer_auth_chunks);
 			else
 				stcb->asoc.peer_auth_chunks = sctp_alloc_chunklist();
 			for (i = 0; i < num_chunks; i++) {
 				(void)sctp_auth_add_chunk(chunks->chunk_types[i],
 				    stcb->asoc.peer_auth_chunks);
 				/* record asconf/asconf-ack if listed */
 				if (chunks->chunk_types[i] == SCTP_ASCONF)
 					saw_asconf = 1;
 				if (chunks->chunk_types[i] == SCTP_ASCONF_ACK)
 					saw_asconf_ack = 1;
 
 			}
 			got_chklist = 1;
 		} else if ((ptype == SCTP_HEARTBEAT_INFO) ||
 			    (ptype == SCTP_STATE_COOKIE) ||
 			    (ptype == SCTP_UNRECOG_PARAM) ||
 			    (ptype == SCTP_COOKIE_PRESERVE) ||
 			    (ptype == SCTP_SUPPORTED_ADDRTYPE) ||
 			    (ptype == SCTP_ADD_IP_ADDRESS) ||
 			    (ptype == SCTP_DEL_IP_ADDRESS) ||
 			    (ptype == SCTP_ERROR_CAUSE_IND) ||
 		    (ptype == SCTP_SUCCESS_REPORT)) {
 			 /* don't care */ ;
 		} else {
 			if ((ptype & 0x8000) == 0x0000) {
 				/*
 				 * must stop processing the rest of the
 				 * param's. Any report bits were handled
 				 * with the call to
 				 * sctp_arethere_unrecognized_parameters()
 				 * when the INIT or INIT-ACK was first seen.
 				 */
 				break;
 			}
 		}
 
 next_param:
 		offset += SCTP_SIZE32(plen);
 		if (offset >= limit) {
 			break;
 		}
 		phdr = sctp_get_next_param(m, offset, &parm_buf,
 		    sizeof(parm_buf));
 	}
 	/* Now check to see if we need to purge any addresses */
 	for (net = TAILQ_FIRST(&stcb->asoc.nets); net != NULL; net = net_tmp) {
 		net_tmp = TAILQ_NEXT(net, sctp_next);
 		if ((net->dest_state & SCTP_ADDR_NOT_IN_ASSOC) ==
 		    SCTP_ADDR_NOT_IN_ASSOC) {
 			/* This address has been removed from the asoc */
 			/* remove and free it */
 			stcb->asoc.numnets--;
 			TAILQ_REMOVE(&stcb->asoc.nets, net, sctp_next);
 			sctp_free_remote_addr(net);
 			if (net == stcb->asoc.primary_destination) {
 				stcb->asoc.primary_destination = NULL;
 				sctp_select_primary_destination(stcb);
 			}
 		}
 	}
 	/* validate authentication required parameters */
 	if (got_random && got_hmacs) {
 		stcb->asoc.peer_supports_auth = 1;
 	} else {
 		stcb->asoc.peer_supports_auth = 0;
 	}
 	if (!stcb->asoc.peer_supports_auth && got_chklist) {
 		/* peer does not support auth but sent a chunks list? */
 		return (-31);
 	}
 	if (!SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk) && stcb->asoc.peer_supports_asconf &&
 	    !stcb->asoc.peer_supports_auth) {
 		/* peer supports asconf but not auth? */
 		return (-32);
 	} else if ((stcb->asoc.peer_supports_asconf) && (stcb->asoc.peer_supports_auth) &&
 	    ((saw_asconf == 0) || (saw_asconf_ack == 0))) {
 		return (-33);
 	}
 	/* concatenate the full random key */
 	keylen = sizeof(*p_random) + random_len + sizeof(*hmacs) + hmacs_len;
 	if (chunks != NULL) {
 		keylen += sizeof(*chunks) + num_chunks;
 	}
 	new_key = sctp_alloc_key(keylen);
 	if (new_key != NULL) {
 		/* copy in the RANDOM */
 		if (p_random != NULL) {
 			keylen = sizeof(*p_random) + random_len;
 			bcopy(p_random, new_key->key, keylen);
 		}
 		/* append in the AUTH chunks */
 		if (chunks != NULL) {
 			bcopy(chunks, new_key->key + keylen,
 			    sizeof(*chunks) + num_chunks);
 			keylen += sizeof(*chunks) + num_chunks;
 		}
 		/* append in the HMACs */
 		if (hmacs != NULL) {
 			bcopy(hmacs, new_key->key + keylen,
 			    sizeof(*hmacs) + hmacs_len);
 		}
 	} else {
 		/* failed to get memory for the key */
 		return (-34);
 	}
 	if (stcb->asoc.authinfo.peer_random != NULL)
 		sctp_free_key(stcb->asoc.authinfo.peer_random);
 	stcb->asoc.authinfo.peer_random = new_key;
 	sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.assoc_keyid);
 	sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.recv_keyid);
 
 	return (0);
 }
 
 int
 sctp_set_primary_addr(struct sctp_tcb *stcb, struct sockaddr *sa,
     struct sctp_nets *net)
 {
 	/* make sure the requested primary address exists in the assoc */
 	if (net == NULL && sa)
 		net = sctp_findnet(stcb, sa);
 
 	if (net == NULL) {
 		/* didn't find the requested primary address! */
 		return (-1);
 	} else {
 		/* set the primary address */
 		if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
 			/* Must be confirmed, so queue to set */
 			net->dest_state |= SCTP_ADDR_REQ_PRIMARY;
 			return (0);
 		}
 		stcb->asoc.primary_destination = net;
 		net->dest_state &= ~SCTP_ADDR_WAS_PRIMARY;
 		net = TAILQ_FIRST(&stcb->asoc.nets);
 		if (net != stcb->asoc.primary_destination) {
 			/*
 			 * first one on the list is NOT the primary
 			 * sctp_cmpaddr() is much more efficient if the
 			 * primary is the first on the list, make it so.
 			 */
 			TAILQ_REMOVE(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next);
 			TAILQ_INSERT_HEAD(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next);
 		}
 		return (0);
 	}
 }
 
 int
 sctp_is_vtag_good(struct sctp_inpcb *inp, uint32_t tag, uint16_t lport, uint16_t rport, struct timeval *now, int save_in_twait)
 {
 	/*
 	 * This function serves two purposes. It will see if a TAG can be
 	 * re-used and return 1 for yes it is ok and 0 for don't use that
 	 * tag. A secondary function it will do is purge out old tags that
 	 * can be removed.
 	 */
 	struct sctpvtaghead *chain;
 	struct sctp_tagblock *twait_block;
 	struct sctpasochead *head;
 	struct sctp_tcb *stcb;
 	int i;
 
 	SCTP_INP_INFO_RLOCK();
 	head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(tag,
 	    SCTP_BASE_INFO(hashasocmark))];
 	if (head == NULL) {
 		/* invalid vtag */
 		goto skip_vtag_check;
 	}
 	LIST_FOREACH(stcb, head, sctp_asocs) {
 		/*
 		 * We choose not to lock anything here. TCB's can't be
 		 * removed since we have the read lock, so they can't be
 		 * freed on us, same thing for the INP. I may be wrong with
 		 * this assumption, but we will go with it for now :-)
 		 */
 		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
 			continue;
 		}
 		if (stcb->asoc.my_vtag == tag) {
 			/* candidate */
 			if (stcb->rport != rport) {
 				continue;
 			}
 			if (stcb->sctp_ep->sctp_lport != lport) {
 				continue;
 			}
 			/* Its a used tag set */
 			SCTP_INP_INFO_RUNLOCK();
 			return (0);
 		}
 	}
 skip_vtag_check:
 
 	chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
 	/* Now what about timed wait ? */
 	if (!LIST_EMPTY(chain)) {
 		/*
 		 * Block(s) are present, lets see if we have this tag in the
 		 * list
 		 */
 		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
 			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
 				if (twait_block->vtag_block[i].v_tag == 0) {
 					/* not used */
 					continue;
 				} else if ((long)twait_block->vtag_block[i].tv_sec_at_expire <
 				    now->tv_sec) {
 					/* Audit expires this guy */
 					twait_block->vtag_block[i].tv_sec_at_expire = 0;
 					twait_block->vtag_block[i].v_tag = 0;
 					twait_block->vtag_block[i].lport = 0;
 					twait_block->vtag_block[i].rport = 0;
 				} else if ((twait_block->vtag_block[i].v_tag == tag) &&
 					    (twait_block->vtag_block[i].lport == lport) &&
 				    (twait_block->vtag_block[i].rport == rport)) {
 					/* Bad tag, sorry :< */
 					SCTP_INP_INFO_RUNLOCK();
 					return (0);
 				}
 			}
 		}
 	}
 	SCTP_INP_INFO_RUNLOCK();
 	return (1);
 }
 
 
 static sctp_assoc_t reneged_asoc_ids[256];
 static uint8_t reneged_at = 0;
 
 
 static void
 sctp_drain_mbufs(struct sctp_inpcb *inp, struct sctp_tcb *stcb)
 {
 	/*
 	 * We must hunt this association for MBUF's past the cumack (i.e.
 	 * out of order data that we can renege on).
 	 */
 	struct sctp_association *asoc;
 	struct sctp_tmit_chunk *chk, *nchk;
 	uint32_t cumulative_tsn_p1;
 	struct sctp_queued_to_read *ctl, *nctl;
 	int cnt, strmat;
 	uint32_t gap, i;
 	int fnd = 0;
 
 	/* We look for anything larger than the cum-ack + 1 */
 
 	asoc = &stcb->asoc;
 	if (asoc->cumulative_tsn == asoc->highest_tsn_inside_map) {
 		/* none we can reneg on. */
 		return;
 	}
 	SCTP_STAT_INCR(sctps_protocol_drains_done);
 	cumulative_tsn_p1 = asoc->cumulative_tsn + 1;
 	cnt = 0;
 	/* First look in the re-assembly queue */
 	chk = TAILQ_FIRST(&asoc->reasmqueue);
 	while (chk) {
 		/* Get the next one */
 		nchk = TAILQ_NEXT(chk, sctp_next);
 		if (compare_with_wrap(chk->rec.data.TSN_seq,
 		    cumulative_tsn_p1, MAX_TSN)) {
 			/* Yep it is above cum-ack */
 			cnt++;
 			SCTP_CALC_TSN_TO_GAP(gap, chk->rec.data.TSN_seq, asoc->mapping_array_base_tsn);
 			asoc->size_on_reasm_queue = sctp_sbspace_sub(asoc->size_on_reasm_queue, chk->send_size);
 			sctp_ucount_decr(asoc->cnt_on_reasm_queue);
 			SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap);
 			TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
 			if (chk->data) {
 				sctp_m_freem(chk->data);
 				chk->data = NULL;
 			}
 			sctp_free_a_chunk(stcb, chk);
 		}
 		chk = nchk;
 	}
 	/* Ok that was fun, now we will drain all the inbound streams? */
 	for (strmat = 0; strmat < asoc->streamincnt; strmat++) {
 		ctl = TAILQ_FIRST(&asoc->strmin[strmat].inqueue);
 		while (ctl) {
 			nctl = TAILQ_NEXT(ctl, next);
 			if (compare_with_wrap(ctl->sinfo_tsn,
 			    cumulative_tsn_p1, MAX_TSN)) {
 				/* Yep it is above cum-ack */
 				cnt++;
 				SCTP_CALC_TSN_TO_GAP(gap, ctl->sinfo_tsn, asoc->mapping_array_base_tsn);
 				asoc->size_on_all_streams = sctp_sbspace_sub(asoc->size_on_all_streams, ctl->length);
 				sctp_ucount_decr(asoc->cnt_on_all_streams);
 				SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap);
 				TAILQ_REMOVE(&asoc->strmin[strmat].inqueue, ctl, next);
 				if (ctl->data) {
 					sctp_m_freem(ctl->data);
 					ctl->data = NULL;
 				}
 				sctp_free_remote_addr(ctl->whoFrom);
 				SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), ctl);
 				SCTP_DECR_READQ_COUNT();
 			}
 			ctl = nctl;
 		}
 	}
 	if (cnt) {
 		/* We must back down to see what the new highest is */
 		for (i = asoc->highest_tsn_inside_map;
 		    (compare_with_wrap(i, asoc->mapping_array_base_tsn, MAX_TSN) || (i == asoc->mapping_array_base_tsn));
 		    i--) {
 			SCTP_CALC_TSN_TO_GAP(gap, i, asoc->mapping_array_base_tsn);
 			if (SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) {
 				asoc->highest_tsn_inside_map = i;
 				fnd = 1;
 				break;
 			}
 		}
 		if (!fnd) {
 			asoc->highest_tsn_inside_map = asoc->mapping_array_base_tsn - 1;
 		}
 		/*
 		 * Question, should we go through the delivery queue? The
 		 * only reason things are on here is the app not reading OR
 		 * a p-d-api up. An attacker COULD send enough in to
 		 * initiate the PD-API and then send a bunch of stuff to
 		 * other streams... these would wind up on the delivery
 		 * queue.. and then we would not get to them. But in order
 		 * to do this I then have to back-track and un-deliver
 		 * sequence numbers in streams.. el-yucko. I think for now
 		 * we will NOT look at the delivery queue and leave it to be
 		 * something to consider later. An alternative would be to
 		 * abort the P-D-API with a notification and then deliver
 		 * the data.... Or another method might be to keep track of
 		 * how many times the situation occurs and if we see a
 		 * possible attack underway just abort the association.
 		 */
 #ifdef SCTP_DEBUG
 		SCTPDBG(SCTP_DEBUG_PCB1, "Freed %d chunks from reneg harvest\n", cnt);
 #endif
 		/*
 		 * Now do we need to find a new
 		 * asoc->highest_tsn_inside_map?
 		 */
 		asoc->last_revoke_count = cnt;
 		(void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer);
 		/* sa_ignore NO_NULL_CHK */
 		sctp_send_sack(stcb);
 		sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_DRAIN, SCTP_SO_NOT_LOCKED);
 		reneged_asoc_ids[reneged_at] = sctp_get_associd(stcb);
 		reneged_at++;
 	}
 	/*
 	 * Another issue, in un-setting the TSN's in the mapping array we
 	 * DID NOT adjust the highest_tsn marker.  This will cause one of
 	 * two things to occur. It may cause us to do extra work in checking
 	 * for our mapping array movement. More importantly it may cause us
 	 * to SACK every datagram. This may not be a bad thing though since
 	 * we will recover once we get our cum-ack above and all this stuff
 	 * we dumped recovered.
 	 */
 }
 
 void
 sctp_drain()
 {
 	/*
 	 * We must walk the PCB lists for ALL associations here. The system
 	 * is LOW on MBUF's and needs help. This is where reneging will
 	 * occur. We really hope this does NOT happen!
 	 */
 	VNET_ITERATOR_DECL(vnet_iter);
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		struct sctp_inpcb *inp;
 		struct sctp_tcb *stcb;
 
 		SCTP_STAT_INCR(sctps_protocol_drain_calls);
 		if (SCTP_BASE_SYSCTL(sctp_do_drain) == 0) {
 #ifdef VIMAGE
 			continue;
 #else
 			return;
 #endif
 		}
 		SCTP_INP_INFO_RLOCK();
 		LIST_FOREACH(inp, &SCTP_BASE_INFO(listhead), sctp_list) {
 			/* For each endpoint */
 			SCTP_INP_RLOCK(inp);
 			LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 				/* For each association */
 				SCTP_TCB_LOCK(stcb);
 				sctp_drain_mbufs(inp, stcb);
 				SCTP_TCB_UNLOCK(stcb);
 			}
 			SCTP_INP_RUNLOCK(inp);
 		}
 		SCTP_INP_INFO_RUNLOCK();
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * start a new iterator
  * iterates through all endpoints and associations based on the pcb_state
  * flags and asoc_state.  "af" (mandatory) is executed for all matching
  * assocs and "ef" (optional) is executed when the iterator completes.
  * "inpf" (optional) is executed for each new endpoint as it is being
  * iterated through. inpe (optional) is called when the inp completes
  * its way through all the stcbs.
  */
 int
 sctp_initiate_iterator(inp_func inpf,
     asoc_func af,
     inp_func inpe,
     uint32_t pcb_state,
     uint32_t pcb_features,
     uint32_t asoc_state,
     void *argp,
     uint32_t argi,
     end_func ef,
     struct sctp_inpcb *s_inp,
     uint8_t chunk_output_off)
 {
 	struct sctp_iterator *it = NULL;
 
 	if (af == NULL) {
 		return (-1);
 	}
 	SCTP_MALLOC(it, struct sctp_iterator *, sizeof(struct sctp_iterator),
 	    SCTP_M_ITER);
 	if (it == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOMEM);
 		return (ENOMEM);
 	}
 	memset(it, 0, sizeof(*it));
 	it->function_assoc = af;
 	it->function_inp = inpf;
 	if (inpf)
 		it->done_current_ep = 0;
 	else
 		it->done_current_ep = 1;
 	it->function_atend = ef;
 	it->pointer = argp;
 	it->val = argi;
 	it->pcb_flags = pcb_state;
 	it->pcb_features = pcb_features;
 	it->asoc_state = asoc_state;
 	it->function_inp_end = inpe;
 	it->no_chunk_output = chunk_output_off;
 	it->vn = curvnet;
 	if (s_inp) {
 		/* Assume lock is held here */
 		it->inp = s_inp;
 		SCTP_INP_INCR_REF(it->inp);
 		it->iterator_flags = SCTP_ITERATOR_DO_SINGLE_INP;
 	} else {
 		SCTP_INP_INFO_RLOCK();
 		it->inp = LIST_FIRST(&SCTP_BASE_INFO(listhead));
 		if (it->inp) {
 			SCTP_INP_INCR_REF(it->inp);
 		}
 		SCTP_INP_INFO_RUNLOCK();
 		it->iterator_flags = SCTP_ITERATOR_DO_ALL_INP;
 
 	}
 	SCTP_IPI_ITERATOR_WQ_LOCK();
 
 	TAILQ_INSERT_TAIL(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
 	if (sctp_it_ctl.iterator_running == 0) {
 		sctp_wakeup_iterator();
 	}
 	SCTP_IPI_ITERATOR_WQ_UNLOCK();
 	/* sa_ignore MEMLEAK {memory is put on the tailq for the iterator} */
 	return (0);
 }
Index: projects/binutils-2.17/sys/netinet/sctp_uio.h
===================================================================
--- projects/binutils-2.17/sys/netinet/sctp_uio.h	(revision 215829)
+++ projects/binutils-2.17/sys/netinet/sctp_uio.h	(revision 215830)
@@ -1,1166 +1,1166 @@
 /*-
  * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * a) Redistributions of source code must retain the above copyright notice,
  *   this list of conditions and the following disclaimer.
  *
  * b) Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *   the documentation and/or other materials provided with the distribution.
  *
  * c) Neither the name of Cisco Systems, Inc. nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /* $KAME: sctp_uio.h,v 1.11 2005/03/06 16:04:18 itojun Exp $	 */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #ifndef __sctp_uio_h__
 #define __sctp_uio_h__
 
 
 #if ! defined(_KERNEL)
 #include <stdint.h>
 #endif
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
 
 typedef uint32_t sctp_assoc_t;
 
 /* Compatibility to previous define's */
 #define sctp_stream_reset_events sctp_stream_reset_event
 
 /* On/Off setup for subscription to events */
 struct sctp_event_subscribe {
 	uint8_t sctp_data_io_event;
 	uint8_t sctp_association_event;
 	uint8_t sctp_address_event;
 	uint8_t sctp_send_failure_event;
 	uint8_t sctp_peer_error_event;
 	uint8_t sctp_shutdown_event;
 	uint8_t sctp_partial_delivery_event;
 	uint8_t sctp_adaptation_layer_event;
 	uint8_t sctp_authentication_event;
 	uint8_t sctp_sender_dry_event;
 	uint8_t sctp_stream_reset_event;
 };
 
 /* ancillary data types */
 #define SCTP_INIT	0x0001
 #define SCTP_SNDRCV	0x0002
 #define SCTP_EXTRCV	0x0003
 /*
  * ancillary data structures
  */
 struct sctp_initmsg {
 	uint16_t sinit_num_ostreams;
 	uint16_t sinit_max_instreams;
 	uint16_t sinit_max_attempts;
 	uint16_t sinit_max_init_timeo;
 };
 
 /* We add 96 bytes to the size of sctp_sndrcvinfo.
  * This makes the current structure 128 bytes long
  * which is nicely 64 bit aligned but also has room
  * for us to add more and keep ABI compatibility.
  * For example, already we have the sctp_extrcvinfo
  * when enabled which is 48 bytes.
  */
 
 /*
  * The assoc up needs a verfid
  * all sendrcvinfo's need a verfid for SENDING only.
  */
 
 
 #define SCTP_ALIGN_RESV_PAD 96
 #define SCTP_ALIGN_RESV_PAD_SHORT 80
 
 struct sctp_sndrcvinfo {
 	uint16_t sinfo_stream;
 	uint16_t sinfo_ssn;
 	uint16_t sinfo_flags;
 	uint32_t sinfo_ppid;
 	uint32_t sinfo_context;
 	uint32_t sinfo_timetolive;
 	uint32_t sinfo_tsn;
 	uint32_t sinfo_cumtsn;
 	sctp_assoc_t sinfo_assoc_id;
 	uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD];
 };
 
 struct sctp_extrcvinfo {
 	uint16_t sinfo_stream;
 	uint16_t sinfo_ssn;
 	uint16_t sinfo_flags;
 	uint16_t sinfo_pr_policy;
 	uint32_t sinfo_ppid;
 	uint32_t sinfo_context;
 	uint32_t sinfo_timetolive;
 	uint32_t sinfo_tsn;
 	uint32_t sinfo_cumtsn;
 	sctp_assoc_t sinfo_assoc_id;
 	uint16_t sreinfo_next_flags;
 	uint16_t sreinfo_next_stream;
 	uint32_t sreinfo_next_aid;
 	uint32_t sreinfo_next_length;
 	uint32_t sreinfo_next_ppid;
 	uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD_SHORT];
 };
 
 #define SCTP_NO_NEXT_MSG           0x0000
 #define SCTP_NEXT_MSG_AVAIL        0x0001
 #define SCTP_NEXT_MSG_ISCOMPLETE   0x0002
 #define SCTP_NEXT_MSG_IS_UNORDERED 0x0004
 #define SCTP_NEXT_MSG_IS_NOTIFICATION 0x0008
 
 struct sctp_snd_all_completes {
 	uint16_t sall_stream;
 	uint16_t sall_flags;
 	uint32_t sall_ppid;
 	uint32_t sall_context;
 	uint32_t sall_num_sent;
 	uint32_t sall_num_failed;
 };
 
 /* Flags that go into the sinfo->sinfo_flags field */
 #define SCTP_EOF              0x0100	/* Start shutdown procedures */
 #define SCTP_ABORT            0x0200	/* Send an ABORT to peer */
 #define SCTP_UNORDERED        0x0400	/* Message is un-ordered */
 #define SCTP_ADDR_OVER        0x0800	/* Override the primary-address */
 #define SCTP_SENDALL          0x1000	/* Send this on all associations */
 #define SCTP_EOR              0x2000	/* end of message signal */
 #define SCTP_SACK_IMMEDIATELY 0x4000	/* Set I-Bit */
 
 #define INVALID_SINFO_FLAG(x) (((x) & 0xffffff00 \
                                     & ~(SCTP_EOF | SCTP_ABORT | SCTP_UNORDERED |\
 				        SCTP_ADDR_OVER | SCTP_SENDALL | SCTP_EOR |\
 					SCTP_SACK_IMMEDIATELY)) != 0)
 /* for the endpoint */
 
 /* The lower byte is an enumeration of PR-SCTP policies */
 #define SCTP_PR_SCTP_TTL  0x0001/* Time based PR-SCTP */
 #define SCTP_PR_SCTP_BUF  0x0002/* Buffer based PR-SCTP */
 #define SCTP_PR_SCTP_RTX  0x0003/* Number of retransmissions based PR-SCTP */
 
 #define PR_SCTP_POLICY(x)         ((x) & 0xff)
 #define PR_SCTP_ENABLED(x)        (PR_SCTP_POLICY(x) != 0)
 #define PR_SCTP_TTL_ENABLED(x)    (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_TTL)
 #define PR_SCTP_BUF_ENABLED(x)    (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_BUF)
 #define PR_SCTP_RTX_ENABLED(x)    (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_RTX)
 #define PR_SCTP_INVALID_POLICY(x) (PR_SCTP_POLICY(x) > SCTP_PR_SCTP_RTX)
 /* Stat's */
 struct sctp_pcbinfo {
 	uint32_t ep_count;
 	uint32_t asoc_count;
 	uint32_t laddr_count;
 	uint32_t raddr_count;
 	uint32_t chk_count;
 	uint32_t readq_count;
 	uint32_t free_chunks;
 	uint32_t stream_oque;
 };
 
 struct sctp_sockstat {
 	sctp_assoc_t ss_assoc_id;
 	uint32_t ss_total_sndbuf;
 	uint32_t ss_total_recv_buf;
 };
 
 /*
  * notification event structures
  */
 
 /*
  * association change event
  */
 struct sctp_assoc_change {
 	uint16_t sac_type;
 	uint16_t sac_flags;
 	uint32_t sac_length;
 	uint16_t sac_state;
 	uint16_t sac_error;
 	uint16_t sac_outbound_streams;
 	uint16_t sac_inbound_streams;
 	sctp_assoc_t sac_assoc_id;
 };
 
 /* sac_state values */
 #define SCTP_COMM_UP		0x0001
 #define SCTP_COMM_LOST		0x0002
 #define SCTP_RESTART		0x0003
 #define SCTP_SHUTDOWN_COMP	0x0004
 #define SCTP_CANT_STR_ASSOC	0x0005
 
 
 /*
  * Address event
  */
 struct sctp_paddr_change {
 	uint16_t spc_type;
 	uint16_t spc_flags;
 	uint32_t spc_length;
 	struct sockaddr_storage spc_aaddr;
 	uint32_t spc_state;
 	uint32_t spc_error;
 	sctp_assoc_t spc_assoc_id;
 	uint8_t spc_padding[4];
 };
 
 /* paddr state values */
 #define SCTP_ADDR_AVAILABLE	0x0001
 #define SCTP_ADDR_UNREACHABLE	0x0002
 #define SCTP_ADDR_REMOVED	0x0003
 #define SCTP_ADDR_ADDED		0x0004
 #define SCTP_ADDR_MADE_PRIM	0x0005
 #define SCTP_ADDR_CONFIRMED	0x0006
 
 /*
  * CAUTION: these are user exposed SCTP addr reachability states must be
  * compatible with SCTP_ADDR states in sctp_constants.h
  */
 #ifdef SCTP_ACTIVE
 #undef SCTP_ACTIVE
 #endif
 #define SCTP_ACTIVE		0x0001	/* SCTP_ADDR_REACHABLE */
 
 #ifdef SCTP_INACTIVE
 #undef SCTP_INACTIVE
 #endif
 #define SCTP_INACTIVE		0x0002	/* SCTP_ADDR_NOT_REACHABLE */
 
 #ifdef SCTP_UNCONFIRMED
 #undef SCTP_UNCONFIRMED
 #endif
 #define SCTP_UNCONFIRMED	0x0200	/* SCTP_ADDR_UNCONFIRMED */
 
 #ifdef SCTP_NOHEARTBEAT
 #undef SCTP_NOHEARTBEAT
 #endif
 #define SCTP_NOHEARTBEAT	0x0040	/* SCTP_ADDR_NOHB */
 
 
 /* remote error events */
 struct sctp_remote_error {
 	uint16_t sre_type;
 	uint16_t sre_flags;
 	uint32_t sre_length;
 	uint16_t sre_error;
 	sctp_assoc_t sre_assoc_id;
 	uint8_t sre_data[4];
 };
 
 /* data send failure event */
 struct sctp_send_failed {
 	uint16_t ssf_type;
 	uint16_t ssf_flags;
 	uint32_t ssf_length;
 	uint32_t ssf_error;
 	struct sctp_sndrcvinfo ssf_info;
 	sctp_assoc_t ssf_assoc_id;
 	uint8_t ssf_data[];
 };
 
 /* flag that indicates state of data */
 #define SCTP_DATA_UNSENT	0x0001	/* inqueue never on wire */
 #define SCTP_DATA_SENT		0x0002	/* on wire at failure */
 
 /* shutdown event */
 struct sctp_shutdown_event {
 	uint16_t sse_type;
 	uint16_t sse_flags;
 	uint32_t sse_length;
 	sctp_assoc_t sse_assoc_id;
 };
 
 /* Adaptation layer indication stuff */
 struct sctp_adaptation_event {
 	uint16_t sai_type;
 	uint16_t sai_flags;
 	uint32_t sai_length;
 	uint32_t sai_adaptation_ind;
 	sctp_assoc_t sai_assoc_id;
 };
 
 struct sctp_setadaptation {
 	uint32_t ssb_adaptation_ind;
 };
 
 /* compatible old spelling */
 struct sctp_adaption_event {
 	uint16_t sai_type;
 	uint16_t sai_flags;
 	uint32_t sai_length;
 	uint32_t sai_adaption_ind;
 	sctp_assoc_t sai_assoc_id;
 };
 
 struct sctp_setadaption {
 	uint32_t ssb_adaption_ind;
 };
 
 
 /*
  * Partial Delivery API event
  */
 struct sctp_pdapi_event {
 	uint16_t pdapi_type;
 	uint16_t pdapi_flags;
 	uint32_t pdapi_length;
 	uint32_t pdapi_indication;
 	uint16_t pdapi_stream;
 	uint16_t pdapi_seq;
 	sctp_assoc_t pdapi_assoc_id;
 };
 
 /* indication values */
 #define SCTP_PARTIAL_DELIVERY_ABORTED	0x0001
 
 
 /*
  * authentication key event
  */
 struct sctp_authkey_event {
 	uint16_t auth_type;
 	uint16_t auth_flags;
 	uint32_t auth_length;
 	uint16_t auth_keynumber;
 	uint16_t auth_altkeynumber;
 	uint32_t auth_indication;
 	sctp_assoc_t auth_assoc_id;
 };
 
 /* indication values */
 #define SCTP_AUTH_NEWKEY	0x0001
 #define SCTP_AUTH_NO_AUTH	0x0002
 #define SCTP_AUTH_FREE_KEY	0x0003
 
 
 struct sctp_sender_dry_event {
 	uint16_t sender_dry_type;
 	uint16_t sender_dry_flags;
 	uint32_t sender_dry_length;
 	sctp_assoc_t sender_dry_assoc_id;
 };
 
 
 /*
  * stream reset event
  */
 struct sctp_stream_reset_event {
 	uint16_t strreset_type;
 	uint16_t strreset_flags;
 	uint32_t strreset_length;
 	sctp_assoc_t strreset_assoc_id;
 	uint16_t strreset_list[];
 };
 
 /* flags in strreset_flags field */
 #define SCTP_STRRESET_INBOUND_STR  0x0001
 #define SCTP_STRRESET_OUTBOUND_STR 0x0002
 #define SCTP_STRRESET_ALL_STREAMS  0x0004
 #define SCTP_STRRESET_STREAM_LIST  0x0008
 #define SCTP_STRRESET_FAILED       0x0010
 #define SCTP_STRRESET_ADD_STREAM   0x0020
 
 /* SCTP notification event */
 struct sctp_tlv {
 	uint16_t sn_type;
 	uint16_t sn_flags;
 	uint32_t sn_length;
 };
 
 union sctp_notification {
 	struct sctp_tlv sn_header;
 	struct sctp_assoc_change sn_assoc_change;
 	struct sctp_paddr_change sn_paddr_change;
 	struct sctp_remote_error sn_remote_error;
 	struct sctp_send_failed sn_send_failed;
 	struct sctp_shutdown_event sn_shutdown_event;
 	struct sctp_adaptation_event sn_adaptation_event;
 	/* compatibility same as above */
 	struct sctp_adaption_event sn_adaption_event;
 	struct sctp_pdapi_event sn_pdapi_event;
 	struct sctp_authkey_event sn_auth_event;
 	struct sctp_sender_dry_event sn_sender_dry_event;
 	struct sctp_stream_reset_event sn_strreset_event;
 };
 
 /* notification types */
 #define SCTP_ASSOC_CHANGE			0x0001
 #define SCTP_PEER_ADDR_CHANGE			0x0002
 #define SCTP_REMOTE_ERROR			0x0003
 #define SCTP_SEND_FAILED			0x0004
 #define SCTP_SHUTDOWN_EVENT			0x0005
 #define SCTP_ADAPTATION_INDICATION		0x0006
 /* same as above */
 #define SCTP_ADAPTION_INDICATION		0x0006
 #define SCTP_PARTIAL_DELIVERY_EVENT		0x0007
 #define SCTP_AUTHENTICATION_EVENT		0x0008
 #define SCTP_STREAM_RESET_EVENT			0x0009
 #define SCTP_SENDER_DRY_EVENT			0x000a
 #define SCTP__NOTIFICATIONS_STOPPED_EVENT	0x000b	/* we don't send this */
 /*
  * socket option structs
  */
 
 struct sctp_paddrparams {
 	struct sockaddr_storage spp_address;
 	sctp_assoc_t spp_assoc_id;
 	uint32_t spp_hbinterval;
 	uint32_t spp_pathmtu;
 	uint32_t spp_flags;
 	uint32_t spp_ipv6_flowlabel;
 	uint16_t spp_pathmaxrxt;
 	uint8_t spp_ipv4_tos;
 };
 
 #define SPP_HB_ENABLE		0x00000001
 #define SPP_HB_DISABLE		0x00000002
 #define SPP_HB_DEMAND		0x00000004
 #define SPP_PMTUD_ENABLE	0x00000008
 #define SPP_PMTUD_DISABLE	0x00000010
 #define SPP_HB_TIME_IS_ZERO     0x00000080
 #define SPP_IPV6_FLOWLABEL      0x00000100
 #define SPP_IPV4_TOS            0x00000200
 
 struct sctp_paddrinfo {
 	struct sockaddr_storage spinfo_address;
 	sctp_assoc_t spinfo_assoc_id;
 	int32_t spinfo_state;
 	uint32_t spinfo_cwnd;
 	uint32_t spinfo_srtt;
 	uint32_t spinfo_rto;
 	uint32_t spinfo_mtu;
 };
 
 struct sctp_rtoinfo {
 	sctp_assoc_t srto_assoc_id;
 	uint32_t srto_initial;
 	uint32_t srto_max;
 	uint32_t srto_min;
 };
 
 struct sctp_assocparams {
 	sctp_assoc_t sasoc_assoc_id;
 	uint32_t sasoc_peer_rwnd;
 	uint32_t sasoc_local_rwnd;
 	uint32_t sasoc_cookie_life;
 	uint16_t sasoc_asocmaxrxt;
 	uint16_t sasoc_number_peer_destinations;
 };
 
 struct sctp_setprim {
 	struct sockaddr_storage ssp_addr;
 	sctp_assoc_t ssp_assoc_id;
 	uint8_t ssp_padding[4];
 };
 
 struct sctp_setpeerprim {
 	struct sockaddr_storage sspp_addr;
 	sctp_assoc_t sspp_assoc_id;
 	uint8_t sspp_padding[4];
 };
 
 struct sctp_getaddresses {
 	sctp_assoc_t sget_assoc_id;
 	/* addr is filled in for N * sockaddr_storage */
 	struct sockaddr addr[1];
 };
 
 struct sctp_setstrm_timeout {
 	sctp_assoc_t ssto_assoc_id;
 	uint32_t ssto_timeout;
 	uint32_t ssto_streamid_start;
 	uint32_t ssto_streamid_end;
 };
 
 struct sctp_status {
 	sctp_assoc_t sstat_assoc_id;
 	int32_t sstat_state;
 	uint32_t sstat_rwnd;
 	uint16_t sstat_unackdata;
 	uint16_t sstat_penddata;
 	uint16_t sstat_instrms;
 	uint16_t sstat_outstrms;
 	uint32_t sstat_fragmentation_point;
 	struct sctp_paddrinfo sstat_primary;
 };
 
 /*
  * AUTHENTICATION support
  */
 /* SCTP_AUTH_CHUNK */
 struct sctp_authchunk {
 	uint8_t sauth_chunk;
 };
 
 /* SCTP_AUTH_KEY */
 struct sctp_authkey {
 	sctp_assoc_t sca_assoc_id;
 	uint16_t sca_keynumber;
 	uint8_t sca_key[];
 };
 
 /* SCTP_HMAC_IDENT */
 struct sctp_hmacalgo {
 	uint32_t shmac_number_of_idents;
 	uint16_t shmac_idents[];
 };
 
 /* AUTH hmac_id */
 #define SCTP_AUTH_HMAC_ID_RSVD		0x0000
 #define SCTP_AUTH_HMAC_ID_SHA1		0x0001	/* default, mandatory */
 #define SCTP_AUTH_HMAC_ID_SHA256	0x0003
 #define SCTP_AUTH_HMAC_ID_SHA224	0x0004
 #define SCTP_AUTH_HMAC_ID_SHA384	0x0005
 #define SCTP_AUTH_HMAC_ID_SHA512	0x0006
 
 
 /* SCTP_AUTH_ACTIVE_KEY / SCTP_AUTH_DELETE_KEY */
 struct sctp_authkeyid {
 	sctp_assoc_t scact_assoc_id;
 	uint16_t scact_keynumber;
 };
 
 /* SCTP_PEER_AUTH_CHUNKS / SCTP_LOCAL_AUTH_CHUNKS */
 struct sctp_authchunks {
 	sctp_assoc_t gauth_assoc_id;
 	uint8_t gauth_chunks[];
 };
 
 struct sctp_assoc_value {
 	sctp_assoc_t assoc_id;
 	uint32_t assoc_value;
 };
 
 struct sctp_assoc_ids {
 	uint32_t gaids_number_of_ids;
 	sctp_assoc_t gaids_assoc_id[];
 };
 
 struct sctp_sack_info {
 	sctp_assoc_t sack_assoc_id;
 	uint32_t sack_delay;
 	uint32_t sack_freq;
 };
 
 struct sctp_timeouts {
 	sctp_assoc_t stimo_assoc_id;
 	uint32_t stimo_init;
 	uint32_t stimo_data;
 	uint32_t stimo_sack;
 	uint32_t stimo_shutdown;
 	uint32_t stimo_heartbeat;
 	uint32_t stimo_cookie;
 	uint32_t stimo_shutdownack;
 };
 
 struct sctp_cwnd_args {
 	struct sctp_nets *net;	/* network to *//* FIXME: LP64 issue */
 	uint32_t cwnd_new_value;/* cwnd in k */
 	uint32_t pseudo_cumack;
 	uint16_t inflight;	/* flightsize in k */
 	uint16_t cwnd_augment;	/* increment to it */
 	uint8_t meets_pseudo_cumack;
 	uint8_t need_new_pseudo_cumack;
 	uint8_t cnt_in_send;
 	uint8_t cnt_in_str;
 };
 
 struct sctp_blk_args {
 	uint32_t onsb;		/* in 1k bytes */
 	uint32_t sndlen;	/* len of send being attempted */
 	uint32_t peer_rwnd;	/* rwnd of peer */
 	uint16_t send_sent_qcnt;/* chnk cnt */
 	uint16_t stream_qcnt;	/* chnk cnt */
 	uint16_t chunks_on_oque;/* chunks out */
 	uint16_t flight_size;	/* flight size in k */
 };
 
 /*
  * Max we can reset in one setting, note this is dictated not by the define
  * but the size of a mbuf cluster so don't change this define and think you
  * can specify more. You must do multiple resets if you want to reset more
  * than SCTP_MAX_EXPLICIT_STR_RESET.
  */
 #define SCTP_MAX_EXPLICT_STR_RESET   1000
 
 #define SCTP_RESET_LOCAL_RECV  0x0001
 #define SCTP_RESET_LOCAL_SEND  0x0002
 #define SCTP_RESET_BOTH        0x0003
 #define SCTP_RESET_TSN         0x0004
 #define SCTP_RESET_ADD_STREAMS 0x0005
 
 struct sctp_stream_reset {
 	sctp_assoc_t strrst_assoc_id;
 	uint16_t strrst_flags;
 	uint16_t strrst_num_streams;	/* 0 == ALL */
 	uint16_t strrst_list[];	/* list if strrst_num_streams is not 0 */
 };
 
 
 struct sctp_get_nonce_values {
 	sctp_assoc_t gn_assoc_id;
 	uint32_t gn_peers_tag;
 	uint32_t gn_local_tag;
 };
 
 /* Debugging logs */
 struct sctp_str_log {
 	void *stcb;		/* FIXME: LP64 issue */
 	uint32_t n_tsn;
 	uint32_t e_tsn;
 	uint16_t n_sseq;
 	uint16_t e_sseq;
 	uint16_t strm;
 };
 
 struct sctp_sb_log {
 	void *stcb;		/* FIXME: LP64 issue */
 	uint32_t so_sbcc;
 	uint32_t stcb_sbcc;
 	uint32_t incr;
 };
 
 struct sctp_fr_log {
 	uint32_t largest_tsn;
 	uint32_t largest_new_tsn;
 	uint32_t tsn;
 };
 
 struct sctp_fr_map {
 	uint32_t base;
 	uint32_t cum;
 	uint32_t high;
 };
 
 struct sctp_rwnd_log {
 	uint32_t rwnd;
 	uint32_t send_size;
 	uint32_t overhead;
 	uint32_t new_rwnd;
 };
 
 struct sctp_mbcnt_log {
 	uint32_t total_queue_size;
 	uint32_t size_change;
 	uint32_t total_queue_mb_size;
 	uint32_t mbcnt_change;
 };
 
 struct sctp_sack_log {
 	uint32_t cumack;
 	uint32_t oldcumack;
 	uint32_t tsn;
 	uint16_t numGaps;
 	uint16_t numDups;
 };
 
 struct sctp_lock_log {
 	void *sock;		/* FIXME: LP64 issue */
 	void *inp;		/* FIXME: LP64 issue */
 	uint8_t tcb_lock;
 	uint8_t inp_lock;
 	uint8_t info_lock;
 	uint8_t sock_lock;
 	uint8_t sockrcvbuf_lock;
 	uint8_t socksndbuf_lock;
 	uint8_t create_lock;
 	uint8_t resv;
 };
 
 struct sctp_rto_log {
 	void *net;		/* FIXME: LP64 issue */
 	uint32_t rtt;
 };
 
 struct sctp_nagle_log {
 	void *stcb;		/* FIXME: LP64 issue */
 	uint32_t total_flight;
 	uint32_t total_in_queue;
 	uint16_t count_in_queue;
 	uint16_t count_in_flight;
 };
 
 struct sctp_sbwake_log {
 	void *stcb;		/* FIXME: LP64 issue */
 	uint16_t send_q;
 	uint16_t sent_q;
 	uint16_t flight;
 	uint16_t wake_cnt;
 	uint8_t stream_qcnt;	/* chnk cnt */
 	uint8_t chunks_on_oque;	/* chunks out */
 	uint8_t sbflags;
 	uint8_t sctpflags;
 };
 
 struct sctp_misc_info {
 	uint32_t log1;
 	uint32_t log2;
 	uint32_t log3;
 	uint32_t log4;
 };
 
 struct sctp_log_closing {
 	void *inp;		/* FIXME: LP64 issue */
 	void *stcb;		/* FIXME: LP64 issue */
 	uint32_t sctp_flags;
 	uint16_t state;
 	int16_t loc;
 };
 
 struct sctp_mbuf_log {
 	struct mbuf *mp;	/* FIXME: LP64 issue */
 	caddr_t ext;
 	caddr_t data;
 	uint16_t size;
 	uint8_t refcnt;
 	uint8_t mbuf_flags;
 };
 
 struct sctp_cwnd_log {
 	uint64_t time_event;
 	uint8_t from;
 	uint8_t event_type;
 	uint8_t resv[2];
 	union {
 		struct sctp_log_closing close;
 		struct sctp_blk_args blk;
 		struct sctp_cwnd_args cwnd;
 		struct sctp_str_log strlog;
 		struct sctp_fr_log fr;
 		struct sctp_fr_map map;
 		struct sctp_rwnd_log rwnd;
 		struct sctp_mbcnt_log mbcnt;
 		struct sctp_sack_log sack;
 		struct sctp_lock_log lock;
 		struct sctp_rto_log rto;
 		struct sctp_sb_log sb;
 		struct sctp_nagle_log nagle;
 		struct sctp_sbwake_log wake;
 		struct sctp_mbuf_log mb;
 		struct sctp_misc_info misc;
 	}     x;
 };
 
 struct sctp_cwnd_log_req {
 	int32_t num_in_log;	/* Number in log */
 	int32_t num_ret;	/* Number returned */
 	int32_t start_at;	/* start at this one */
 	int32_t end_at;		/* end at this one */
 	struct sctp_cwnd_log log[];
 };
 
 struct sctp_timeval {
 	uint32_t tv_sec;
 	uint32_t tv_usec;
 };
 
 struct sctpstat {
 	struct sctp_timeval sctps_discontinuitytime;	/* sctpStats 18
 							 * (TimeStamp) */
 	/* MIB according to RFC 3873 */
 	uint32_t sctps_currestab;	/* sctpStats  1   (Gauge32) */
 	uint32_t sctps_activeestab;	/* sctpStats  2 (Counter32) */
 	uint32_t sctps_restartestab;
 	uint32_t sctps_collisionestab;
 	uint32_t sctps_passiveestab;	/* sctpStats  3 (Counter32) */
 	uint32_t sctps_aborted;	/* sctpStats  4 (Counter32) */
 	uint32_t sctps_shutdown;/* sctpStats  5 (Counter32) */
 	uint32_t sctps_outoftheblue;	/* sctpStats  6 (Counter32) */
 	uint32_t sctps_checksumerrors;	/* sctpStats  7 (Counter32) */
 	uint32_t sctps_outcontrolchunks;	/* sctpStats  8 (Counter64) */
 	uint32_t sctps_outorderchunks;	/* sctpStats  9 (Counter64) */
 	uint32_t sctps_outunorderchunks;	/* sctpStats 10 (Counter64) */
 	uint32_t sctps_incontrolchunks;	/* sctpStats 11 (Counter64) */
 	uint32_t sctps_inorderchunks;	/* sctpStats 12 (Counter64) */
 	uint32_t sctps_inunorderchunks;	/* sctpStats 13 (Counter64) */
 	uint32_t sctps_fragusrmsgs;	/* sctpStats 14 (Counter64) */
 	uint32_t sctps_reasmusrmsgs;	/* sctpStats 15 (Counter64) */
 	uint32_t sctps_outpackets;	/* sctpStats 16 (Counter64) */
 	uint32_t sctps_inpackets;	/* sctpStats 17 (Counter64) */
 
 	/* input statistics: */
 	uint32_t sctps_recvpackets;	/* total input packets        */
 	uint32_t sctps_recvdatagrams;	/* total input datagrams      */
 	uint32_t sctps_recvpktwithdata;	/* total packets that had data */
 	uint32_t sctps_recvsacks;	/* total input SACK chunks    */
 	uint32_t sctps_recvdata;/* total input DATA chunks    */
 	uint32_t sctps_recvdupdata;	/* total input duplicate DATA chunks */
 	uint32_t sctps_recvheartbeat;	/* total input HB chunks      */
 	uint32_t sctps_recvheartbeatack;	/* total input HB-ACK chunks  */
 	uint32_t sctps_recvecne;/* total input ECNE chunks    */
 	uint32_t sctps_recvauth;/* total input AUTH chunks    */
 	uint32_t sctps_recvauthmissing;	/* total input chunks missing AUTH */
 	uint32_t sctps_recvivalhmacid;	/* total number of invalid HMAC ids
 					 * received */
 	uint32_t sctps_recvivalkeyid;	/* total number of invalid secret ids
 					 * received */
 	uint32_t sctps_recvauthfailed;	/* total number of auth failed */
 	uint32_t sctps_recvexpress;	/* total fast path receives all one
 					 * chunk */
 	uint32_t sctps_recvexpressm;	/* total fast path multi-part data */
 	uint32_t sctps_recvnocrc;
 	uint32_t sctps_recvswcrc;
 	uint32_t sctps_recvhwcrc;
 
 	/* output statistics: */
 	uint32_t sctps_sendpackets;	/* total output packets       */
 	uint32_t sctps_sendsacks;	/* total output SACKs         */
 	uint32_t sctps_senddata;/* total output DATA chunks   */
 	uint32_t sctps_sendretransdata;	/* total output retransmitted DATA
 					 * chunks */
 	uint32_t sctps_sendfastretrans;	/* total output fast retransmitted
 					 * DATA chunks */
 	uint32_t sctps_sendmultfastretrans;	/* total FR's that happened
 						 * more than once to same
 						 * chunk (u-del multi-fr
 						 * algo). */
 	uint32_t sctps_sendheartbeat;	/* total output HB chunks     */
 	uint32_t sctps_sendecne;/* total output ECNE chunks    */
 	uint32_t sctps_sendauth;/* total output AUTH chunks FIXME   */
 	uint32_t sctps_senderrors;	/* ip_output error counter */
 	uint32_t sctps_sendnocrc;
 	uint32_t sctps_sendswcrc;
 	uint32_t sctps_sendhwcrc;
 	/* PCKDROPREP statistics: */
 	uint32_t sctps_pdrpfmbox;	/* Packet drop from middle box */
 	uint32_t sctps_pdrpfehos;	/* P-drop from end host */
 	uint32_t sctps_pdrpmbda;/* P-drops with data */
 	uint32_t sctps_pdrpmbct;/* P-drops, non-data, non-endhost */
 	uint32_t sctps_pdrpbwrpt;	/* P-drop, non-endhost, bandwidth rep
 					 * only */
 	uint32_t sctps_pdrpcrupt;	/* P-drop, not enough for chunk header */
 	uint32_t sctps_pdrpnedat;	/* P-drop, not enough data to confirm */
 	uint32_t sctps_pdrppdbrk;	/* P-drop, where process_chunk_drop
 					 * said break */
 	uint32_t sctps_pdrptsnnf;	/* P-drop, could not find TSN */
 	uint32_t sctps_pdrpdnfnd;	/* P-drop, attempt reverse TSN lookup */
 	uint32_t sctps_pdrpdiwnp;	/* P-drop, e-host confirms zero-rwnd */
 	uint32_t sctps_pdrpdizrw;	/* P-drop, midbox confirms no space */
 	uint32_t sctps_pdrpbadd;/* P-drop, data did not match TSN */
 	uint32_t sctps_pdrpmark;/* P-drop, TSN's marked for Fast Retran */
 	/* timeouts */
 	uint32_t sctps_timoiterator;	/* Number of iterator timers that
 					 * fired */
 	uint32_t sctps_timodata;/* Number of T3 data time outs */
 	uint32_t sctps_timowindowprobe;	/* Number of window probe (T3) timers
 					 * that fired */
 	uint32_t sctps_timoinit;/* Number of INIT timers that fired */
 	uint32_t sctps_timosack;/* Number of sack timers that fired */
 	uint32_t sctps_timoshutdown;	/* Number of shutdown timers that
 					 * fired */
 	uint32_t sctps_timoheartbeat;	/* Number of heartbeat timers that
 					 * fired */
 	uint32_t sctps_timocookie;	/* Number of times a cookie timeout
 					 * fired */
 	uint32_t sctps_timosecret;	/* Number of times an endpoint changed
 					 * its cookie secret */
 	uint32_t sctps_timopathmtu;	/* Number of PMTU timers that fired */
 	uint32_t sctps_timoshutdownack;	/* Number of shutdown ack timers that
 					 * fired */
 	uint32_t sctps_timoshutdownguard;	/* Number of shutdown guard
 						 * timers that fired */
 	uint32_t sctps_timostrmrst;	/* Number of stream reset timers that
 					 * fired */
 	uint32_t sctps_timoearlyfr;	/* Number of early FR timers that
 					 * fired */
 	uint32_t sctps_timoasconf;	/* Number of times an asconf timer
 					 * fired */
 	uint32_t sctps_timodelprim;	/* Number of times a prim_deleted
 					 * timer fired */
 	uint32_t sctps_timoautoclose;	/* Number of times auto close timer
 					 * fired */
 	uint32_t sctps_timoassockill;	/* Number of asoc free timers expired */
 	uint32_t sctps_timoinpkill;	/* Number of inp free timers expired */
 	/* Early fast retransmission counters */
 	uint32_t sctps_earlyfrstart;
 	uint32_t sctps_earlyfrstop;
 	uint32_t sctps_earlyfrmrkretrans;
 	uint32_t sctps_earlyfrstpout;
 	uint32_t sctps_earlyfrstpidsck1;
 	uint32_t sctps_earlyfrstpidsck2;
 	uint32_t sctps_earlyfrstpidsck3;
 	uint32_t sctps_earlyfrstpidsck4;
 	uint32_t sctps_earlyfrstrid;
 	uint32_t sctps_earlyfrstrout;
 	uint32_t sctps_earlyfrstrtmr;
 	/* others */
 	uint32_t sctps_hdrops;	/* packet shorter than header */
 	uint32_t sctps_badsum;	/* checksum error             */
 	uint32_t sctps_noport;	/* no endpoint for port       */
 	uint32_t sctps_badvtag;	/* bad v-tag                  */
 	uint32_t sctps_badsid;	/* bad SID                    */
 	uint32_t sctps_nomem;	/* no memory                  */
 	uint32_t sctps_fastretransinrtt;	/* number of multiple FR in a
 						 * RTT window */
 	uint32_t sctps_markedretrans;
 	uint32_t sctps_naglesent;	/* nagle allowed sending      */
 	uint32_t sctps_naglequeued;	/* nagle doesn't allow sending */
 	uint32_t sctps_maxburstqueued;	/* max burst doesn't allow sending */
 	uint32_t sctps_ifnomemqueued;	/* look ahead tells us no memory in
 					 * interface ring buffer OR we had a
 					 * send error and are queuing one
 					 * send. */
 	uint32_t sctps_windowprobed;	/* total number of window probes sent */
 	uint32_t sctps_lowlevelerr;	/* total times an output error causes
 					 * us to clamp down on next user send. */
 	uint32_t sctps_lowlevelerrusr;	/* total times sctp_senderrors were
 					 * caused from a user send from a user
 					 * invoked send not a sack response */
 	uint32_t sctps_datadropchklmt;	/* Number of in data drops due to
 					 * chunk limit reached */
 	uint32_t sctps_datadroprwnd;	/* Number of in data drops due to rwnd
 					 * limit reached */
 	uint32_t sctps_ecnereducedcwnd;	/* Number of times a ECN reduced the
 					 * cwnd */
 	uint32_t sctps_vtagexpress;	/* Used express lookup via vtag */
 	uint32_t sctps_vtagbogus;	/* Collision in express lookup. */
 	uint32_t sctps_primary_randry;	/* Number of times the sender ran dry
 					 * of user data on primary */
 	uint32_t sctps_cmt_randry;	/* Same for above */
 	uint32_t sctps_slowpath_sack;	/* Sacks the slow way */
 	uint32_t sctps_wu_sacks_sent;	/* Window Update only sacks sent */
 	uint32_t sctps_sends_with_flags;	/* number of sends with
 						 * sinfo_flags !=0 */
-	uint32_t sctps_sends_with_unord; /* number of unordered sends */
+	uint32_t sctps_sends_with_unord;	/* number of unordered sends */
 	uint32_t sctps_sends_with_eof;	/* number of sends with EOF flag set */
 	uint32_t sctps_sends_with_abort;	/* number of sends with ABORT
 						 * flag set */
 	uint32_t sctps_protocol_drain_calls;	/* number of times protocol
 						 * drain called */
 	uint32_t sctps_protocol_drains_done;	/* number of times we did a
 						 * protocol drain */
 	uint32_t sctps_read_peeks;	/* Number of times recv was called
 					 * with peek */
 	uint32_t sctps_cached_chk;	/* Number of cached chunks used */
 	uint32_t sctps_cached_strmoq;	/* Number of cached stream oq's used */
 	uint32_t sctps_left_abandon;	/* Number of unread messages abandoned
 					 * by close */
 	uint32_t sctps_send_burst_avoid;	/* Unused */
 	uint32_t sctps_send_cwnd_avoid;	/* Send cwnd full  avoidance, already
 					 * max burst inflight to net */
 	uint32_t sctps_fwdtsn_map_over;	/* number of map array over-runs via
 					 * fwd-tsn's */
 
 	uint32_t sctps_reserved[32];	/* Future ABI compat - remove int's
 					 * from here when adding new */
 };
 
 #define SCTP_STAT_INCR(_x) SCTP_STAT_INCR_BY(_x,1)
 #define SCTP_STAT_DECR(_x) SCTP_STAT_DECR_BY(_x,1)
 #if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
 #define SCTP_STAT_INCR_BY(_x,_d) (SCTP_BASE_STATS[PCPU_GET(cpuid)]._x += _d)
 #define SCTP_STAT_DECR_BY(_x,_d) (SCTP_BASE_STATS[PCPU_GET(cpuid)]._x -= _d)
 #else
 #define SCTP_STAT_INCR_BY(_x,_d) atomic_add_int(&SCTP_BASE_STAT(_x), _d)
 #define SCTP_STAT_DECR_BY(_x,_d) atomic_subtract_int(&SCTP_BASE_STAT(_x), _d)
 #endif
 /* The following macros are for handling MIB values, */
 #define SCTP_STAT_INCR_COUNTER32(_x) SCTP_STAT_INCR(_x)
 #define SCTP_STAT_INCR_COUNTER64(_x) SCTP_STAT_INCR(_x)
 #define SCTP_STAT_INCR_GAUGE32(_x) SCTP_STAT_INCR(_x)
 #define SCTP_STAT_DECR_COUNTER32(_x) SCTP_STAT_DECR(_x)
 #define SCTP_STAT_DECR_COUNTER64(_x) SCTP_STAT_DECR(_x)
 #define SCTP_STAT_DECR_GAUGE32(_x) SCTP_STAT_DECR(_x)
 
 union sctp_sockstore {
 #if defined(INET) || !defined(_KERNEL)
 	struct sockaddr_in sin;
 #endif
 #if defined(INET6) || !defined(_KERNEL)
 	struct sockaddr_in6 sin6;
 #endif
 	struct sockaddr sa;
 };
 
 
 /***********************************/
 /* And something for us old timers */
 /***********************************/
 
 #ifndef ntohll
 #include <sys/endian.h>
 #define ntohll(x) be64toh(x)
 #endif
 
 #ifndef htonll
 #include <sys/endian.h>
 #define htonll(x) htobe64(x)
 #endif
 /***********************************/
 
 
 struct xsctp_inpcb {
 	uint32_t last;
 	uint32_t flags;
 	uint32_t features;
 	uint32_t total_sends;
 	uint32_t total_recvs;
 	uint32_t total_nospaces;
 	uint32_t fragmentation_point;
 	uint16_t local_port;
 	uint16_t qlen;
 	uint16_t maxqlen;
 	uint32_t extra_padding[32];	/* future */
 };
 
 struct xsctp_tcb {
 	union sctp_sockstore primary_addr;	/* sctpAssocEntry 5/6 */
 	uint32_t last;
 	uint32_t heartbeat_interval;	/* sctpAssocEntry 7   */
 	uint32_t state;		/* sctpAssocEntry 8   */
 	uint32_t in_streams;	/* sctpAssocEntry 9   */
 	uint32_t out_streams;	/* sctpAssocEntry 10  */
 	uint32_t max_nr_retrans;/* sctpAssocEntry 11  */
 	uint32_t primary_process;	/* sctpAssocEntry 12  */
 	uint32_t T1_expireries;	/* sctpAssocEntry 13  */
 	uint32_t T2_expireries;	/* sctpAssocEntry 14  */
 	uint32_t retransmitted_tsns;	/* sctpAssocEntry 15  */
 	uint32_t total_sends;
 	uint32_t total_recvs;
 	uint32_t local_tag;
 	uint32_t remote_tag;
 	uint32_t initial_tsn;
 	uint32_t highest_tsn;
 	uint32_t cumulative_tsn;
 	uint32_t cumulative_tsn_ack;
 	uint32_t mtu;
 	uint32_t refcnt;
 	uint16_t local_port;	/* sctpAssocEntry 3   */
 	uint16_t remote_port;	/* sctpAssocEntry 4   */
 	struct sctp_timeval start_time;	/* sctpAssocEntry 16  */
 	struct sctp_timeval discontinuity_time;	/* sctpAssocEntry 17  */
 	uint32_t peers_rwnd;
 	sctp_assoc_t assoc_id;	/* sctpAssocEntry 1   */
 	uint32_t extra_padding[32];	/* future */
 };
 
 struct xsctp_laddr {
 	union sctp_sockstore address;	/* sctpAssocLocalAddrEntry 1/2 */
 	uint32_t last;
 	struct sctp_timeval start_time;	/* sctpAssocLocalAddrEntry 3   */
 	uint32_t extra_padding[32];	/* future */
 };
 
 struct xsctp_raddr {
 	union sctp_sockstore address;	/* sctpAssocLocalRemEntry 1/2 */
 	uint32_t last;
 	uint32_t rto;		/* sctpAssocLocalRemEntry 5   */
 	uint32_t max_path_rtx;	/* sctpAssocLocalRemEntry 6   */
 	uint32_t rtx;		/* sctpAssocLocalRemEntry 7   */
 	uint32_t error_counter;	/* */
 	uint32_t cwnd;		/* */
 	uint32_t flight_size;	/* */
 	uint32_t mtu;		/* */
 	uint8_t active;		/* sctpAssocLocalRemEntry 3   */
 	uint8_t confirmed;	/* */
 	uint8_t heartbeat_enabled;	/* sctpAssocLocalRemEntry 4   */
 	struct sctp_timeval start_time;	/* sctpAssocLocalRemEntry 8   */
 	uint32_t rtt;
 	uint32_t extra_padding[32];	/* future */
 };
 
 #define SCTP_MAX_LOGGING_SIZE 30000
 #define SCTP_TRACE_PARAMS 6	/* This number MUST be even   */
 
 struct sctp_log_entry {
 	uint64_t timestamp;
 	uint32_t subsys;
 	uint32_t padding;
 	uint32_t params[SCTP_TRACE_PARAMS];
 };
 
 struct sctp_log {
 	struct sctp_log_entry entry[SCTP_MAX_LOGGING_SIZE];
 	uint32_t index;
 	uint32_t padding;
 };
 
 /*
  * Kernel defined for sctp_send
  */
 #if defined(_KERNEL) || defined(__Userspace__)
 int
 sctp_lower_sosend(struct socket *so,
     struct sockaddr *addr,
     struct uio *uio,
     struct mbuf *i_pak,
     struct mbuf *control,
     int flags,
     struct sctp_sndrcvinfo *srcv
     ,struct thread *p
 );
 
 int
 sctp_sorecvmsg(struct socket *so,
     struct uio *uio,
     struct mbuf **mp,
     struct sockaddr *from,
     int fromlen,
     int *msg_flags,
     struct sctp_sndrcvinfo *sinfo,
     int filling_sinfo);
 
 #endif
 
 /*
  * API system calls
  */
 #if !(defined(_KERNEL)) && !(defined(__Userspace__))
 
 __BEGIN_DECLS
 int sctp_peeloff __P((int, sctp_assoc_t));
 int sctp_bindx __P((int, struct sockaddr *, int, int));
 int sctp_connectx __P((int, const struct sockaddr *, int, sctp_assoc_t *));
 int sctp_getaddrlen __P((sa_family_t));
 int sctp_getpaddrs __P((int, sctp_assoc_t, struct sockaddr **));
 void sctp_freepaddrs __P((struct sockaddr *));
 int sctp_getladdrs __P((int, sctp_assoc_t, struct sockaddr **));
 void sctp_freeladdrs __P((struct sockaddr *));
 int sctp_opt_info __P((int, sctp_assoc_t, int, void *, socklen_t *));
 
 ssize_t sctp_sendmsg 
 __P((int, const void *, size_t,
     const struct sockaddr *,
     socklen_t, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t));
 
 	ssize_t sctp_send __P((int sd, const void *msg, size_t len,
               const struct sctp_sndrcvinfo *sinfo, int flags));
 
 	ssize_t sctp_sendx __P((int sd, const void *msg, size_t len,
                struct sockaddr *addrs, int addrcnt,
                struct sctp_sndrcvinfo *sinfo, int flags));
 
 	ssize_t sctp_sendmsgx __P((int sd, const void *, size_t,
                   struct sockaddr *, int,
                   uint32_t, uint32_t, uint16_t, uint32_t, uint32_t));
 
 	sctp_assoc_t sctp_getassocid __P((int sd, struct sockaddr *sa));
 
 	ssize_t sctp_recvmsg __P((int, void *, size_t, struct sockaddr *,
                  socklen_t *, struct sctp_sndrcvinfo *, int *));
 
 __END_DECLS
 
 #endif				/* !_KERNEL */
 #endif				/* !__sctp_uio_h__ */
Index: projects/binutils-2.17/sys/sys/elf_common.h
===================================================================
--- projects/binutils-2.17/sys/sys/elf_common.h	(revision 215829)
+++ projects/binutils-2.17/sys/sys/elf_common.h	(revision 215830)
@@ -1,963 +1,964 @@
 /*-
  * Copyright (c) 1998 John D. Polstra.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_ELF_COMMON_H_
 #define	_SYS_ELF_COMMON_H_ 1
 
 /*
  * ELF definitions that are independent of architecture or word size.
  */
 
 /*
  * Note header.  The ".note" section contains an array of notes.  Each
  * begins with this header, aligned to a word boundary.  Immediately
  * following the note header is n_namesz bytes of name, padded to the
  * next word boundary.  Then comes n_descsz bytes of descriptor, again
  * padded to a word boundary.  The values of n_namesz and n_descsz do
  * not include the padding.
  */
 
 typedef struct {
 	u_int32_t	n_namesz;	/* Length of name. */
 	u_int32_t	n_descsz;	/* Length of descriptor. */
 	u_int32_t	n_type;		/* Type of this note. */
 } Elf_Note;
 
 /*
  * The header for GNU-style hash sections.
  */
 
 typedef struct {
 	u_int32_t	gh_nbuckets;	/* Number of hash buckets. */
 	u_int32_t	gh_symndx;	/* First visible symbol in .dynsym. */
 	u_int32_t	gh_maskwords;	/* #maskwords used in bloom filter. */
 	u_int32_t	gh_shift2;	/* Bloom filter shift count. */
 } Elf_GNU_Hash_Header;
 
 /* Indexes into the e_ident array.  Keep synced with
    http://www.sco.com/developers/gabi/latest/ch4.eheader.html */
 #define	EI_MAG0		0	/* Magic number, byte 0. */
 #define	EI_MAG1		1	/* Magic number, byte 1. */
 #define	EI_MAG2		2	/* Magic number, byte 2. */
 #define	EI_MAG3		3	/* Magic number, byte 3. */
 #define	EI_CLASS	4	/* Class of machine. */
 #define	EI_DATA		5	/* Data format. */
 #define	EI_VERSION	6	/* ELF format version. */
 #define	EI_OSABI	7	/* Operating system / ABI identification */
 #define	EI_ABIVERSION	8	/* ABI version */
 #define	OLD_EI_BRAND	8	/* Start of architecture identification. */
 #define	EI_PAD		9	/* Start of padding (per SVR4 ABI). */
 #define	EI_NIDENT	16	/* Size of e_ident array. */
 
 /* Values for the magic number bytes. */
 #define	ELFMAG0		0x7f
 #define	ELFMAG1		'E'
 #define	ELFMAG2		'L'
 #define	ELFMAG3		'F'
 #define	ELFMAG		"\177ELF"	/* magic string */
 #define	SELFMAG		4		/* magic string size */
 
 /* Values for e_ident[EI_VERSION] and e_version. */
 #define	EV_NONE		0
 #define	EV_CURRENT	1
 
 /* Values for e_ident[EI_CLASS]. */
 #define	ELFCLASSNONE	0	/* Unknown class. */
 #define	ELFCLASS32	1	/* 32-bit architecture. */
 #define	ELFCLASS64	2	/* 64-bit architecture. */
 
 /* Values for e_ident[EI_DATA]. */
 #define	ELFDATANONE	0	/* Unknown data format. */
 #define	ELFDATA2LSB	1	/* 2's complement little-endian. */
 #define	ELFDATA2MSB	2	/* 2's complement big-endian. */
 
 /* Values for e_ident[EI_OSABI]. */
 #define	ELFOSABI_NONE		0	/* UNIX System V ABI */
 #define	ELFOSABI_HPUX		1	/* HP-UX operating system */
 #define	ELFOSABI_NETBSD		2	/* NetBSD */
 #define	ELFOSABI_LINUX		3	/* GNU/Linux */
 #define	ELFOSABI_HURD		4	/* GNU/Hurd */
 #define	ELFOSABI_86OPEN		5	/* 86Open common IA32 ABI */
 #define	ELFOSABI_SOLARIS	6	/* Solaris */
 #define	ELFOSABI_AIX		7	/* AIX */
 #define	ELFOSABI_IRIX		8	/* IRIX */
 #define	ELFOSABI_FREEBSD	9	/* FreeBSD */
 #define	ELFOSABI_TRU64		10	/* TRU64 UNIX */
 #define	ELFOSABI_MODESTO	11	/* Novell Modesto */
 #define	ELFOSABI_OPENBSD	12	/* OpenBSD */
 #define	ELFOSABI_OPENVMS	13	/* Open VMS */
 #define	ELFOSABI_NSK		14	/* HP Non-Stop Kernel */
 #define	ELFOSABI_AROS		15	/* Amiga Research OS */
 #define	ELFOSABI_ARM		97	/* ARM */
 #define	ELFOSABI_STANDALONE	255	/* Standalone (embedded) application */
 
 #define	ELFOSABI_SYSV		ELFOSABI_NONE	/* symbol used in old spec */
 #define	ELFOSABI_MONTEREY	ELFOSABI_AIX	/* Monterey */
 
 /* e_ident */
 #define	IS_ELF(ehdr)	((ehdr).e_ident[EI_MAG0] == ELFMAG0 && \
 			 (ehdr).e_ident[EI_MAG1] == ELFMAG1 && \
 			 (ehdr).e_ident[EI_MAG2] == ELFMAG2 && \
 			 (ehdr).e_ident[EI_MAG3] == ELFMAG3)
 
 /* Values for e_type. */
 #define	ET_NONE		0	/* Unknown type. */
 #define	ET_REL		1	/* Relocatable. */
 #define	ET_EXEC		2	/* Executable. */
 #define	ET_DYN		3	/* Shared object. */
 #define	ET_CORE		4	/* Core file. */
 #define	ET_LOOS		0xfe00	/* First operating system specific. */
 #define	ET_HIOS		0xfeff	/* Last operating system-specific. */
 #define	ET_LOPROC	0xff00	/* First processor-specific. */
 #define	ET_HIPROC	0xffff	/* Last processor-specific. */
 
 /* Values for e_machine. */
 #define	EM_NONE		0	/* Unknown machine. */
 #define	EM_M32		1	/* AT&T WE32100. */
 #define	EM_SPARC	2	/* Sun SPARC. */
 #define	EM_386		3	/* Intel i386. */
 #define	EM_68K		4	/* Motorola 68000. */
 #define	EM_88K		5	/* Motorola 88000. */
 #define	EM_860		7	/* Intel i860. */
 #define	EM_MIPS		8	/* MIPS R3000 Big-Endian only. */
 #define	EM_S370		9	/* IBM System/370. */
 #define	EM_MIPS_RS3_LE	10	/* MIPS R3000 Little-Endian. */
 #define	EM_PARISC	15	/* HP PA-RISC. */
 #define	EM_VPP500	17	/* Fujitsu VPP500. */
 #define	EM_SPARC32PLUS	18	/* SPARC v8plus. */
 #define	EM_960		19	/* Intel 80960. */
 #define	EM_PPC		20	/* PowerPC 32-bit. */
 #define	EM_PPC64	21	/* PowerPC 64-bit. */
 #define	EM_S390		22	/* IBM System/390. */
 #define	EM_V800		36	/* NEC V800. */
 #define	EM_FR20		37	/* Fujitsu FR20. */
 #define	EM_RH32		38	/* TRW RH-32. */
 #define	EM_RCE		39	/* Motorola RCE. */
 #define	EM_ARM		40	/* ARM. */
 #define	EM_SH		42	/* Hitachi SH. */
 #define	EM_SPARCV9	43	/* SPARC v9 64-bit. */
 #define	EM_TRICORE	44	/* Siemens TriCore embedded processor. */
 #define	EM_ARC		45	/* Argonaut RISC Core. */
 #define	EM_H8_300	46	/* Hitachi H8/300. */
 #define	EM_H8_300H	47	/* Hitachi H8/300H. */
 #define	EM_H8S		48	/* Hitachi H8S. */
 #define	EM_H8_500	49	/* Hitachi H8/500. */
 #define	EM_IA_64	50	/* Intel IA-64 Processor. */
 #define	EM_MIPS_X	51	/* Stanford MIPS-X. */
 #define	EM_COLDFIRE	52	/* Motorola ColdFire. */
 #define	EM_68HC12	53	/* Motorola M68HC12. */
 #define	EM_MMA		54	/* Fujitsu MMA. */
 #define	EM_PCP		55	/* Siemens PCP. */
 #define	EM_NCPU		56	/* Sony nCPU. */
 #define	EM_NDR1		57	/* Denso NDR1 microprocessor. */
 #define	EM_STARCORE	58	/* Motorola Star*Core processor. */
 #define	EM_ME16		59	/* Toyota ME16 processor. */
 #define	EM_ST100	60	/* STMicroelectronics ST100 processor. */
 #define	EM_TINYJ	61	/* Advanced Logic Corp. TinyJ processor. */
 #define	EM_X86_64	62	/* Advanced Micro Devices x86-64 */
 #define	EM_AMD64	EM_X86_64	/* Advanced Micro Devices x86-64 (compat) */
 #define	EM_PDSP		63	/* Sony DSP Processor. */
 #define	EM_FX66		66	/* Siemens FX66 microcontroller. */
 #define	EM_ST9PLUS	67	/* STMicroelectronics ST9+ 8/16
 				   microcontroller. */
 #define	EM_ST7		68	/* STmicroelectronics ST7 8-bit
 				   microcontroller. */
 #define	EM_68HC16	69	/* Motorola MC68HC16 microcontroller. */
 #define	EM_68HC11	70	/* Motorola MC68HC11 microcontroller. */
 #define	EM_68HC08	71	/* Motorola MC68HC08 microcontroller. */
 #define	EM_68HC05	72	/* Motorola MC68HC05 microcontroller. */
 #define	EM_SVX		73	/* Silicon Graphics SVx. */
 #define	EM_ST19		74	/* STMicroelectronics ST19 8-bit mc. */
 #define	EM_VAX		75	/* Digital VAX. */
 #define	EM_CRIS		76	/* Axis Communications 32-bit embedded
 				   processor. */
 #define	EM_JAVELIN	77	/* Infineon Technologies 32-bit embedded
 				   processor. */
 #define	EM_FIREPATH	78	/* Element 14 64-bit DSP Processor. */
 #define	EM_ZSP		79	/* LSI Logic 16-bit DSP Processor. */
 #define	EM_MMIX		80	/* Donald Knuth's educational 64-bit proc. */
 #define	EM_HUANY	81	/* Harvard University machine-independent
 				   object files. */
 #define	EM_PRISM	82	/* SiTera Prism. */
 #define	EM_AVR		83	/* Atmel AVR 8-bit microcontroller. */
 #define	EM_FR30		84	/* Fujitsu FR30. */
 #define	EM_D10V		85	/* Mitsubishi D10V. */
 #define	EM_D30V		86	/* Mitsubishi D30V. */
 #define	EM_V850		87	/* NEC v850. */
 #define	EM_M32R		88	/* Mitsubishi M32R. */
 #define	EM_MN10300	89	/* Matsushita MN10300. */
 #define	EM_MN10200	90	/* Matsushita MN10200. */
 #define	EM_PJ		91	/* picoJava. */
 #define	EM_OPENRISC	92	/* OpenRISC 32-bit embedded processor. */
 #define	EM_ARC_A5	93	/* ARC Cores Tangent-A5. */
 #define	EM_XTENSA	94	/* Tensilica Xtensa Architecture. */
 #define	EM_VIDEOCORE	95	/* Alphamosaic VideoCore processor. */
 #define	EM_TMM_GPP	96	/* Thompson Multimedia General Purpose
 				   Processor. */
 #define	EM_NS32K	97	/* National Semiconductor 32000 series. */
 #define	EM_TPC		98	/* Tenor Network TPC processor. */
 #define	EM_SNP1K	99	/* Trebia SNP 1000 processor. */
 #define	EM_ST200	100	/* STMicroelectronics ST200 microcontroller. */
 #define	EM_IP2K		101	/* Ubicom IP2xxx microcontroller family. */
 #define	EM_MAX		102	/* MAX Processor. */
 #define	EM_CR		103	/* National Semiconductor CompactRISC
 				   microprocessor. */
 #define	EM_F2MC16	104	/* Fujitsu F2MC16. */
 #define	EM_MSP430	105	/* Texas Instruments embedded microcontroller
 				   msp430. */
 #define	EM_BLACKFIN	106	/* Analog Devices Blackfin (DSP) processor. */
 #define	EM_SE_C33	107	/* S1C33 Family of Seiko Epson processors. */
 #define	EM_SEP		108	/* Sharp embedded microprocessor. */
 #define	EM_ARCA		109	/* Arca RISC Microprocessor. */
 #define	EM_UNICORE	110	/* Microprocessor series from PKU-Unity Ltd.
 				   and MPRC of Peking University */
 
 /* Non-standard or deprecated. */
 #define	EM_486		6	/* Intel i486. */
 #define	EM_MIPS_RS4_BE	10	/* MIPS R4000 Big-Endian */
 #define	EM_ALPHA_STD	41	/* Digital Alpha (standard value). */
 #define	EM_ALPHA	0x9026	/* Alpha (written in the absence of an ABI) */
 
 /* Special section indexes. */
 #define	SHN_UNDEF	     0		/* Undefined, missing, irrelevant. */
 #define	SHN_LORESERVE	0xff00		/* First of reserved range. */
 #define	SHN_LOPROC	0xff00		/* First processor-specific. */
 #define	SHN_HIPROC	0xff1f		/* Last processor-specific. */
 #define	SHN_LOOS	0xff20		/* First operating system-specific. */
 #define	SHN_HIOS	0xff3f		/* Last operating system-specific. */
 #define	SHN_ABS		0xfff1		/* Absolute values. */
 #define	SHN_COMMON	0xfff2		/* Common data. */
 #define	SHN_XINDEX	0xffff		/* Escape -- index stored elsewhere. */
 #define	SHN_HIRESERVE	0xffff		/* Last of reserved range. */
 
 /* sh_type */
 #define	SHT_NULL		0	/* inactive */
 #define	SHT_PROGBITS		1	/* program defined information */
 #define	SHT_SYMTAB		2	/* symbol table section */
 #define	SHT_STRTAB		3	/* string table section */
 #define	SHT_RELA		4	/* relocation section with addends */
 #define	SHT_HASH		5	/* symbol hash table section */
 #define	SHT_DYNAMIC		6	/* dynamic section */
 #define	SHT_NOTE		7	/* note section */
 #define	SHT_NOBITS		8	/* no space section */
 #define	SHT_REL			9	/* relocation section - no addends */
 #define	SHT_SHLIB		10	/* reserved - purpose unknown */
 #define	SHT_DYNSYM		11	/* dynamic symbol table section */
 #define	SHT_INIT_ARRAY		14	/* Initialization function pointers. */
 #define	SHT_FINI_ARRAY		15	/* Termination function pointers. */
 #define	SHT_PREINIT_ARRAY	16	/* Pre-initialization function ptrs. */
 #define	SHT_GROUP		17	/* Section group. */
 #define	SHT_SYMTAB_SHNDX	18	/* Section indexes (see SHN_XINDEX). */
 #define	SHT_LOOS		0x60000000	/* First of OS specific semantics */
 #define	SHT_LOSUNW		0x6ffffff4
 #define	SHT_SUNW_dof		0x6ffffff4
 #define	SHT_SUNW_cap		0x6ffffff5
 #define	SHT_SUNW_SIGNATURE	0x6ffffff6
 #define	SHT_GNU_HASH		0x6ffffff6
 #define	SHT_SUNW_ANNOTATE	0x6ffffff7
 #define	SHT_SUNW_DEBUGSTR	0x6ffffff8
 #define	SHT_SUNW_DEBUG		0x6ffffff9
 #define	SHT_SUNW_move		0x6ffffffa
 #define	SHT_SUNW_COMDAT		0x6ffffffb
 #define	SHT_SUNW_syminfo	0x6ffffffc
 #define	SHT_SUNW_verdef		0x6ffffffd
 #define	SHT_GNU_verdef		0x6ffffffd	/* Symbol versions provided */
 #define	SHT_SUNW_verneed	0x6ffffffe
 #define	SHT_GNU_verneed		0x6ffffffe	/* Symbol versions required */
 #define	SHT_SUNW_versym		0x6fffffff
 #define	SHT_GNU_versym		0x6fffffff	/* Symbol version table */
 #define	SHT_HISUNW		0x6fffffff
 #define	SHT_HIOS		0x6fffffff	/* Last of OS specific semantics */
 #define	SHT_LOPROC		0x70000000	/* reserved range for processor */
 #define	SHT_AMD64_UNWIND	0x70000001	/* unwind information */
 #define	SHT_HIPROC		0x7fffffff	/* specific section header types */
 #define	SHT_LOUSER		0x80000000	/* reserved range for application */
 #define	SHT_HIUSER		0xffffffff	/* specific indexes */
 
 /* Flags for sh_flags. */
 #define	SHF_WRITE		0x1	/* Section contains writable data. */
 #define	SHF_ALLOC		0x2	/* Section occupies memory. */
 #define	SHF_EXECINSTR		0x4	/* Section contains instructions. */
 #define	SHF_MERGE		0x10	/* Section may be merged. */
 #define	SHF_STRINGS		0x20	/* Section contains strings. */
 #define	SHF_INFO_LINK		0x40	/* sh_info holds section index. */
 #define	SHF_LINK_ORDER		0x80	/* Special ordering requirements. */
 #define	SHF_OS_NONCONFORMING	0x100	/* OS-specific processing required. */
 #define	SHF_GROUP		0x200	/* Member of section group. */
 #define	SHF_TLS			0x400	/* Section contains TLS data. */
 #define	SHF_MASKOS	0x0ff00000	/* OS-specific semantics. */
 #define	SHF_MASKPROC	0xf0000000	/* Processor-specific semantics. */
 
 /* Values for p_type. */
 #define	PT_NULL		0	/* Unused entry. */
 #define	PT_LOAD		1	/* Loadable segment. */
 #define	PT_DYNAMIC	2	/* Dynamic linking information segment. */
 #define	PT_INTERP	3	/* Pathname of interpreter. */
 #define	PT_NOTE		4	/* Auxiliary information. */
 #define	PT_SHLIB	5	/* Reserved (not used). */
 #define	PT_PHDR		6	/* Location of program header itself. */
 #define	PT_TLS		7	/* Thread local storage segment */
 #define	PT_LOOS		0x60000000	/* First OS-specific. */
 #define	PT_SUNW_UNWIND	0x6464e550	/* amd64 UNWIND program header */
 #define	PT_GNU_EH_FRAME	0x6474e550
+#define	PT_GNU_STACK	0x6474e551
 #define	PT_LOSUNW	0x6ffffffa
 #define	PT_SUNWBSS	0x6ffffffa	/* Sun Specific segment */
 #define	PT_SUNWSTACK	0x6ffffffb	/* describes the stack segment */
 #define	PT_SUNWDTRACE	0x6ffffffc	/* private */
 #define	PT_SUNWCAP	0x6ffffffd	/* hard/soft capabilities segment */
 #define	PT_HISUNW	0x6fffffff
 #define	PT_HIOS		0x6fffffff	/* Last OS-specific. */
 #define	PT_LOPROC	0x70000000	/* First processor-specific type. */
 #define	PT_HIPROC	0x7fffffff	/* Last processor-specific type. */
 
 /* Values for p_flags. */
 #define	PF_X		0x1		/* Executable. */
 #define	PF_W		0x2		/* Writable. */
 #define	PF_R		0x4		/* Readable. */
 #define	PF_MASKOS	0x0ff00000	/* Operating system-specific. */
 #define	PF_MASKPROC	0xf0000000	/* Processor-specific. */
 
 /* Extended program header index. */
 #define	PN_XNUM		0xffff
 
 /* Values for d_tag. */
 #define	DT_NULL		0	/* Terminating entry. */
 #define	DT_NEEDED	1	/* String table offset of a needed shared
 				   library. */
 #define	DT_PLTRELSZ	2	/* Total size in bytes of PLT relocations. */
 #define	DT_PLTGOT	3	/* Processor-dependent address. */
 #define	DT_HASH		4	/* Address of symbol hash table. */
 #define	DT_STRTAB	5	/* Address of string table. */
 #define	DT_SYMTAB	6	/* Address of symbol table. */
 #define	DT_RELA		7	/* Address of ElfNN_Rela relocations. */
 #define	DT_RELASZ	8	/* Total size of ElfNN_Rela relocations. */
 #define	DT_RELAENT	9	/* Size of each ElfNN_Rela relocation entry. */
 #define	DT_STRSZ	10	/* Size of string table. */
 #define	DT_SYMENT	11	/* Size of each symbol table entry. */
 #define	DT_INIT		12	/* Address of initialization function. */
 #define	DT_FINI		13	/* Address of finalization function. */
 #define	DT_SONAME	14	/* String table offset of shared object
 				   name. */
 #define	DT_RPATH	15	/* String table offset of library path. [sup] */
 #define	DT_SYMBOLIC	16	/* Indicates "symbolic" linking. [sup] */
 #define	DT_REL		17	/* Address of ElfNN_Rel relocations. */
 #define	DT_RELSZ	18	/* Total size of ElfNN_Rel relocations. */
 #define	DT_RELENT	19	/* Size of each ElfNN_Rel relocation. */
 #define	DT_PLTREL	20	/* Type of relocation used for PLT. */
 #define	DT_DEBUG	21	/* Reserved (not used). */
 #define	DT_TEXTREL	22	/* Indicates there may be relocations in
 				   non-writable segments. [sup] */
 #define	DT_JMPREL	23	/* Address of PLT relocations. */
 #define	DT_BIND_NOW	24	/* [sup] */
 #define	DT_INIT_ARRAY	25	/* Address of the array of pointers to
 				   initialization functions */
 #define	DT_FINI_ARRAY	26	/* Address of the array of pointers to
 				   termination functions */
 #define	DT_INIT_ARRAYSZ	27	/* Size in bytes of the array of
 				   initialization functions. */
 #define	DT_FINI_ARRAYSZ	28	/* Size in bytes of the array of
 				   terminationfunctions. */
 #define	DT_RUNPATH	29	/* String table offset of a null-terminated
 				   library search path string. */
 #define	DT_FLAGS	30	/* Object specific flag values. */
 #define	DT_ENCODING	32	/* Values greater than or equal to DT_ENCODING
 				   and less than DT_LOOS follow the rules for
 				   the interpretation of the d_un union
 				   as follows: even == 'd_ptr', odd == 'd_val'
 				   or none */
 #define	DT_PREINIT_ARRAY 32	/* Address of the array of pointers to
 				   pre-initialization functions. */
 #define	DT_PREINIT_ARRAYSZ 33	/* Size in bytes of the array of
 				   pre-initialization functions. */
 #define	DT_MAXPOSTAGS	34	/* number of positive tags */
 #define	DT_LOOS		0x6000000d	/* First OS-specific */
 #define	DT_SUNW_AUXILIARY	0x6000000d	/* symbol auxiliary name */
 #define	DT_SUNW_RTLDINF		0x6000000e	/* ld.so.1 info (private) */
 #define	DT_SUNW_FILTER		0x6000000f	/* symbol filter name */
 #define	DT_SUNW_CAP		0x60000010	/* hardware/software */
 #define	DT_HIOS		0x6ffff000	/* Last OS-specific */
 
 /*
  * DT_* entries which fall between DT_VALRNGHI & DT_VALRNGLO use the
  * Dyn.d_un.d_val field of the Elf*_Dyn structure.
  */
 #define	DT_VALRNGLO	0x6ffffd00
 #define	DT_CHECKSUM	0x6ffffdf8	/* elf checksum */
 #define	DT_PLTPADSZ	0x6ffffdf9	/* pltpadding size */
 #define	DT_MOVEENT	0x6ffffdfa	/* move table entry size */
 #define	DT_MOVESZ	0x6ffffdfb	/* move table size */
 #define	DT_FEATURE_1	0x6ffffdfc	/* feature holder */
 #define	DT_POSFLAG_1	0x6ffffdfd	/* flags for DT_* entries, effecting */
 					/*	the following DT_* entry. */
 					/*	See DF_P1_* definitions */
 #define	DT_SYMINSZ	0x6ffffdfe	/* syminfo table size (in bytes) */
 #define	DT_SYMINENT	0x6ffffdff	/* syminfo entry size (in bytes) */
 #define	DT_VALRNGHI	0x6ffffdff
 
 /*
  * DT_* entries which fall between DT_ADDRRNGHI & DT_ADDRRNGLO use the
  * Dyn.d_un.d_ptr field of the Elf*_Dyn structure.
  *
  * If any adjustment is made to the ELF object after it has been
  * built, these entries will need to be adjusted.
  */
 #define	DT_ADDRRNGLO	0x6ffffe00
 #define	DT_GNU_HASH	0x6ffffef5	/* GNU-style hash table */
 #define	DT_CONFIG	0x6ffffefa	/* configuration information */
 #define	DT_DEPAUDIT	0x6ffffefb	/* dependency auditing */
 #define	DT_AUDIT	0x6ffffefc	/* object auditing */
 #define	DT_PLTPAD	0x6ffffefd	/* pltpadding (sparcv9) */
 #define	DT_MOVETAB	0x6ffffefe	/* move table */
 #define	DT_SYMINFO	0x6ffffeff	/* syminfo table */
 #define	DT_ADDRRNGHI	0x6ffffeff
 
 #define	DT_VERSYM	0x6ffffff0	/* Address of versym section. */
 #define	DT_RELACOUNT	0x6ffffff9	/* number of RELATIVE relocations */
 #define	DT_RELCOUNT	0x6ffffffa	/* number of RELATIVE relocations */
 #define	DT_FLAGS_1	0x6ffffffb	/* state flags - see DF_1_* defs */
 #define	DT_VERDEF	0x6ffffffc	/* Address of verdef section. */
 #define	DT_VERDEFNUM	0x6ffffffd	/* Number of elems in verdef section */
 #define	DT_VERNEED	0x6ffffffe	/* Address of verneed section. */
 #define	DT_VERNEEDNUM	0x6fffffff	/* Number of elems in verneed section */
 
 #define	DT_LOPROC	0x70000000	/* First processor-specific type. */
 #define	DT_DEPRECATED_SPARC_REGISTER	0x7000001
 #define	DT_AUXILIARY	0x7ffffffd	/* shared library auxiliary name */
 #define	DT_USED		0x7ffffffe	/* ignored - same as needed */
 #define	DT_FILTER	0x7fffffff	/* shared library filter name */
 #define	DT_HIPROC	0x7fffffff	/* Last processor-specific type. */
 
 /* Values for DT_FLAGS */
 #define	DF_ORIGIN	0x0001	/* Indicates that the object being loaded may
 				   make reference to the $ORIGIN substitution
 				   string */
 #define	DF_SYMBOLIC	0x0002	/* Indicates "symbolic" linking. */
 #define	DF_TEXTREL	0x0004	/* Indicates there may be relocations in
 				   non-writable segments. */
 #define	DF_BIND_NOW	0x0008	/* Indicates that the dynamic linker should
 				   process all relocations for the object
 				   containing this entry before transferring
 				   control to the program. */
 #define	DF_STATIC_TLS	0x0010	/* Indicates that the shared object or
 				   executable contains code using a static
 				   thread-local storage scheme. */
 
 /* Values for DT_FLAGS_1 */
 #define	DF_1_BIND_NOW	0x00000001	/* Same as DF_BIND_NOW */
 #define	DF_1_GLOBAL	0x00000002	/* Set the RTLD_GLOBAL for object */
 #define	DF_1_NODELETE	0x00000008	/* Set the RTLD_NODELETE for object */
 #define	DF_1_LOADFLTR	0x00000010	/* Immediate loading of filtees */
 #define	DF_1_NOOPEN     0x00000040	/* Do not allow loading on dlopen() */
 #define	DF_1_ORIGIN	0x00000080	/* Process $ORIGIN */
 
 /* Values for n_type.  Used in core files. */
 #define	NT_PRSTATUS	1	/* Process status. */
 #define	NT_FPREGSET	2	/* Floating point registers. */
 #define	NT_PRPSINFO	3	/* Process state info. */
 #define	NT_THRMISC	7	/* Thread miscellaneous info. */
 
 /* Symbol Binding - ELFNN_ST_BIND - st_info */
 #define	STB_LOCAL	0	/* Local symbol */
 #define	STB_GLOBAL	1	/* Global symbol */
 #define	STB_WEAK	2	/* like global - lower precedence */
 #define	STB_LOOS	10	/* Reserved range for operating system */
 #define	STB_HIOS	12	/*   specific semantics. */
 #define	STB_LOPROC	13	/* reserved range for processor */
 #define	STB_HIPROC	15	/*   specific semantics. */
 
 /* Symbol type - ELFNN_ST_TYPE - st_info */
 #define	STT_NOTYPE	0	/* Unspecified type. */
 #define	STT_OBJECT	1	/* Data object. */
 #define	STT_FUNC	2	/* Function. */
 #define	STT_SECTION	3	/* Section. */
 #define	STT_FILE	4	/* Source file. */
 #define	STT_COMMON	5	/* Uninitialized common block. */
 #define	STT_TLS		6	/* TLS object. */
 #define	STT_NUM		7
 #define	STT_LOOS	10	/* Reserved range for operating system */
 #define	STT_HIOS	12	/*   specific semantics. */
 #define	STT_LOPROC	13	/* reserved range for processor */
 #define	STT_HIPROC	15	/*   specific semantics. */
 
 /* Symbol visibility - ELFNN_ST_VISIBILITY - st_other */
 #define	STV_DEFAULT	0x0	/* Default visibility (see binding). */
 #define	STV_INTERNAL	0x1	/* Special meaning in relocatable objects. */
 #define	STV_HIDDEN	0x2	/* Not visible. */
 #define	STV_PROTECTED	0x3	/* Visible but not preemptible. */
 #define	STV_EXPORTED	0x4
 #define	STV_SINGLETON	0x5
 #define	STV_ELIMINATE	0x6
 
 /* Special symbol table indexes. */
 #define	STN_UNDEF	0	/* Undefined symbol index. */
 
 /* Symbol versioning flags. */
 #define	VER_DEF_CURRENT	1
 #define	VER_DEF_IDX(x)	VER_NDX(x)
 
 #define	VER_FLG_BASE	0x01
 #define	VER_FLG_WEAK	0x02
 
 #define	VER_NEED_CURRENT	1
 #define	VER_NEED_WEAK	(1u << 15)
 #define	VER_NEED_HIDDEN	VER_NDX_HIDDEN
 #define	VER_NEED_IDX(x)	VER_NDX(x)
 
 #define	VER_NDX_LOCAL	0
 #define	VER_NDX_GLOBAL	1
 #define	VER_NDX_GIVEN	2
 
 #define	VER_NDX_HIDDEN	(1u << 15)
 #define	VER_NDX(x)	((x) & ~(1u << 15))
 
 #define	CA_SUNW_NULL	0
 #define	CA_SUNW_HW_1	1		/* first hardware capabilities entry */
 #define	CA_SUNW_SF_1	2		/* first software capabilities entry */
 
 /*
  * Syminfo flag values
  */
 #define	SYMINFO_FLG_DIRECT	0x0001	/* symbol ref has direct association */
 					/*	to object containing defn. */
 #define	SYMINFO_FLG_PASSTHRU	0x0002	/* ignored - see SYMINFO_FLG_FILTER */
 #define	SYMINFO_FLG_COPY	0x0004	/* symbol is a copy-reloc */
 #define	SYMINFO_FLG_LAZYLOAD	0x0008	/* object containing defn should be */
 					/*	lazily-loaded */
 #define	SYMINFO_FLG_DIRECTBIND	0x0010	/* ref should be bound directly to */
 					/*	object containing defn. */
 #define	SYMINFO_FLG_NOEXTDIRECT	0x0020	/* don't let an external reference */
 					/*	directly bind to this symbol */
 #define	SYMINFO_FLG_FILTER	0x0002	/* symbol ref is associated to a */
 #define	SYMINFO_FLG_AUXILIARY	0x0040	/* 	standard or auxiliary filter */
 
 /*
  * Syminfo.si_boundto values.
  */
 #define	SYMINFO_BT_SELF		0xffff	/* symbol bound to self */
 #define	SYMINFO_BT_PARENT	0xfffe	/* symbol bound to parent */
 #define	SYMINFO_BT_NONE		0xfffd	/* no special symbol binding */
 #define	SYMINFO_BT_EXTERN	0xfffc	/* symbol defined as external */
 #define	SYMINFO_BT_LOWRESERVE	0xff00	/* beginning of reserved entries */
 
 /*
  * Syminfo version values.
  */
 #define	SYMINFO_NONE		0	/* Syminfo version */
 #define	SYMINFO_CURRENT		1
 #define	SYMINFO_NUM		2
 
 /*
  * Relocation types.
  *
  * All machine architectures are defined here to allow tools on one to
  * handle others.
  */
 
 #define	R_386_NONE		0	/* No relocation. */
 #define	R_386_32		1	/* Add symbol value. */
 #define	R_386_PC32		2	/* Add PC-relative symbol value. */
 #define	R_386_GOT32		3	/* Add PC-relative GOT offset. */
 #define	R_386_PLT32		4	/* Add PC-relative PLT offset. */
 #define	R_386_COPY		5	/* Copy data from shared object. */
 #define	R_386_GLOB_DAT		6	/* Set GOT entry to data address. */
 #define	R_386_JMP_SLOT		7	/* Set GOT entry to code address. */
 #define	R_386_RELATIVE		8	/* Add load address of shared object. */
 #define	R_386_GOTOFF		9	/* Add GOT-relative symbol address. */
 #define	R_386_GOTPC		10	/* Add PC-relative GOT table address. */
 #define	R_386_TLS_TPOFF		14	/* Negative offset in static TLS block */
 #define	R_386_TLS_IE		15	/* Absolute address of GOT for -ve static TLS */
 #define	R_386_TLS_GOTIE		16	/* GOT entry for negative static TLS block */
 #define	R_386_TLS_LE		17	/* Negative offset relative to static TLS */
 #define	R_386_TLS_GD		18	/* 32 bit offset to GOT (index,off) pair */
 #define	R_386_TLS_LDM		19	/* 32 bit offset to GOT (index,zero) pair */
 #define	R_386_TLS_GD_32		24	/* 32 bit offset to GOT (index,off) pair */
 #define	R_386_TLS_GD_PUSH	25	/* pushl instruction for Sun ABI GD sequence */
 #define	R_386_TLS_GD_CALL	26	/* call instruction for Sun ABI GD sequence */
 #define	R_386_TLS_GD_POP	27	/* popl instruction for Sun ABI GD sequence */
 #define	R_386_TLS_LDM_32	28	/* 32 bit offset to GOT (index,zero) pair */
 #define	R_386_TLS_LDM_PUSH	29	/* pushl instruction for Sun ABI LD sequence */
 #define	R_386_TLS_LDM_CALL	30	/* call instruction for Sun ABI LD sequence */
 #define	R_386_TLS_LDM_POP	31	/* popl instruction for Sun ABI LD sequence */
 #define	R_386_TLS_LDO_32	32	/* 32 bit offset from start of TLS block */
 #define	R_386_TLS_IE_32		33	/* 32 bit offset to GOT static TLS offset entry */
 #define	R_386_TLS_LE_32		34	/* 32 bit offset within static TLS block */
 #define	R_386_TLS_DTPMOD32	35	/* GOT entry containing TLS index */
 #define	R_386_TLS_DTPOFF32	36	/* GOT entry containing TLS offset */
 #define	R_386_TLS_TPOFF32	37	/* GOT entry of -ve static TLS offset */
 
 #define	R_ARM_NONE		0	/* No relocation. */
 #define	R_ARM_PC24		1
 #define	R_ARM_ABS32		2
 #define	R_ARM_REL32		3
 #define	R_ARM_PC13		4
 #define	R_ARM_ABS16		5
 #define	R_ARM_ABS12		6
 #define	R_ARM_THM_ABS5		7
 #define	R_ARM_ABS8		8
 #define	R_ARM_SBREL32		9
 #define	R_ARM_THM_PC22		10
 #define	R_ARM_THM_PC8		11
 #define	R_ARM_AMP_VCALL9	12
 #define	R_ARM_SWI24		13
 #define	R_ARM_THM_SWI8		14
 #define	R_ARM_XPC25		15
 #define	R_ARM_THM_XPC22		16
 #define	R_ARM_COPY		20	/* Copy data from shared object. */
 #define	R_ARM_GLOB_DAT		21	/* Set GOT entry to data address. */
 #define	R_ARM_JUMP_SLOT		22	/* Set GOT entry to code address. */
 #define	R_ARM_RELATIVE		23	/* Add load address of shared object. */
 #define	R_ARM_GOTOFF		24	/* Add GOT-relative symbol address. */
 #define	R_ARM_GOTPC		25	/* Add PC-relative GOT table address. */
 #define	R_ARM_GOT32		26	/* Add PC-relative GOT offset. */
 #define	R_ARM_PLT32		27	/* Add PC-relative PLT offset. */
 #define	R_ARM_GNU_VTENTRY	100
 #define	R_ARM_GNU_VTINHERIT	101
 #define	R_ARM_RSBREL32		250
 #define	R_ARM_THM_RPC22		251
 #define	R_ARM_RREL32		252
 #define	R_ARM_RABS32		253
 #define	R_ARM_RPC24		254
 #define	R_ARM_RBASE		255
 
 /*	Name			Value	   Field	Calculation */
 #define	R_IA_64_NONE		0	/* None */
 #define	R_IA_64_IMM14		0x21	/* immediate14	S + A */
 #define	R_IA_64_IMM22		0x22	/* immediate22	S + A */
 #define	R_IA_64_IMM64		0x23	/* immediate64	S + A */
 #define	R_IA_64_DIR32MSB	0x24	/* word32 MSB	S + A */
 #define	R_IA_64_DIR32LSB	0x25	/* word32 LSB	S + A */
 #define	R_IA_64_DIR64MSB	0x26	/* word64 MSB	S + A */
 #define	R_IA_64_DIR64LSB	0x27	/* word64 LSB	S + A */
 #define	R_IA_64_GPREL22		0x2a	/* immediate22	@gprel(S + A) */
 #define	R_IA_64_GPREL64I	0x2b	/* immediate64	@gprel(S + A) */
 #define	R_IA_64_GPREL32MSB	0x2c	/* word32 MSB	@gprel(S + A) */
 #define	R_IA_64_GPREL32LSB	0x2d	/* word32 LSB	@gprel(S + A) */
 #define	R_IA_64_GPREL64MSB	0x2e	/* word64 MSB	@gprel(S + A) */
 #define	R_IA_64_GPREL64LSB	0x2f	/* word64 LSB	@gprel(S + A) */
 #define	R_IA_64_LTOFF22		0x32	/* immediate22	@ltoff(S + A) */
 #define	R_IA_64_LTOFF64I	0x33	/* immediate64	@ltoff(S + A) */
 #define	R_IA_64_PLTOFF22	0x3a	/* immediate22	@pltoff(S + A) */
 #define	R_IA_64_PLTOFF64I	0x3b	/* immediate64	@pltoff(S + A) */
 #define	R_IA_64_PLTOFF64MSB	0x3e	/* word64 MSB	@pltoff(S + A) */
 #define	R_IA_64_PLTOFF64LSB	0x3f	/* word64 LSB	@pltoff(S + A) */
 #define	R_IA_64_FPTR64I		0x43	/* immediate64	@fptr(S + A) */
 #define	R_IA_64_FPTR32MSB	0x44	/* word32 MSB	@fptr(S + A) */
 #define	R_IA_64_FPTR32LSB	0x45	/* word32 LSB	@fptr(S + A) */
 #define	R_IA_64_FPTR64MSB	0x46	/* word64 MSB	@fptr(S + A) */
 #define	R_IA_64_FPTR64LSB	0x47	/* word64 LSB	@fptr(S + A) */
 #define	R_IA_64_PCREL60B	0x48	/* immediate60 form1 S + A - P */
 #define	R_IA_64_PCREL21B	0x49	/* immediate21 form1 S + A - P */
 #define	R_IA_64_PCREL21M	0x4a	/* immediate21 form2 S + A - P */
 #define	R_IA_64_PCREL21F	0x4b	/* immediate21 form3 S + A - P */
 #define	R_IA_64_PCREL32MSB	0x4c	/* word32 MSB	S + A - P */
 #define	R_IA_64_PCREL32LSB	0x4d	/* word32 LSB	S + A - P */
 #define	R_IA_64_PCREL64MSB	0x4e	/* word64 MSB	S + A - P */
 #define	R_IA_64_PCREL64LSB	0x4f	/* word64 LSB	S + A - P */
 #define	R_IA_64_LTOFF_FPTR22	0x52	/* immediate22	@ltoff(@fptr(S + A)) */
 #define	R_IA_64_LTOFF_FPTR64I	0x53	/* immediate64	@ltoff(@fptr(S + A)) */
 #define	R_IA_64_LTOFF_FPTR32MSB	0x54	/* word32 MSB	@ltoff(@fptr(S + A)) */
 #define	R_IA_64_LTOFF_FPTR32LSB	0x55	/* word32 LSB	@ltoff(@fptr(S + A)) */
 #define	R_IA_64_LTOFF_FPTR64MSB	0x56	/* word64 MSB	@ltoff(@fptr(S + A)) */
 #define	R_IA_64_LTOFF_FPTR64LSB	0x57	/* word64 LSB	@ltoff(@fptr(S + A)) */
 #define	R_IA_64_SEGREL32MSB	0x5c	/* word32 MSB	@segrel(S + A) */
 #define	R_IA_64_SEGREL32LSB	0x5d	/* word32 LSB	@segrel(S + A) */
 #define	R_IA_64_SEGREL64MSB	0x5e	/* word64 MSB	@segrel(S + A) */
 #define	R_IA_64_SEGREL64LSB	0x5f	/* word64 LSB	@segrel(S + A) */
 #define	R_IA_64_SECREL32MSB	0x64	/* word32 MSB	@secrel(S + A) */
 #define	R_IA_64_SECREL32LSB	0x65	/* word32 LSB	@secrel(S + A) */
 #define	R_IA_64_SECREL64MSB	0x66	/* word64 MSB	@secrel(S + A) */
 #define	R_IA_64_SECREL64LSB	0x67	/* word64 LSB	@secrel(S + A) */
 #define	R_IA_64_REL32MSB	0x6c	/* word32 MSB	BD + A */
 #define	R_IA_64_REL32LSB	0x6d	/* word32 LSB	BD + A */
 #define	R_IA_64_REL64MSB	0x6e	/* word64 MSB	BD + A */
 #define	R_IA_64_REL64LSB	0x6f	/* word64 LSB	BD + A */
 #define	R_IA_64_LTV32MSB	0x74	/* word32 MSB	S + A */
 #define	R_IA_64_LTV32LSB	0x75	/* word32 LSB	S + A */
 #define	R_IA_64_LTV64MSB	0x76	/* word64 MSB	S + A */
 #define	R_IA_64_LTV64LSB	0x77	/* word64 LSB	S + A */
 #define	R_IA_64_PCREL21BI	0x79	/* immediate21 form1 S + A - P */
 #define	R_IA_64_PCREL22		0x7a	/* immediate22	S + A - P */
 #define	R_IA_64_PCREL64I	0x7b	/* immediate64	S + A - P */
 #define	R_IA_64_IPLTMSB		0x80	/* function descriptor MSB special */
 #define	R_IA_64_IPLTLSB		0x81	/* function descriptor LSB speciaal */
 #define	R_IA_64_SUB		0x85	/* immediate64	A - S */
 #define	R_IA_64_LTOFF22X	0x86	/* immediate22	special */
 #define	R_IA_64_LDXMOV		0x87	/* immediate22	special */
 #define	R_IA_64_TPREL14		0x91	/* imm14	@tprel(S + A) */
 #define	R_IA_64_TPREL22		0x92	/* imm22	@tprel(S + A) */
 #define	R_IA_64_TPREL64I	0x93	/* imm64	@tprel(S + A) */
 #define	R_IA_64_TPREL64MSB	0x96	/* word64 MSB	@tprel(S + A) */
 #define	R_IA_64_TPREL64LSB	0x97	/* word64 LSB	@tprel(S + A) */
 #define	R_IA_64_LTOFF_TPREL22	0x9a	/* imm22	@ltoff(@tprel(S+A)) */
 #define	R_IA_64_DTPMOD64MSB	0xa6	/* word64 MSB	@dtpmod(S + A) */
 #define	R_IA_64_DTPMOD64LSB	0xa7	/* word64 LSB	@dtpmod(S + A) */
 #define	R_IA_64_LTOFF_DTPMOD22	0xaa	/* imm22	@ltoff(@dtpmod(S+A)) */
 #define	R_IA_64_DTPREL14	0xb1	/* imm14	@dtprel(S + A) */
 #define	R_IA_64_DTPREL22	0xb2	/* imm22	@dtprel(S + A) */
 #define	R_IA_64_DTPREL64I	0xb3	/* imm64	@dtprel(S + A) */
 #define	R_IA_64_DTPREL32MSB	0xb4	/* word32 MSB	@dtprel(S + A) */
 #define	R_IA_64_DTPREL32LSB	0xb5	/* word32 LSB	@dtprel(S + A) */
 #define	R_IA_64_DTPREL64MSB	0xb6	/* word64 MSB	@dtprel(S + A) */
 #define	R_IA_64_DTPREL64LSB	0xb7	/* word64 LSB	@dtprel(S + A) */
 #define	R_IA_64_LTOFF_DTPREL22	0xba	/* imm22	@ltoff(@dtprel(S+A)) */
 
 #define	R_MIPS_NONE	0	/* No reloc */
 #define	R_MIPS_16	1	/* Direct 16 bit */
 #define	R_MIPS_32	2	/* Direct 32 bit */
 #define	R_MIPS_REL32	3	/* PC relative 32 bit */
 #define	R_MIPS_26	4	/* Direct 26 bit shifted */
 #define	R_MIPS_HI16	5	/* High 16 bit */
 #define	R_MIPS_LO16	6	/* Low 16 bit */
 #define	R_MIPS_GPREL16	7	/* GP relative 16 bit */
 #define	R_MIPS_LITERAL	8	/* 16 bit literal entry */
 #define	R_MIPS_GOT16	9	/* 16 bit GOT entry */
 #define	R_MIPS_PC16	10	/* PC relative 16 bit */
 #define	R_MIPS_CALL16	11	/* 16 bit GOT entry for function */
 #define	R_MIPS_GPREL32	12	/* GP relative 32 bit */
 #define	R_MIPS_GOTHI16	21	/* GOT HI 16 bit */
 #define	R_MIPS_GOTLO16	22	/* GOT LO 16 bit */
 #define	R_MIPS_CALLHI16 30	/* upper 16 bit GOT entry for function */
 #define	R_MIPS_CALLLO16 31	/* lower 16 bit GOT entry for function */
 
 #define	R_PPC_NONE		0	/* No relocation. */
 #define	R_PPC_ADDR32		1
 #define	R_PPC_ADDR24		2
 #define	R_PPC_ADDR16		3
 #define	R_PPC_ADDR16_LO		4
 #define	R_PPC_ADDR16_HI		5
 #define	R_PPC_ADDR16_HA		6
 #define	R_PPC_ADDR14		7
 #define	R_PPC_ADDR14_BRTAKEN	8
 #define	R_PPC_ADDR14_BRNTAKEN	9
 #define	R_PPC_REL24		10
 #define	R_PPC_REL14		11
 #define	R_PPC_REL14_BRTAKEN	12
 #define	R_PPC_REL14_BRNTAKEN	13
 #define	R_PPC_GOT16		14
 #define	R_PPC_GOT16_LO		15
 #define	R_PPC_GOT16_HI		16
 #define	R_PPC_GOT16_HA		17
 #define	R_PPC_PLTREL24		18
 #define	R_PPC_COPY		19
 #define	R_PPC_GLOB_DAT		20
 #define	R_PPC_JMP_SLOT		21
 #define	R_PPC_RELATIVE		22
 #define	R_PPC_LOCAL24PC		23
 #define	R_PPC_UADDR32		24
 #define	R_PPC_UADDR16		25
 #define	R_PPC_REL32		26
 #define	R_PPC_PLT32		27
 #define	R_PPC_PLTREL32		28
 #define	R_PPC_PLT16_LO		29
 #define	R_PPC_PLT16_HI		30
 #define	R_PPC_PLT16_HA		31
 #define	R_PPC_SDAREL16		32
 #define	R_PPC_SECTOFF		33
 #define	R_PPC_SECTOFF_LO	34
 #define	R_PPC_SECTOFF_HI	35
 #define	R_PPC_SECTOFF_HA	36
 
 /*
  * 64-bit relocations
  */
 #define	R_PPC64_ADDR64		38
 #define	R_PPC64_ADDR16_HIGHER	39
 #define	R_PPC64_ADDR16_HIGHERA	40
 #define	R_PPC64_ADDR16_HIGHEST	41
 #define	R_PPC64_ADDR16_HIGHESTA	42
 #define	R_PPC64_UADDR64		43
 #define	R_PPC64_REL64		44
 #define	R_PPC64_PLT64		45
 #define	R_PPC64_PLTREL64	46
 #define	R_PPC64_TOC16		47
 #define	R_PPC64_TOC16_LO	48
 #define	R_PPC64_TOC16_HI	49
 #define	R_PPC64_TOC16_HA	50
 #define	R_PPC64_TOC		51
 #define	R_PPC64_DTPMOD64	68
 #define	R_PPC64_TPREL64		73
 #define	R_PPC64_DTPREL64	78
 
 /*
  * TLS relocations
  */
 #define	R_PPC_TLS		67
 #define	R_PPC_DTPMOD32		68
 #define	R_PPC_TPREL16		69
 #define	R_PPC_TPREL16_LO	70
 #define	R_PPC_TPREL16_HI	71
 #define	R_PPC_TPREL16_HA	72
 #define	R_PPC_TPREL32		73
 #define	R_PPC_DTPREL16		74
 #define	R_PPC_DTPREL16_LO	75
 #define	R_PPC_DTPREL16_HI	76
 #define	R_PPC_DTPREL16_HA	77
 #define	R_PPC_DTPREL32		78
 #define	R_PPC_GOT_TLSGD16	79
 #define	R_PPC_GOT_TLSGD16_LO	80
 #define	R_PPC_GOT_TLSGD16_HI	81
 #define	R_PPC_GOT_TLSGD16_HA	82
 #define	R_PPC_GOT_TLSLD16	83
 #define	R_PPC_GOT_TLSLD16_LO	84
 #define	R_PPC_GOT_TLSLD16_HI	85
 #define	R_PPC_GOT_TLSLD16_HA	86
 #define	R_PPC_GOT_TPREL16	87
 #define	R_PPC_GOT_TPREL16_LO	88
 #define	R_PPC_GOT_TPREL16_HI	89
 #define	R_PPC_GOT_TPREL16_HA	90
 
 /*
  * The remaining relocs are from the Embedded ELF ABI, and are not in the
  *  SVR4 ELF ABI.
  */
 
 #define	R_PPC_EMB_NADDR32	101
 #define	R_PPC_EMB_NADDR16	102
 #define	R_PPC_EMB_NADDR16_LO	103
 #define	R_PPC_EMB_NADDR16_HI	104
 #define	R_PPC_EMB_NADDR16_HA	105
 #define	R_PPC_EMB_SDAI16	106
 #define	R_PPC_EMB_SDA2I16	107
 #define	R_PPC_EMB_SDA2REL	108
 #define	R_PPC_EMB_SDA21		109
 #define	R_PPC_EMB_MRKREF	110
 #define	R_PPC_EMB_RELSEC16	111
 #define	R_PPC_EMB_RELST_LO	112
 #define	R_PPC_EMB_RELST_HI	113
 #define	R_PPC_EMB_RELST_HA	114
 #define	R_PPC_EMB_BIT_FLD	115
 #define	R_PPC_EMB_RELSDA	116
 
 #define	R_SPARC_NONE		0
 #define	R_SPARC_8		1
 #define	R_SPARC_16		2
 #define	R_SPARC_32		3
 #define	R_SPARC_DISP8		4
 #define	R_SPARC_DISP16		5
 #define	R_SPARC_DISP32		6
 #define	R_SPARC_WDISP30		7
 #define	R_SPARC_WDISP22		8
 #define	R_SPARC_HI22		9
 #define	R_SPARC_22		10
 #define	R_SPARC_13		11
 #define	R_SPARC_LO10		12
 #define	R_SPARC_GOT10		13
 #define	R_SPARC_GOT13		14
 #define	R_SPARC_GOT22		15
 #define	R_SPARC_PC10		16
 #define	R_SPARC_PC22		17
 #define	R_SPARC_WPLT30		18
 #define	R_SPARC_COPY		19
 #define	R_SPARC_GLOB_DAT	20
 #define	R_SPARC_JMP_SLOT	21
 #define	R_SPARC_RELATIVE	22
 #define	R_SPARC_UA32		23
 #define	R_SPARC_PLT32		24
 #define	R_SPARC_HIPLT22		25
 #define	R_SPARC_LOPLT10		26
 #define	R_SPARC_PCPLT32		27
 #define	R_SPARC_PCPLT22		28
 #define	R_SPARC_PCPLT10		29
 #define	R_SPARC_10		30
 #define	R_SPARC_11		31
 #define	R_SPARC_64		32
 #define	R_SPARC_OLO10		33
 #define	R_SPARC_HH22		34
 #define	R_SPARC_HM10		35
 #define	R_SPARC_LM22		36
 #define	R_SPARC_PC_HH22		37
 #define	R_SPARC_PC_HM10		38
 #define	R_SPARC_PC_LM22		39
 #define	R_SPARC_WDISP16		40
 #define	R_SPARC_WDISP19		41
 #define	R_SPARC_GLOB_JMP	42
 #define	R_SPARC_7		43
 #define	R_SPARC_5		44
 #define	R_SPARC_6		45
 #define	R_SPARC_DISP64		46
 #define	R_SPARC_PLT64		47
 #define	R_SPARC_HIX22		48
 #define	R_SPARC_LOX10		49
 #define	R_SPARC_H44		50
 #define	R_SPARC_M44		51
 #define	R_SPARC_L44		52
 #define	R_SPARC_REGISTER	53
 #define	R_SPARC_UA64		54
 #define	R_SPARC_UA16		55
 #define	R_SPARC_TLS_GD_HI22	56
 #define	R_SPARC_TLS_GD_LO10	57
 #define	R_SPARC_TLS_GD_ADD	58
 #define	R_SPARC_TLS_GD_CALL	59
 #define	R_SPARC_TLS_LDM_HI22	60
 #define	R_SPARC_TLS_LDM_LO10	61
 #define	R_SPARC_TLS_LDM_ADD	62
 #define	R_SPARC_TLS_LDM_CALL	63
 #define	R_SPARC_TLS_LDO_HIX22	64
 #define	R_SPARC_TLS_LDO_LOX10	65
 #define	R_SPARC_TLS_LDO_ADD	66
 #define	R_SPARC_TLS_IE_HI22	67
 #define	R_SPARC_TLS_IE_LO10	68
 #define	R_SPARC_TLS_IE_LD	69
 #define	R_SPARC_TLS_IE_LDX	70
 #define	R_SPARC_TLS_IE_ADD	71
 #define	R_SPARC_TLS_LE_HIX22	72
 #define	R_SPARC_TLS_LE_LOX10	73
 #define	R_SPARC_TLS_DTPMOD32	74
 #define	R_SPARC_TLS_DTPMOD64	75
 #define	R_SPARC_TLS_DTPOFF32	76
 #define	R_SPARC_TLS_DTPOFF64	77
 #define	R_SPARC_TLS_TPOFF32	78
 #define	R_SPARC_TLS_TPOFF64	79
 
 #define	R_X86_64_NONE		0	/* No relocation. */
 #define	R_X86_64_64		1	/* Add 64 bit symbol value. */
 #define	R_X86_64_PC32		2	/* PC-relative 32 bit signed sym value. */
 #define	R_X86_64_GOT32		3	/* PC-relative 32 bit GOT offset. */
 #define	R_X86_64_PLT32		4	/* PC-relative 32 bit PLT offset. */
 #define	R_X86_64_COPY		5	/* Copy data from shared object. */
 #define	R_X86_64_GLOB_DAT	6	/* Set GOT entry to data address. */
 #define	R_X86_64_JMP_SLOT	7	/* Set GOT entry to code address. */
 #define	R_X86_64_RELATIVE	8	/* Add load address of shared object. */
 #define	R_X86_64_GOTPCREL	9	/* Add 32 bit signed pcrel offset to GOT. */
 #define	R_X86_64_32		10	/* Add 32 bit zero extended symbol value */
 #define	R_X86_64_32S		11	/* Add 32 bit sign extended symbol value */
 #define	R_X86_64_16		12	/* Add 16 bit zero extended symbol value */
 #define	R_X86_64_PC16		13	/* Add 16 bit signed extended pc relative symbol value */
 #define	R_X86_64_8		14	/* Add 8 bit zero extended symbol value */
 #define	R_X86_64_PC8		15	/* Add 8 bit signed extended pc relative symbol value */
 #define	R_X86_64_DTPMOD64	16	/* ID of module containing symbol */
 #define	R_X86_64_DTPOFF64	17	/* Offset in TLS block */
 #define	R_X86_64_TPOFF64	18	/* Offset in static TLS block */
 #define	R_X86_64_TLSGD		19	/* PC relative offset to GD GOT entry */
 #define	R_X86_64_TLSLD		20	/* PC relative offset to LD GOT entry */
 #define	R_X86_64_DTPOFF32	21	/* Offset in TLS block */
 #define	R_X86_64_GOTTPOFF	22	/* PC relative offset to IE GOT entry */
 #define	R_X86_64_TPOFF32	23	/* Offset in static TLS block */
 
 
 #endif /* !_SYS_ELF_COMMON_H_ */
Index: projects/binutils-2.17/sys/sys/sysctl.h
===================================================================
--- projects/binutils-2.17/sys/sys/sysctl.h	(revision 215829)
+++ projects/binutils-2.17/sys/sys/sysctl.h	(revision 215830)
@@ -1,726 +1,727 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Mike Karels at Berkeley Software Design, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)sysctl.h	8.1 (Berkeley) 6/2/93
  * $FreeBSD$
  */
 
 #ifndef _SYS_SYSCTL_H_
 #define	_SYS_SYSCTL_H_
 
 #include <sys/queue.h>
 
 struct thread;
 /*
  * Definitions for sysctl call.  The sysctl call uses a hierarchical name
  * for objects that can be examined or modified.  The name is expressed as
  * a sequence of integers.  Like a file path name, the meaning of each
  * component depends on its place in the hierarchy.  The top-level and kern
  * identifiers are defined here, and other identifiers are defined in the
  * respective subsystem header files.
  */
 
 #define CTL_MAXNAME	24	/* largest number of components supported */
 
 /*
  * Each subsystem defined by sysctl defines a list of variables
  * for that subsystem. Each name is either a node with further
  * levels defined below it, or it is a leaf of some particular
  * type given below. Each sysctl level defines a set of name/type
  * pairs to be used by sysctl(8) in manipulating the subsystem.
  */
 struct ctlname {
 	char	*ctl_name;	/* subsystem name */
 	int	ctl_type;	/* type of name */
 };
 
 #define CTLTYPE		0xf	/* Mask for the type */
 #define	CTLTYPE_NODE	1	/* name is a node */
 #define	CTLTYPE_INT	2	/* name describes an integer */
 #define	CTLTYPE_STRING	3	/* name describes a string */
 #define	CTLTYPE_QUAD	4	/* name describes a 64-bit number */
 #define	CTLTYPE_OPAQUE	5	/* name describes a structure */
 #define	CTLTYPE_STRUCT	CTLTYPE_OPAQUE	/* name describes a structure */
 #define	CTLTYPE_UINT	6	/* name describes an unsigned integer */
 #define	CTLTYPE_LONG	7	/* name describes a long */
 #define	CTLTYPE_ULONG	8	/* name describes an unsigned long */
 
 #define CTLFLAG_RD	0x80000000	/* Allow reads of variable */
 #define CTLFLAG_WR	0x40000000	/* Allow writes to the variable */
 #define CTLFLAG_RW	(CTLFLAG_RD|CTLFLAG_WR)
 #define CTLFLAG_NOLOCK	0x20000000	/* XXX Don't Lock */
 #define CTLFLAG_ANYBODY	0x10000000	/* All users can set this var */
 #define CTLFLAG_SECURE	0x08000000	/* Permit set only if securelevel<=0 */
 #define CTLFLAG_PRISON	0x04000000	/* Prisoned roots can fiddle */
 #define CTLFLAG_DYN	0x02000000	/* Dynamic oid - can be freed */
 #define CTLFLAG_SKIP	0x01000000	/* Skip this sysctl when listing */
 #define CTLMASK_SECURE	0x00F00000	/* Secure level */
 #define CTLFLAG_TUN	0x00080000	/* Tunable variable */
 #define CTLFLAG_MPSAFE	0x00040000	/* Handler is MP safe */
 #define CTLFLAG_VNET	0x00020000	/* Prisons with vnet can fiddle */
 #define CTLFLAG_RDTUN	(CTLFLAG_RD|CTLFLAG_TUN)
 
 /*
  * Secure level.   Note that CTLFLAG_SECURE == CTLFLAG_SECURE1.  
  *
  * Secure when the securelevel is raised to at least N.
  */
 #define CTLSHIFT_SECURE	20
 #define CTLFLAG_SECURE1	(CTLFLAG_SECURE | (0 << CTLSHIFT_SECURE))
 #define CTLFLAG_SECURE2	(CTLFLAG_SECURE | (1 << CTLSHIFT_SECURE))
 #define CTLFLAG_SECURE3	(CTLFLAG_SECURE | (2 << CTLSHIFT_SECURE))
 
 /*
  * USE THIS instead of a hardwired number from the categories below
  * to get dynamically assigned sysctl entries using the linker-set
  * technology. This is the way nearly all new sysctl variables should
  * be implemented.
  * e.g. SYSCTL_INT(_parent, OID_AUTO, name, CTLFLAG_RW, &variable, 0, "");
  */ 
 #define OID_AUTO	(-1)
 
 /*
  * The starting number for dynamically-assigned entries.  WARNING!
  * ALL static sysctl entries should have numbers LESS than this!
  */
 #define CTL_AUTO_START	0x100
 
 #ifdef _KERNEL
 #define SYSCTL_HANDLER_ARGS struct sysctl_oid *oidp, void *arg1, int arg2, \
 	struct sysctl_req *req
 
 /* definitions for sysctl_req 'lock' member */
 #define REQ_UNLOCKED	0	/* not locked and not wired */
 #define REQ_LOCKED	1	/* locked and not wired */
 #define REQ_WIRED	2	/* locked and wired */
 
 /* definitions for sysctl_req 'flags' member */
 #if defined(__amd64__) || defined(__ia64__) || defined(__powerpc64__)
 #define	SCTL_MASK32	1	/* 32 bit emulation */
 #endif
 
 /*
  * This describes the access space for a sysctl request.  This is needed
  * so that we can use the interface from the kernel or from user-space.
  */
 struct sysctl_req {
 	struct thread	*td;		/* used for access checking */
 	int		lock;		/* locking/wiring state */
 	void		*oldptr;
 	size_t		oldlen;
 	size_t		oldidx;
 	int		(*oldfunc)(struct sysctl_req *, const void *, size_t);
 	void		*newptr;
 	size_t		newlen;
 	size_t		newidx;
 	int		(*newfunc)(struct sysctl_req *, void *, size_t);
 	size_t		validlen;
 	int		flags;
 };
 
 SLIST_HEAD(sysctl_oid_list, sysctl_oid);
 
 /*
  * This describes one "oid" in the MIB tree.  Potentially more nodes can
  * be hidden behind it, expanded by the handler.
  */
 struct sysctl_oid {
 	struct sysctl_oid_list *oid_parent;
 	SLIST_ENTRY(sysctl_oid) oid_link;
 	int		oid_number;
 	u_int		oid_kind;
 	void		*oid_arg1;
 	int		oid_arg2;
 	const char	*oid_name;
 	int 		(*oid_handler)(SYSCTL_HANDLER_ARGS);
 	const char	*oid_fmt;
 	int		oid_refcnt;
 	const char	*oid_descr;
 };
 
 #define SYSCTL_IN(r, p, l) (r->newfunc)(r, p, l)
 #define SYSCTL_OUT(r, p, l) (r->oldfunc)(r, p, l)
 
 int sysctl_handle_int(SYSCTL_HANDLER_ARGS);
 int sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_long(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_quad(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_intptr(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_string(SYSCTL_HANDLER_ARGS);
 int sysctl_handle_opaque(SYSCTL_HANDLER_ARGS);
 
 int sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS);
 int sysctl_dpcpu_long(SYSCTL_HANDLER_ARGS);
 int sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS);
 
 /*
  * These functions are used to add/remove an oid from the mib.
  */
 void sysctl_register_oid(struct sysctl_oid *oidp);
 void sysctl_unregister_oid(struct sysctl_oid *oidp);
 
 /* Declare a static oid to allow child oids to be added to it. */
 #define SYSCTL_DECL(name)					\
 	extern struct sysctl_oid_list sysctl_##name##_children
 
 /* Hide these in macros */
 #define	SYSCTL_CHILDREN(oid_ptr) (struct sysctl_oid_list *) \
 	(oid_ptr)->oid_arg1
 #define	SYSCTL_CHILDREN_SET(oid_ptr, val) \
 	(oid_ptr)->oid_arg1 = (val);
 #define	SYSCTL_STATIC_CHILDREN(oid_name) \
 	(&sysctl_##oid_name##_children)
 
 /* === Structs and macros related to context handling === */
 
 /* All dynamically created sysctls can be tracked in a context list. */
 struct sysctl_ctx_entry {
 	struct sysctl_oid *entry;
 	TAILQ_ENTRY(sysctl_ctx_entry) link;
 };
 
 TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 
 #define SYSCTL_NODE_CHILDREN(parent, name) \
 	sysctl_##parent##_##name##_children
 
 #ifndef NO_SYSCTL_DESCR
 #define __DESCR(d) d
 #else
 #define __DESCR(d) ""
 #endif
 
 /* This constructs a "raw" MIB oid. */
 #define SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
 	static struct sysctl_oid sysctl__##parent##_##name = {		 \
 		&sysctl_##parent##_children, { NULL }, nbr, kind,	 \
 		a1, a2, #name, handler, fmt, 0, __DESCR(descr) };     \
 	DATA_SET(sysctl_set, sysctl__##parent##_##name)
 
 #define SYSCTL_ADD_OID(ctx, parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
 	sysctl_add_oid(ctx, parent, nbr, name, kind, a1, a2, handler, fmt, __DESCR(descr))
 
 /* This constructs a node from which other oids can hang. */
 #define SYSCTL_NODE(parent, nbr, name, access, handler, descr)		    \
 	struct sysctl_oid_list SYSCTL_NODE_CHILDREN(parent, name);	    \
 	SYSCTL_OID(parent, nbr, name, CTLTYPE_NODE|(access),		    \
 	    (void*)&SYSCTL_NODE_CHILDREN(parent, name), 0, handler, "N", descr)
 
 #define SYSCTL_ADD_NODE(ctx, parent, nbr, name, access, handler, descr)	    \
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_NODE|(access),	    \
 	NULL, 0, handler, "N", __DESCR(descr))
 
 /* Oid for a string.  len can be 0 to indicate '\0' termination. */
 #define SYSCTL_STRING(parent, nbr, name, access, arg, len, descr) \
 	SYSCTL_OID(parent, nbr, name, CTLTYPE_STRING|(access), \
 		arg, len, sysctl_handle_string, "A", descr)
 
 #define SYSCTL_ADD_STRING(ctx, parent, nbr, name, access, arg, len, descr)  \
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_STRING|(access),	    \
 	arg, len, sysctl_handle_string, "A", __DESCR(descr))
 
 /* Oid for an int.  If ptr is NULL, val is returned. */
 #define SYSCTL_INT(parent, nbr, name, access, ptr, val, descr) \
 	SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|CTLFLAG_MPSAFE|(access), \
 		ptr, val, sysctl_handle_int, "I", descr)
 
 #define SYSCTL_ADD_INT(ctx, parent, nbr, name, access, ptr, val, descr)	    \
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_INT|CTLFLAG_MPSAFE|(access),	    \
 	ptr, val, sysctl_handle_int, "I", __DESCR(descr))
 
 /* Oid for an unsigned int.  If ptr is NULL, val is returned. */
 #define SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr) \
 	SYSCTL_OID(parent, nbr, name, CTLTYPE_UINT|CTLFLAG_MPSAFE|(access), \
 		ptr, val, sysctl_handle_int, "IU", descr)
 
 #define SYSCTL_ADD_UINT(ctx, parent, nbr, name, access, ptr, val, descr)    \
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_UINT|CTLFLAG_MPSAFE|(access),	    \
 	ptr, val, sysctl_handle_int, "IU", __DESCR(descr))
 
 #define SYSCTL_XINT(parent, nbr, name, access, ptr, val, descr) \
 	SYSCTL_OID(parent, nbr, name, CTLTYPE_UINT|CTLFLAG_MPSAFE|(access), \
 		ptr, val, sysctl_handle_int, "IX", descr)
 
 #define SYSCTL_ADD_XINT(ctx, parent, nbr, name, access, ptr, val, descr)    \
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_UINT|CTLFLAG_MPSAFE|(access),	    \
 	ptr, val, sysctl_handle_int, "IX", __DESCR(descr))
 
 /* Oid for a long.  The pointer must be non NULL. */
 #define SYSCTL_LONG(parent, nbr, name, access, ptr, val, descr) \
 	SYSCTL_OID(parent, nbr, name, CTLTYPE_LONG|CTLFLAG_MPSAFE|(access), \
 		ptr, val, sysctl_handle_long, "L", descr)
 
 #define SYSCTL_ADD_LONG(ctx, parent, nbr, name, access, ptr, descr)	    \
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_LONG|CTLFLAG_MPSAFE|(access),	    \
 	ptr, 0, sysctl_handle_long, "L", __DESCR(descr))
 
 /* Oid for an unsigned long.  The pointer must be non NULL. */
 #define SYSCTL_ULONG(parent, nbr, name, access, ptr, val, descr) \
 	SYSCTL_OID(parent, nbr, name, CTLTYPE_ULONG|CTLFLAG_MPSAFE|(access), \
 		ptr, val, sysctl_handle_long, "LU", __DESCR(descr))
 
 #define SYSCTL_ADD_ULONG(ctx, parent, nbr, name, access, ptr, descr)	    \
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_ULONG|CTLFLAG_MPSAFE|(access),	    \
 	ptr, 0, sysctl_handle_long, "LU", __DESCR(descr))
 
 #define SYSCTL_XLONG(parent, nbr, name, access, ptr, val, descr) \
 	SYSCTL_OID(parent, nbr, name, CTLTYPE_ULONG|CTLFLAG_MPSAFE|(access), \
 		ptr, val, sysctl_handle_long, "LX", __DESCR(descr))
 
 #define SYSCTL_ADD_XLONG(ctx, parent, nbr, name, access, ptr, descr)	    \
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_ULONG|CTLFLAG_MPSAFE|(access),	    \
 	ptr, 0, sysctl_handle_long, "LX", __DESCR(descr))
 
 /* Oid for a quad.  The pointer must be non NULL. */
 #define SYSCTL_QUAD(parent, nbr, name, access, ptr, val, descr) \
 	SYSCTL_OID(parent, nbr, name, CTLTYPE_QUAD|CTLFLAG_MPSAFE|(access), \
 		ptr, val, sysctl_handle_quad, "Q", __DESCR(descr))
 
 #define SYSCTL_ADD_QUAD(ctx, parent, nbr, name, access, ptr, descr)	    \
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_QUAD|CTLFLAG_MPSAFE|(access),	    \
 	ptr, 0, sysctl_handle_quad, "Q", __DESCR(descr))
 
 /* Oid for an opaque object.  Specified by a pointer and a length. */
 #define SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr) \
 	SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|(access), \
 		ptr, len, sysctl_handle_opaque, fmt, descr)
 
 #define SYSCTL_ADD_OPAQUE(ctx, parent, nbr, name, access, ptr, len, fmt, descr)\
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_OPAQUE|(access),	    \
 	ptr, len, sysctl_handle_opaque, fmt, __DESCR(descr))
 
 /* Oid for a struct.  Specified by a pointer and a type. */
 #define SYSCTL_STRUCT(parent, nbr, name, access, ptr, type, descr) \
 	SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|(access), \
 		ptr, sizeof(struct type), sysctl_handle_opaque, \
 		"S," #type, descr)
 
 #define SYSCTL_ADD_STRUCT(ctx, parent, nbr, name, access, ptr, type, descr) \
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_OPAQUE|(access),	    \
 	ptr, sizeof(struct type), sysctl_handle_opaque, "S," #type, __DESCR(descr))
 
 /* Oid for a procedure.  Specified by a pointer and an arg. */
 #define SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt, descr) \
 	SYSCTL_OID(parent, nbr, name, (access), \
 		ptr, arg, handler, fmt, descr)
 
 #define SYSCTL_ADD_PROC(ctx, parent, nbr, name, access, ptr, arg, handler, fmt, descr) \
 	sysctl_add_oid(ctx, parent, nbr, name, (access),			    \
 	ptr, arg, handler, fmt, __DESCR(descr))
 
 /*
  * A macro to generate a read-only sysctl to indicate the presense of optional
  * kernel features.
  */
 #define	FEATURE(name, desc)						\
 	SYSCTL_INT(_kern_features, OID_AUTO, name, CTLFLAG_RD, 0, 1, desc)
 
 #endif /* _KERNEL */
 
 /*
  * Top-level identifiers
  */
 #define	CTL_UNSPEC	0		/* unused */
 #define	CTL_KERN	1		/* "high kernel": proc, limits */
 #define	CTL_VM		2		/* virtual memory */
 #define	CTL_VFS		3		/* filesystem, mount type is next */
 #define	CTL_NET		4		/* network, see socket.h */
 #define	CTL_DEBUG	5		/* debugging parameters */
 #define	CTL_HW		6		/* generic cpu/io */
 #define	CTL_MACHDEP	7		/* machine dependent */
 #define	CTL_USER	8		/* user-level */
 #define	CTL_P1003_1B	9		/* POSIX 1003.1B */
 #define	CTL_MAXID	10		/* number of valid top-level ids */
 
 #define CTL_NAMES { \
 	{ 0, 0 }, \
 	{ "kern", CTLTYPE_NODE }, \
 	{ "vm", CTLTYPE_NODE }, \
 	{ "vfs", CTLTYPE_NODE }, \
 	{ "net", CTLTYPE_NODE }, \
 	{ "debug", CTLTYPE_NODE }, \
 	{ "hw", CTLTYPE_NODE }, \
 	{ "machdep", CTLTYPE_NODE }, \
 	{ "user", CTLTYPE_NODE }, \
 	{ "p1003_1b", CTLTYPE_NODE }, \
 }
 
 /*
  * CTL_KERN identifiers
  */
 #define	KERN_OSTYPE	 	 1	/* string: system version */
 #define	KERN_OSRELEASE	 	 2	/* string: system release */
 #define	KERN_OSREV	 	 3	/* int: system revision */
 #define	KERN_VERSION	 	 4	/* string: compile time info */
 #define	KERN_MAXVNODES	 	 5	/* int: max vnodes */
 #define	KERN_MAXPROC	 	 6	/* int: max processes */
 #define	KERN_MAXFILES	 	 7	/* int: max open files */
 #define	KERN_ARGMAX	 	 8	/* int: max arguments to exec */
 #define	KERN_SECURELVL	 	 9	/* int: system security level */
 #define	KERN_HOSTNAME		10	/* string: hostname */
 #define	KERN_HOSTID		11	/* int: host identifier */
 #define	KERN_CLOCKRATE		12	/* struct: struct clockrate */
 #define	KERN_VNODE		13	/* struct: vnode structures */
 #define	KERN_PROC		14	/* struct: process entries */
 #define	KERN_FILE		15	/* struct: file entries */
 #define	KERN_PROF		16	/* node: kernel profiling info */
 #define	KERN_POSIX1		17	/* int: POSIX.1 version */
 #define	KERN_NGROUPS		18	/* int: # of supplemental group ids */
 #define	KERN_JOB_CONTROL	19	/* int: is job control available */
 #define	KERN_SAVED_IDS		20	/* int: saved set-user/group-ID */
 #define	KERN_BOOTTIME		21	/* struct: time kernel was booted */
 #define KERN_NISDOMAINNAME	22	/* string: YP domain name */
 #define KERN_UPDATEINTERVAL	23	/* int: update process sleep time */
 #define KERN_OSRELDATE		24	/* int: kernel release date */
 #define KERN_NTP_PLL		25	/* node: NTP PLL control */
 #define	KERN_BOOTFILE		26	/* string: name of booted kernel */
 #define	KERN_MAXFILESPERPROC	27	/* int: max open files per proc */
 #define	KERN_MAXPROCPERUID 	28	/* int: max processes per uid */
 #define KERN_DUMPDEV		29	/* struct cdev *: device to dump on */
 #define	KERN_IPC		30	/* node: anything related to IPC */
 #define	KERN_DUMMY		31	/* unused */
 #define	KERN_PS_STRINGS		32	/* int: address of PS_STRINGS */
 #define	KERN_USRSTACK		33	/* int: address of USRSTACK */
 #define	KERN_LOGSIGEXIT		34	/* int: do we log sigexit procs? */
 #define	KERN_IOV_MAX		35	/* int: value of UIO_MAXIOV */
 #define	KERN_HOSTUUID		36	/* string: host UUID identifier */
 #define	KERN_ARND		37	/* int: from arc4rand() */
 #define	KERN_MAXID		38	/* number of valid kern ids */
 
 #define CTL_KERN_NAMES { \
 	{ 0, 0 }, \
 	{ "ostype", CTLTYPE_STRING }, \
 	{ "osrelease", CTLTYPE_STRING }, \
 	{ "osrevision", CTLTYPE_INT }, \
 	{ "version", CTLTYPE_STRING }, \
 	{ "maxvnodes", CTLTYPE_INT }, \
 	{ "maxproc", CTLTYPE_INT }, \
 	{ "maxfiles", CTLTYPE_INT }, \
 	{ "argmax", CTLTYPE_INT }, \
 	{ "securelevel", CTLTYPE_INT }, \
 	{ "hostname", CTLTYPE_STRING }, \
 	{ "hostid", CTLTYPE_UINT }, \
 	{ "clockrate", CTLTYPE_STRUCT }, \
 	{ "vnode", CTLTYPE_STRUCT }, \
 	{ "proc", CTLTYPE_STRUCT }, \
 	{ "file", CTLTYPE_STRUCT }, \
 	{ "profiling", CTLTYPE_NODE }, \
 	{ "posix1version", CTLTYPE_INT }, \
 	{ "ngroups", CTLTYPE_INT }, \
 	{ "job_control", CTLTYPE_INT }, \
 	{ "saved_ids", CTLTYPE_INT }, \
 	{ "boottime", CTLTYPE_STRUCT }, \
 	{ "nisdomainname", CTLTYPE_STRING }, \
 	{ "update", CTLTYPE_INT }, \
 	{ "osreldate", CTLTYPE_INT }, \
 	{ "ntp_pll", CTLTYPE_NODE }, \
 	{ "bootfile", CTLTYPE_STRING }, \
 	{ "maxfilesperproc", CTLTYPE_INT }, \
 	{ "maxprocperuid", CTLTYPE_INT }, \
 	{ "ipc", CTLTYPE_NODE }, \
 	{ "dummy", CTLTYPE_INT }, \
 	{ "ps_strings", CTLTYPE_INT }, \
 	{ "usrstack", CTLTYPE_INT }, \
 	{ "logsigexit", CTLTYPE_INT }, \
 	{ "iov_max", CTLTYPE_INT }, \
 	{ "hostuuid", CTLTYPE_STRING }, \
+	{ "arc4rand", CTLTYPE_OPAQUE }, \
 }
 
 /*
  * CTL_VFS identifiers
  */
 #define CTL_VFS_NAMES { \
 	{ "vfsconf", CTLTYPE_STRUCT }, \
 }
 
 /*
  * KERN_PROC subtypes
  */
 #define KERN_PROC_ALL		0	/* everything */
 #define	KERN_PROC_PID		1	/* by process id */
 #define	KERN_PROC_PGRP		2	/* by process group id */
 #define	KERN_PROC_SESSION	3	/* by session of pid */
 #define	KERN_PROC_TTY		4	/* by controlling tty */
 #define	KERN_PROC_UID		5	/* by effective uid */
 #define	KERN_PROC_RUID		6	/* by real uid */
 #define	KERN_PROC_ARGS		7	/* get/set arguments/proctitle */
 #define	KERN_PROC_PROC		8	/* only return procs */
 #define	KERN_PROC_SV_NAME	9	/* get syscall vector name */
 #define	KERN_PROC_RGID		10	/* by real group id */
 #define	KERN_PROC_GID		11	/* by effective group id */
 #define	KERN_PROC_PATHNAME	12	/* path to executable */
 #define	KERN_PROC_OVMMAP	13	/* Old VM map entries for process */
 #define	KERN_PROC_OFILEDESC	14	/* Old file descriptors for process */
 #define	KERN_PROC_KSTACK	15	/* Kernel stacks for process */
 #define	KERN_PROC_INC_THREAD	0x10	/*
 					 * modifier for pid, pgrp, tty,
 					 * uid, ruid, gid, rgid and proc
 					 * This effectively uses 16-31
 					 */
 #define	KERN_PROC_VMMAP		32	/* VM map entries for process */
 #define	KERN_PROC_FILEDESC	33	/* File descriptors for process */
 #define	KERN_PROC_GROUPS	34	/* process groups */
 
 /*
  * KERN_IPC identifiers
  */
 #define KIPC_MAXSOCKBUF		1	/* int: max size of a socket buffer */
 #define	KIPC_SOCKBUF_WASTE	2	/* int: wastage factor in sockbuf */
 #define	KIPC_SOMAXCONN		3	/* int: max length of connection q */
 #define	KIPC_MAX_LINKHDR	4	/* int: max length of link header */
 #define	KIPC_MAX_PROTOHDR	5	/* int: max length of network header */
 #define	KIPC_MAX_HDR		6	/* int: max total length of headers */
 #define	KIPC_MAX_DATALEN	7	/* int: max length of data? */
 
 /*
  * CTL_HW identifiers
  */
 #define	HW_MACHINE	 1		/* string: machine class */
 #define	HW_MODEL	 2		/* string: specific machine model */
 #define	HW_NCPU		 3		/* int: number of cpus */
 #define	HW_BYTEORDER	 4		/* int: machine byte order */
 #define	HW_PHYSMEM	 5		/* int: total memory */
 #define	HW_USERMEM	 6		/* int: non-kernel memory */
 #define	HW_PAGESIZE	 7		/* int: software page size */
 #define	HW_DISKNAMES	 8		/* strings: disk drive names */
 #define	HW_DISKSTATS	 9		/* struct: diskstats[] */
 #define HW_FLOATINGPT	10		/* int: has HW floating point? */
 #define HW_MACHINE_ARCH	11		/* string: machine architecture */
 #define	HW_REALMEM	12		/* int: 'real' memory */
 #define	HW_MAXID	13		/* number of valid hw ids */
 
 #define CTL_HW_NAMES { \
 	{ 0, 0 }, \
 	{ "machine", CTLTYPE_STRING }, \
 	{ "model", CTLTYPE_STRING }, \
 	{ "ncpu", CTLTYPE_INT }, \
 	{ "byteorder", CTLTYPE_INT }, \
 	{ "physmem", CTLTYPE_ULONG }, \
 	{ "usermem", CTLTYPE_ULONG }, \
 	{ "pagesize", CTLTYPE_INT }, \
 	{ "disknames", CTLTYPE_STRUCT }, \
 	{ "diskstats", CTLTYPE_STRUCT }, \
 	{ "floatingpoint", CTLTYPE_INT }, \
 	{ "machine_arch", CTLTYPE_STRING }, \
 	{ "realmem", CTLTYPE_ULONG }, \
 }
 
 /*
  * CTL_USER definitions
  */
 #define	USER_CS_PATH		 1	/* string: _CS_PATH */
 #define	USER_BC_BASE_MAX	 2	/* int: BC_BASE_MAX */
 #define	USER_BC_DIM_MAX		 3	/* int: BC_DIM_MAX */
 #define	USER_BC_SCALE_MAX	 4	/* int: BC_SCALE_MAX */
 #define	USER_BC_STRING_MAX	 5	/* int: BC_STRING_MAX */
 #define	USER_COLL_WEIGHTS_MAX	 6	/* int: COLL_WEIGHTS_MAX */
 #define	USER_EXPR_NEST_MAX	 7	/* int: EXPR_NEST_MAX */
 #define	USER_LINE_MAX		 8	/* int: LINE_MAX */
 #define	USER_RE_DUP_MAX		 9	/* int: RE_DUP_MAX */
 #define	USER_POSIX2_VERSION	10	/* int: POSIX2_VERSION */
 #define	USER_POSIX2_C_BIND	11	/* int: POSIX2_C_BIND */
 #define	USER_POSIX2_C_DEV	12	/* int: POSIX2_C_DEV */
 #define	USER_POSIX2_CHAR_TERM	13	/* int: POSIX2_CHAR_TERM */
 #define	USER_POSIX2_FORT_DEV	14	/* int: POSIX2_FORT_DEV */
 #define	USER_POSIX2_FORT_RUN	15	/* int: POSIX2_FORT_RUN */
 #define	USER_POSIX2_LOCALEDEF	16	/* int: POSIX2_LOCALEDEF */
 #define	USER_POSIX2_SW_DEV	17	/* int: POSIX2_SW_DEV */
 #define	USER_POSIX2_UPE		18	/* int: POSIX2_UPE */
 #define	USER_STREAM_MAX		19	/* int: POSIX2_STREAM_MAX */
 #define	USER_TZNAME_MAX		20	/* int: POSIX2_TZNAME_MAX */
 #define	USER_MAXID		21	/* number of valid user ids */
 
 #define	CTL_USER_NAMES { \
 	{ 0, 0 }, \
 	{ "cs_path", CTLTYPE_STRING }, \
 	{ "bc_base_max", CTLTYPE_INT }, \
 	{ "bc_dim_max", CTLTYPE_INT }, \
 	{ "bc_scale_max", CTLTYPE_INT }, \
 	{ "bc_string_max", CTLTYPE_INT }, \
 	{ "coll_weights_max", CTLTYPE_INT }, \
 	{ "expr_nest_max", CTLTYPE_INT }, \
 	{ "line_max", CTLTYPE_INT }, \
 	{ "re_dup_max", CTLTYPE_INT }, \
 	{ "posix2_version", CTLTYPE_INT }, \
 	{ "posix2_c_bind", CTLTYPE_INT }, \
 	{ "posix2_c_dev", CTLTYPE_INT }, \
 	{ "posix2_char_term", CTLTYPE_INT }, \
 	{ "posix2_fort_dev", CTLTYPE_INT }, \
 	{ "posix2_fort_run", CTLTYPE_INT }, \
 	{ "posix2_localedef", CTLTYPE_INT }, \
 	{ "posix2_sw_dev", CTLTYPE_INT }, \
 	{ "posix2_upe", CTLTYPE_INT }, \
 	{ "stream_max", CTLTYPE_INT }, \
 	{ "tzname_max", CTLTYPE_INT }, \
 }
 
 #define CTL_P1003_1B_ASYNCHRONOUS_IO		1	/* boolean */
 #define CTL_P1003_1B_MAPPED_FILES		2	/* boolean */
 #define CTL_P1003_1B_MEMLOCK			3	/* boolean */
 #define CTL_P1003_1B_MEMLOCK_RANGE		4	/* boolean */
 #define CTL_P1003_1B_MEMORY_PROTECTION		5	/* boolean */
 #define CTL_P1003_1B_MESSAGE_PASSING		6	/* boolean */
 #define CTL_P1003_1B_PRIORITIZED_IO		7	/* boolean */
 #define CTL_P1003_1B_PRIORITY_SCHEDULING	8	/* boolean */
 #define CTL_P1003_1B_REALTIME_SIGNALS		9	/* boolean */
 #define CTL_P1003_1B_SEMAPHORES			10	/* boolean */
 #define CTL_P1003_1B_FSYNC			11	/* boolean */
 #define CTL_P1003_1B_SHARED_MEMORY_OBJECTS	12	/* boolean */
 #define CTL_P1003_1B_SYNCHRONIZED_IO		13	/* boolean */
 #define CTL_P1003_1B_TIMERS			14	/* boolean */
 #define CTL_P1003_1B_AIO_LISTIO_MAX		15	/* int */
 #define CTL_P1003_1B_AIO_MAX			16	/* int */
 #define CTL_P1003_1B_AIO_PRIO_DELTA_MAX		17	/* int */
 #define CTL_P1003_1B_DELAYTIMER_MAX		18	/* int */
 #define CTL_P1003_1B_MQ_OPEN_MAX		19	/* int */
 #define CTL_P1003_1B_PAGESIZE			20	/* int */
 #define CTL_P1003_1B_RTSIG_MAX			21	/* int */
 #define CTL_P1003_1B_SEM_NSEMS_MAX		22	/* int */
 #define CTL_P1003_1B_SEM_VALUE_MAX		23	/* int */
 #define CTL_P1003_1B_SIGQUEUE_MAX		24	/* int */
 #define CTL_P1003_1B_TIMER_MAX			25	/* int */
 
 #define CTL_P1003_1B_MAXID		26
 
 #define	CTL_P1003_1B_NAMES { \
 	{ 0, 0 }, \
 	{ "asynchronous_io", CTLTYPE_INT }, \
 	{ "mapped_files", CTLTYPE_INT }, \
 	{ "memlock", CTLTYPE_INT }, \
 	{ "memlock_range", CTLTYPE_INT }, \
 	{ "memory_protection", CTLTYPE_INT }, \
 	{ "message_passing", CTLTYPE_INT }, \
 	{ "prioritized_io", CTLTYPE_INT }, \
 	{ "priority_scheduling", CTLTYPE_INT }, \
 	{ "realtime_signals", CTLTYPE_INT }, \
 	{ "semaphores", CTLTYPE_INT }, \
 	{ "fsync", CTLTYPE_INT }, \
 	{ "shared_memory_objects", CTLTYPE_INT }, \
 	{ "synchronized_io", CTLTYPE_INT }, \
 	{ "timers", CTLTYPE_INT }, \
 	{ "aio_listio_max", CTLTYPE_INT }, \
 	{ "aio_max", CTLTYPE_INT }, \
 	{ "aio_prio_delta_max", CTLTYPE_INT }, \
 	{ "delaytimer_max", CTLTYPE_INT }, \
 	{ "mq_open_max", CTLTYPE_INT }, \
 	{ "pagesize", CTLTYPE_INT }, \
 	{ "rtsig_max", CTLTYPE_INT }, \
 	{ "nsems_max", CTLTYPE_INT }, \
 	{ "sem_value_max", CTLTYPE_INT }, \
 	{ "sigqueue_max", CTLTYPE_INT }, \
 	{ "timer_max", CTLTYPE_INT }, \
 }
 
 #ifdef _KERNEL
 
 /*
  * Declare some common oids.
  */
 extern struct sysctl_oid_list sysctl__children;
 SYSCTL_DECL(_kern);
 SYSCTL_DECL(_kern_features);
 SYSCTL_DECL(_kern_ipc);
 SYSCTL_DECL(_kern_proc);
 SYSCTL_DECL(_kern_sched);
 SYSCTL_DECL(_kern_sched_stats);
 SYSCTL_DECL(_sysctl);
 SYSCTL_DECL(_vm);
 SYSCTL_DECL(_vm_stats);
 SYSCTL_DECL(_vm_stats_misc);
 SYSCTL_DECL(_vfs);
 SYSCTL_DECL(_net);
 SYSCTL_DECL(_debug);
 SYSCTL_DECL(_debug_sizeof);
 SYSCTL_DECL(_dev);
 SYSCTL_DECL(_hw);
 SYSCTL_DECL(_hw_bus);
 SYSCTL_DECL(_hw_bus_devices);
 SYSCTL_DECL(_hw_bus_info);
 SYSCTL_DECL(_machdep);
 SYSCTL_DECL(_user);
 SYSCTL_DECL(_compat);
 SYSCTL_DECL(_regression);
 SYSCTL_DECL(_security);
 SYSCTL_DECL(_security_bsd);
 
 extern char	machine[];
 extern char	osrelease[];
 extern char	ostype[];
 extern char	kern_ident[];
 
 /* Dynamic oid handling */
 struct sysctl_oid *sysctl_add_oid(struct sysctl_ctx_list *clist,
 		struct sysctl_oid_list *parent, int nbr, const char *name,
 		int kind, void *arg1, int arg2,
 		int (*handler) (SYSCTL_HANDLER_ARGS),
 		const char *fmt, const char *descr);
 void	sysctl_rename_oid(struct sysctl_oid *oidp, const char *name);
 int	sysctl_move_oid(struct sysctl_oid *oidp,
 		struct sysctl_oid_list *parent);
 int	sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse);
 int	sysctl_ctx_init(struct sysctl_ctx_list *clist);
 int	sysctl_ctx_free(struct sysctl_ctx_list *clist);
 struct	sysctl_ctx_entry *sysctl_ctx_entry_add(struct sysctl_ctx_list *clist,
 		struct sysctl_oid *oidp);
 struct	sysctl_ctx_entry *sysctl_ctx_entry_find(struct sysctl_ctx_list *clist,
 		struct sysctl_oid *oidp);
 int	sysctl_ctx_entry_del(struct sysctl_ctx_list *clist,
 		struct sysctl_oid *oidp);
 
 int	kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old,
 		      size_t *oldlenp, void *new, size_t newlen,
 		      size_t *retval, int flags);
 int	kernel_sysctlbyname(struct thread *td, char *name,
 		void *old, size_t *oldlenp, void *new, size_t newlen,
 		size_t *retval, int flags);
 int	userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
 			size_t *oldlenp, int inkernel, void *new, size_t newlen,
 			size_t *retval, int flags);
 int	sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid,
 			int *nindx, struct sysctl_req *req);
 void	sysctl_lock(void);
 void	sysctl_unlock(void);
 int	sysctl_wire_old_buffer(struct sysctl_req *req, size_t len);
 
 struct sbuf;
 struct sbuf	*sbuf_new_for_sysctl(struct sbuf *, char *, int,
 		    struct sysctl_req *);
 #else	/* !_KERNEL */
 #include <sys/cdefs.h>
 
 __BEGIN_DECLS
 int	sysctl(const int *, u_int, void *, size_t *, const void *, size_t);
 int	sysctlbyname(const char *, void *, size_t *, const void *, size_t);
 int	sysctlnametomib(const char *, int *, size_t *);
 __END_DECLS
 #endif	/* _KERNEL */
 
 #endif	/* !_SYS_SYSCTL_H_ */
Index: projects/binutils-2.17/sys/vm/vm_object.c
===================================================================
--- projects/binutils-2.17/sys/vm/vm_object.c	(revision 215829)
+++ projects/binutils-2.17/sys/vm/vm_object.c	(revision 215830)
@@ -1,2231 +1,2233 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_object.c	8.5 (Berkeley) 3/22/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 /*
  *	Virtual memory object module.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>		/* for curproc, pageproc */
 #include <sys/socket.h>
 #include <sys/resourcevar.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/sx.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_reserv.h>
 #include <vm/uma.h>
 
 static int old_msync;
 SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0,
     "Use old (insecure) msync behavior");
 
 static int	vm_object_page_collect_flush(vm_object_t object, vm_page_t p,
 		    int pagerflags);
 static void	vm_object_qcollapse(vm_object_t object);
 static void	vm_object_vndeallocate(vm_object_t object);
 
 /*
  *	Virtual memory objects maintain the actual data
  *	associated with allocated virtual memory.  A given
  *	page of memory exists within exactly one object.
  *
  *	An object is only deallocated when all "references"
  *	are given up.  Only one "reference" to a given
  *	region of an object should be writeable.
  *
  *	Associated with each object is a list of all resident
  *	memory pages belonging to that object; this list is
  *	maintained by the "vm_page" module, and locked by the object's
  *	lock.
  *
  *	Each object also records a "pager" routine which is
  *	used to retrieve (and store) pages to the proper backing
  *	storage.  In addition, objects may be backed by other
  *	objects from which they were virtual-copied.
  *
  *	The only items within the object structure which are
  *	modified after time of creation are:
  *		reference count		locked by object's lock
  *		pager routine		locked by object's lock
  *
  */
 
 struct object_q vm_object_list;
 struct mtx vm_object_list_mtx;	/* lock for object list and count */
 
 struct vm_object kernel_object_store;
 struct vm_object kmem_object_store;
 
 SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD, 0, "VM object stats");
 
 static long object_collapses;
 SYSCTL_LONG(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD,
     &object_collapses, 0, "VM object collapses");
 
 static long object_bypasses;
 SYSCTL_LONG(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD,
     &object_bypasses, 0, "VM object bypasses");
 
 static uma_zone_t obj_zone;
 
 static int vm_object_zinit(void *mem, int size, int flags);
 
 #ifdef INVARIANTS
 static void vm_object_zdtor(void *mem, int size, void *arg);
 
 static void
 vm_object_zdtor(void *mem, int size, void *arg)
 {
 	vm_object_t object;
 
 	object = (vm_object_t)mem;
 	KASSERT(TAILQ_EMPTY(&object->memq),
 	    ("object %p has resident pages",
 	    object));
 #if VM_NRESERVLEVEL > 0
 	KASSERT(LIST_EMPTY(&object->rvq),
 	    ("object %p has reservations",
 	    object));
 #endif
 	KASSERT(object->cache == NULL,
 	    ("object %p has cached pages",
 	    object));
 	KASSERT(object->paging_in_progress == 0,
 	    ("object %p paging_in_progress = %d",
 	    object, object->paging_in_progress));
 	KASSERT(object->resident_page_count == 0,
 	    ("object %p resident_page_count = %d",
 	    object, object->resident_page_count));
 	KASSERT(object->shadow_count == 0,
 	    ("object %p shadow_count = %d",
 	    object, object->shadow_count));
 }
 #endif
 
 static int
 vm_object_zinit(void *mem, int size, int flags)
 {
 	vm_object_t object;
 
 	object = (vm_object_t)mem;
 	bzero(&object->mtx, sizeof(object->mtx));
 	VM_OBJECT_LOCK_INIT(object, "standard object");
 
 	/* These are true for any object that has been freed */
 	object->paging_in_progress = 0;
 	object->resident_page_count = 0;
 	object->shadow_count = 0;
 	return (0);
 }
 
 void
 _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
 {
 
 	TAILQ_INIT(&object->memq);
 	LIST_INIT(&object->shadow_head);
 
 	object->root = NULL;
 	object->type = type;
 	object->size = size;
 	object->generation = 1;
 	object->ref_count = 1;
 	object->memattr = VM_MEMATTR_DEFAULT;
 	object->flags = 0;
 	object->uip = NULL;
 	object->charge = 0;
 	if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
 		object->flags = OBJ_ONEMAPPING;
 	object->pg_color = 0;
 	object->handle = NULL;
 	object->backing_object = NULL;
 	object->backing_object_offset = (vm_ooffset_t) 0;
 #if VM_NRESERVLEVEL > 0
 	LIST_INIT(&object->rvq);
 #endif
 	object->cache = NULL;
 
 	mtx_lock(&vm_object_list_mtx);
 	TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
 	mtx_unlock(&vm_object_list_mtx);
 }
 
 /*
  *	vm_object_init:
  *
  *	Initialize the VM objects module.
  */
 void
 vm_object_init(void)
 {
 	TAILQ_INIT(&vm_object_list);
 	mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF);
 	
 	VM_OBJECT_LOCK_INIT(&kernel_object_store, "kernel object");
 	_vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
 	    kernel_object);
 #if VM_NRESERVLEVEL > 0
 	kernel_object->flags |= OBJ_COLORED;
 	kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
 #endif
 
 	VM_OBJECT_LOCK_INIT(&kmem_object_store, "kmem object");
 	_vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
 	    kmem_object);
 #if VM_NRESERVLEVEL > 0
 	kmem_object->flags |= OBJ_COLORED;
 	kmem_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
 #endif
 
 	/*
 	 * The lock portion of struct vm_object must be type stable due
 	 * to vm_pageout_fallback_object_lock locking a vm object
 	 * without holding any references to it.
 	 */
 	obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
 #ifdef INVARIANTS
 	    vm_object_zdtor,
 #else
 	    NULL,
 #endif
 	    vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
 }
 
 void
 vm_object_clear_flag(vm_object_t object, u_short bits)
 {
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	object->flags &= ~bits;
 }
 
 /*
  *	Sets the default memory attribute for the specified object.  Pages
  *	that are allocated to this object are by default assigned this memory
  *	attribute.
  *
  *	Presently, this function must be called before any pages are allocated
  *	to the object.  In the future, this requirement may be relaxed for
  *	"default" and "swap" objects.
  */
 int
 vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr)
 {
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	switch (object->type) {
 	case OBJT_DEFAULT:
 	case OBJT_DEVICE:
 	case OBJT_PHYS:
 	case OBJT_SG:
 	case OBJT_SWAP:
 	case OBJT_VNODE:
 		if (!TAILQ_EMPTY(&object->memq))
 			return (KERN_FAILURE);
 		break;
 	case OBJT_DEAD:
 		return (KERN_INVALID_ARGUMENT);
 	}
 	object->memattr = memattr;
 	return (KERN_SUCCESS);
 }
 
 void
 vm_object_pip_add(vm_object_t object, short i)
 {
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	object->paging_in_progress += i;
 }
 
 void
 vm_object_pip_subtract(vm_object_t object, short i)
 {
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	object->paging_in_progress -= i;
 }
 
 void
 vm_object_pip_wakeup(vm_object_t object)
 {
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	object->paging_in_progress--;
 	if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
 		vm_object_clear_flag(object, OBJ_PIPWNT);
 		wakeup(object);
 	}
 }
 
 void
 vm_object_pip_wakeupn(vm_object_t object, short i)
 {
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	if (i)
 		object->paging_in_progress -= i;
 	if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
 		vm_object_clear_flag(object, OBJ_PIPWNT);
 		wakeup(object);
 	}
 }
 
 void
 vm_object_pip_wait(vm_object_t object, char *waitid)
 {
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	while (object->paging_in_progress) {
 		object->flags |= OBJ_PIPWNT;
 		msleep(object, VM_OBJECT_MTX(object), PVM, waitid, 0);
 	}
 }
 
 /*
  *	vm_object_allocate:
  *
  *	Returns a new object with the given size.
  */
 vm_object_t
 vm_object_allocate(objtype_t type, vm_pindex_t size)
 {
 	vm_object_t object;
 
 	object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK);
 	_vm_object_allocate(type, size, object);
 	return (object);
 }
 
 
 /*
  *	vm_object_reference:
  *
  *	Gets another reference to the given object.  Note: OBJ_DEAD
  *	objects can be referenced during final cleaning.
  */
 void
 vm_object_reference(vm_object_t object)
 {
 	if (object == NULL)
 		return;
 	VM_OBJECT_LOCK(object);
 	vm_object_reference_locked(object);
 	VM_OBJECT_UNLOCK(object);
 }
 
 /*
  *	vm_object_reference_locked:
  *
  *	Gets another reference to the given object.
  *
  *	The object must be locked.
  */
 void
 vm_object_reference_locked(vm_object_t object)
 {
 	struct vnode *vp;
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	object->ref_count++;
 	if (object->type == OBJT_VNODE) {
 		vp = object->handle;
 		vref(vp);
 	}
 }
 
 /*
  * Handle deallocating an object of type OBJT_VNODE.
  */
 static void
 vm_object_vndeallocate(vm_object_t object)
 {
 	struct vnode *vp = (struct vnode *) object->handle;
 
 	VFS_ASSERT_GIANT(vp->v_mount);
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	KASSERT(object->type == OBJT_VNODE,
 	    ("vm_object_vndeallocate: not a vnode object"));
 	KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
 #ifdef INVARIANTS
 	if (object->ref_count == 0) {
 		vprint("vm_object_vndeallocate", vp);
 		panic("vm_object_vndeallocate: bad object reference count");
 	}
 #endif
 
 	object->ref_count--;
 	if (object->ref_count == 0) {
 		mp_fixme("Unlocked vflag access.");
 		vp->v_vflag &= ~VV_TEXT;
 	}
 	VM_OBJECT_UNLOCK(object);
 	/*
 	 * vrele may need a vop lock
 	 */
 	vrele(vp);
 }
 
 /*
  *	vm_object_deallocate:
  *
  *	Release a reference to the specified object,
  *	gained either through a vm_object_allocate
  *	or a vm_object_reference call.  When all references
  *	are gone, storage associated with this object
  *	may be relinquished.
  *
  *	No object may be locked.
  */
 void
 vm_object_deallocate(vm_object_t object)
 {
 	vm_object_t temp;
 
 	while (object != NULL) {
 		int vfslocked;
 
 		vfslocked = 0;
 	restart:
 		VM_OBJECT_LOCK(object);
 		if (object->type == OBJT_VNODE) {
 			struct vnode *vp = (struct vnode *) object->handle;
 
 			/*
 			 * Conditionally acquire Giant for a vnode-backed
 			 * object.  We have to be careful since the type of
 			 * a vnode object can change while the object is
 			 * unlocked.
 			 */
 			if (VFS_NEEDSGIANT(vp->v_mount) && !vfslocked) {
 				vfslocked = 1;
 				if (!mtx_trylock(&Giant)) {
 					VM_OBJECT_UNLOCK(object);
 					mtx_lock(&Giant);
 					goto restart;
 				}
 			}
 			vm_object_vndeallocate(object);
 			VFS_UNLOCK_GIANT(vfslocked);
 			return;
 		} else
 			/*
 			 * This is to handle the case that the object
 			 * changed type while we dropped its lock to
 			 * obtain Giant.
 			 */
 			VFS_UNLOCK_GIANT(vfslocked);
 
 		KASSERT(object->ref_count != 0,
 			("vm_object_deallocate: object deallocated too many times: %d", object->type));
 
 		/*
 		 * If the reference count goes to 0 we start calling
 		 * vm_object_terminate() on the object chain.
 		 * A ref count of 1 may be a special case depending on the
 		 * shadow count being 0 or 1.
 		 */
 		object->ref_count--;
 		if (object->ref_count > 1) {
 			VM_OBJECT_UNLOCK(object);
 			return;
 		} else if (object->ref_count == 1) {
 			if (object->shadow_count == 0 &&
 			    object->handle == NULL &&
 			    (object->type == OBJT_DEFAULT ||
 			     object->type == OBJT_SWAP)) {
 				vm_object_set_flag(object, OBJ_ONEMAPPING);
 			} else if ((object->shadow_count == 1) &&
 			    (object->handle == NULL) &&
 			    (object->type == OBJT_DEFAULT ||
 			     object->type == OBJT_SWAP)) {
 				vm_object_t robject;
 
 				robject = LIST_FIRST(&object->shadow_head);
 				KASSERT(robject != NULL,
 				    ("vm_object_deallocate: ref_count: %d, shadow_count: %d",
 					 object->ref_count,
 					 object->shadow_count));
 				if (!VM_OBJECT_TRYLOCK(robject)) {
 					/*
 					 * Avoid a potential deadlock.
 					 */
 					object->ref_count++;
 					VM_OBJECT_UNLOCK(object);
 					/*
 					 * More likely than not the thread
 					 * holding robject's lock has lower
 					 * priority than the current thread.
 					 * Let the lower priority thread run.
 					 */
 					pause("vmo_de", 1);
 					continue;
 				}
 				/*
 				 * Collapse object into its shadow unless its
 				 * shadow is dead.  In that case, object will
 				 * be deallocated by the thread that is
 				 * deallocating its shadow.
 				 */
 				if ((robject->flags & OBJ_DEAD) == 0 &&
 				    (robject->handle == NULL) &&
 				    (robject->type == OBJT_DEFAULT ||
 				     robject->type == OBJT_SWAP)) {
 
 					robject->ref_count++;
 retry:
 					if (robject->paging_in_progress) {
 						VM_OBJECT_UNLOCK(object);
 						vm_object_pip_wait(robject,
 						    "objde1");
 						temp = robject->backing_object;
 						if (object == temp) {
 							VM_OBJECT_LOCK(object);
 							goto retry;
 						}
 					} else if (object->paging_in_progress) {
 						VM_OBJECT_UNLOCK(robject);
 						object->flags |= OBJ_PIPWNT;
 						msleep(object,
 						    VM_OBJECT_MTX(object),
 						    PDROP | PVM, "objde2", 0);
 						VM_OBJECT_LOCK(robject);
 						temp = robject->backing_object;
 						if (object == temp) {
 							VM_OBJECT_LOCK(object);
 							goto retry;
 						}
 					} else
 						VM_OBJECT_UNLOCK(object);
 
 					if (robject->ref_count == 1) {
 						robject->ref_count--;
 						object = robject;
 						goto doterm;
 					}
 					object = robject;
 					vm_object_collapse(object);
 					VM_OBJECT_UNLOCK(object);
 					continue;
 				}
 				VM_OBJECT_UNLOCK(robject);
 			}
 			VM_OBJECT_UNLOCK(object);
 			return;
 		}
 doterm:
 		temp = object->backing_object;
 		if (temp != NULL) {
 			VM_OBJECT_LOCK(temp);
 			LIST_REMOVE(object, shadow_list);
 			temp->shadow_count--;
 			VM_OBJECT_UNLOCK(temp);
 			object->backing_object = NULL;
 		}
 		/*
 		 * Don't double-terminate, we could be in a termination
 		 * recursion due to the terminate having to sync data
 		 * to disk.
 		 */
 		if ((object->flags & OBJ_DEAD) == 0)
 			vm_object_terminate(object);
 		else
 			VM_OBJECT_UNLOCK(object);
 		object = temp;
 	}
 }
 
 /*
  *	vm_object_destroy removes the object from the global object list
  *      and frees the space for the object.
  */
 void
 vm_object_destroy(vm_object_t object)
 {
 
 	/*
 	 * Remove the object from the global object list.
 	 */
 	mtx_lock(&vm_object_list_mtx);
 	TAILQ_REMOVE(&vm_object_list, object, object_list);
 	mtx_unlock(&vm_object_list_mtx);
 
 	/*
 	 * Release the allocation charge.
 	 */
 	if (object->uip != NULL) {
 		KASSERT(object->type == OBJT_DEFAULT ||
 		    object->type == OBJT_SWAP,
 		    ("vm_object_terminate: non-swap obj %p has uip",
 		     object));
 		swap_release_by_uid(object->charge, object->uip);
 		object->charge = 0;
 		uifree(object->uip);
 		object->uip = NULL;
 	}
 
 	/*
 	 * Free the space for the object.
 	 */
 	uma_zfree(obj_zone, object);
 }
 
 /*
  *	vm_object_terminate actually destroys the specified object, freeing
  *	up all previously used resources.
  *
  *	The object must be locked.
  *	This routine may block.
  */
 void
 vm_object_terminate(vm_object_t object)
 {
 	vm_page_t p, p_next;
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 
 	/*
 	 * Make sure no one uses us.
 	 */
 	vm_object_set_flag(object, OBJ_DEAD);
 
 	/*
 	 * wait for the pageout daemon to be done with the object
 	 */
 	vm_object_pip_wait(object, "objtrm");
 
 	KASSERT(!object->paging_in_progress,
 		("vm_object_terminate: pageout in progress"));
 
 	/*
 	 * Clean and free the pages, as appropriate. All references to the
 	 * object are gone, so we don't need to lock it.
 	 */
 	if (object->type == OBJT_VNODE) {
 		struct vnode *vp = (struct vnode *)object->handle;
 
 		/*
 		 * Clean pages and flush buffers.
 		 */
 		vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
 		VM_OBJECT_UNLOCK(object);
 
 		vinvalbuf(vp, V_SAVE, 0, 0);
 
 		VM_OBJECT_LOCK(object);
 	}
 
 	KASSERT(object->ref_count == 0, 
 		("vm_object_terminate: object with references, ref_count=%d",
 		object->ref_count));
 
 	/*
 	 * Free any remaining pageable pages.  This also removes them from the
 	 * paging queues.  However, don't free wired pages, just remove them
 	 * from the object.  Rather than incrementally removing each page from
 	 * the object, the page and object are reset to any empty state. 
 	 */
 	TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
 		KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0,
 		    ("vm_object_terminate: freeing busy page %p", p));
 		vm_page_lock(p);
 		/*
 		 * Optimize the page's removal from the object by resetting
 		 * its "object" field.  Specifically, if the page is not
 		 * wired, then the effect of this assignment is that
 		 * vm_page_free()'s call to vm_page_remove() will return
 		 * immediately without modifying the page or the object.
 		 */ 
 		p->object = NULL;
 		if (p->wire_count == 0) {
 			vm_page_free(p);
 			PCPU_INC(cnt.v_pfree);
 		}
 		vm_page_unlock(p);
 	}
 	/*
 	 * If the object contained any pages, then reset it to an empty state.
 	 * None of the object's fields, including "resident_page_count", were
 	 * modified by the preceding loop.
 	 */
 	if (object->resident_page_count != 0) {
 		object->root = NULL;
 		TAILQ_INIT(&object->memq);
 		object->resident_page_count = 0;
 		if (object->type == OBJT_VNODE)
 			vdrop(object->handle);
 	}
 
 #if VM_NRESERVLEVEL > 0
 	if (__predict_false(!LIST_EMPTY(&object->rvq)))
 		vm_reserv_break_all(object);
 #endif
 	if (__predict_false(object->cache != NULL))
 		vm_page_cache_free(object, 0, 0);
 
 	/*
 	 * Let the pager know object is dead.
 	 */
 	vm_pager_deallocate(object);
 	VM_OBJECT_UNLOCK(object);
 
 	vm_object_destroy(object);
 }
 
 /*
  *	vm_object_page_clean
  *
  *	Clean all dirty pages in the specified range of object.  Leaves page 
  * 	on whatever queue it is currently on.   If NOSYNC is set then do not
  *	write out pages with VPO_NOSYNC set (originally comes from MAP_NOSYNC),
  *	leaving the object dirty.
  *
  *	When stuffing pages asynchronously, allow clustering.  XXX we need a
  *	synchronous clustering mode implementation.
  *
  *	Odd semantics: if start == end, we clean everything.
  *
  *	The object must be locked.
  */
 void
 vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
     int flags)
 {
 	vm_page_t np, p;
 	vm_pindex_t pi, tend;
 	int clearobjflags, curgeneration, n, pagerflags;
 
 	mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED);
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	KASSERT(object->type == OBJT_VNODE, ("Not a vnode object"));
 	if ((object->flags & OBJ_MIGHTBEDIRTY) == 0 ||
 	    object->resident_page_count == 0)
 		return;
 
 	pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) != 0 ?
 	    VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
 	pagerflags |= (flags & OBJPC_INVAL) != 0 ? VM_PAGER_PUT_INVAL : 0;
 
 	tend = (end == 0) ? object->size : end;
 
 	vm_object_set_flag(object, OBJ_CLEANING);
 
 	/*
 	 * Make the page read-only so we can then clear the object flags.
 	 *
 	 * However, if this is a nosync mmap then the object is likely to 
 	 * stay dirty so do not mess with the page and do not clear the
 	 * object flags.
 	 */
 	clearobjflags = 1;
 	for (p = vm_page_find_least(object, start);
 	    p != NULL && p->pindex < tend; p = TAILQ_NEXT(p, listq)) {
 		if ((flags & OBJPC_NOSYNC) != 0 &&
 		    (p->oflags & VPO_NOSYNC) != 0)
 			clearobjflags = 0;
 		else
 			pmap_remove_write(p);
 	}
 
 	if (clearobjflags && (start == 0) && (tend == object->size))
 		vm_object_clear_flag(object, OBJ_MIGHTBEDIRTY);
 
 rescan:
 	curgeneration = object->generation;
 
 	for (p = vm_page_find_least(object, start); p != NULL; p = np) {
 		pi = p->pindex;
 		if (pi >= tend)
 			break;
 		np = TAILQ_NEXT(p, listq);
 		if (p->valid == 0)
 			continue;
-		while (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) {
+		if (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) {
 			if (object->generation != curgeneration)
 				goto rescan;
+			np = vm_page_find_least(object, pi);
+			continue;
 		}
 		vm_page_test_dirty(p);
 		if (p->dirty == 0)
 			continue;
 
 		/*
 		 * If we have been asked to skip nosync pages and this is a
 		 * nosync page, skip it.  Note that the object flags were
 		 * not cleared in this case so we do not have to set them.
 		 */
 		if ((flags & OBJPC_NOSYNC) != 0 &&
 		    (p->oflags & VPO_NOSYNC) != 0)
 			continue;
 
 		n = vm_object_page_collect_flush(object, p, pagerflags);
 		if (object->generation != curgeneration)
 			goto rescan;
 		np = vm_page_find_least(object, pi + n);
 	}
 #if 0
 	VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC) ? MNT_WAIT : 0);
 #endif
 
 	vm_object_clear_flag(object, OBJ_CLEANING);
 }
 
 static int
 vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags)
 {
 	vm_page_t ma[vm_pageout_page_count], p_first, tp;
 	int count, i, mreq, runlen;
 
 	mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED);
 	vm_page_lock_assert(p, MA_NOTOWNED);
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 
 	count = 1;
 	mreq = 0;
 
 	for (tp = p; count < vm_pageout_page_count; count++) {
 		tp = vm_page_next(tp);
 		if (tp == NULL || tp->busy != 0 || (tp->oflags & VPO_BUSY) != 0)
 			break;
 		vm_page_test_dirty(tp);
 		if (tp->dirty == 0)
 			break;
 	}
 
 	for (p_first = p; count < vm_pageout_page_count; count++) {
 		tp = vm_page_prev(p_first);
 		if (tp == NULL || tp->busy != 0 || (tp->oflags & VPO_BUSY) != 0)
 			break;
 		vm_page_test_dirty(tp);
 		if (tp->dirty == 0)
 			break;
 		p_first = tp;
 		mreq++;
 	}
 
 	for (tp = p_first, i = 0; i < count; tp = TAILQ_NEXT(tp, listq), i++)
 		ma[i] = tp;
 
 	vm_pageout_flush(ma, count, pagerflags, mreq, &runlen);
 	return (runlen);
 }
 
 /*
  * Note that there is absolutely no sense in writing out
  * anonymous objects, so we track down the vnode object
  * to write out.
  * We invalidate (remove) all pages from the address space
  * for semantic correctness.
  *
  * Note: certain anonymous maps, such as MAP_NOSYNC maps,
  * may start out with a NULL object.
  */
 void
 vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
     boolean_t syncio, boolean_t invalidate)
 {
 	vm_object_t backing_object;
 	struct vnode *vp;
 	struct mount *mp;
 	int flags;
 
 	if (object == NULL)
 		return;
 	VM_OBJECT_LOCK(object);
 	while ((backing_object = object->backing_object) != NULL) {
 		VM_OBJECT_LOCK(backing_object);
 		offset += object->backing_object_offset;
 		VM_OBJECT_UNLOCK(object);
 		object = backing_object;
 		if (object->size < OFF_TO_IDX(offset + size))
 			size = IDX_TO_OFF(object->size) - offset;
 	}
 	/*
 	 * Flush pages if writing is allowed, invalidate them
 	 * if invalidation requested.  Pages undergoing I/O
 	 * will be ignored by vm_object_page_remove().
 	 *
 	 * We cannot lock the vnode and then wait for paging
 	 * to complete without deadlocking against vm_fault.
 	 * Instead we simply call vm_object_page_remove() and
 	 * allow it to block internally on a page-by-page
 	 * basis when it encounters pages undergoing async
 	 * I/O.
 	 */
 	if (object->type == OBJT_VNODE &&
 	    (object->flags & OBJ_MIGHTBEDIRTY) != 0) {
 		int vfslocked;
 		vp = object->handle;
 		VM_OBJECT_UNLOCK(object);
 		(void) vn_start_write(vp, &mp, V_WAIT);
 		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
 		flags |= invalidate ? OBJPC_INVAL : 0;
 		VM_OBJECT_LOCK(object);
 		vm_object_page_clean(object,
 		    OFF_TO_IDX(offset),
 		    OFF_TO_IDX(offset + size + PAGE_MASK),
 		    flags);
 		VM_OBJECT_UNLOCK(object);
 		VOP_UNLOCK(vp, 0);
 		VFS_UNLOCK_GIANT(vfslocked);
 		vn_finished_write(mp);
 		VM_OBJECT_LOCK(object);
 	}
 	if ((object->type == OBJT_VNODE ||
 	     object->type == OBJT_DEVICE) && invalidate) {
 		boolean_t purge;
 		purge = old_msync || (object->type == OBJT_DEVICE);
 		vm_object_page_remove(object,
 		    OFF_TO_IDX(offset),
 		    OFF_TO_IDX(offset + size + PAGE_MASK),
 		    purge ? FALSE : TRUE);
 	}
 	VM_OBJECT_UNLOCK(object);
 }
 
 /*
  *	vm_object_madvise:
  *
  *	Implements the madvise function at the object/page level.
  *
  *	MADV_WILLNEED	(any object)
  *
  *	    Activate the specified pages if they are resident.
  *
  *	MADV_DONTNEED	(any object)
  *
  *	    Deactivate the specified pages if they are resident.
  *
  *	MADV_FREE	(OBJT_DEFAULT/OBJT_SWAP objects,
  *			 OBJ_ONEMAPPING only)
  *
  *	    Deactivate and clean the specified pages if they are
  *	    resident.  This permits the process to reuse the pages
  *	    without faulting or the kernel to reclaim the pages
  *	    without I/O.
  */
 void
 vm_object_madvise(vm_object_t object, vm_pindex_t pindex, int count, int advise)
 {
 	vm_pindex_t end, tpindex;
 	vm_object_t backing_object, tobject;
 	vm_page_t m;
 
 	if (object == NULL)
 		return;
 	VM_OBJECT_LOCK(object);
 	end = pindex + count;
 	/*
 	 * Locate and adjust resident pages
 	 */
 	for (; pindex < end; pindex += 1) {
 relookup:
 		tobject = object;
 		tpindex = pindex;
 shadowlookup:
 		/*
 		 * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages
 		 * and those pages must be OBJ_ONEMAPPING.
 		 */
 		if (advise == MADV_FREE) {
 			if ((tobject->type != OBJT_DEFAULT &&
 			     tobject->type != OBJT_SWAP) ||
 			    (tobject->flags & OBJ_ONEMAPPING) == 0) {
 				goto unlock_tobject;
 			}
 		} else if (tobject->type == OBJT_PHYS)
 			goto unlock_tobject;
 		m = vm_page_lookup(tobject, tpindex);
 		if (m == NULL && advise == MADV_WILLNEED) {
 			/*
 			 * If the page is cached, reactivate it.
 			 */
 			m = vm_page_alloc(tobject, tpindex, VM_ALLOC_IFCACHED |
 			    VM_ALLOC_NOBUSY);
 		}
 		if (m == NULL) {
 			/*
 			 * There may be swap even if there is no backing page
 			 */
 			if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
 				swap_pager_freespace(tobject, tpindex, 1);
 			/*
 			 * next object
 			 */
 			backing_object = tobject->backing_object;
 			if (backing_object == NULL)
 				goto unlock_tobject;
 			VM_OBJECT_LOCK(backing_object);
 			tpindex += OFF_TO_IDX(tobject->backing_object_offset);
 			if (tobject != object)
 				VM_OBJECT_UNLOCK(tobject);
 			tobject = backing_object;
 			goto shadowlookup;
 		} else if (m->valid != VM_PAGE_BITS_ALL)
 			goto unlock_tobject;
 		/*
 		 * If the page is not in a normal state, skip it.
 		 */
 		vm_page_lock(m);
 		if (m->hold_count != 0 || m->wire_count != 0) {
 			vm_page_unlock(m);
 			goto unlock_tobject;
 		}
 		KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 		    ("vm_object_madvise: page %p is not managed", m));
 		if ((m->oflags & VPO_BUSY) || m->busy) {
 			if (advise == MADV_WILLNEED) {
 				/*
 				 * Reference the page before unlocking and
 				 * sleeping so that the page daemon is less
 				 * likely to reclaim it. 
 				 */
 				vm_page_lock_queues();
 				vm_page_flag_set(m, PG_REFERENCED);
 				vm_page_unlock_queues();
 			}
 			vm_page_unlock(m);
 			if (object != tobject)
 				VM_OBJECT_UNLOCK(object);
 			m->oflags |= VPO_WANTED;
 			msleep(m, VM_OBJECT_MTX(tobject), PDROP | PVM, "madvpo",
 			    0);
 			VM_OBJECT_LOCK(object);
   			goto relookup;
 		}
 		if (advise == MADV_WILLNEED) {
 			vm_page_activate(m);
 		} else if (advise == MADV_DONTNEED) {
 			vm_page_dontneed(m);
 		} else if (advise == MADV_FREE) {
 			/*
 			 * Mark the page clean.  This will allow the page
 			 * to be freed up by the system.  However, such pages
 			 * are often reused quickly by malloc()/free()
 			 * so we do not do anything that would cause
 			 * a page fault if we can help it.
 			 *
 			 * Specifically, we do not try to actually free
 			 * the page now nor do we try to put it in the
 			 * cache (which would cause a page fault on reuse).
 			 *
 			 * But we do make the page is freeable as we
 			 * can without actually taking the step of unmapping
 			 * it.
 			 */
 			pmap_clear_modify(m);
 			m->dirty = 0;
 			m->act_count = 0;
 			vm_page_dontneed(m);
 		}
 		vm_page_unlock(m);
 		if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
 			swap_pager_freespace(tobject, tpindex, 1);
 unlock_tobject:
 		if (tobject != object)
 			VM_OBJECT_UNLOCK(tobject);
 	}	
 	VM_OBJECT_UNLOCK(object);
 }
 
 /*
  *	vm_object_shadow:
  *
  *	Create a new object which is backed by the
  *	specified existing object range.  The source
  *	object reference is deallocated.
  *
  *	The new object and offset into that object
  *	are returned in the source parameters.
  */
 void
 vm_object_shadow(
 	vm_object_t *object,	/* IN/OUT */
 	vm_ooffset_t *offset,	/* IN/OUT */
 	vm_size_t length)
 {
 	vm_object_t source;
 	vm_object_t result;
 
 	source = *object;
 
 	/*
 	 * Don't create the new object if the old object isn't shared.
 	 */
 	if (source != NULL) {
 		VM_OBJECT_LOCK(source);
 		if (source->ref_count == 1 &&
 		    source->handle == NULL &&
 		    (source->type == OBJT_DEFAULT ||
 		     source->type == OBJT_SWAP)) {
 			VM_OBJECT_UNLOCK(source);
 			return;
 		}
 		VM_OBJECT_UNLOCK(source);
 	}
 
 	/*
 	 * Allocate a new object with the given length.
 	 */
 	result = vm_object_allocate(OBJT_DEFAULT, length);
 
 	/*
 	 * The new object shadows the source object, adding a reference to it.
 	 * Our caller changes his reference to point to the new object,
 	 * removing a reference to the source object.  Net result: no change
 	 * of reference count.
 	 *
 	 * Try to optimize the result object's page color when shadowing
 	 * in order to maintain page coloring consistency in the combined 
 	 * shadowed object.
 	 */
 	result->backing_object = source;
 	/*
 	 * Store the offset into the source object, and fix up the offset into
 	 * the new object.
 	 */
 	result->backing_object_offset = *offset;
 	if (source != NULL) {
 		VM_OBJECT_LOCK(source);
 		LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list);
 		source->shadow_count++;
 #if VM_NRESERVLEVEL > 0
 		result->flags |= source->flags & OBJ_COLORED;
 		result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) &
 		    ((1 << (VM_NFREEORDER - 1)) - 1);
 #endif
 		VM_OBJECT_UNLOCK(source);
 	}
 
 
 	/*
 	 * Return the new things
 	 */
 	*offset = 0;
 	*object = result;
 }
 
 /*
  *	vm_object_split:
  *
  * Split the pages in a map entry into a new object.  This affords
  * easier removal of unused pages, and keeps object inheritance from
  * being a negative impact on memory usage.
  */
 void
 vm_object_split(vm_map_entry_t entry)
 {
 	vm_page_t m, m_next;
 	vm_object_t orig_object, new_object, source;
 	vm_pindex_t idx, offidxstart;
 	vm_size_t size;
 
 	orig_object = entry->object.vm_object;
 	if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
 		return;
 	if (orig_object->ref_count <= 1)
 		return;
 	VM_OBJECT_UNLOCK(orig_object);
 
 	offidxstart = OFF_TO_IDX(entry->offset);
 	size = atop(entry->end - entry->start);
 
 	/*
 	 * If swap_pager_copy() is later called, it will convert new_object
 	 * into a swap object.
 	 */
 	new_object = vm_object_allocate(OBJT_DEFAULT, size);
 
 	/*
 	 * At this point, the new object is still private, so the order in
 	 * which the original and new objects are locked does not matter.
 	 */
 	VM_OBJECT_LOCK(new_object);
 	VM_OBJECT_LOCK(orig_object);
 	source = orig_object->backing_object;
 	if (source != NULL) {
 		VM_OBJECT_LOCK(source);
 		if ((source->flags & OBJ_DEAD) != 0) {
 			VM_OBJECT_UNLOCK(source);
 			VM_OBJECT_UNLOCK(orig_object);
 			VM_OBJECT_UNLOCK(new_object);
 			vm_object_deallocate(new_object);
 			VM_OBJECT_LOCK(orig_object);
 			return;
 		}
 		LIST_INSERT_HEAD(&source->shadow_head,
 				  new_object, shadow_list);
 		source->shadow_count++;
 		vm_object_reference_locked(source);	/* for new_object */
 		vm_object_clear_flag(source, OBJ_ONEMAPPING);
 		VM_OBJECT_UNLOCK(source);
 		new_object->backing_object_offset = 
 			orig_object->backing_object_offset + entry->offset;
 		new_object->backing_object = source;
 	}
 	if (orig_object->uip != NULL) {
 		new_object->uip = orig_object->uip;
 		uihold(orig_object->uip);
 		new_object->charge = ptoa(size);
 		KASSERT(orig_object->charge >= ptoa(size),
 		    ("orig_object->charge < 0"));
 		orig_object->charge -= ptoa(size);
 	}
 retry:
 	m = vm_page_find_least(orig_object, offidxstart);
 	for (; m != NULL && (idx = m->pindex - offidxstart) < size;
 	    m = m_next) {
 		m_next = TAILQ_NEXT(m, listq);
 
 		/*
 		 * We must wait for pending I/O to complete before we can
 		 * rename the page.
 		 *
 		 * We do not have to VM_PROT_NONE the page as mappings should
 		 * not be changed by this operation.
 		 */
 		if ((m->oflags & VPO_BUSY) || m->busy) {
 			VM_OBJECT_UNLOCK(new_object);
 			m->oflags |= VPO_WANTED;
 			msleep(m, VM_OBJECT_MTX(orig_object), PVM, "spltwt", 0);
 			VM_OBJECT_LOCK(new_object);
 			goto retry;
 		}
 		vm_page_lock(m);
 		vm_page_rename(m, new_object, idx);
 		vm_page_unlock(m);
 		/* page automatically made dirty by rename and cache handled */
 		vm_page_busy(m);
 	}
 	if (orig_object->type == OBJT_SWAP) {
 		/*
 		 * swap_pager_copy() can sleep, in which case the orig_object's
 		 * and new_object's locks are released and reacquired. 
 		 */
 		swap_pager_copy(orig_object, new_object, offidxstart, 0);
 
 		/*
 		 * Transfer any cached pages from orig_object to new_object.
 		 */
 		if (__predict_false(orig_object->cache != NULL))
 			vm_page_cache_transfer(orig_object, offidxstart,
 			    new_object);
 	}
 	VM_OBJECT_UNLOCK(orig_object);
 	TAILQ_FOREACH(m, &new_object->memq, listq)
 		vm_page_wakeup(m);
 	VM_OBJECT_UNLOCK(new_object);
 	entry->object.vm_object = new_object;
 	entry->offset = 0LL;
 	vm_object_deallocate(orig_object);
 	VM_OBJECT_LOCK(new_object);
 }
 
 #define	OBSC_TEST_ALL_SHADOWED	0x0001
 #define	OBSC_COLLAPSE_NOWAIT	0x0002
 #define	OBSC_COLLAPSE_WAIT	0x0004
 
 static int
 vm_object_backing_scan(vm_object_t object, int op)
 {
 	int r = 1;
 	vm_page_t p;
 	vm_object_t backing_object;
 	vm_pindex_t backing_offset_index;
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	VM_OBJECT_LOCK_ASSERT(object->backing_object, MA_OWNED);
 
 	backing_object = object->backing_object;
 	backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
 
 	/*
 	 * Initial conditions
 	 */
 	if (op & OBSC_TEST_ALL_SHADOWED) {
 		/*
 		 * We do not want to have to test for the existence of cache
 		 * or swap pages in the backing object.  XXX but with the
 		 * new swapper this would be pretty easy to do.
 		 *
 		 * XXX what about anonymous MAP_SHARED memory that hasn't
 		 * been ZFOD faulted yet?  If we do not test for this, the
 		 * shadow test may succeed! XXX
 		 */
 		if (backing_object->type != OBJT_DEFAULT) {
 			return (0);
 		}
 	}
 	if (op & OBSC_COLLAPSE_WAIT) {
 		vm_object_set_flag(backing_object, OBJ_DEAD);
 	}
 
 	/*
 	 * Our scan
 	 */
 	p = TAILQ_FIRST(&backing_object->memq);
 	while (p) {
 		vm_page_t next = TAILQ_NEXT(p, listq);
 		vm_pindex_t new_pindex = p->pindex - backing_offset_index;
 
 		if (op & OBSC_TEST_ALL_SHADOWED) {
 			vm_page_t pp;
 
 			/*
 			 * Ignore pages outside the parent object's range
 			 * and outside the parent object's mapping of the 
 			 * backing object.
 			 *
 			 * note that we do not busy the backing object's
 			 * page.
 			 */
 			if (
 			    p->pindex < backing_offset_index ||
 			    new_pindex >= object->size
 			) {
 				p = next;
 				continue;
 			}
 
 			/*
 			 * See if the parent has the page or if the parent's
 			 * object pager has the page.  If the parent has the
 			 * page but the page is not valid, the parent's
 			 * object pager must have the page.
 			 *
 			 * If this fails, the parent does not completely shadow
 			 * the object and we might as well give up now.
 			 */
 
 			pp = vm_page_lookup(object, new_pindex);
 			if (
 			    (pp == NULL || pp->valid == 0) &&
 			    !vm_pager_has_page(object, new_pindex, NULL, NULL)
 			) {
 				r = 0;
 				break;
 			}
 		}
 
 		/*
 		 * Check for busy page
 		 */
 		if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) {
 			vm_page_t pp;
 
 			if (op & OBSC_COLLAPSE_NOWAIT) {
 				if ((p->oflags & VPO_BUSY) ||
 				    !p->valid || 
 				    p->busy) {
 					p = next;
 					continue;
 				}
 			} else if (op & OBSC_COLLAPSE_WAIT) {
 				if ((p->oflags & VPO_BUSY) || p->busy) {
 					VM_OBJECT_UNLOCK(object);
 					p->oflags |= VPO_WANTED;
 					msleep(p, VM_OBJECT_MTX(backing_object),
 					    PDROP | PVM, "vmocol", 0);
 					VM_OBJECT_LOCK(object);
 					VM_OBJECT_LOCK(backing_object);
 					/*
 					 * If we slept, anything could have
 					 * happened.  Since the object is
 					 * marked dead, the backing offset
 					 * should not have changed so we
 					 * just restart our scan.
 					 */
 					p = TAILQ_FIRST(&backing_object->memq);
 					continue;
 				}
 			}
 
 			KASSERT(
 			    p->object == backing_object,
 			    ("vm_object_backing_scan: object mismatch")
 			);
 
 			/*
 			 * Destroy any associated swap
 			 */
 			if (backing_object->type == OBJT_SWAP) {
 				swap_pager_freespace(
 				    backing_object, 
 				    p->pindex,
 				    1
 				);
 			}
 
 			if (
 			    p->pindex < backing_offset_index ||
 			    new_pindex >= object->size
 			) {
 				/*
 				 * Page is out of the parent object's range, we 
 				 * can simply destroy it. 
 				 */
 				vm_page_lock(p);
 				KASSERT(!pmap_page_is_mapped(p),
 				    ("freeing mapped page %p", p));
 				if (p->wire_count == 0)
 					vm_page_free(p);
 				else
 					vm_page_remove(p);
 				vm_page_unlock(p);
 				p = next;
 				continue;
 			}
 
 			pp = vm_page_lookup(object, new_pindex);
 			if (
 			    pp != NULL ||
 			    vm_pager_has_page(object, new_pindex, NULL, NULL)
 			) {
 				/*
 				 * page already exists in parent OR swap exists
 				 * for this location in the parent.  Destroy 
 				 * the original page from the backing object.
 				 *
 				 * Leave the parent's page alone
 				 */
 				vm_page_lock(p);
 				KASSERT(!pmap_page_is_mapped(p),
 				    ("freeing mapped page %p", p));
 				if (p->wire_count == 0)
 					vm_page_free(p);
 				else
 					vm_page_remove(p);
 				vm_page_unlock(p);
 				p = next;
 				continue;
 			}
 
 #if VM_NRESERVLEVEL > 0
 			/*
 			 * Rename the reservation.
 			 */
 			vm_reserv_rename(p, object, backing_object,
 			    backing_offset_index);
 #endif
 
 			/*
 			 * Page does not exist in parent, rename the
 			 * page from the backing object to the main object. 
 			 *
 			 * If the page was mapped to a process, it can remain 
 			 * mapped through the rename.
 			 */
 			vm_page_lock(p);
 			vm_page_rename(p, object, new_pindex);
 			vm_page_unlock(p);
 			/* page automatically made dirty by rename */
 		}
 		p = next;
 	}
 	return (r);
 }
 
 
 /*
  * this version of collapse allows the operation to occur earlier and
  * when paging_in_progress is true for an object...  This is not a complete
  * operation, but should plug 99.9% of the rest of the leaks.
  */
 static void
 vm_object_qcollapse(vm_object_t object)
 {
 	vm_object_t backing_object = object->backing_object;
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	VM_OBJECT_LOCK_ASSERT(backing_object, MA_OWNED);
 
 	if (backing_object->ref_count != 1)
 		return;
 
 	vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT);
 }
 
 /*
  *	vm_object_collapse:
  *
  *	Collapse an object with the object backing it.
  *	Pages in the backing object are moved into the
  *	parent, and the backing object is deallocated.
  */
 void
 vm_object_collapse(vm_object_t object)
 {
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	
 	while (TRUE) {
 		vm_object_t backing_object;
 
 		/*
 		 * Verify that the conditions are right for collapse:
 		 *
 		 * The object exists and the backing object exists.
 		 */
 		if ((backing_object = object->backing_object) == NULL)
 			break;
 
 		/*
 		 * we check the backing object first, because it is most likely
 		 * not collapsable.
 		 */
 		VM_OBJECT_LOCK(backing_object);
 		if (backing_object->handle != NULL ||
 		    (backing_object->type != OBJT_DEFAULT &&
 		     backing_object->type != OBJT_SWAP) ||
 		    (backing_object->flags & OBJ_DEAD) ||
 		    object->handle != NULL ||
 		    (object->type != OBJT_DEFAULT &&
 		     object->type != OBJT_SWAP) ||
 		    (object->flags & OBJ_DEAD)) {
 			VM_OBJECT_UNLOCK(backing_object);
 			break;
 		}
 
 		if (
 		    object->paging_in_progress != 0 ||
 		    backing_object->paging_in_progress != 0
 		) {
 			vm_object_qcollapse(object);
 			VM_OBJECT_UNLOCK(backing_object);
 			break;
 		}
 		/*
 		 * We know that we can either collapse the backing object (if
 		 * the parent is the only reference to it) or (perhaps) have
 		 * the parent bypass the object if the parent happens to shadow
 		 * all the resident pages in the entire backing object.
 		 *
 		 * This is ignoring pager-backed pages such as swap pages.
 		 * vm_object_backing_scan fails the shadowing test in this
 		 * case.
 		 */
 		if (backing_object->ref_count == 1) {
 			/*
 			 * If there is exactly one reference to the backing
 			 * object, we can collapse it into the parent.  
 			 */
 			vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT);
 
 #if VM_NRESERVLEVEL > 0
 			/*
 			 * Break any reservations from backing_object.
 			 */
 			if (__predict_false(!LIST_EMPTY(&backing_object->rvq)))
 				vm_reserv_break_all(backing_object);
 #endif
 
 			/*
 			 * Move the pager from backing_object to object.
 			 */
 			if (backing_object->type == OBJT_SWAP) {
 				/*
 				 * swap_pager_copy() can sleep, in which case
 				 * the backing_object's and object's locks are
 				 * released and reacquired.
 				 */
 				swap_pager_copy(
 				    backing_object,
 				    object,
 				    OFF_TO_IDX(object->backing_object_offset), TRUE);
 
 				/*
 				 * Free any cached pages from backing_object.
 				 */
 				if (__predict_false(backing_object->cache != NULL))
 					vm_page_cache_free(backing_object, 0, 0);
 			}
 			/*
 			 * Object now shadows whatever backing_object did.
 			 * Note that the reference to 
 			 * backing_object->backing_object moves from within 
 			 * backing_object to within object.
 			 */
 			LIST_REMOVE(object, shadow_list);
 			backing_object->shadow_count--;
 			if (backing_object->backing_object) {
 				VM_OBJECT_LOCK(backing_object->backing_object);
 				LIST_REMOVE(backing_object, shadow_list);
 				LIST_INSERT_HEAD(
 				    &backing_object->backing_object->shadow_head,
 				    object, shadow_list);
 				/*
 				 * The shadow_count has not changed.
 				 */
 				VM_OBJECT_UNLOCK(backing_object->backing_object);
 			}
 			object->backing_object = backing_object->backing_object;
 			object->backing_object_offset +=
 			    backing_object->backing_object_offset;
 
 			/*
 			 * Discard backing_object.
 			 *
 			 * Since the backing object has no pages, no pager left,
 			 * and no object references within it, all that is
 			 * necessary is to dispose of it.
 			 */
 			KASSERT(backing_object->ref_count == 1, (
 "backing_object %p was somehow re-referenced during collapse!",
 			    backing_object));
 			VM_OBJECT_UNLOCK(backing_object);
 			vm_object_destroy(backing_object);
 
 			object_collapses++;
 		} else {
 			vm_object_t new_backing_object;
 
 			/*
 			 * If we do not entirely shadow the backing object,
 			 * there is nothing we can do so we give up.
 			 */
 			if (object->resident_page_count != object->size &&
 			    vm_object_backing_scan(object,
 			    OBSC_TEST_ALL_SHADOWED) == 0) {
 				VM_OBJECT_UNLOCK(backing_object);
 				break;
 			}
 
 			/*
 			 * Make the parent shadow the next object in the
 			 * chain.  Deallocating backing_object will not remove
 			 * it, since its reference count is at least 2.
 			 */
 			LIST_REMOVE(object, shadow_list);
 			backing_object->shadow_count--;
 
 			new_backing_object = backing_object->backing_object;
 			if ((object->backing_object = new_backing_object) != NULL) {
 				VM_OBJECT_LOCK(new_backing_object);
 				LIST_INSERT_HEAD(
 				    &new_backing_object->shadow_head,
 				    object,
 				    shadow_list
 				);
 				new_backing_object->shadow_count++;
 				vm_object_reference_locked(new_backing_object);
 				VM_OBJECT_UNLOCK(new_backing_object);
 				object->backing_object_offset +=
 					backing_object->backing_object_offset;
 			}
 
 			/*
 			 * Drop the reference count on backing_object. Since
 			 * its ref_count was at least 2, it will not vanish.
 			 */
 			backing_object->ref_count--;
 			VM_OBJECT_UNLOCK(backing_object);
 			object_bypasses++;
 		}
 
 		/*
 		 * Try again with this object's new backing object.
 		 */
 	}
 }
 
 /*
  *	vm_object_page_remove:
  *
  *	For the given object, either frees or invalidates each of the
  *	specified pages.  In general, a page is freed.  However, if a
  *	page is wired for any reason other than the existence of a
  *	managed, wired mapping, then it may be invalidated but not
  *	removed from the object.  Pages are specified by the given
  *	range ["start", "end") and Boolean "clean_only".  As a
  *	special case, if "end" is zero, then the range extends from
  *	"start" to the end of the object.  If "clean_only" is TRUE,
  *	then only the non-dirty pages within the specified range are
  *	affected.
  *
  *	In general, this operation should only be performed on objects
  *	that contain managed pages.  There are two exceptions.  First,
  *	it may be performed on the kernel and kmem objects.  Second,
  *	it may be used by msync(..., MS_INVALIDATE) to invalidate
  *	device-backed pages.  In both of these cases, "clean_only"
  *	must be FALSE.
  *
  *	The object must be locked.
  */
 void
 vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
     boolean_t clean_only)
 {
 	vm_page_t p, next;
 	int wirings;
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	if (object->resident_page_count == 0)
 		goto skipmemq;
 
 	/*
 	 * Since physically-backed objects do not use managed pages, we can't
 	 * remove pages from the object (we must instead remove the page
 	 * references, and then destroy the object).
 	 */
 	KASSERT(object->type != OBJT_PHYS || object == kernel_object ||
 	    object == kmem_object,
 	    ("attempt to remove pages from a physical object"));
 
 	vm_object_pip_add(object, 1);
 again:
 	p = vm_page_find_least(object, start);
 
 	/*
 	 * Assert: the variable p is either (1) the page with the
 	 * least pindex greater than or equal to the parameter pindex
 	 * or (2) NULL.
 	 */
 	for (;
 	     p != NULL && (p->pindex < end || end == 0);
 	     p = next) {
 		next = TAILQ_NEXT(p, listq);
 
 		/*
 		 * If the page is wired for any reason besides the
 		 * existence of managed, wired mappings, then it cannot
 		 * be freed.  For example, fictitious pages, which
 		 * represent device memory, are inherently wired and
 		 * cannot be freed.  They can, however, be invalidated
 		 * if "clean_only" is FALSE.
 		 */
 		vm_page_lock(p);
 		if ((wirings = p->wire_count) != 0 &&
 		    (wirings = pmap_page_wired_mappings(p)) != p->wire_count) {
 			/* Fictitious pages do not have managed mappings. */
 			if ((p->flags & PG_FICTITIOUS) == 0)
 				pmap_remove_all(p);
 			/* Account for removal of managed, wired mappings. */
 			p->wire_count -= wirings;
 			if (!clean_only) {
 				p->valid = 0;
 				vm_page_undirty(p);
 			}
 			vm_page_unlock(p);
 			continue;
 		}
 		if (vm_page_sleep_if_busy(p, TRUE, "vmopar"))
 			goto again;
 		KASSERT((p->flags & PG_FICTITIOUS) == 0,
 		    ("vm_object_page_remove: page %p is fictitious", p));
 		if (clean_only && p->valid) {
 			pmap_remove_write(p);
 			if (p->dirty) {
 				vm_page_unlock(p);
 				continue;
 			}
 		}
 		pmap_remove_all(p);
 		/* Account for removal of managed, wired mappings. */
 		if (wirings != 0)
 			p->wire_count -= wirings;
 		vm_page_free(p);
 		vm_page_unlock(p);
 	}
 	vm_object_pip_wakeup(object);
 skipmemq:
 	if (__predict_false(object->cache != NULL))
 		vm_page_cache_free(object, start, end);
 }
 
 /*
  *	Populate the specified range of the object with valid pages.  Returns
  *	TRUE if the range is successfully populated and FALSE otherwise.
  *
  *	Note: This function should be optimized to pass a larger array of
  *	pages to vm_pager_get_pages() before it is applied to a non-
  *	OBJT_DEVICE object.
  *
  *	The object must be locked.
  */
 boolean_t
 vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
 {
 	vm_page_t m, ma[1];
 	vm_pindex_t pindex;
 	int rv;
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	for (pindex = start; pindex < end; pindex++) {
 		m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL |
 		    VM_ALLOC_RETRY);
 		if (m->valid != VM_PAGE_BITS_ALL) {
 			ma[0] = m;
 			rv = vm_pager_get_pages(object, ma, 1, 0);
 			m = vm_page_lookup(object, pindex);
 			if (m == NULL)
 				break;
 			if (rv != VM_PAGER_OK) {
 				vm_page_lock(m);
 				vm_page_free(m);
 				vm_page_unlock(m);
 				break;
 			}
 		}
 		/*
 		 * Keep "m" busy because a subsequent iteration may unlock
 		 * the object.
 		 */
 	}
 	if (pindex > start) {
 		m = vm_page_lookup(object, start);
 		while (m != NULL && m->pindex < pindex) {
 			vm_page_wakeup(m);
 			m = TAILQ_NEXT(m, listq);
 		}
 	}
 	return (pindex == end);
 }
 
 /*
  *	Routine:	vm_object_coalesce
  *	Function:	Coalesces two objects backing up adjoining
  *			regions of memory into a single object.
  *
  *	returns TRUE if objects were combined.
  *
  *	NOTE:	Only works at the moment if the second object is NULL -
  *		if it's not, which object do we lock first?
  *
  *	Parameters:
  *		prev_object	First object to coalesce
  *		prev_offset	Offset into prev_object
  *		prev_size	Size of reference to prev_object
  *		next_size	Size of reference to the second object
  *		reserved	Indicator that extension region has
  *				swap accounted for
  *
  *	Conditions:
  *	The object must *not* be locked.
  */
 boolean_t
 vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
     vm_size_t prev_size, vm_size_t next_size, boolean_t reserved)
 {
 	vm_pindex_t next_pindex;
 
 	if (prev_object == NULL)
 		return (TRUE);
 	VM_OBJECT_LOCK(prev_object);
 	if (prev_object->type != OBJT_DEFAULT &&
 	    prev_object->type != OBJT_SWAP) {
 		VM_OBJECT_UNLOCK(prev_object);
 		return (FALSE);
 	}
 
 	/*
 	 * Try to collapse the object first
 	 */
 	vm_object_collapse(prev_object);
 
 	/*
 	 * Can't coalesce if: . more than one reference . paged out . shadows
 	 * another object . has a copy elsewhere (any of which mean that the
 	 * pages not mapped to prev_entry may be in use anyway)
 	 */
 	if (prev_object->backing_object != NULL) {
 		VM_OBJECT_UNLOCK(prev_object);
 		return (FALSE);
 	}
 
 	prev_size >>= PAGE_SHIFT;
 	next_size >>= PAGE_SHIFT;
 	next_pindex = OFF_TO_IDX(prev_offset) + prev_size;
 
 	if ((prev_object->ref_count > 1) &&
 	    (prev_object->size != next_pindex)) {
 		VM_OBJECT_UNLOCK(prev_object);
 		return (FALSE);
 	}
 
 	/*
 	 * Account for the charge.
 	 */
 	if (prev_object->uip != NULL) {
 
 		/*
 		 * If prev_object was charged, then this mapping,
 		 * althought not charged now, may become writable
 		 * later. Non-NULL uip in the object would prevent
 		 * swap reservation during enabling of the write
 		 * access, so reserve swap now. Failed reservation
 		 * cause allocation of the separate object for the map
 		 * entry, and swap reservation for this entry is
 		 * managed in appropriate time.
 		 */
 		if (!reserved && !swap_reserve_by_uid(ptoa(next_size),
 		    prev_object->uip)) {
 			return (FALSE);
 		}
 		prev_object->charge += ptoa(next_size);
 	}
 
 	/*
 	 * Remove any pages that may still be in the object from a previous
 	 * deallocation.
 	 */
 	if (next_pindex < prev_object->size) {
 		vm_object_page_remove(prev_object,
 				      next_pindex,
 				      next_pindex + next_size, FALSE);
 		if (prev_object->type == OBJT_SWAP)
 			swap_pager_freespace(prev_object,
 					     next_pindex, next_size);
 #if 0
 		if (prev_object->uip != NULL) {
 			KASSERT(prev_object->charge >=
 			    ptoa(prev_object->size - next_pindex),
 			    ("object %p overcharged 1 %jx %jx", prev_object,
 				(uintmax_t)next_pindex, (uintmax_t)next_size));
 			prev_object->charge -= ptoa(prev_object->size -
 			    next_pindex);
 		}
 #endif
 	}
 
 	/*
 	 * Extend the object if necessary.
 	 */
 	if (next_pindex + next_size > prev_object->size)
 		prev_object->size = next_pindex + next_size;
 
 	VM_OBJECT_UNLOCK(prev_object);
 	return (TRUE);
 }
 
 void
 vm_object_set_writeable_dirty(vm_object_t object)
 {
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	if (object->type != OBJT_VNODE ||
 	    (object->flags & OBJ_MIGHTBEDIRTY) != 0)
 		return;
 	vm_object_set_flag(object, OBJ_MIGHTBEDIRTY);
 }
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <sys/kernel.h>
 
 #include <sys/cons.h>
 
 #include <ddb/ddb.h>
 
 static int
 _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
 {
 	vm_map_t tmpm;
 	vm_map_entry_t tmpe;
 	vm_object_t obj;
 	int entcount;
 
 	if (map == 0)
 		return 0;
 
 	if (entry == 0) {
 		tmpe = map->header.next;
 		entcount = map->nentries;
 		while (entcount-- && (tmpe != &map->header)) {
 			if (_vm_object_in_map(map, object, tmpe)) {
 				return 1;
 			}
 			tmpe = tmpe->next;
 		}
 	} else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 		tmpm = entry->object.sub_map;
 		tmpe = tmpm->header.next;
 		entcount = tmpm->nentries;
 		while (entcount-- && tmpe != &tmpm->header) {
 			if (_vm_object_in_map(tmpm, object, tmpe)) {
 				return 1;
 			}
 			tmpe = tmpe->next;
 		}
 	} else if ((obj = entry->object.vm_object) != NULL) {
 		for (; obj; obj = obj->backing_object)
 			if (obj == object) {
 				return 1;
 			}
 	}
 	return 0;
 }
 
 static int
 vm_object_in_map(vm_object_t object)
 {
 	struct proc *p;
 
 	/* sx_slock(&allproc_lock); */
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
 			continue;
 		if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
 			/* sx_sunlock(&allproc_lock); */
 			return 1;
 		}
 	}
 	/* sx_sunlock(&allproc_lock); */
 	if (_vm_object_in_map(kernel_map, object, 0))
 		return 1;
 	if (_vm_object_in_map(kmem_map, object, 0))
 		return 1;
 	if (_vm_object_in_map(pager_map, object, 0))
 		return 1;
 	if (_vm_object_in_map(buffer_map, object, 0))
 		return 1;
 	return 0;
 }
 
 DB_SHOW_COMMAND(vmochk, vm_object_check)
 {
 	vm_object_t object;
 
 	/*
 	 * make sure that internal objs are in a map somewhere
 	 * and none have zero ref counts.
 	 */
 	TAILQ_FOREACH(object, &vm_object_list, object_list) {
 		if (object->handle == NULL &&
 		    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
 			if (object->ref_count == 0) {
 				db_printf("vmochk: internal obj has zero ref count: %ld\n",
 					(long)object->size);
 			}
 			if (!vm_object_in_map(object)) {
 				db_printf(
 			"vmochk: internal obj is not in a map: "
 			"ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
 				    object->ref_count, (u_long)object->size, 
 				    (u_long)object->size,
 				    (void *)object->backing_object);
 			}
 		}
 	}
 }
 
 /*
  *	vm_object_print:	[ debug ]
  */
 DB_SHOW_COMMAND(object, vm_object_print_static)
 {
 	/* XXX convert args. */
 	vm_object_t object = (vm_object_t)addr;
 	boolean_t full = have_addr;
 
 	vm_page_t p;
 
 	/* XXX count is an (unused) arg.  Avoid shadowing it. */
 #define	count	was_count
 
 	int count;
 
 	if (object == NULL)
 		return;
 
 	db_iprintf(
 	    "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x uip %d charge %jx\n",
 	    object, (int)object->type, (uintmax_t)object->size,
 	    object->resident_page_count, object->ref_count, object->flags,
 	    object->uip ? object->uip->ui_uid : -1, (uintmax_t)object->charge);
 	db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
 	    object->shadow_count, 
 	    object->backing_object ? object->backing_object->ref_count : 0,
 	    object->backing_object, (uintmax_t)object->backing_object_offset);
 
 	if (!full)
 		return;
 
 	db_indent += 2;
 	count = 0;
 	TAILQ_FOREACH(p, &object->memq, listq) {
 		if (count == 0)
 			db_iprintf("memory:=");
 		else if (count == 6) {
 			db_printf("\n");
 			db_iprintf(" ...");
 			count = 0;
 		} else
 			db_printf(",");
 		count++;
 
 		db_printf("(off=0x%jx,page=0x%jx)",
 		    (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p));
 	}
 	if (count != 0)
 		db_printf("\n");
 	db_indent -= 2;
 }
 
 /* XXX. */
 #undef count
 
 /* XXX need this non-static entry for calling from vm_map_print. */
 void
 vm_object_print(
         /* db_expr_t */ long addr,
 	boolean_t have_addr,
 	/* db_expr_t */ long count,
 	char *modif)
 {
 	vm_object_print_static(addr, have_addr, count, modif);
 }
 
 DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
 {
 	vm_object_t object;
 	vm_pindex_t fidx;
 	vm_paddr_t pa;
 	vm_page_t m, prev_m;
 	int rcount, nl, c;
 
 	nl = 0;
 	TAILQ_FOREACH(object, &vm_object_list, object_list) {
 		db_printf("new object: %p\n", (void *)object);
 		if (nl > 18) {
 			c = cngetc();
 			if (c != ' ')
 				return;
 			nl = 0;
 		}
 		nl++;
 		rcount = 0;
 		fidx = 0;
 		pa = -1;
 		TAILQ_FOREACH(m, &object->memq, listq) {
 			if (m->pindex > 128)
 				break;
 			if ((prev_m = TAILQ_PREV(m, pglist, listq)) != NULL &&
 			    prev_m->pindex + 1 != m->pindex) {
 				if (rcount) {
 					db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 						(long)fidx, rcount, (long)pa);
 					if (nl > 18) {
 						c = cngetc();
 						if (c != ' ')
 							return;
 						nl = 0;
 					}
 					nl++;
 					rcount = 0;
 				}
 			}				
 			if (rcount &&
 				(VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
 				++rcount;
 				continue;
 			}
 			if (rcount) {
 				db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 					(long)fidx, rcount, (long)pa);
 				if (nl > 18) {
 					c = cngetc();
 					if (c != ' ')
 						return;
 					nl = 0;
 				}
 				nl++;
 			}
 			fidx = m->pindex;
 			pa = VM_PAGE_TO_PHYS(m);
 			rcount = 1;
 		}
 		if (rcount) {
 			db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 				(long)fidx, rcount, (long)pa);
 			if (nl > 18) {
 				c = cngetc();
 				if (c != ' ')
 					return;
 				nl = 0;
 			}
 			nl++;
 		}
 	}
 }
 #endif /* DDB */
Index: projects/binutils-2.17/sys/x86/x86/local_apic.c
===================================================================
--- projects/binutils-2.17/sys/x86/x86/local_apic.c	(revision 215829)
+++ projects/binutils-2.17/sys/x86/x86/local_apic.c	(revision 215830)
@@ -1,1522 +1,1522 @@
 /*-
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  * Copyright (c) 1996, by Steve Passe
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Local APIC support on Pentium and later processors.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_hwpmc_hooks.h"
 #include "opt_kdtrace.h"
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/timeet.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <x86/apicreg.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <machine/apicvar.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/smp.h>
 #include <machine/specialreg.h>
 
 #ifdef DDB
 #include <sys/interrupt.h>
 #include <ddb/ddb.h>
 #endif
 
 #ifdef __amd64__
 #define	SDT_APIC	SDT_SYSIGT
 #define	SDT_APICT	SDT_SYSIGT
 #define	GSEL_APIC	0
 #else
 #define	SDT_APIC	SDT_SYS386IGT
 #define	SDT_APICT	SDT_SYS386TGT
 #define	GSEL_APIC	GSEL(GCODE_SEL, SEL_KPL)
 #endif
 
 /* Sanity checks on IDT vectors. */
 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT);
 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS);
 CTASSERT(APIC_LOCAL_INTS == 240);
 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT);
 
 /* Magic IRQ values for the timer and syscalls. */
 #define	IRQ_TIMER	(NUM_IO_INTS + 1)
 #define	IRQ_SYSCALL	(NUM_IO_INTS + 2)
 #define	IRQ_DTRACE_RET	(NUM_IO_INTS + 3)
 
 /*
  * Support for local APICs.  Local APICs manage interrupts on each
  * individual processor as opposed to I/O APICs which receive interrupts
  * from I/O devices and then forward them on to the local APICs.
  *
  * Local APICs can also send interrupts to each other thus providing the
  * mechanism for IPIs.
  */
 
 struct lvt {
 	u_int lvt_edgetrigger:1;
 	u_int lvt_activehi:1;
 	u_int lvt_masked:1;
 	u_int lvt_active:1;
 	u_int lvt_mode:16;
 	u_int lvt_vector:8;
 };
 
 struct lapic {
 	struct lvt la_lvts[LVT_MAX + 1];
 	u_int la_id:8;
 	u_int la_cluster:4;
 	u_int la_cluster_id:2;
 	u_int la_present:1;
 	u_long *la_timer_count;
 	u_long la_timer_period;
 	u_int la_timer_mode;
 	/* Include IDT_SYSCALL to make indexing easier. */
 	int la_ioint_irqs[APIC_NUM_IOINTS + 1];
 } static lapics[MAX_APIC_ID + 1];
 
 /* Global defaults for local APIC LVT entries. */
 static struct lvt lvts[LVT_MAX + 1] = {
 	{ 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 },	/* LINT0: masked ExtINT */
 	{ 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 },	/* LINT1: NMI */
 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT },	/* Timer */
 	{ 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT },	/* Error */
 	{ 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 },	/* PMC */
 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT },	/* Thermal */
 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT },	/* CMCI */
 };
 
 static inthand_t *ioint_handlers[] = {
 	NULL,			/* 0 - 31 */
 	IDTVEC(apic_isr1),	/* 32 - 63 */
 	IDTVEC(apic_isr2),	/* 64 - 95 */
 	IDTVEC(apic_isr3),	/* 96 - 127 */
 	IDTVEC(apic_isr4),	/* 128 - 159 */
 	IDTVEC(apic_isr5),	/* 160 - 191 */
 	IDTVEC(apic_isr6),	/* 192 - 223 */
 	IDTVEC(apic_isr7),	/* 224 - 255 */
 };
 
 
 static u_int32_t lapic_timer_divisors[] = {
 	APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
 	APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128
 };
 
 extern inthand_t IDTVEC(rsvd);
 
 volatile lapic_t *lapic;
 vm_paddr_t lapic_paddr;
 static u_long lapic_timer_divisor;
 static struct eventtimer lapic_et;
 
 static void	lapic_enable(void);
 static void	lapic_resume(struct pic *pic);
 static void	lapic_timer_enable_intr(void);
 static void	lapic_timer_oneshot(u_int count);
 static void	lapic_timer_periodic(u_int count);
 static void	lapic_timer_stop(void);
 static void	lapic_timer_set_divisor(u_int divisor);
 static uint32_t	lvt_mode(struct lapic *la, u_int pin, uint32_t value);
 static int	lapic_et_start(struct eventtimer *et,
     struct bintime *first, struct bintime *period);
 static int	lapic_et_stop(struct eventtimer *et);
 
 struct pic lapic_pic = { .pic_resume = lapic_resume };
 
 static uint32_t
 lvt_mode(struct lapic *la, u_int pin, uint32_t value)
 {
 	struct lvt *lvt;
 
 	KASSERT(pin <= LVT_MAX, ("%s: pin %u out of range", __func__, pin));
 	if (la->la_lvts[pin].lvt_active)
 		lvt = &la->la_lvts[pin];
 	else
 		lvt = &lvts[pin];
 
 	value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM |
 	    APIC_LVT_VECTOR);
 	if (lvt->lvt_edgetrigger == 0)
 		value |= APIC_LVT_TM;
 	if (lvt->lvt_activehi == 0)
 		value |= APIC_LVT_IIPP_INTALO;
 	if (lvt->lvt_masked)
 		value |= APIC_LVT_M;
 	value |= lvt->lvt_mode;
 	switch (lvt->lvt_mode) {
 	case APIC_LVT_DM_NMI:
 	case APIC_LVT_DM_SMI:
 	case APIC_LVT_DM_INIT:
 	case APIC_LVT_DM_EXTINT:
 		if (!lvt->lvt_edgetrigger) {
 			printf("lapic%u: Forcing LINT%u to edge trigger\n",
 			    la->la_id, pin);
 			value |= APIC_LVT_TM;
 		}
 		/* Use a vector of 0. */
 		break;
 	case APIC_LVT_DM_FIXED:
 		value |= lvt->lvt_vector;
 		break;
 	default:
 		panic("bad APIC LVT delivery mode: %#x\n", value);
 	}
 	return (value);
 }
 
 /*
  * Map the local APIC and setup necessary interrupt vectors.
  */
 void
 lapic_init(vm_paddr_t addr)
 {
 	u_int regs[4];
 	int i, arat;
 
 	/* Map the local APIC and setup the spurious interrupt handler. */
 	KASSERT(trunc_page(addr) == addr,
 	    ("local APIC not aligned on a page boundary"));
 	lapic = pmap_mapdev(addr, sizeof(lapic_t));
 	lapic_paddr = addr;
 	setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL,
 	    GSEL_APIC);
 
 	/* Perform basic initialization of the BSP's local APIC. */
 	lapic_enable();
 
 	/* Set BSP's per-CPU local APIC ID. */
 	PCPU_SET(apic_id, lapic_id());
 
 	/* Local APIC timer interrupt. */
 	setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_APIC, SEL_KPL, GSEL_APIC);
 
 	/* Local APIC error interrupt. */
 	setidt(APIC_ERROR_INT, IDTVEC(errorint), SDT_APIC, SEL_KPL, GSEL_APIC);
 
 	/* XXX: Thermal interrupt */
 
 	/* Local APIC CMCI. */
 	setidt(APIC_CMC_INT, IDTVEC(cmcint), SDT_APICT, SEL_KPL, GSEL_APIC);
 
 	if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) {
 		arat = 0;
 		/* Intel CPUID 0x06 EAX[2] set if APIC timer runs in C3. */
 		if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high >= 6) {
 			do_cpuid(0x06, regs);
-			if (regs[0] & 0x4)
+			if ((regs[0] & CPUTPM1_ARAT) != 0)
 				arat = 1;
 		}
 		bzero(&lapic_et, sizeof(lapic_et));
 		lapic_et.et_name = "LAPIC";
 		lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT |
 		    ET_FLAGS_PERCPU;
 		lapic_et.et_quality = 600;
 		if (!arat) {
 			lapic_et.et_flags |= ET_FLAGS_C3STOP;
 			lapic_et.et_quality -= 200;
 		}
 		lapic_et.et_frequency = 0;
 		/* We don't know frequency yet, so trying to guess. */
 		lapic_et.et_min_period.sec = 0;
 		lapic_et.et_min_period.frac = 0x00001000LL << 32;
 		lapic_et.et_max_period.sec = 1;
 		lapic_et.et_max_period.frac = 0;
 		lapic_et.et_start = lapic_et_start;
 		lapic_et.et_stop = lapic_et_stop;
 		lapic_et.et_priv = NULL;
 		et_register(&lapic_et);
 	}
 }
 
 /*
  * Create a local APIC instance.
  */
 void
 lapic_create(u_int apic_id, int boot_cpu)
 {
 	int i;
 
 	if (apic_id > MAX_APIC_ID) {
 		printf("APIC: Ignoring local APIC with ID %d\n", apic_id);
 		if (boot_cpu)
 			panic("Can't ignore BSP");
 		return;
 	}
 	KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u",
 	    apic_id));
 
 	/*
 	 * Assume no local LVT overrides and a cluster of 0 and
 	 * intra-cluster ID of 0.
 	 */
 	lapics[apic_id].la_present = 1;
 	lapics[apic_id].la_id = apic_id;
 	for (i = 0; i <= LVT_MAX; i++) {
 		lapics[apic_id].la_lvts[i] = lvts[i];
 		lapics[apic_id].la_lvts[i].lvt_active = 0;
 	}
 	for (i = 0; i <= APIC_NUM_IOINTS; i++)
 	    lapics[apic_id].la_ioint_irqs[i] = -1;
 	lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
 	lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] =
 	    IRQ_TIMER;
 #ifdef KDTRACE_HOOKS
 	lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = IRQ_DTRACE_RET;
 #endif
 
 
 #ifdef SMP
 	cpu_add(apic_id, boot_cpu);
 #endif
 }
 
 /*
  * Dump contents of local APIC registers
  */
 void
 lapic_dump(const char* str)
 {
 	uint32_t maxlvt;
 
 	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
 	printf("     ID: 0x%08x   VER: 0x%08x LDR: 0x%08x DFR: 0x%08x\n",
 	    lapic->id, lapic->version, lapic->ldr, lapic->dfr);
 	printf("  lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
 	    lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr);
 	printf("  timer: 0x%08x therm: 0x%08x err: 0x%08x",
 	    lapic->lvt_timer, lapic->lvt_thermal, lapic->lvt_error);
 	if (maxlvt >= LVT_PMC)
 		printf(" pmc: 0x%08x", lapic->lvt_pcint);
 	printf("\n");
 	if (maxlvt >= LVT_CMCI)
 		printf("   cmci: 0x%08x\n", lapic->lvt_cmci);
 }
 
 void
 lapic_setup(int boot)
 {
 	struct lapic *la;
 	u_int32_t maxlvt;
 	register_t saveintr;
 	char buf[MAXCOMLEN + 1];
 
 	la = &lapics[lapic_id()];
 	KASSERT(la->la_present, ("missing APIC structure"));
 	saveintr = intr_disable();
 	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 
 	/* Initialize the TPR to allow all interrupts. */
 	lapic_set_tpr(0);
 
 	/* Setup spurious vector and enable the local APIC. */
 	lapic_enable();
 
 	/* Program LINT[01] LVT entries. */
 	lapic->lvt_lint0 = lvt_mode(la, LVT_LINT0, lapic->lvt_lint0);
 	lapic->lvt_lint1 = lvt_mode(la, LVT_LINT1, lapic->lvt_lint1);
 
 	/* Program the PMC LVT entry if present. */
 	if (maxlvt >= LVT_PMC)
 		lapic->lvt_pcint = lvt_mode(la, LVT_PMC, lapic->lvt_pcint);
 
 	/* Program timer LVT and setup handler. */
 	lapic->lvt_timer = lvt_mode(la, LVT_TIMER, lapic->lvt_timer);
 	if (boot) {
 		snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid));
 		intrcnt_add(buf, &la->la_timer_count);
 	}
 
 	/* Setup the timer if configured. */
 	if (la->la_timer_mode != 0) {
 		KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor",
 		    lapic_id()));
 		lapic_timer_stop();
 		lapic_timer_set_divisor(lapic_timer_divisor);
 		lapic_timer_enable_intr();
 		if (la->la_timer_mode == 1)
 			lapic_timer_periodic(la->la_timer_period);
 		else
 			lapic_timer_oneshot(la->la_timer_period);
 	}
 
 	/* Program error LVT and clear any existing errors. */
 	lapic->lvt_error = lvt_mode(la, LVT_ERROR, lapic->lvt_error);
 	lapic->esr = 0;
 
 	/* XXX: Thermal LVT */
 
 	/* Program the CMCI LVT entry if present. */
 	if (maxlvt >= LVT_CMCI)
 		lapic->lvt_cmci = lvt_mode(la, LVT_CMCI, lapic->lvt_cmci);
 	    
 	intr_restore(saveintr);
 }
 
 void
 lapic_reenable_pmc(void)
 {
 #ifdef HWPMC_HOOKS
 	uint32_t value;
 
 	value =  lapic->lvt_pcint;
 	value &= ~APIC_LVT_M;
 	lapic->lvt_pcint = value;
 #endif
 }
 
 #ifdef HWPMC_HOOKS
 static void
 lapic_update_pmc(void *dummy)
 {
 	struct lapic *la;
 
 	la = &lapics[lapic_id()];
 	lapic->lvt_pcint = lvt_mode(la, LVT_PMC, lapic->lvt_pcint);
 }
 #endif
 
 int
 lapic_enable_pmc(void)
 {
 #ifdef HWPMC_HOOKS
 	u_int32_t maxlvt;
 
 	/* Fail if the local APIC is not present. */
 	if (lapic == NULL)
 		return (0);
 
 	/* Fail if the PMC LVT is not present. */
 	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	if (maxlvt < LVT_PMC)
 		return (0);
 
 	lvts[LVT_PMC].lvt_masked = 0;
 
 #ifdef SMP
 	/*
 	 * If hwpmc was loaded at boot time then the APs may not be
 	 * started yet.  In that case, don't forward the request to
 	 * them as they will program the lvt when they start.
 	 */
 	if (smp_started)
 		smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
 	else
 #endif
 		lapic_update_pmc(NULL);
 	return (1);
 #else
 	return (0);
 #endif
 }
 
 void
 lapic_disable_pmc(void)
 {
 #ifdef HWPMC_HOOKS
 	u_int32_t maxlvt;
 
 	/* Fail if the local APIC is not present. */
 	if (lapic == NULL)
 		return;
 
 	/* Fail if the PMC LVT is not present. */
 	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	if (maxlvt < LVT_PMC)
 		return;
 
 	lvts[LVT_PMC].lvt_masked = 1;
 
 #ifdef SMP
 	/* The APs should always be started when hwpmc is unloaded. */
 	KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early"));
 #endif
 	smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
 #endif
 }
 
 static int
 lapic_et_start(struct eventtimer *et,
     struct bintime *first, struct bintime *period)
 {
 	struct lapic *la;
 	u_long value;
 
 	if (et->et_frequency == 0) {
 		/* Start off with a divisor of 2 (power on reset default). */
 		lapic_timer_divisor = 2;
 		/* Try to calibrate the local APIC timer. */
 		do {
 			lapic_timer_set_divisor(lapic_timer_divisor);
 			lapic_timer_oneshot(APIC_TIMER_MAX_COUNT);
 			DELAY(1000000);
 			value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer;
 			if (value != APIC_TIMER_MAX_COUNT)
 				break;
 			lapic_timer_divisor <<= 1;
 		} while (lapic_timer_divisor <= 128);
 		if (lapic_timer_divisor > 128)
 			panic("lapic: Divisor too big");
 		if (bootverbose)
 			printf("lapic: Divisor %lu, Frequency %lu Hz\n",
 			    lapic_timer_divisor, value);
 		et->et_frequency = value;
 		et->et_min_period.sec = 0;
 		et->et_min_period.frac =
 		    ((0x00000002LLU << 32) / et->et_frequency) << 32;
 		et->et_max_period.sec = 0xfffffffeLLU / et->et_frequency;
 		et->et_max_period.frac =
 		    ((0xfffffffeLLU << 32) / et->et_frequency) << 32;
 	}
 	lapic_timer_stop();
 	lapic_timer_set_divisor(lapic_timer_divisor);
 	lapic_timer_enable_intr();
 	la = &lapics[lapic_id()];
 	if (period != NULL) {
 		la->la_timer_mode = 1;
 		la->la_timer_period =
 		    (et->et_frequency * (period->frac >> 32)) >> 32;
 		if (period->sec != 0)
 			la->la_timer_period += et->et_frequency * period->sec;
 		lapic_timer_periodic(la->la_timer_period);
 	} else {
 		la->la_timer_mode = 2;
 		la->la_timer_period =
 		    (et->et_frequency * (first->frac >> 32)) >> 32;
 		if (first->sec != 0)
 			la->la_timer_period += et->et_frequency * first->sec;
 		lapic_timer_oneshot(la->la_timer_period);
 	}
 	return (0);
 }
 
 static int
 lapic_et_stop(struct eventtimer *et)
 {
 	struct lapic *la = &lapics[lapic_id()];
 
 	la->la_timer_mode = 0;
 	lapic_timer_stop();
 	return (0);
 }
 
 void
 lapic_disable(void)
 {
 	uint32_t value;
 
 	/* Software disable the local APIC. */
 	value = lapic->svr;
 	value &= ~APIC_SVR_SWEN;
 	lapic->svr = value;
 }
 
 static void
 lapic_enable(void)
 {
 	u_int32_t value;
 
 	/* Program the spurious vector to enable the local APIC. */
 	value = lapic->svr;
 	value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
 	value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT);
 	lapic->svr = value;
 }
 
 /* Reset the local APIC on the BSP during resume. */
 static void
 lapic_resume(struct pic *pic)
 {
 
 	lapic_setup(0);
 }
 
 int
 lapic_id(void)
 {
 
 	KASSERT(lapic != NULL, ("local APIC is not mapped"));
 	return (lapic->id >> APIC_ID_SHIFT);
 }
 
 int
 lapic_intr_pending(u_int vector)
 {
 	volatile u_int32_t *irr;
 
 	/*
 	 * The IRR registers are an array of 128-bit registers each of
 	 * which only describes 32 interrupts in the low 32 bits..  Thus,
 	 * we divide the vector by 32 to get the 128-bit index.  We then
 	 * multiply that index by 4 to get the equivalent index from
 	 * treating the IRR as an array of 32-bit registers.  Finally, we
 	 * modulus the vector by 32 to determine the individual bit to
 	 * test.
 	 */
 	irr = &lapic->irr0;
 	return (irr[(vector / 32) * 4] & 1 << (vector % 32));
 }
 
 void
 lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
 {
 	struct lapic *la;
 
 	KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist",
 	    __func__, apic_id));
 	KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big",
 	    __func__, cluster));
 	KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID,
 	    ("%s: intra cluster id %u too big", __func__, cluster_id));
 	la = &lapics[apic_id];
 	la->la_cluster = cluster;
 	la->la_cluster_id = cluster_id;
 }
 
 int
 lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked)
 {
 
 	if (pin > LVT_MAX)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvts[pin].lvt_masked = masked;
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lapics[apic_id].la_lvts[pin].lvt_masked = masked;
 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	if (bootverbose)
 		printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked");
 	return (0);
 }
 
 int
 lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode)
 {
 	struct lvt *lvt;
 
 	if (pin > LVT_MAX)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvt = &lvts[pin];
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lvt = &lapics[apic_id].la_lvts[pin];
 		lvt->lvt_active = 1;
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	lvt->lvt_mode = mode;
 	switch (mode) {
 	case APIC_LVT_DM_NMI:
 	case APIC_LVT_DM_SMI:
 	case APIC_LVT_DM_INIT:
 	case APIC_LVT_DM_EXTINT:
 		lvt->lvt_edgetrigger = 1;
 		lvt->lvt_activehi = 1;
 		if (mode == APIC_LVT_DM_EXTINT)
 			lvt->lvt_masked = 1;
 		else
 			lvt->lvt_masked = 0;
 		break;
 	default:
 		panic("Unsupported delivery mode: 0x%x\n", mode);
 	}
 	if (bootverbose) {
 		printf(" Routing ");
 		switch (mode) {
 		case APIC_LVT_DM_NMI:
 			printf("NMI");
 			break;
 		case APIC_LVT_DM_SMI:
 			printf("SMI");
 			break;
 		case APIC_LVT_DM_INIT:
 			printf("INIT");
 			break;
 		case APIC_LVT_DM_EXTINT:
 			printf("ExtINT");
 			break;
 		}
 		printf(" -> LINT%u\n", pin);
 	}
 	return (0);
 }
 
 int
 lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol)
 {
 
 	if (pin > LVT_MAX || pol == INTR_POLARITY_CONFORM)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH);
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
 		lapics[apic_id].la_lvts[pin].lvt_activehi =
 		    (pol == INTR_POLARITY_HIGH);
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	if (bootverbose)
 		printf(" LINT%u polarity: %s\n", pin,
 		    pol == INTR_POLARITY_HIGH ? "high" : "low");
 	return (0);
 }
 
 int
 lapic_set_lvt_triggermode(u_int apic_id, u_int pin, enum intr_trigger trigger)
 {
 
 	if (pin > LVT_MAX || trigger == INTR_TRIGGER_CONFORM)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lapics[apic_id].la_lvts[pin].lvt_edgetrigger =
 		    (trigger == INTR_TRIGGER_EDGE);
 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	if (bootverbose)
 		printf(" LINT%u trigger: %s\n", pin,
 		    trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
 	return (0);
 }
 
 /*
  * Adjust the TPR of the current CPU so that it blocks all interrupts below
  * the passed in vector.
  */
 void
 lapic_set_tpr(u_int vector)
 {
 #ifdef CHEAP_TPR
 	lapic->tpr = vector;
 #else
 	u_int32_t tpr;
 
 	tpr = lapic->tpr & ~APIC_TPR_PRIO;
 	tpr |= vector;
 	lapic->tpr = tpr;
 #endif
 }
 
 void
 lapic_eoi(void)
 {
 
 	lapic->eoi = 0;
 }
 
 void
 lapic_handle_intr(int vector, struct trapframe *frame)
 {
 	struct intsrc *isrc;
 
 	if (vector == -1)
 		panic("Couldn't get vector from ISR!");
 	isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id),
 	    vector));
 	intr_execute_handlers(isrc, frame);
 }
 
 void
 lapic_handle_timer(struct trapframe *frame)
 {
 	struct lapic *la;
 	struct trapframe *oldframe;
 	struct thread *td;
 
 	/* Send EOI first thing. */
 	lapic_eoi();
 
 #if defined(SMP) && !defined(SCHED_ULE)
 	/*
 	 * Don't do any accounting for the disabled HTT cores, since it
 	 * will provide misleading numbers for the userland.
 	 *
 	 * No locking is necessary here, since even if we loose the race
 	 * when hlt_cpus_mask changes it is not a big deal, really.
 	 *
 	 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
 	 * and unlike other schedulers it actually schedules threads to
 	 * those CPUs.
 	 */
 	if ((hlt_cpus_mask & (1 << PCPU_GET(cpuid))) != 0)
 		return;
 #endif
 
 	/* Look up our local APIC structure for the tick counters. */
 	la = &lapics[PCPU_GET(apic_id)];
 	(*la->la_timer_count)++;
 	critical_enter();
 	if (lapic_et.et_active) {
 		td = curthread;
 		td->td_intr_nesting_level++;
 		oldframe = td->td_intr_frame;
 		td->td_intr_frame = frame;
 		lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg);
 		td->td_intr_frame = oldframe;
 		td->td_intr_nesting_level--;
 	}
 	critical_exit();
 }
 
 static void
 lapic_timer_set_divisor(u_int divisor)
 {
 
 	KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor));
 	KASSERT(ffs(divisor) <= sizeof(lapic_timer_divisors) /
 	    sizeof(u_int32_t), ("lapic: invalid divisor %u", divisor));
 	lapic->dcr_timer = lapic_timer_divisors[ffs(divisor) - 1];
 }
 
 static void
 lapic_timer_oneshot(u_int count)
 {
 	u_int32_t value;
 
 	value = lapic->lvt_timer;
 	value &= ~APIC_LVTT_TM;
 	value |= APIC_LVTT_TM_ONE_SHOT;
 	lapic->lvt_timer = value;
 	lapic->icr_timer = count;
 }
 
 static void
 lapic_timer_periodic(u_int count)
 {
 	u_int32_t value;
 
 	value = lapic->lvt_timer;
 	value &= ~APIC_LVTT_TM;
 	value |= APIC_LVTT_TM_PERIODIC;
 	lapic->lvt_timer = value;
 	lapic->icr_timer = count;
 }
 
 static void
 lapic_timer_stop(void)
 {
 	u_int32_t value;
 
 	value = lapic->lvt_timer;
 	value &= ~APIC_LVTT_TM;
 	value |= APIC_LVT_M;
 	lapic->lvt_timer = value;
 	lapic->icr_timer = 0;
 }
 
 static void
 lapic_timer_enable_intr(void)
 {
 	u_int32_t value;
 
 	value = lapic->lvt_timer;
 	value &= ~APIC_LVT_M;
 	lapic->lvt_timer = value;
 }
 
 void
 lapic_handle_cmc(void)
 {
 
 	lapic_eoi();
 	cmc_intr();
 }
 
 /*
  * Called from the mca_init() to activate the CMC interrupt if this CPU is
  * responsible for monitoring any MC banks for CMC events.  Since mca_init()
  * is called prior to lapic_setup() during boot, this just needs to unmask
  * this CPU's LVT_CMCI entry.
  */
 void
 lapic_enable_cmc(void)
 {
 	u_int apic_id;
 
 	apic_id = PCPU_GET(apic_id);
 	KASSERT(lapics[apic_id].la_present,
 	    ("%s: missing APIC %u", __func__, apic_id));
 	lapics[apic_id].la_lvts[LVT_CMCI].lvt_masked = 0;
 	lapics[apic_id].la_lvts[LVT_CMCI].lvt_active = 1;
 	if (bootverbose)
 		printf("lapic%u: CMCI unmasked\n", apic_id);
 }
 
 void
 lapic_handle_error(void)
 {
 	u_int32_t esr;
 
 	/*
 	 * Read the contents of the error status register.  Write to
 	 * the register first before reading from it to force the APIC
 	 * to update its value to indicate any errors that have
 	 * occurred since the previous write to the register.
 	 */
 	lapic->esr = 0;
 	esr = lapic->esr;
 
 	printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr);
 	lapic_eoi();
 }
 
 u_int
 apic_cpuid(u_int apic_id)
 {
 #ifdef SMP
 	return apic_cpuids[apic_id];
 #else
 	return 0;
 #endif
 }
 
 /* Request a free IDT vector to be used by the specified IRQ. */
 u_int
 apic_alloc_vector(u_int apic_id, u_int irq)
 {
 	u_int vector;
 
 	KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
 
 	/*
 	 * Search for a free vector.  Currently we just use a very simple
 	 * algorithm to find the first free vector.
 	 */
 	mtx_lock_spin(&icu_lock);
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
 		if (lapics[apic_id].la_ioint_irqs[vector] != -1)
 			continue;
 		lapics[apic_id].la_ioint_irqs[vector] = irq;
 		mtx_unlock_spin(&icu_lock);
 		return (vector + APIC_IO_INTS);
 	}
 	mtx_unlock_spin(&icu_lock);
 	return (0);
 }
 
 /*
  * Request 'count' free contiguous IDT vectors to be used by 'count'
  * IRQs.  'count' must be a power of two and the vectors will be
  * aligned on a boundary of 'align'.  If the request cannot be
  * satisfied, 0 is returned.
  */
 u_int
 apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align)
 {
 	u_int first, run, vector;
 
 	KASSERT(powerof2(count), ("bad count"));
 	KASSERT(powerof2(align), ("bad align"));
 	KASSERT(align >= count, ("align < count"));
 #ifdef INVARIANTS
 	for (run = 0; run < count; run++)
 		KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u",
 		    irqs[run], run));
 #endif
 
 	/*
 	 * Search for 'count' free vectors.  As with apic_alloc_vector(),
 	 * this just uses a simple first fit algorithm.
 	 */
 	run = 0;
 	first = 0;
 	mtx_lock_spin(&icu_lock);
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
 
 		/* Vector is in use, end run. */
 		if (lapics[apic_id].la_ioint_irqs[vector] != -1) {
 			run = 0;
 			first = 0;
 			continue;
 		}
 
 		/* Start a new run if run == 0 and vector is aligned. */
 		if (run == 0) {
 			if ((vector & (align - 1)) != 0)
 				continue;
 			first = vector;
 		}
 		run++;
 
 		/* Keep looping if the run isn't long enough yet. */
 		if (run < count)
 			continue;
 
 		/* Found a run, assign IRQs and return the first vector. */
 		for (vector = 0; vector < count; vector++)
 			lapics[apic_id].la_ioint_irqs[first + vector] =
 			    irqs[vector];
 		mtx_unlock_spin(&icu_lock);
 		return (first + APIC_IO_INTS);
 	}
 	mtx_unlock_spin(&icu_lock);
 	printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count);
 	return (0);
 }
 
 /*
  * Enable a vector for a particular apic_id.  Since all lapics share idt
  * entries and ioint_handlers this enables the vector on all lapics.  lapics
  * which do not have the vector configured would report spurious interrupts
  * should it fire.
  */
 void
 apic_enable_vector(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
 	KASSERT(ioint_handlers[vector / 32] != NULL,
 	    ("No ISR handler for vector %u", vector));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 	setidt(vector, ioint_handlers[vector / 32], SDT_APIC, SEL_KPL,
 	    GSEL_APIC);
 }
 
 void
 apic_disable_vector(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 	KASSERT(ioint_handlers[vector / 32] != NULL,
 	    ("No ISR handler for vector %u", vector));
 #ifdef notyet
 	/*
 	 * We can not currently clear the idt entry because other cpus
 	 * may have a valid vector at this offset.
 	 */
 	setidt(vector, &IDTVEC(rsvd), SDT_APICT, SEL_KPL, GSEL_APIC);
 #endif
 }
 
 /* Release an APIC vector when it's no longer in use. */
 void
 apic_free_vector(u_int apic_id, u_int vector, u_int irq)
 {
 	struct thread *td;
 
 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
 	    ("Vector %u does not map to an IRQ line", vector));
 	KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
 	KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] ==
 	    irq, ("IRQ mismatch"));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 
 	/*
 	 * Bind us to the cpu that owned the vector before freeing it so
 	 * we don't lose an interrupt delivery race.
 	 */
 	td = curthread;
 	if (!rebooting) {
 		thread_lock(td);
 		if (sched_is_bound(td))
 			panic("apic_free_vector: Thread already bound.\n");
 		sched_bind(td, apic_cpuid(apic_id));
 		thread_unlock(td);
 	}
 	mtx_lock_spin(&icu_lock);
 	lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = -1;
 	mtx_unlock_spin(&icu_lock);
 	if (!rebooting) {
 		thread_lock(td);
 		sched_unbind(td);
 		thread_unlock(td);
 	}
 }
 
 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */
 u_int
 apic_idt_to_irq(u_int apic_id, u_int vector)
 {
 	int irq;
 
 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
 	    ("Vector %u does not map to an IRQ line", vector));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 	irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS];
 	if (irq < 0)
 		irq = 0;
 	return (irq);
 }
 
 #ifdef DDB
 /*
  * Dump data about APIC IDT vector mappings.
  */
 DB_SHOW_COMMAND(apic, db_show_apic)
 {
 	struct intsrc *isrc;
 	int i, verbose;
 	u_int apic_id;
 	u_int irq;
 
 	if (strcmp(modif, "vv") == 0)
 		verbose = 2;
 	else if (strcmp(modif, "v") == 0)
 		verbose = 1;
 	else
 		verbose = 0;
 	for (apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) {
 		if (lapics[apic_id].la_present == 0)
 			continue;
 		db_printf("Interrupts bound to lapic %u\n", apic_id);
 		for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
 			irq = lapics[apic_id].la_ioint_irqs[i];
 			if (irq == -1 || irq == IRQ_SYSCALL)
 				continue;
 #ifdef KDTRACE_HOOKS
 			if (irq == IRQ_DTRACE_RET)
 				continue;
 #endif
 			db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
 			if (irq == IRQ_TIMER)
 				db_printf("lapic timer\n");
 			else if (irq < NUM_IO_INTS) {
 				isrc = intr_lookup_source(irq);
 				if (isrc == NULL || verbose == 0)
 					db_printf("IRQ %u\n", irq);
 				else
 					db_dump_intr_event(isrc->is_event,
 					    verbose == 2);
 			} else
 				db_printf("IRQ %u ???\n", irq);
 		}
 	}
 }
 
 static void
 dump_mask(const char *prefix, uint32_t v, int base)
 {
 	int i, first;
 
 	first = 1;
 	for (i = 0; i < 32; i++)
 		if (v & (1 << i)) {
 			if (first) {
 				db_printf("%s:", prefix);
 				first = 0;
 			}
 			db_printf(" %02x", base + i);
 		}
 	if (!first)
 		db_printf("\n");
 }
 
 /* Show info from the lapic regs for this CPU. */
 DB_SHOW_COMMAND(lapic, db_show_lapic)
 {
 	uint32_t v;
 
 	db_printf("lapic ID = %d\n", lapic_id());
 	v = lapic->version;
 	db_printf("version  = %d.%d\n", (v & APIC_VER_VERSION) >> 4,
 	    v & 0xf);
 	db_printf("max LVT  = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT);
 	v = lapic->svr;
 	db_printf("SVR      = %02x (%s)\n", v & APIC_SVR_VECTOR,
 	    v & APIC_SVR_ENABLE ? "enabled" : "disabled");
 	db_printf("TPR      = %02x\n", lapic->tpr);
 
 #define dump_field(prefix, index)					\
 	dump_mask(__XSTRING(prefix ## index), lapic->prefix ## index,	\
 	    index * 32)
 
 	db_printf("In-service Interrupts:\n");
 	dump_field(isr, 0);
 	dump_field(isr, 1);
 	dump_field(isr, 2);
 	dump_field(isr, 3);
 	dump_field(isr, 4);
 	dump_field(isr, 5);
 	dump_field(isr, 6);
 	dump_field(isr, 7);
 
 	db_printf("TMR Interrupts:\n");
 	dump_field(tmr, 0);
 	dump_field(tmr, 1);
 	dump_field(tmr, 2);
 	dump_field(tmr, 3);
 	dump_field(tmr, 4);
 	dump_field(tmr, 5);
 	dump_field(tmr, 6);
 	dump_field(tmr, 7);
 
 	db_printf("IRR Interrupts:\n");
 	dump_field(irr, 0);
 	dump_field(irr, 1);
 	dump_field(irr, 2);
 	dump_field(irr, 3);
 	dump_field(irr, 4);
 	dump_field(irr, 5);
 	dump_field(irr, 6);
 	dump_field(irr, 7);
 
 #undef dump_field
 }
 #endif
 
 /*
  * APIC probing support code.  This includes code to manage enumerators.
  */
 
 static SLIST_HEAD(, apic_enumerator) enumerators =
 	SLIST_HEAD_INITIALIZER(enumerators);
 static struct apic_enumerator *best_enum;
 
 void
 apic_register_enumerator(struct apic_enumerator *enumerator)
 {
 #ifdef INVARIANTS
 	struct apic_enumerator *apic_enum;
 
 	SLIST_FOREACH(apic_enum, &enumerators, apic_next) {
 		if (apic_enum == enumerator)
 			panic("%s: Duplicate register of %s", __func__,
 			    enumerator->apic_name);
 	}
 #endif
 	SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next);
 }
 
 /*
  * We have to look for CPU's very, very early because certain subsystems
  * want to know how many CPU's we have extremely early on in the boot
  * process.
  */
 static void
 apic_init(void *dummy __unused)
 {
 	struct apic_enumerator *enumerator;
 #ifndef __amd64__
 	uint64_t apic_base;
 #endif
 	int retval, best;
 
 	/* We only support built in local APICs. */
 	if (!(cpu_feature & CPUID_APIC))
 		return;
 
 	/* Don't probe if APIC mode is disabled. */
 	if (resource_disabled("apic", 0))
 		return;
 
 	/* Probe all the enumerators to find the best match. */
 	best_enum = NULL;
 	best = 0;
 	SLIST_FOREACH(enumerator, &enumerators, apic_next) {
 		retval = enumerator->apic_probe();
 		if (retval > 0)
 			continue;
 		if (best_enum == NULL || best < retval) {
 			best_enum = enumerator;
 			best = retval;
 		}
 	}
 	if (best_enum == NULL) {
 		if (bootverbose)
 			printf("APIC: Could not find any APICs.\n");
 		return;
 	}
 
 	if (bootverbose)
 		printf("APIC: Using the %s enumerator.\n",
 		    best_enum->apic_name);
 
 #ifndef __amd64__
 	/*
 	 * To work around an errata, we disable the local APIC on some
 	 * CPUs during early startup.  We need to turn the local APIC back
 	 * on on such CPUs now.
 	 */
 	if (cpu == CPU_686 && cpu_vendor_id == CPU_VENDOR_INTEL &&
 	    (cpu_id & 0xff0) == 0x610) {
 		apic_base = rdmsr(MSR_APICBASE);
 		apic_base |= APICBASE_ENABLED;
 		wrmsr(MSR_APICBASE, apic_base);
 	}
 #endif
 
 	/* Probe the CPU's in the system. */
 	retval = best_enum->apic_probe_cpus();
 	if (retval != 0)
 		printf("%s: Failed to probe CPUs: returned %d\n",
 		    best_enum->apic_name, retval);
 
 }
 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL);
 
 /*
  * Setup the local APIC.  We have to do this prior to starting up the APs
  * in the SMP case.
  */
 static void
 apic_setup_local(void *dummy __unused)
 {
 	int retval;
  
 	if (best_enum == NULL)
 		return;
 
 	/* Initialize the local APIC. */
 	retval = best_enum->apic_setup_local();
 	if (retval != 0)
 		printf("%s: Failed to setup the local APIC: returned %d\n",
 		    best_enum->apic_name, retval);
 }
 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL);
 
 /*
  * Setup the I/O APICs.
  */
 static void
 apic_setup_io(void *dummy __unused)
 {
 	int retval;
 
 	if (best_enum == NULL)
 		return;
 	retval = best_enum->apic_setup_io();
 	if (retval != 0)
 		printf("%s: Failed to setup I/O APICs: returned %d\n",
 		    best_enum->apic_name, retval);
 
 #ifdef XEN
 	return;
 #endif
 	/*
 	 * Finish setting up the local APIC on the BSP once we know how to
 	 * properly program the LINT pins.
 	 */
 	lapic_setup(1);
 	intr_register_pic(&lapic_pic);
 	if (bootverbose)
 		lapic_dump("BSP");
 
 	/* Enable the MSI "pic". */
 	msi_init();
 }
 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL);
 
 #ifdef SMP
 /*
  * Inter Processor Interrupt functions.  The lapic_ipi_*() functions are
  * private to the MD code.  The public interface for the rest of the
  * kernel is defined in mp_machdep.c.
  */
 int
 lapic_ipi_wait(int delay)
 {
 	int x, incr;
 
 	/*
 	 * Wait delay loops for IPI to be sent.  This is highly bogus
 	 * since this is sensitive to CPU clock speed.  If delay is
 	 * -1, we wait forever.
 	 */
 	if (delay == -1) {
 		incr = 0;
 		delay = 1;
 	} else
 		incr = 1;
 	for (x = 0; x < delay; x += incr) {
 		if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE)
 			return (1);
 		ia32_pause();
 	}
 	return (0);
 }
 
 void
 lapic_ipi_raw(register_t icrlo, u_int dest)
 {
 	register_t value, saveintr;
 
 	/* XXX: Need more sanity checking of icrlo? */
 	KASSERT(lapic != NULL, ("%s called too early", __func__));
 	KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
 	    ("%s: invalid dest field", __func__));
 	KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
 	    ("%s: reserved bits set in ICR LO register", __func__));
 
 	/* Set destination in ICR HI register if it is being used. */
 	saveintr = intr_disable();
 	if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
 		value = lapic->icr_hi;
 		value &= ~APIC_ID_MASK;
 		value |= dest << APIC_ID_SHIFT;
 		lapic->icr_hi = value;
 	}
 
 	/* Program the contents of the IPI and dispatch it. */
 	value = lapic->icr_lo;
 	value &= APIC_ICRLO_RESV_MASK;
 	value |= icrlo;
 	lapic->icr_lo = value;
 	intr_restore(saveintr);
 }
 
 #define	BEFORE_SPIN	1000000
 #ifdef DETECT_DEADLOCK
 #define	AFTER_SPIN	1000
 #endif
 
 void
 lapic_ipi_vectored(u_int vector, int dest)
 {
 	register_t icrlo, destfield;
 
 	KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
 	    ("%s: invalid vector %d", __func__, vector));
 
 	icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE;
 
 	/*
 	 * IPI_STOP_HARD is just a "fake" vector used to send a NMI.
 	 * Use special rules regard NMI if passed, otherwise specify
 	 * the vector.
 	 */
 	if (vector == IPI_STOP_HARD)
 		icrlo |= APIC_DELMODE_NMI | APIC_LEVEL_ASSERT;
 	else
 		icrlo |= vector | APIC_DELMODE_FIXED | APIC_LEVEL_DEASSERT;
 	destfield = 0;
 	switch (dest) {
 	case APIC_IPI_DEST_SELF:
 		icrlo |= APIC_DEST_SELF;
 		break;
 	case APIC_IPI_DEST_ALL:
 		icrlo |= APIC_DEST_ALLISELF;
 		break;
 	case APIC_IPI_DEST_OTHERS:
 		icrlo |= APIC_DEST_ALLESELF;
 		break;
 	default:
 		KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
 		    ("%s: invalid destination 0x%x", __func__, dest));
 		destfield = dest;
 	}
 
 	/* Wait for an earlier IPI to finish. */
 	if (!lapic_ipi_wait(BEFORE_SPIN)) {
 		if (panicstr != NULL)
 			return;
 		else
 			panic("APIC: Previous IPI is stuck");
 	}
 
 	lapic_ipi_raw(icrlo, destfield);
 
 #ifdef DETECT_DEADLOCK
 	/* Wait for IPI to be delivered. */
 	if (!lapic_ipi_wait(AFTER_SPIN)) {
 #ifdef needsattention
 		/*
 		 * XXX FIXME:
 		 *
 		 * The above function waits for the message to actually be
 		 * delivered.  It breaks out after an arbitrary timeout
 		 * since the message should eventually be delivered (at
 		 * least in theory) and that if it wasn't we would catch
 		 * the failure with the check above when the next IPI is
 		 * sent.
 		 *
 		 * We could skip this wait entirely, EXCEPT it probably
 		 * protects us from other routines that assume that the
 		 * message was delivered and acted upon when this function
 		 * returns.
 		 */
 		printf("APIC: IPI might be stuck\n");
 #else /* !needsattention */
 		/* Wait until mesage is sent without a timeout. */
 		while (lapic->icr_lo & APIC_DELSTAT_PEND)
 			ia32_pause();
 #endif /* needsattention */
 	}
 #endif /* DETECT_DEADLOCK */
 }
 #endif /* SMP */
Index: projects/binutils-2.17/sys
===================================================================
--- projects/binutils-2.17/sys	(revision 215829)
+++ projects/binutils-2.17/sys	(revision 215830)

Property changes on: projects/binutils-2.17/sys
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys:r215709-215824
Index: projects/binutils-2.17/tools/tools/sysbuild/sysbuild.sh
===================================================================
--- projects/binutils-2.17/tools/tools/sysbuild/sysbuild.sh	(revision 215829)
+++ projects/binutils-2.17/tools/tools/sysbuild/sysbuild.sh	(revision 215830)
@@ -1,550 +1,577 @@
 #!/bin/sh
 #
 # Copyright (c) 1994-2009 Poul-Henning Kamp.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 # $FreeBSD$
 #
 
 set -e
 
 exec < /dev/null
 
 if [ `uname -m` = "i386" ] ; then
 	TARGET_PART=`df / | sed '
 	1d
 	s/[    ].*//
 	s,/dev/,,
 	s,s1a,s3a,
 	s,s2a,s1a,
 	s,s3a,s2a,
 	'`
 
 	# Where our build-bits are to be found
 	FREEBSD_PART=`echo $TARGET_PART | sed 's/s[12]a/s3/'`
 elif [ `uname -m` = "amd64" ] ; then
 	TARGET_PART=`df / | sed '
 	1d
 	s/[    ].*//
 	s,/dev/,,
 	s,s1a,s3a,
 	s,s2a,s1a,
 	s,s3a,s2a,
 	'`
 
 	# Where our build-bits are to be found
 	FREEBSD_PART=`echo $TARGET_PART | sed 's/s[12]a/s3/'`
 else
 	TARGET_PART=unknown
 	FREEBSD_PART=unknown
 fi
 
 # Relative to /freebsd
 PORTS_PATH=ports
 SRC_PATH=src
 # OBJ_PATH=obj
 
 # Name of kernel
 KERNCONF=GENERIC
 
 # srcconf
 #SRCCONF="SRCCONF=/usr/src/src.conf"
 
 # -j arg to make(1)
 
 ncpu=`sysctl -n kern.smp.cpus`
 if [ $ncpu -gt 1 ] ; then
 	JARG="-j $ncpu"
 fi
 
 # serial console ?
 SERCONS=false
 
 # Remotely mounted distfiles
 # REMOTEDISTFILES=fs:/rdonly/distfiles
 
 # Proxy
 #FTP_PROXY=http://127.0.0.1:3128/
 #HTTP_PROXY=http://127.0.0.1:3128/
 #export FTP_PROXY HTTP_PROXY
 
 PORTS_WE_WANT='
 '
 
 PORTS_OPTS="BATCH=YES MAKE_IDEA=YES A4=yes"
 
 CONFIGFILES='
 '
 
 cleanup() (
 )
 
 before_ports() (
 )
 
 before_ports_chroot() (
 )
 
 final_root() (
 )
 
 final_chroot() (
 )
 
 #######################################################################
 #######################################################################
 
 usage () {
 	(
         echo "Usage: $0 [-b/-k/-w] [-c config_file]"
         echo "  -b      suppress builds (both kernel and world)"
         echo "  -k      suppress buildkernel"
         echo "  -w      suppress buildworld"
         echo "  -p      used cached packages"
         echo "  -c      specify config file"
         ) 1>&2
         exit 2
 }
 
 #######################################################################
 #######################################################################
 
 if [ ! -f $0 ] ; then
 	echo "Must be able to access self ($0)" 1>&2
 	exit 1
 fi
 
 if grep -q 'Magic String: 0`0nQT40W%l,CX&' $0 ; then
 	true
 else
 	echo "self ($0) does not contain magic string" 1>&2
 	exit 1
 fi
 
 #######################################################################
 
 set -e
 
 log_it() (
 	set +x
 	a="$*"
 	set `cat /tmp/_sb_log`
 	TX=`date +%s`
 	echo "$1 $TX" > /tmp/_sb_log
 	DT=`expr $TX - $1 || true`
 	DL=`expr $TX - $2 || true`
 	echo -n "### `date +%H:%M:%S`"
 	printf " ### %5d ### %5d ### %s\n" $DT $DL "$a"
 )
 
 #######################################################################
 
 
 ports_recurse() (
 	set +x
+	t=$1
+	shift
+	if [ "x$t" = "x." ] ; then
+		true > /tmp/_.plist
+		true > /tmp/_.plist.tdone
+		echo 'digraph {' > /tmp/_.plist.dot
+	fi
+	if grep -q "^$t\$" /tmp/_.plist.tdone ; then
+		return
+	fi
+	echo "$t" >> /tmp/_.plist.tdone
 	for d
 	do
 		if [ ! -d $d ] ; then
 			echo "Missing port $d" 1>&2
 			exit 2
 		fi
+		if [ "x$t" != "x." ] ; then
+			echo "\"$t\" -> \"$d\"" >> /tmp/_.plist.dot
+		fi
 		if grep -q "^$d\$" /tmp/_.plist ; then
 			true
+		elif grep -q "^$d\$" /tmp/_.plist.tdone ; then
+			true
 		else
 			(
 			cd $d
-			ports_recurse `make -V _DEPEND_DIRS ${PORTS_OPTS}`
+			ports_recurse $d `make -V _DEPEND_DIRS ${PORTS_OPTS}`
 			)
-			echo $d >> /tmp/_.plist
+			echo "$d" >> /tmp/_.plist
 		fi
 	done
+	if [ "x$t" = "x." ] ; then
+		echo '}' >> /tmp/_.plist.dot
+	fi
 )
 
 ports_build() (
 	set +x
 
-	true > /tmp/_.plist
-	ports_recurse $PORTS_WE_WANT 
+	ports_recurse . $PORTS_WE_WANT 
 
 	# Now build & install them
 	for p in `cat /tmp/_.plist`
 	do
 		t=`echo $p | sed 's,/usr/ports/,,'`
 		pn=`cd $p && make package-name`
 		if [ "x${PKG_DIR}" != "x" -a -f ${PKG_DIR}/$pn.tbz ] ; then
 			if [ "x$use_pkg" = "x-p" ] ; then
 				log_it "install $p from ${PKG_DIR}/$pn.tbz"
 				pkg_add ${PKG_DIR}/$pn.tbz
 			fi
 		fi
 		i=`pkg_info -qO $t`
 		if [ -z "$i" ] ; then
 			log_it "build $p"
 			b=`echo $p | tr / _`
 			(
 				set -x
 				cd /usr/ports
 				cd $p
 				set +e
 				make clean ${PORTS_OPTS}
 				if make install ${PORTS_OPTS} ; then
 					if [ "x${PKG_DIR}" != "x" ] ; then
 						make package ${PORTS_OPTS}
 						mv *.tbz ${PKG_DIR}
 					fi
 				else
 					log_it FAIL build $p
 				fi
 				make clean
 			) > _.$b 2>&1 < /dev/null
 			date
 		fi
 	done
 )
 
 ports_prefetch() (
 	(
 	set +x
-	true > /tmp/_.plist
-	ports_recurse $PORTS_WE_WANT
-
 	true > /mnt/_.prefetch
+	echo "Building /tmp/_.plist" >> /mnt/_.prefetch
+
+	ports_recurse . $PORTS_WE_WANT
+
+	echo "Completed /tmp/_.plist" >> /mnt/_.prefetch
 	# Now checksump/fetch them
 	for p in `cat /tmp/_.plist`
 	do
-		echo "Prefetching $p" >> /mnt/_.prefetch
 		b=`echo $p | tr / _`
 		(
 			cd $p
 			if make checksum $PORTS_OPTS ; then
-				true
+				rm -f /mnt/_.prefetch.$b
+				echo "OK $p" >> /mnt/_.prefetch
+				exit 0
+			fi
+			make distclean
+			make checksum $PORTS_OPTS || true
+
+			if make checksum $PORTS_OPTS > /dev/null 2>&1 ; then
+				rm -f /mnt/_.prefetch.$b
+				echo "OK $p" >> /mnt/_.prefetch
 			else
-				make distclean
-				make checksum $PORTS_OPTS || true
+				echo "BAD $p" >> /mnt/_.prefetch
 			fi
 		) > /mnt/_.prefetch.$b 2>&1
 	done
 	) 
 )
 
 #######################################################################
 
 do_world=true
 do_kernel=true
 use_pkg=""
 c_arg=""
 
 set +e
 args=`getopt bc:hkpw $*`
 if [ $? -ne 0 ] ; then
 	usage
 fi
 set -e
 
 set -- $args
 for i
 do
 	case "$i"
 	in
 	-b)
 		shift;
 		do_world=false
 		do_kernel=false
 		;;
 	-c)
 		c_arg=$2
 		if [ ! -f "$c_arg" ] ; then
 			echo "Cannot read $c_arg" 1>&2
 			usage
 		fi
 		. "$2"
 		shift
 		shift
 		;;
 	-h)
 		usage
 		;;
 	-k)
 		shift;
 		do_kernel=false
 		;;
 	-p)
 		shift;
 		use_pkg="-p"
 		;;
 	-w)
 		shift;
 		do_world=false
 		;;
 	--)
 		shift
 		break;
 		;;
 	esac
 done
 
 #######################################################################
 
 if [ "x$1" = "xchroot_script" ] ; then
 	set +x
 	set -e
 
 	shift
 
 	before_ports_chroot
 
 	ports_build
 
 	exit 0
 fi
 
 if [ "x$1" = "xfinal_chroot" ] ; then
 	final_chroot
 	exit 0
 fi
 
 if [ $# -gt 0 ] ; then
         echo "$0: Extraneous arguments supplied"
         usage
 fi
 
 #######################################################################
 
 T0=`date +%s`
 echo $T0 $T0 > /tmp/_sb_log
 
 log_it Unmount everything
 (
 	( cleanup )
 	umount /freebsd/distfiles || true
 	umount /mnt/freebsd/distfiles || true
 	umount /dev/${FREEBSD_PART} || true
 	umount /mnt/freebsd || true
 	umount /mnt/dev || true
 	umount /mnt || true
 	umount /dev/${TARGET_PART} || true
 ) # > /dev/null 2>&1
 
 log_it Prepare running image
 mkdir -p /freebsd
 mount /dev/${FREEBSD_PART} /freebsd
 
 #######################################################################
 
 if [ ! -d /freebsd/${PORTS_PATH} ] ;  then
 	echo PORTS_PATH does not exist 1>&2
 	exit 1
 fi
 
 if [ ! -d /freebsd/${SRC_PATH} ] ;  then
 	echo SRC_PATH does not exist 1>&2
 	exit 1
 fi
 
 log_it TARGET_PART $TARGET_PART
 sleep 5
 
 rm -rf /usr/ports
 ln -s /freebsd/${PORTS_PATH} /usr/ports
 
 rm -rf /usr/src
 ln -s /freebsd/${SRC_PATH} /usr/src
 
 if $do_world ; then
 	if [ "x${OBJ_PATH}" != "x" ] ; then
 		rm -rf /usr/obj
 		mkdir -p /freebsd/${OBJ_PATH}
 		ln -s /freebsd/${OBJ_PATH} /usr/obj
 	else
 		rm -rf /usr/obj
 		mkdir -p /usr/obj
 	fi
 fi
 
 #######################################################################
 
 for i in ${PORTS_WE_WANT}
 do
 	if [ ! -d $i ]  ; then
 		echo "Port $i not found" 1>&2
 		exit 2
 	fi
 done
 
 export PORTS_WE_WANT
 export PORTS_OPTS
 
 #######################################################################
 
 log_it Prepare destination partition
 newfs -O2 -U /dev/${TARGET_PART} > /dev/null
 mount /dev/${TARGET_PART} /mnt
 mkdir -p /mnt/dev
 mount -t devfs devfs /mnt/dev
 
 if [ "x${REMOTEDISTFILES}" != "x" ] ; then
 	rm -rf /freebsd/${PORTS_PATH}/distfiles
 	ln -s /freebsd/distfiles /freebsd/${PORTS_PATH}/distfiles
 	mkdir -p /freebsd/distfiles
 	mount  ${REMOTEDISTFILES} /freebsd/distfiles
 fi
 
 log_it copy ports config files
-(cd / ; find var/db/ports -print | cpio -dumpv /mnt )
+(cd / ; find var/db/ports -print | cpio -dumpv /mnt > /dev/null 2>&1)
 
 log_it "Start prefetch of ports distfiles"
 ports_prefetch &
 
 if $do_world ; then
 	(
 	cd /usr/src
 	log_it "Buildworld"
 	make ${JARG} -s buildworld ${SRCCONF} > /mnt/_.bw 2>&1
 	)
 fi
 
 if $do_kernel ; then
 	(
 	cd /usr/src
 	log_it "Buildkernel"
 	make ${JARG} -s buildkernel KERNCONF=$KERNCONF > /mnt/_.bk 2>&1
 	)
 fi
 
 
 log_it Installworld
 (cd /usr/src && make ${JARG} installworld DESTDIR=/mnt ${SRCCONF} ) \
 	> /mnt/_.iw 2>&1
 
 log_it distribution
 (cd /usr/src/etc && make -m /usr/src/share/mk distribution DESTDIR=/mnt ${SRCCONF} ) \
 	> /mnt/_.dist 2>&1
 
 log_it Installkernel
 (cd /usr/src && make ${JARG} installkernel DESTDIR=/mnt KERNCONF=$KERNCONF ) \
 	> /mnt/_.ik 2>&1
 
 if [ "x${OBJ_PATH}" != "x" ] ; then
 	rmdir /mnt/usr/obj
 	ln -s /freebsd/${OBJ_PATH} /mnt/usr/obj
 fi
 
 log_it Wait for ports prefetch
 log_it "(Tail /mnt/_.prefetch for progress)"
 wait
 
 log_it Move filesystems
 
 if [ "x${REMOTEDISTFILES}" != "x" ] ; then
 	umount /freebsd/distfiles
 fi
 umount /dev/${FREEBSD_PART} || true
 mkdir -p /mnt/freebsd
 mount /dev/${FREEBSD_PART} /mnt/freebsd
 if [ "x${REMOTEDISTFILES}" != "x" ] ; then
 	mount  ${REMOTEDISTFILES} /mnt/freebsd/distfiles
 fi
 
 rm -rf /mnt/usr/ports || true
 ln -s /freebsd/${PORTS_PATH} /mnt/usr/ports
 
 rm -rf /mnt/usr/src || true
 ln -s /freebsd/${SRC_PATH} /mnt/usr/src
 
 log_it Build and install ports
 
 # Make sure fetching will work in the chroot
 if [ -f /etc/resolv.conf ] ; then
 	log_it copy resolv.conf
 	cp /etc/resolv.conf /mnt/etc
 	chflags schg /mnt/etc/resolv.conf
 fi
 
 if [ -f /etc/localtime ] ; then
 	log_it copy localtime
 	cp /etc/localtime /mnt/etc
 fi
 
 log_it ldconfig in chroot
 chroot /mnt sh /etc/rc.d/ldconfig start
 
 log_it before_ports
 ( 
 	before_ports 
 )
 
 log_it build ports
 pwd
 cp $0 /mnt/root
 cp /tmp/_sb_log /mnt/tmp
 b=`basename $0`
 if [ "x$c_arg" != "x" ] ; then
 	cp $c_arg /mnt/root
 	chroot /mnt sh /root/$0 -c /root/`basename $c_arg` $use_pkg chroot_script 
 else
 	chroot /mnt sh /root/$0 $use_pkg chroot_script
 fi
 cp /mnt/tmp/_sb_log /tmp
 
 log_it fixing fstab
 sed "/[ 	]\/[ 	]/s;^[^ 	]*[ 	];/dev/${TARGET_PART}	;" \
 	/etc/fstab > /mnt/etc/fstab
 
 log_it create all mountpoints
 grep -v '^[ 	]*#' /mnt/etc/fstab | 
 while read a b c
 do
 	mkdir -p /mnt/$b
 done
 
 if [ "x$SERCONS" != "xfalse" ] ; then
 	log_it serial console
 	echo " -h" > /mnt/boot.config
 	sed -i "" -e /ttyd0/s/off/on/ /mnt/etc/ttys
 	sed -i "" -e /ttyu0/s/off/on/ /mnt/etc/ttys
 	sed -i "" -e '/^ttyv[0-8]/s/	on/	off/' /mnt/etc/ttys
 fi
 
 log_it move config files
 (
 	cd /mnt
 	mkdir root/configfiles_dist
 	find ${CONFIGFILES} -print | cpio -dumpv root/configfiles_dist
 )
 
 (cd / && find ${CONFIGFILES} -print | cpio -dumpv /mnt)
 
 log_it final_root
 ( final_root )
 log_it final_chroot
 cp /tmp/_sb_log /mnt/tmp
 if [ "x$c_arg" != "x" ] ; then
 	chroot /mnt sh /root/$0 -c /root/`basename $c_arg` final_chroot
 else
 	chroot /mnt sh /root/$0 final_chroot
 fi
 cp /mnt/tmp/_sb_log /tmp
 log_it "Check these messages (if any):"
 grep '^Stop' /mnt/_* || true
 log_it DONE
Index: projects/binutils-2.17/usr.bin/calendar
===================================================================
--- projects/binutils-2.17/usr.bin/calendar	(revision 215829)
+++ projects/binutils-2.17/usr.bin/calendar	(revision 215830)

Property changes on: projects/binutils-2.17/usr.bin/calendar
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/usr.bin/calendar:r215709-215824
Index: projects/binutils-2.17/usr.bin/csup
===================================================================
--- projects/binutils-2.17/usr.bin/csup	(revision 215829)
+++ projects/binutils-2.17/usr.bin/csup	(revision 215830)

Property changes on: projects/binutils-2.17/usr.bin/csup
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/usr.bin/csup:r215709-215824
Index: projects/binutils-2.17/usr.bin/locate/locate/locate.h
===================================================================
--- projects/binutils-2.17/usr.bin/locate/locate/locate.h	(revision 215829)
+++ projects/binutils-2.17/usr.bin/locate/locate/locate.h	(revision 215830)
@@ -1,72 +1,72 @@
 /*
  * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)locate.h	8.1 (Berkeley) 6/6/93
  * $FreeBSD$
  */
 
 /* Symbolic constants shared by locate.c and code.c */
 
 #define	NBG		128		/* number of bigrams considered */
 #define	OFFSET		14		/* abs value of max likely diff */
 #define	PARITY		0200		/* parity bit */
 #define	SWITCH		30		/* switch code */
 #define UMLAUT          31              /* an 8 bit char followed */
 
 /* 	0-28	likeliest differential counts + offset to make nonnegative */
 #define LDC_MIN         0
 #define LDC_MAX        28
 
 /*	128-255 bigram codes (128 most common, as determined by 'updatedb') */
-#define BIGRAM_MIN    (UCHAR_MAX - CHAR_MAX) 
+#define BIGRAM_MIN    (UCHAR_MAX - SCHAR_MAX) 
 #define BIGRAM_MAX    UCHAR_MAX
 
 /*	32-127  single character (printable) ascii residue (ie, literal) */
 #define ASCII_MIN      32
-#define ASCII_MAX     CHAR_MAX
+#define ASCII_MAX     SCHAR_MAX
 
-/* #define TO7BIT(x)     (x = ( ((u_char)x) & CHAR_MAX )) */
-#define TO7BIT(x)     (x = x & CHAR_MAX )
+/* #define TO7BIT(x)     (x = ( ((u_char)x) & SCHAR_MAX )) */
+#define TO7BIT(x)     (x = x & SCHAR_MAX )
 
 
 #if UCHAR_MAX >= 4096
    define TOLOWER(ch)	  tolower(ch)
 #else
 
 u_char myctype[UCHAR_MAX + 1];
 #define TOLOWER(ch)	(myctype[ch])
 #endif
 
 #define INTSIZE (sizeof(int))
 
 #define LOCATE_REG "*?[]\\"  /* fnmatch(3) meta characters */
Index: projects/binutils-2.17/usr.bin/netstat/inet.c
===================================================================
--- projects/binutils-2.17/usr.bin/netstat/inet.c	(revision 215829)
+++ projects/binutils-2.17/usr.bin/netstat/inet.c	(revision 215830)
@@ -1,1309 +1,1310 @@
 /*-
  * Copyright (c) 1983, 1988, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)inet.c	8.5 (Berkeley) 5/24/95";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 
 #include <net/route.h>
 #include <net/if_arp.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_carp.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif /* INET6 */
 #include <netinet/in_pcb.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/icmp_var.h>
 #include <netinet/igmp_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/pim_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcpip.h>
 #include <netinet/tcp_seq.h>
 #define	TCPSTATES
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_debug.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 #include <arpa/inet.h>
 #include <err.h>
 #include <errno.h>
 #include <libutil.h>
 #include <netdb.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include "netstat.h"
 
 char	*inetname(struct in_addr *);
 void	inetprint(struct in_addr *, int, const char *, int);
 #ifdef INET6
 static int udp_done, tcp_done;
 #endif /* INET6 */
 
 static int
 pcblist_sysctl(int proto, char **bufp, int istcp)
 {
 	const char *mibvar;
 	char *buf;
 	size_t len;
 
 	switch (proto) {
 	case IPPROTO_TCP:
 		mibvar = "net.inet.tcp.pcblist";
 		break;
 	case IPPROTO_UDP:
 		mibvar = "net.inet.udp.pcblist";
 		break;
 	case IPPROTO_DIVERT:
 		mibvar = "net.inet.divert.pcblist";
 		break;
 	default:
 		mibvar = "net.inet.raw.pcblist";
 		break;
 	}
 
 	len = 0;
 	if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) {
 		if (errno != ENOENT)
 			warn("sysctl: %s", mibvar);
 		return (0);
 	}
 	if ((buf = malloc(len)) == 0) {
 		warnx("malloc %lu bytes", (u_long)len);
 		return (0);
 	}
 	if (sysctlbyname(mibvar, buf, &len, 0, 0) < 0) {
 		warn("sysctl: %s", mibvar);
 		free(buf);
 		return (0);
 	}
 	*bufp = buf;
 	return (1);
 }
 
 /*
  * Copied directly from uipc_socket2.c.  We leave out some fields that are in
  * nested structures that aren't used to avoid extra work.
  */
 static void
 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
 {
 	xsb->sb_cc = sb->sb_cc;
 	xsb->sb_hiwat = sb->sb_hiwat;
 	xsb->sb_mbcnt = sb->sb_mbcnt;
 	xsb->sb_mcnt = sb->sb_mcnt;
 	xsb->sb_ccnt = sb->sb_ccnt;
 	xsb->sb_mbmax = sb->sb_mbmax;
 	xsb->sb_lowat = sb->sb_lowat;
 	xsb->sb_flags = sb->sb_flags;
 	xsb->sb_timeo = sb->sb_timeo;
 }
 
 int
 sotoxsocket(struct socket *so, struct xsocket *xso)
 {
 	struct protosw proto;
 	struct domain domain;
 
 	bzero(xso, sizeof *xso);
 	xso->xso_len = sizeof *xso;
 	xso->xso_so = so;
 	xso->so_type = so->so_type;
 	xso->so_options = so->so_options;
 	xso->so_linger = so->so_linger;
 	xso->so_state = so->so_state;
 	xso->so_pcb = so->so_pcb;
 	if (kread((uintptr_t)so->so_proto, &proto, sizeof(proto)) != 0)
 		return (-1);
 	xso->xso_protocol = proto.pr_protocol;
 	if (kread((uintptr_t)proto.pr_domain, &domain, sizeof(domain)) != 0)
 		return (-1);
 	xso->xso_family = domain.dom_family;
 	xso->so_qlen = so->so_qlen;
 	xso->so_incqlen = so->so_incqlen;
 	xso->so_qlimit = so->so_qlimit;
 	xso->so_timeo = so->so_timeo;
 	xso->so_error = so->so_error;
 	xso->so_oobmark = so->so_oobmark;
 	sbtoxsockbuf(&so->so_snd, &xso->so_snd);
 	sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
 	return (0);
 }
 
 static int
 pcblist_kvm(u_long off, char **bufp, int istcp)
 {
 	struct inpcbinfo pcbinfo;
 	struct inpcbhead listhead;
 	struct inpcb *inp;
 	struct xinpcb xi;
 	struct xinpgen xig;
 	struct xtcpcb xt;
 	struct socket so;
 	struct xsocket *xso;
 	char *buf, *p;
 	size_t len;
 
 	if (off == 0)
 		return (0);
 	kread(off, &pcbinfo, sizeof(pcbinfo));
 	if (istcp)
 		len = 2 * sizeof(xig) +
 		    (pcbinfo.ipi_count + pcbinfo.ipi_count / 8) *
 		    sizeof(struct xtcpcb);
 	else
 		len = 2 * sizeof(xig) +
 		    (pcbinfo.ipi_count + pcbinfo.ipi_count / 8) *
 		    sizeof(struct xinpcb);
 	if ((buf = malloc(len)) == 0) {
 		warnx("malloc %lu bytes", (u_long)len);
 		return (0);
 	}
 	p = buf;
 
 #define	COPYOUT(obj, size) do {						\
 	if (len < (size)) {						\
 		warnx("buffer size exceeded");				\
 		goto fail;						\
 	}								\
 	bcopy((obj), p, (size));					\
 	len -= (size);							\
 	p += (size);							\
 } while (0)
 
 #define	KREAD(off, buf, len) do {					\
 	if (kread((uintptr_t)(off), (buf), (len)) != 0)			\
 		goto fail;						\
 } while (0)
 
 	/* Write out header. */
 	xig.xig_len = sizeof xig;
 	xig.xig_count = pcbinfo.ipi_count;
 	xig.xig_gen = pcbinfo.ipi_gencnt;
 	xig.xig_sogen = 0;
 	COPYOUT(&xig, sizeof xig);
 
 	/* Walk the PCB list. */
 	xt.xt_len = sizeof xt;
 	xi.xi_len = sizeof xi;
 	if (istcp)
 		xso = &xt.xt_socket;
 	else
 		xso = &xi.xi_socket;
 	KREAD(pcbinfo.ipi_listhead, &listhead, sizeof(listhead));
 	LIST_FOREACH(inp, &listhead, inp_list) {
 		if (istcp) {
 			KREAD(inp, &xt.xt_inp, sizeof(*inp));
 			inp = &xt.xt_inp;
 		} else {
 			KREAD(inp, &xi.xi_inp, sizeof(*inp));
 			inp = &xi.xi_inp;
 		}
 
 		if (inp->inp_gencnt > pcbinfo.ipi_gencnt)
 			continue;
 
 		if (istcp) {
 			if (inp->inp_ppcb == NULL)
 				bzero(&xt.xt_tp, sizeof xt.xt_tp);
 			else if (inp->inp_flags & INP_TIMEWAIT) {
 				bzero(&xt.xt_tp, sizeof xt.xt_tp);
 				xt.xt_tp.t_state = TCPS_TIME_WAIT;
 			} else
 				KREAD(inp->inp_ppcb, &xt.xt_tp,
 				    sizeof xt.xt_tp);
 		}
 		if (inp->inp_socket) {
 			KREAD(inp->inp_socket, &so, sizeof(so));
 			if (sotoxsocket(&so, xso) != 0)
 				goto fail;
 		} else {
 			bzero(xso, sizeof(*xso));
 			if (istcp)
 				xso->xso_protocol = IPPROTO_TCP;
 		}
 		if (istcp)
 			COPYOUT(&xt, sizeof xt);
 		else
 			COPYOUT(&xi, sizeof xi);
 	}
 
 	/* Reread the pcbinfo and write out the footer. */
 	kread(off, &pcbinfo, sizeof(pcbinfo));
 	xig.xig_count = pcbinfo.ipi_count;
 	xig.xig_gen = pcbinfo.ipi_gencnt;
 	COPYOUT(&xig, sizeof xig);
 
 	*bufp = buf;
 	return (1);
 
 fail:
 	free(buf);
 	return (0);
 #undef COPYOUT
 #undef KREAD
 }
 
 /*
  * Print a summary of connections related to an Internet
  * protocol.  For TCP, also give state of connection.
  * Listening processes (aflag) are suppressed unless the
  * -a (all) flag is specified.
  */
 void
 protopr(u_long off, const char *name, int af1, int proto)
 {
 	int istcp;
 	static int first = 1;
 	char *buf;
 	const char *vchar;
 	struct tcpcb *tp = NULL;
 	struct inpcb *inp;
 	struct xinpgen *xig, *oxig;
 	struct xsocket *so;
 	struct xtcp_timer *timer;
 
 	istcp = 0;
 	switch (proto) {
 	case IPPROTO_TCP:
 #ifdef INET6
 		if (tcp_done != 0)
 			return;
 		else
 			tcp_done = 1;
 #endif
 		istcp = 1;
 		break;
 	case IPPROTO_UDP:
 #ifdef INET6
 		if (udp_done != 0)
 			return;
 		else
 			udp_done = 1;
 #endif
 		break;
 	}
 	if (live) {
 		if (!pcblist_sysctl(proto, &buf, istcp))
 			return;
 	} else {
 		if (!pcblist_kvm(off, &buf, istcp))
 			return;
 	}
 
 	oxig = xig = (struct xinpgen *)buf;
 	for (xig = (struct xinpgen *)((char *)xig + xig->xig_len);
 	     xig->xig_len > sizeof(struct xinpgen);
 	     xig = (struct xinpgen *)((char *)xig + xig->xig_len)) {
 		if (istcp) {
 			timer = &((struct xtcpcb *)xig)->xt_timer;
 			tp = &((struct xtcpcb *)xig)->xt_tp;
 			inp = &((struct xtcpcb *)xig)->xt_inp;
 			so = &((struct xtcpcb *)xig)->xt_socket;
 		} else {
 			inp = &((struct xinpcb *)xig)->xi_inp;
 			so = &((struct xinpcb *)xig)->xi_socket;
 			timer = NULL;
 		}
 
 		/* Ignore sockets for protocols other than the desired one. */
 		if (so->xso_protocol != proto)
 			continue;
 
 		/* Ignore PCBs which were freed during copyout. */
 		if (inp->inp_gencnt > oxig->xig_gen)
 			continue;
 
 		if ((af1 == AF_INET && (inp->inp_vflag & INP_IPV4) == 0)
 #ifdef INET6
 		    || (af1 == AF_INET6 && (inp->inp_vflag & INP_IPV6) == 0)
 #endif /* INET6 */
 		    || (af1 == AF_UNSPEC && ((inp->inp_vflag & INP_IPV4) == 0
 #ifdef INET6
 					  && (inp->inp_vflag & INP_IPV6) == 0
 #endif /* INET6 */
 			))
 		    )
 			continue;
 		if (!aflag &&
 		    (
 		     (istcp && tp->t_state == TCPS_LISTEN)
 		     || (af1 == AF_INET &&
 		      inet_lnaof(inp->inp_laddr) == INADDR_ANY)
 #ifdef INET6
 		     || (af1 == AF_INET6 &&
 			 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
 #endif /* INET6 */
 		     || (af1 == AF_UNSPEC &&
 			 (((inp->inp_vflag & INP_IPV4) != 0 &&
 			   inet_lnaof(inp->inp_laddr) == INADDR_ANY)
 #ifdef INET6
 			  || ((inp->inp_vflag & INP_IPV6) != 0 &&
 			      IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
 #endif
 			  ))
 		     ))
 			continue;
 
 		if (first) {
 			if (!Lflag) {
 				printf("Active Internet connections");
 				if (aflag)
 					printf(" (including servers)");
 			} else
 				printf(
 	"Current listen queue sizes (qlen/incqlen/maxqlen)");
 			putchar('\n');
 			if (Aflag)
 				printf("%-8.8s ", "Tcpcb");
 			if (Lflag)
 				printf("%-5.5s %-14.14s %-22.22s\n",
 				    "Proto", "Listen", "Local Address");
 			if (Tflag) 
 				printf((Aflag && !Wflag) ?
 			    "%-5.5s %-6.6s %-6.6s %-6.6s %-18.18s %s\n" :
 			    "%-5.5s %-6.6s %-6.6s %-6.6s %-22.22s %s\n",
 				    "Proto", "Rexmit", "OOORcv", "0-win",
 				    "Local Address", "Foreign Address");
 			if (xflag) {
 				printf("%-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s ",
 				       "R-MBUF", "S-MBUF", "R-CLUS", 
 				       "S-CLUS", "R-HIWA", "S-HIWA", 
 				       "R-LOWA", "S-LOWA", "R-BCNT", 
 				       "S-BCNT", "R-BMAX", "S-BMAX");
 				printf("%7.7s %7.7s %7.7s %7.7s %7.7s %7.7s %s\n",
 				       "rexmt", "persist", "keep",
 				       "2msl", "delack", "rcvtime",
 				       "(state)");
 			}
-			if (!xflag && !Tflag) 
+			if (!xflag && !Tflag) {
 				printf((Aflag && !Wflag) ? 
 				       "%-5.5s %-6.6s %-6.6s  %-18.18s %-18.18s" :
 				       "%-5.5s %-6.6s %-6.6s  %-22.22s %-22.22s",
 				       "Proto", "Recv-Q", "Send-Q",
 				       "Local Address", "Foreign Address");
-
+				printf("(state)\n");
+			}
 			first = 0;
 		}
 		if (Lflag && so->so_qlimit == 0)
 			continue;
 		if (Aflag) {
 			if (istcp)
 				printf("%8lx ", (u_long)inp->inp_ppcb);
 			else
 				printf("%8lx ", (u_long)so->so_pcb);
 		}
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV6) != 0)
 			vchar = ((inp->inp_vflag & INP_IPV4) != 0) ?
 			    "46" : "6 ";
 		else
 #endif
 		vchar = ((inp->inp_vflag & INP_IPV4) != 0) ?
 		    "4 " : "  ";
 		printf("%-3.3s%-2.2s ", name, vchar);
 		if (Lflag) {
 			char buf1[15];
 
 			snprintf(buf1, 15, "%d/%d/%d", so->so_qlen,
 			    so->so_incqlen, so->so_qlimit);
 			printf("%-14.14s ", buf1);
 		} else if (Tflag) {
 			if (istcp)
 				printf("%6u %6u %6u ", tp->t_sndrexmitpack,
 				       tp->t_rcvoopack, tp->t_sndzerowin);
 		} else {
 			printf("%6u %6u ", so->so_rcv.sb_cc, so->so_snd.sb_cc);
 		}
 		if (numeric_port) {
 			if (inp->inp_vflag & INP_IPV4) {
 				inetprint(&inp->inp_laddr, (int)inp->inp_lport,
 				    name, 1);
 				if (!Lflag)
 					inetprint(&inp->inp_faddr,
 					    (int)inp->inp_fport, name, 1);
 			}
 #ifdef INET6
 			else if (inp->inp_vflag & INP_IPV6) {
 				inet6print(&inp->in6p_laddr,
 				    (int)inp->inp_lport, name, 1);
 				if (!Lflag)
 					inet6print(&inp->in6p_faddr,
 					    (int)inp->inp_fport, name, 1);
 			} /* else nothing printed now */
 #endif /* INET6 */
 		} else if (inp->inp_flags & INP_ANONPORT) {
 			if (inp->inp_vflag & INP_IPV4) {
 				inetprint(&inp->inp_laddr, (int)inp->inp_lport,
 				    name, 1);
 				if (!Lflag)
 					inetprint(&inp->inp_faddr,
 					    (int)inp->inp_fport, name, 0);
 			}
 #ifdef INET6
 			else if (inp->inp_vflag & INP_IPV6) {
 				inet6print(&inp->in6p_laddr,
 				    (int)inp->inp_lport, name, 1);
 				if (!Lflag)
 					inet6print(&inp->in6p_faddr,
 					    (int)inp->inp_fport, name, 0);
 			} /* else nothing printed now */
 #endif /* INET6 */
 		} else {
 			if (inp->inp_vflag & INP_IPV4) {
 				inetprint(&inp->inp_laddr, (int)inp->inp_lport,
 				    name, 0);
 				if (!Lflag)
 					inetprint(&inp->inp_faddr,
 					    (int)inp->inp_fport, name,
 					    inp->inp_lport != inp->inp_fport);
 			}
 #ifdef INET6
 			else if (inp->inp_vflag & INP_IPV6) {
 				inet6print(&inp->in6p_laddr,
 				    (int)inp->inp_lport, name, 0);
 				if (!Lflag)
 					inet6print(&inp->in6p_faddr,
 					    (int)inp->inp_fport, name,
 					    inp->inp_lport != inp->inp_fport);
 			} /* else nothing printed now */
 #endif /* INET6 */
 		}
 		if (xflag) {
 			if (Lflag)
 				printf("%21s %6u %6u %6u %6u %6u %6u %6u %6u %6u %6u %6u %6u ",
 				       " ",
 				       so->so_rcv.sb_mcnt, so->so_snd.sb_mcnt,
 				       so->so_rcv.sb_ccnt, so->so_snd.sb_ccnt,
 				       so->so_rcv.sb_hiwat, so->so_snd.sb_hiwat,
 				       so->so_rcv.sb_lowat, so->so_snd.sb_lowat,
 				       so->so_rcv.sb_mbcnt, so->so_snd.sb_mbcnt,
 				       so->so_rcv.sb_mbmax, so->so_snd.sb_mbmax);
 			else {
 				printf("%6u %6u %6u %6u %6u %6u %6u %6u %6u %6u %6u %6u ",
 				       so->so_rcv.sb_mcnt, so->so_snd.sb_mcnt,
 				       so->so_rcv.sb_ccnt, so->so_snd.sb_ccnt,
 				       so->so_rcv.sb_hiwat, so->so_snd.sb_hiwat,
 				       so->so_rcv.sb_lowat, so->so_snd.sb_lowat,
 				       so->so_rcv.sb_mbcnt, so->so_snd.sb_mbcnt,
 				       so->so_rcv.sb_mbmax, so->so_snd.sb_mbmax);
 				if (timer != NULL)
 					printf("%4d.%02d %4d.%02d %4d.%02d %4d.%02d %4d.%02d %4d.%02d ",
 					    timer->tt_rexmt / 1000, (timer->tt_rexmt % 1000) / 10,
 					    timer->tt_persist / 1000, (timer->tt_persist % 1000) / 10,
 					    timer->tt_keep / 1000, (timer->tt_keep % 1000) / 10,
 					    timer->tt_2msl / 1000, (timer->tt_2msl % 1000) / 10,
 					    timer->tt_delack / 1000, (timer->tt_delack % 1000) / 10,
 					    timer->t_rcvtime / 1000, (timer->t_rcvtime % 1000) / 10);
 			}
 		}
 		if (istcp && !Lflag && !xflag && !Tflag) {
 			if (tp->t_state < 0 || tp->t_state >= TCP_NSTATES)
 				printf("%d", tp->t_state);
 			else {
 				printf("%s", tcpstates[tp->t_state]);
 #if defined(TF_NEEDSYN) && defined(TF_NEEDFIN)
 				/* Show T/TCP `hidden state' */
 				if (tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN))
 					putchar('*');
 #endif /* defined(TF_NEEDSYN) && defined(TF_NEEDFIN) */
 			}
 		} 		
 		putchar('\n');
 	}
 	if (xig != oxig && xig->xig_gen != oxig->xig_gen) {
 		if (oxig->xig_count > xig->xig_count) {
 			printf("Some %s sockets may have been deleted.\n",
 			    name);
 		} else if (oxig->xig_count < xig->xig_count) {
 			printf("Some %s sockets may have been created.\n",
 			    name);
 		} else {
 			printf(
 	"Some %s sockets may have been created or deleted.\n",
 			    name);
 		}
 	}
 	free(buf);
 }
 
 /*
  * Dump TCP statistics structure.
  */
 void
 tcp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 {
 	struct tcpstat tcpstat, zerostat;
 	size_t len = sizeof tcpstat;
 
 #ifdef INET6
 	if (tcp_done != 0)
 		return;
 	else
 		tcp_done = 1;
 #endif
 
 	if (live) {
 		if (zflag)
 			memset(&zerostat, 0, len);
 		if (sysctlbyname("net.inet.tcp.stats", &tcpstat, &len,
 		    zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
 			warn("sysctl: net.inet.tcp.stats");
 			return;
 		}
 	} else
 		kread(off, &tcpstat, len);
 
 	printf ("%s:\n", name);
 
 #define	p(f, m) if (tcpstat.f || sflag <= 1) \
     printf(m, tcpstat.f, plural(tcpstat.f))
 #define	p1a(f, m) if (tcpstat.f || sflag <= 1) \
     printf(m, tcpstat.f)
 #define	p2(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \
     printf(m, tcpstat.f1, plural(tcpstat.f1), tcpstat.f2, plural(tcpstat.f2))
 #define	p2a(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \
     printf(m, tcpstat.f1, plural(tcpstat.f1), tcpstat.f2)
 #define	p3(f, m) if (tcpstat.f || sflag <= 1) \
     printf(m, tcpstat.f, pluralies(tcpstat.f))
 
 	p(tcps_sndtotal, "\t%lu packet%s sent\n");
 	p2(tcps_sndpack,tcps_sndbyte, "\t\t%lu data packet%s (%lu byte%s)\n");
 	p2(tcps_sndrexmitpack, tcps_sndrexmitbyte,
 	    "\t\t%lu data packet%s (%lu byte%s) retransmitted\n");
 	p(tcps_sndrexmitbad,
 	    "\t\t%lu data packet%s unnecessarily retransmitted\n");
 	p(tcps_mturesent, "\t\t%lu resend%s initiated by MTU discovery\n");
 	p2a(tcps_sndacks, tcps_delack,
 	    "\t\t%lu ack-only packet%s (%lu delayed)\n");
 	p(tcps_sndurg, "\t\t%lu URG only packet%s\n");
 	p(tcps_sndprobe, "\t\t%lu window probe packet%s\n");
 	p(tcps_sndwinup, "\t\t%lu window update packet%s\n");
 	p(tcps_sndctrl, "\t\t%lu control packet%s\n");
 	p(tcps_rcvtotal, "\t%lu packet%s received\n");
 	p2(tcps_rcvackpack, tcps_rcvackbyte,
 	    "\t\t%lu ack%s (for %lu byte%s)\n");
 	p(tcps_rcvdupack, "\t\t%lu duplicate ack%s\n");
 	p(tcps_rcvacktoomuch, "\t\t%lu ack%s for unsent data\n");
 	p2(tcps_rcvpack, tcps_rcvbyte,
 	    "\t\t%lu packet%s (%lu byte%s) received in-sequence\n");
 	p2(tcps_rcvduppack, tcps_rcvdupbyte,
 	    "\t\t%lu completely duplicate packet%s (%lu byte%s)\n");
 	p(tcps_pawsdrop, "\t\t%lu old duplicate packet%s\n");
 	p2(tcps_rcvpartduppack, tcps_rcvpartdupbyte,
 	    "\t\t%lu packet%s with some dup. data (%lu byte%s duped)\n");
 	p2(tcps_rcvoopack, tcps_rcvoobyte,
 	    "\t\t%lu out-of-order packet%s (%lu byte%s)\n");
 	p2(tcps_rcvpackafterwin, tcps_rcvbyteafterwin,
 	    "\t\t%lu packet%s (%lu byte%s) of data after window\n");
 	p(tcps_rcvwinprobe, "\t\t%lu window probe%s\n");
 	p(tcps_rcvwinupd, "\t\t%lu window update packet%s\n");
 	p(tcps_rcvafterclose, "\t\t%lu packet%s received after close\n");
 	p(tcps_rcvbadsum, "\t\t%lu discarded for bad checksum%s\n");
 	p(tcps_rcvbadoff, "\t\t%lu discarded for bad header offset field%s\n");
 	p1a(tcps_rcvshort, "\t\t%lu discarded because packet too short\n");
 	p1a(tcps_rcvmemdrop, "\t\t%lu discarded due to memory problems\n");
 	p(tcps_connattempt, "\t%lu connection request%s\n");
 	p(tcps_accepts, "\t%lu connection accept%s\n");
 	p(tcps_badsyn, "\t%lu bad connection attempt%s\n");
 	p(tcps_listendrop, "\t%lu listen queue overflow%s\n");
 	p(tcps_badrst, "\t%lu ignored RSTs in the window%s\n");
 	p(tcps_connects, "\t%lu connection%s established (including accepts)\n");
 	p2(tcps_closed, tcps_drops,
 	    "\t%lu connection%s closed (including %lu drop%s)\n");
 	p(tcps_cachedrtt, "\t\t%lu connection%s updated cached RTT on close\n");
 	p(tcps_cachedrttvar,
 	    "\t\t%lu connection%s updated cached RTT variance on close\n");
 	p(tcps_cachedssthresh,
 	    "\t\t%lu connection%s updated cached ssthresh on close\n");
 	p(tcps_conndrops, "\t%lu embryonic connection%s dropped\n");
 	p2(tcps_rttupdated, tcps_segstimed,
 	    "\t%lu segment%s updated rtt (of %lu attempt%s)\n");
 	p(tcps_rexmttimeo, "\t%lu retransmit timeout%s\n");
 	p(tcps_timeoutdrop, "\t\t%lu connection%s dropped by rexmit timeout\n");
 	p(tcps_persisttimeo, "\t%lu persist timeout%s\n");
 	p(tcps_persistdrop, "\t\t%lu connection%s dropped by persist timeout\n");
 	p(tcps_finwait2_drops,
 	    "\t%lu Connection%s (fin_wait_2) dropped because of timeout\n");
 	p(tcps_keeptimeo, "\t%lu keepalive timeout%s\n");
 	p(tcps_keepprobe, "\t\t%lu keepalive probe%s sent\n");
 	p(tcps_keepdrops, "\t\t%lu connection%s dropped by keepalive\n");
 	p(tcps_predack, "\t%lu correct ACK header prediction%s\n");
 	p(tcps_preddat, "\t%lu correct data packet header prediction%s\n");
 
 	p3(tcps_sc_added, "\t%lu syncache entr%s added\n");
 	p1a(tcps_sc_retransmitted, "\t\t%lu retransmitted\n");
 	p1a(tcps_sc_dupsyn, "\t\t%lu dupsyn\n");
 	p1a(tcps_sc_dropped, "\t\t%lu dropped\n");
 	p1a(tcps_sc_completed, "\t\t%lu completed\n");
 	p1a(tcps_sc_bucketoverflow, "\t\t%lu bucket overflow\n");
 	p1a(tcps_sc_cacheoverflow, "\t\t%lu cache overflow\n");
 	p1a(tcps_sc_reset, "\t\t%lu reset\n");
 	p1a(tcps_sc_stale, "\t\t%lu stale\n");
 	p1a(tcps_sc_aborted, "\t\t%lu aborted\n");
 	p1a(tcps_sc_badack, "\t\t%lu badack\n");
 	p1a(tcps_sc_unreach, "\t\t%lu unreach\n");
 	p(tcps_sc_zonefail, "\t\t%lu zone failure%s\n");
 	p(tcps_sc_sendcookie, "\t%lu cookie%s sent\n");
 	p(tcps_sc_recvcookie, "\t%lu cookie%s received\n");
 
 	p(tcps_hc_added, "\t%lu hostcache entrie%s added\n");
 	p1a(tcps_hc_bucketoverflow, "\t\t%lu bucket overflow\n");
 
 	p(tcps_sack_recovery_episode, "\t%lu SACK recovery episode%s\n");
 	p(tcps_sack_rexmits,
 	    "\t%lu segment rexmit%s in SACK recovery episodes\n");
 	p(tcps_sack_rexmit_bytes,
 	    "\t%lu byte rexmit%s in SACK recovery episodes\n");
 	p(tcps_sack_rcv_blocks,
 	    "\t%lu SACK option%s (SACK blocks) received\n");
 	p(tcps_sack_send_blocks, "\t%lu SACK option%s (SACK blocks) sent\n");
 	p1a(tcps_sack_sboverflow, "\t%lu SACK scoreboard overflow\n");
 
 	p(tcps_ecn_ce, "\t%lu packet%s with ECN CE bit set\n");
 	p(tcps_ecn_ect0, "\t%lu packet%s with ECN ECT(0) bit set\n");
 	p(tcps_ecn_ect1, "\t%lu packet%s with ECN ECT(1) bit set\n");
 	p(tcps_ecn_shs, "\t%lu successful ECN handshake%s\n");
 	p(tcps_ecn_rcwnd, "\t%lu time%s ECN reduced the congestion window\n");
 #undef p
 #undef p1a
 #undef p2
 #undef p2a
 #undef p3
 }
 
 /*
  * Dump UDP statistics structure.
  */
 void
 udp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 {
 	struct udpstat udpstat, zerostat;
 	size_t len = sizeof udpstat;
 	u_long delivered;
 
 #ifdef INET6
 	if (udp_done != 0)
 		return;
 	else
 		udp_done = 1;
 #endif
 
 	if (live) {
 		if (zflag)
 			memset(&zerostat, 0, len);
 		if (sysctlbyname("net.inet.udp.stats", &udpstat, &len,
 		    zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
 			warn("sysctl: net.inet.udp.stats");
 			return;
 		}
 	} else
 		kread(off, &udpstat, len);
 
 	printf("%s:\n", name);
 #define	p(f, m) if (udpstat.f || sflag <= 1) \
     printf(m, udpstat.f, plural(udpstat.f))
 #define	p1a(f, m) if (udpstat.f || sflag <= 1) \
     printf(m, udpstat.f)
 	p(udps_ipackets, "\t%lu datagram%s received\n");
 	p1a(udps_hdrops, "\t%lu with incomplete header\n");
 	p1a(udps_badlen, "\t%lu with bad data length field\n");
 	p1a(udps_badsum, "\t%lu with bad checksum\n");
 	p1a(udps_nosum, "\t%lu with no checksum\n");
 	p1a(udps_noport, "\t%lu dropped due to no socket\n");
 	p(udps_noportbcast,
 	    "\t%lu broadcast/multicast datagram%s undelivered\n");
 	p1a(udps_fullsock, "\t%lu dropped due to full socket buffers\n");
 	p1a(udpps_pcbhashmiss, "\t%lu not for hashed pcb\n");
 	delivered = udpstat.udps_ipackets -
 		    udpstat.udps_hdrops -
 		    udpstat.udps_badlen -
 		    udpstat.udps_badsum -
 		    udpstat.udps_noport -
 		    udpstat.udps_noportbcast -
 		    udpstat.udps_fullsock;
 	if (delivered || sflag <= 1)
 		printf("\t%lu delivered\n", delivered);
 	p(udps_opackets, "\t%lu datagram%s output\n");
 	/* the next statistic is cumulative in udps_noportbcast */
 	p(udps_filtermcast,
 	    "\t%lu time%s multicast source filter matched\n");
 #undef p
 #undef p1a
 }
 
 /*
  * Dump CARP statistics structure.
  */
 void
 carp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 {
 	struct carpstats carpstat, zerostat;
 	size_t len = sizeof(struct carpstats);
 
 	if (live) {
 		if (zflag)
 			memset(&zerostat, 0, len);
 		if (sysctlbyname("net.inet.carp.stats", &carpstat, &len,
 		    zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
 			if (errno != ENOENT)
 				warn("sysctl: net.inet.carp.stats");
 			return;
 		}
 	} else {
 		if (off == 0)
 			return;
 		kread(off, &carpstat, len);
 	}
 
 	printf("%s:\n", name);
 
 #define	p(f, m) if (carpstat.f || sflag <= 1) \
 	printf(m, (uintmax_t)carpstat.f, plural(carpstat.f))
 #define	p2(f, m) if (carpstat.f || sflag <= 1) \
 	printf(m, (uintmax_t)carpstat.f)
 
 	p(carps_ipackets, "\t%ju packet%s received (IPv4)\n");
 	p(carps_ipackets6, "\t%ju packet%s received (IPv6)\n");
 	p(carps_badttl, "\t\t%ju packet%s discarded for wrong TTL\n");
 	p(carps_hdrops, "\t\t%ju packet%s shorter than header\n");
 	p(carps_badsum, "\t\t%ju discarded for bad checksum%s\n");
 	p(carps_badver,	"\t\t%ju discarded packet%s with a bad version\n");
 	p2(carps_badlen, "\t\t%ju discarded because packet too short\n");
 	p2(carps_badauth, "\t\t%ju discarded for bad authentication\n");
 	p2(carps_badvhid, "\t\t%ju discarded for bad vhid\n");
 	p2(carps_badaddrs, "\t\t%ju discarded because of a bad address list\n");
 	p(carps_opackets, "\t%ju packet%s sent (IPv4)\n");
 	p(carps_opackets6, "\t%ju packet%s sent (IPv6)\n");
 	p2(carps_onomem, "\t\t%ju send failed due to mbuf memory error\n");
 #if notyet
 	p(carps_ostates, "\t\t%s state update%s sent\n");
 #endif
 #undef p
 #undef p2
 }
 
 /*
  * Dump IP statistics structure.
  */
 void
 ip_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 {
 	struct ipstat ipstat, zerostat;
 	size_t len = sizeof ipstat;
 
 	if (live) {
 		if (zflag)
 			memset(&zerostat, 0, len);
 		if (sysctlbyname("net.inet.ip.stats", &ipstat, &len,
 		    zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
 			warn("sysctl: net.inet.ip.stats");
 			return;
 		}
 	} else
 		kread(off, &ipstat, len);
 
 	printf("%s:\n", name);
 
 #define	p(f, m) if (ipstat.f || sflag <= 1) \
     printf(m, ipstat.f, plural(ipstat.f))
 #define	p1a(f, m) if (ipstat.f || sflag <= 1) \
     printf(m, ipstat.f)
 
 	p(ips_total, "\t%lu total packet%s received\n");
 	p(ips_badsum, "\t%lu bad header checksum%s\n");
 	p1a(ips_toosmall, "\t%lu with size smaller than minimum\n");
 	p1a(ips_tooshort, "\t%lu with data size < data length\n");
 	p1a(ips_toolong, "\t%lu with ip length > max ip packet size\n");
 	p1a(ips_badhlen, "\t%lu with header length < data size\n");
 	p1a(ips_badlen, "\t%lu with data length < header length\n");
 	p1a(ips_badoptions, "\t%lu with bad options\n");
 	p1a(ips_badvers, "\t%lu with incorrect version number\n");
 	p(ips_fragments, "\t%lu fragment%s received\n");
 	p(ips_fragdropped, "\t%lu fragment%s dropped (dup or out of space)\n");
 	p(ips_fragtimeout, "\t%lu fragment%s dropped after timeout\n");
 	p(ips_reassembled, "\t%lu packet%s reassembled ok\n");
 	p(ips_delivered, "\t%lu packet%s for this host\n");
 	p(ips_noproto, "\t%lu packet%s for unknown/unsupported protocol\n");
 	p(ips_forward, "\t%lu packet%s forwarded");
 	p(ips_fastforward, " (%lu packet%s fast forwarded)");
 	if (ipstat.ips_forward || sflag <= 1)
 		putchar('\n');
 	p(ips_cantforward, "\t%lu packet%s not forwardable\n");
 	p(ips_notmember,
 	    "\t%lu packet%s received for unknown multicast group\n");
 	p(ips_redirectsent, "\t%lu redirect%s sent\n");
 	p(ips_localout, "\t%lu packet%s sent from this host\n");
 	p(ips_rawout, "\t%lu packet%s sent with fabricated ip header\n");
 	p(ips_odropped,
 	    "\t%lu output packet%s dropped due to no bufs, etc.\n");
 	p(ips_noroute, "\t%lu output packet%s discarded due to no route\n");
 	p(ips_fragmented, "\t%lu output datagram%s fragmented\n");
 	p(ips_ofragments, "\t%lu fragment%s created\n");
 	p(ips_cantfrag, "\t%lu datagram%s that can't be fragmented\n");
 	p(ips_nogif, "\t%lu tunneling packet%s that can't find gif\n");
 	p(ips_badaddr, "\t%lu datagram%s with bad address in header\n");
 #undef p
 #undef p1a
 }
 
 /*
  * Dump ARP statistics structure.
  */
 void
 arp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 {
 	struct arpstat arpstat, zerostat;
 	size_t len = sizeof(arpstat);
 
 	if (live) {
 		if (zflag)
 			memset(&zerostat, 0, len);
 		if (sysctlbyname("net.link.ether.arp.stats", &arpstat, &len,
 		    zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
 			warn("sysctl: net.link.ether.arp.stats");
 			return;
 		}
 	} else
 		kread(off, &arpstat, len);
 
 	printf("%s:\n", name);
 
 #define	p(f, m) if (arpstat.f || sflag <= 1) \
     printf(m, arpstat.f, plural(arpstat.f))
 #define	p2(f, m) if (arpstat.f || sflag <= 1) \
     printf(m, arpstat.f, pluralies(arpstat.f))
 
 	p(txrequests, "\t%lu ARP request%s sent\n");
 	p2(txreplies, "\t%lu ARP repl%s sent\n");
 	p(rxrequests, "\t%lu ARP request%s received\n");
 	p2(rxreplies, "\t%lu ARP repl%s received\n");
 	p(received, "\t%lu ARP packet%s received\n");
 	p(dropped, "\t%lu total packet%s dropped due to no ARP entry\n");
 	p(timeouts, "\t%lu ARP entry%s timed out\n");
 	p(dupips, "\t%lu Duplicate IP%s seen\n");
 #undef p
 #undef p2
 }
 
 
 
 static	const char *icmpnames[ICMP_MAXTYPE + 1] = {
 	"echo reply",			/* RFC 792 */
 	"#1",
 	"#2",
 	"destination unreachable",	/* RFC 792 */
 	"source quench",		/* RFC 792 */
 	"routing redirect",		/* RFC 792 */
 	"#6",
 	"#7",
 	"echo",				/* RFC 792 */
 	"router advertisement",		/* RFC 1256 */
 	"router solicitation",		/* RFC 1256 */
 	"time exceeded",		/* RFC 792 */
 	"parameter problem",		/* RFC 792 */
 	"time stamp",			/* RFC 792 */
 	"time stamp reply",		/* RFC 792 */
 	"information request",		/* RFC 792 */
 	"information request reply",	/* RFC 792 */
 	"address mask request",		/* RFC 950 */
 	"address mask reply",		/* RFC 950 */
 	"#19",
 	"#20",
 	"#21",
 	"#22",
 	"#23",
 	"#24",
 	"#25",
 	"#26",
 	"#27",
 	"#28",
 	"#29",
 	"icmp traceroute",		/* RFC 1393 */
 	"datagram conversion error",	/* RFC 1475 */
 	"mobile host redirect",
 	"IPv6 where-are-you",
 	"IPv6 i-am-here",
 	"mobile registration req",
 	"mobile registration reply",
 	"domain name request",		/* RFC 1788 */
 	"domain name reply",		/* RFC 1788 */
 	"icmp SKIP",
 	"icmp photuris",		/* RFC 2521 */
 };
 
 /*
  * Dump ICMP statistics.
  */
 void
 icmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 {
 	struct icmpstat icmpstat, zerostat;
 	int i, first;
 	size_t len;
 
 	len = sizeof icmpstat;
 	if (live) {
 		if (zflag)
 			memset(&zerostat, 0, len);
 		if (sysctlbyname("net.inet.icmp.stats", &icmpstat, &len,
 		    zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
 			warn("sysctl: net.inet.icmp.stats");
 			return;
 		}
 	} else
 		kread(off, &icmpstat, len);
 
 	printf("%s:\n", name);
 
 #define	p(f, m) if (icmpstat.f || sflag <= 1) \
     printf(m, icmpstat.f, plural(icmpstat.f))
 #define	p1a(f, m) if (icmpstat.f || sflag <= 1) \
     printf(m, icmpstat.f)
 #define	p2(f, m) if (icmpstat.f || sflag <= 1) \
     printf(m, icmpstat.f, plurales(icmpstat.f))
 
 	p(icps_error, "\t%lu call%s to icmp_error\n");
 	p(icps_oldicmp,
 	    "\t%lu error%s not generated in response to an icmp message\n");
 	for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++)
 		if (icmpstat.icps_outhist[i] != 0) {
 			if (first) {
 				printf("\tOutput histogram:\n");
 				first = 0;
 			}
 			if (icmpnames[i] != NULL)
 				printf("\t\t%s: %lu\n", icmpnames[i],
 					icmpstat.icps_outhist[i]);
 			else
 				printf("\t\tunknown ICMP #%d: %lu\n", i,
 					icmpstat.icps_outhist[i]);
 		}
 	p(icps_badcode, "\t%lu message%s with bad code fields\n");
 	p(icps_tooshort, "\t%lu message%s less than the minimum length\n");
 	p(icps_checksum, "\t%lu message%s with bad checksum\n");
 	p(icps_badlen, "\t%lu message%s with bad length\n");
 	p1a(icps_bmcastecho, "\t%lu multicast echo requests ignored\n");
 	p1a(icps_bmcasttstamp, "\t%lu multicast timestamp requests ignored\n");
 	for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++)
 		if (icmpstat.icps_inhist[i] != 0) {
 			if (first) {
 				printf("\tInput histogram:\n");
 				first = 0;
 			}
 			if (icmpnames[i] != NULL)
 				printf("\t\t%s: %lu\n", icmpnames[i],
 				    icmpstat.icps_inhist[i]);
 			else
 				printf("\t\tunknown ICMP #%d: %lu\n", i,
 				    icmpstat.icps_inhist[i]);
 		}
 	p(icps_reflect, "\t%lu message response%s generated\n");
 	p2(icps_badaddr, "\t%lu invalid return address%s\n");
 	p(icps_noroute, "\t%lu no return route%s\n");
 #undef p
 #undef p1a
 #undef p2
 	if (live) {
 		len = sizeof i;
 		if (sysctlbyname("net.inet.icmp.maskrepl", &i, &len, NULL, 0) <
 		    0)
 			return;
 		printf("\tICMP address mask responses are %sabled\n",
 		    i ? "en" : "dis");
 	}
 }
 
 #ifndef BURN_BRIDGES
 /*
  * Dump IGMP statistics structure (pre 8.x kernel).
  */
 static void
 igmp_stats_live_old(u_long off, const char *name)
 {
 	struct oigmpstat oigmpstat, zerostat;
 	size_t len = sizeof(oigmpstat);
 
 	if (zflag)
 		memset(&zerostat, 0, len);
 	if (sysctlbyname("net.inet.igmp.stats", &oigmpstat, &len,
 	    zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
 		warn("sysctl: net.inet.igmp.stats");
 		return;
 	}
 
 	printf("%s:\n", name);
 
 #define	p(f, m) if (oigmpstat.f || sflag <= 1) \
     printf(m, oigmpstat.f, plural(oigmpstat.f))
 #define	py(f, m) if (oigmpstat.f || sflag <= 1) \
     printf(m, oigmpstat.f, oigmpstat.f != 1 ? "ies" : "y")
 	p(igps_rcv_total, "\t%u message%s received\n");
 	p(igps_rcv_tooshort, "\t%u message%s received with too few bytes\n");
 	p(igps_rcv_badsum, "\t%u message%s received with bad checksum\n");
 	py(igps_rcv_queries, "\t%u membership quer%s received\n");
 	py(igps_rcv_badqueries,
 	    "\t%u membership quer%s received with invalid field(s)\n");
 	p(igps_rcv_reports, "\t%u membership report%s received\n");
 	p(igps_rcv_badreports,
 	    "\t%u membership report%s received with invalid field(s)\n");
 	p(igps_rcv_ourreports,
 "\t%u membership report%s received for groups to which we belong\n");
         p(igps_snd_reports, "\t%u membership report%s sent\n");
 #undef p
 #undef py
 }
 #endif /* !BURN_BRIDGES */
 
 /*
  * Dump IGMP statistics structure.
  */
 void
 igmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 {
 	struct igmpstat igmpstat, zerostat;
 	size_t len;
 
 #ifndef BURN_BRIDGES
 	if (live) {
 		/*
 		 * Detect if we are being run against a pre-IGMPv3 kernel.
 		 * We cannot do this for a core file as the legacy
 		 * struct igmpstat has no size field, nor does it
 		 * export it in any readily-available symbols.
 		 */
 		len = 0;
 		if (sysctlbyname("net.inet.igmp.stats", NULL, &len, NULL,
 		    0) < 0) {
 			warn("sysctl: net.inet.igmp.stats");
 			return;
 		}
 		if (len < sizeof(igmpstat)) {
 			igmp_stats_live_old(off, name);
 			return;
 		}
 	}
 #endif /* !BURN_BRIDGES */
 
 	len = sizeof(igmpstat);
 	if (live) {
 		if (zflag)
 			memset(&zerostat, 0, len);
 		if (sysctlbyname("net.inet.igmp.stats", &igmpstat, &len,
 		    zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
 			warn("sysctl: net.inet.igmp.stats");
 			return;
 		}
 	} else {
 		len = sizeof(igmpstat);
 		kread(off, &igmpstat, len);
 	}
 
 	if (igmpstat.igps_version != IGPS_VERSION_3) {
 		warnx("%s: version mismatch (%d != %d)", __func__,
 		    igmpstat.igps_version, IGPS_VERSION_3);
 	}
 	if (igmpstat.igps_len != IGPS_VERSION3_LEN) {
 		warnx("%s: size mismatch (%d != %d)", __func__,
 		    igmpstat.igps_len, IGPS_VERSION3_LEN);
 	}
 
 	printf("%s:\n", name);
 
 #define	p64(f, m) if (igmpstat.f || sflag <= 1) \
     printf(m, (uintmax_t) igmpstat.f, plural(igmpstat.f))
 #define	py64(f, m) if (igmpstat.f || sflag <= 1) \
     printf(m, (uintmax_t) igmpstat.f, pluralies(igmpstat.f))
 	p64(igps_rcv_total, "\t%ju message%s received\n");
 	p64(igps_rcv_tooshort, "\t%ju message%s received with too few bytes\n");
 	p64(igps_rcv_badttl, "\t%ju message%s received with wrong TTL\n");
 	p64(igps_rcv_badsum, "\t%ju message%s received with bad checksum\n");
 	py64(igps_rcv_v1v2_queries, "\t%ju V1/V2 membership quer%s received\n");
 	py64(igps_rcv_v3_queries, "\t%ju V3 membership quer%s received\n");
 	py64(igps_rcv_badqueries,
 	    "\t%ju membership quer%s received with invalid field(s)\n");
 	py64(igps_rcv_gen_queries, "\t%ju general quer%s received\n");
 	py64(igps_rcv_group_queries, "\t%ju group quer%s received\n");
 	py64(igps_rcv_gsr_queries, "\t%ju group-source quer%s received\n");
 	py64(igps_drop_gsr_queries, "\t%ju group-source quer%s dropped\n");
 	p64(igps_rcv_reports, "\t%ju membership report%s received\n");
 	p64(igps_rcv_badreports,
 	    "\t%ju membership report%s received with invalid field(s)\n");
 	p64(igps_rcv_ourreports,
 "\t%ju membership report%s received for groups to which we belong\n");
         p64(igps_rcv_nora, "\t%ju V3 report%s received without Router Alert\n");
         p64(igps_snd_reports, "\t%ju membership report%s sent\n");
 #undef p64
 #undef py64
 }
 
 /*
  * Dump PIM statistics structure.
  */
 void
 pim_stats(u_long off __unused, const char *name, int af1 __unused,
     int proto __unused)
 {
 	struct pimstat pimstat, zerostat;
 	size_t len = sizeof pimstat;
 
 	if (live) {
 		if (zflag)
 			memset(&zerostat, 0, len);
 		if (sysctlbyname("net.inet.pim.stats", &pimstat, &len,
 		    zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
 			if (errno != ENOENT)
 				warn("sysctl: net.inet.pim.stats");
 			return;
 		}
 	} else {
 		if (off == 0)
 			return;
 		kread(off, &pimstat, len);
 	}
 
 	printf("%s:\n", name);
 
 #define	p(f, m) if (pimstat.f || sflag <= 1) \
     printf(m, (uintmax_t)pimstat.f, plural(pimstat.f))
 #define	py(f, m) if (pimstat.f || sflag <= 1) \
     printf(m, (uintmax_t)pimstat.f, pimstat.f != 1 ? "ies" : "y")
 	p(pims_rcv_total_msgs, "\t%ju message%s received\n");
 	p(pims_rcv_total_bytes, "\t%ju byte%s received\n");
 	p(pims_rcv_tooshort, "\t%ju message%s received with too few bytes\n");
         p(pims_rcv_badsum, "\t%ju message%s received with bad checksum\n");
 	p(pims_rcv_badversion, "\t%ju message%s received with bad version\n");
 	p(pims_rcv_registers_msgs, "\t%ju data register message%s received\n");
 	p(pims_rcv_registers_bytes, "\t%ju data register byte%s received\n");
 	p(pims_rcv_registers_wrongiif,
 	    "\t%ju data register message%s received on wrong iif\n");
 	p(pims_rcv_badregisters, "\t%ju bad register%s received\n");
 	p(pims_snd_registers_msgs, "\t%ju data register message%s sent\n");
 	p(pims_snd_registers_bytes, "\t%ju data register byte%s sent\n");
 #undef p
 #undef py
 }
 
 /*
  * Pretty print an Internet address (net address + port).
  */
 void
 inetprint(struct in_addr *in, int port, const char *proto, int num_port)
 {
 	struct servent *sp = 0;
 	char line[80], *cp;
 	int width;
 
 	if (Wflag)
 	    sprintf(line, "%s.", inetname(in));
 	else
 	    sprintf(line, "%.*s.", (Aflag && !num_port) ? 12 : 16, inetname(in));
 	cp = index(line, '\0');
 	if (!num_port && port)
 		sp = getservbyport((int)port, proto);
 	if (sp || port == 0)
 		sprintf(cp, "%.15s ", sp ? sp->s_name : "*");
 	else
 		sprintf(cp, "%d ", ntohs((u_short)port));
 	width = (Aflag && !Wflag) ? 18 : 22;
 	if (Wflag)
 	    printf("%-*s ", width, line);
 	else
 	    printf("%-*.*s ", width, width, line);
 }
 
 /*
  * Construct an Internet address representation.
  * If numeric_addr has been supplied, give
  * numeric value, otherwise try for symbolic name.
  */
 char *
 inetname(struct in_addr *inp)
 {
 	char *cp;
 	static char line[MAXHOSTNAMELEN];
 	struct hostent *hp;
 	struct netent *np;
 
 	cp = 0;
 	if (!numeric_addr && inp->s_addr != INADDR_ANY) {
 		int net = inet_netof(*inp);
 		int lna = inet_lnaof(*inp);
 
 		if (lna == INADDR_ANY) {
 			np = getnetbyaddr(net, AF_INET);
 			if (np)
 				cp = np->n_name;
 		}
 		if (cp == 0) {
 			hp = gethostbyaddr((char *)inp, sizeof (*inp), AF_INET);
 			if (hp) {
 				cp = hp->h_name;
 				trimdomain(cp, strlen(cp));
 			}
 		}
 	}
 	if (inp->s_addr == INADDR_ANY)
 		strcpy(line, "*");
 	else if (cp) {
 		strlcpy(line, cp, sizeof(line));
 	} else {
 		inp->s_addr = ntohl(inp->s_addr);
 #define	C(x)	((u_int)((x) & 0xff))
 		sprintf(line, "%u.%u.%u.%u", C(inp->s_addr >> 24),
 		    C(inp->s_addr >> 16), C(inp->s_addr >> 8), C(inp->s_addr));
 	}
 	return (line);
 }
Index: projects/binutils-2.17/usr.bin/procstat
===================================================================
--- projects/binutils-2.17/usr.bin/procstat	(revision 215829)
+++ projects/binutils-2.17/usr.bin/procstat	(revision 215830)

Property changes on: projects/binutils-2.17/usr.bin/procstat
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/usr.bin/procstat:r215709-215824
Index: projects/binutils-2.17/usr.sbin/kernbb/kernbb.8
===================================================================
--- projects/binutils-2.17/usr.sbin/kernbb/kernbb.8	(revision 215829)
+++ projects/binutils-2.17/usr.sbin/kernbb/kernbb.8	(nonexistent)
@@ -1,82 +0,0 @@
-.\" Copyright (c) 1983, 1991, 1993
-.\"	The Regents of the University of California.  All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\"    notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\"    notice, this list of conditions and the following disclaimer in the
-.\"    documentation and/or other materials provided with the distribution.
-.\" 4. Neither the name of the University nor the names of its contributors
-.\"    may be used to endorse or promote products derived from this software
-.\"    without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" $FreeBSD$
-.\"
-.Dd May 22, 1995
-.Dt KERNBB 8
-.Os
-.Sh NAME
-.Nm kernbb
-.Nd generate a dump of the kernels basic-block profile buffers
-.Sh SYNOPSIS
-.Nm
-.Sh DESCRIPTION
-The
-.Nm
-utility is used to extract the basic-block profiling buffers of the running
-kernel into the files needed for the
-.Xr gcov 1
-tool.
-.Pp
-At least one source file in the running kernel must have been compiled
-with the
-.Fl Fl test-coverage
-and
-.Fl Fl profile-arcs
-options.
-.Pp
-The output is stored in the filenames compiled into the kernel by
-.Xr gcc 1 .
-If the absolute pathname cannot be written to, the directory part
-of the filename is discarded and the file stored in the current
-directory under its basename.
-.Pp
-The output files are named
-.Pa *.da ,
-and the
-.Xr gcov 1
-program will extract the counts and merge them with the source
-file to show actual execution counts.
-.Sh FILES
-.Bl -tag -width /boot/kernel/kernel -compact
-.It Pa /boot/kernel/kernel
-the default system
-.It Pa /dev/kmem
-the default memory
-.El
-.Sh SEE ALSO
-.Xr cc 1 ,
-.Xr gcov 1
-.Sh AUTHORS
-The
-.Nm
-utility was written by
-.An Poul-Henning Kamp ,
-along with the kernel-support.
-.Sh BUGS
-There are far too much magic and internal knowledge from GCC in this.

Property changes on: projects/binutils-2.17/usr.sbin/kernbb/kernbb.8
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/binutils-2.17/usr.sbin/kernbb/kernbb.c
===================================================================
--- projects/binutils-2.17/usr.sbin/kernbb/kernbb.c	(revision 215829)
+++ projects/binutils-2.17/usr.sbin/kernbb/kernbb.c	(nonexistent)
@@ -1,145 +0,0 @@
-/*
- * ----------------------------------------------------------------------------
- * "THE BEER-WARE LICENSE" (Revision 42):
- * <phk@FreeBSD.org> wrote this file.  As long as you retain this notice you
- * can do whatever you want with this stuff. If we meet some day, and you think
- * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
- * ----------------------------------------------------------------------------
- *
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <err.h>
-#include <fcntl.h>
-#include <kvm.h>
-#include <nlist.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/endian.h>
-
-typedef long long gcov_type;
-
-#define PARAMS(foo)	foo
-#define ATTRIBUTE_UNUSED __unused
-#include "gcov-io.h"
-
-struct bbf {
-	long	checksum;
-	int	arc_count;
-	u_long	name;
-};
-
-struct bb {
-	u_long	zero_one;
-	u_long	filename;
-	u_long	counts;
-	u_long	ncounts;
-	u_long	next;
-	u_long	sizeof_bb;
-	u_long	funcs;
-};
-
-struct nlist namelist[] = {
-	{ "bbhead", 0, 0, 0, 0 },
-	{ NULL, 0, 0, 0, 0 }
-};
-
-kvm_t	*kv;
-
-int
-main(int argc __unused, char **argv __unused)
-{
-	int i, funcs;
-	u_long l1,l2,l4;
-	struct bb bb;
-	struct bbf bbf;
-	char buf[BUFSIZ], *p;
-	gcov_type *q, *qr;
-	
-	FILE *f;
-
-	kv = kvm_open(NULL,NULL,NULL,O_RDWR,"dnc");
-	if (!kv) 
-		err(1,"kvm_open");
-	i = kvm_nlist(kv,namelist);
-	if (i)
-		err(1,"kvm_nlist");
-
-	l1 = namelist[0].n_value;
-	kvm_read(kv,l1,&l2,sizeof l2);
-	while(l2) {
-		l1 += sizeof l1;
-		kvm_read(kv,l2,&bb,sizeof bb);
-#if 0
-printf("%lx\n%lx\n%lx\n%lx\n%lx\n%lx\n%lx\n",
-	bb.zero_one, bb.filename, bb.counts, bb.ncounts, bb.next,
-	bb.sizeof_bb, bb.funcs);
-#endif
-
-		funcs = 0;
-		for (l4 = bb.funcs; ; l4 += sizeof (bbf)) {
-			kvm_read(kv, l4, &bbf, sizeof(bbf));
-			if (bbf.arc_count == -1)
-				break;
-			funcs++;
-		}
-		
-		l2 = bb.next;
-
-		kvm_read(kv, bb.filename, buf, sizeof(buf));
-		p = buf;
-		f = fopen(p, "w");
-		if (f != NULL) {
-			printf("Writing \"%s\"\n", p);
-		} else {
-			p = strrchr(buf, '/');
-			if (p == NULL)
-				p = buf;
-			else
-				p++;
-			printf("Writing \"%s\" (spec \"%s\")\n", p, buf);
-			f = fopen(p, "w");
-		}
-		if (f == NULL)
-			err(1,"%s", p);
-		__write_long(-123, f, 4);
-
-		__write_long(funcs, f, 4);
-
-		__write_long(4 + 8 + 8 + 4 + 8 + 8, f, 4);
-
-		__write_long(bb.ncounts, f, 4);
-		__write_long(0, f, 8);
-		__write_long(0, f, 8);
-
-		__write_long(bb.ncounts, f, 4);
-		__write_long(0, f, 8);
-		__write_long(0, f, 8);
-
-		qr = malloc(bb.ncounts * 8);
-		kvm_read(kv, bb.counts, qr, bb.ncounts * 8);
-		q = qr;
-		for (l4 = bb.funcs; ; l4 += sizeof (bbf)) {
-			kvm_read(kv, l4, &bbf, sizeof(bbf));
-			if (bbf.arc_count == -1)
-				break;
-			kvm_read(kv, bbf.name, buf, sizeof(buf));
-
-			__write_gcov_string(buf, strlen(buf), f, -1);
-			
-			__write_long(bbf.checksum, f, 4);
-			__write_long(bbf.arc_count, f, 4);
-			for (i = 0; i < bbf.arc_count; i++) {
-				__write_gcov_type(*q, f, 8);
-				q++;
-			}
-		}
-		fclose(f);
-		free(qr);
-	}
-	return 0;
-}

Property changes on: projects/binutils-2.17/usr.sbin/kernbb/kernbb.c
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/binutils-2.17/usr.sbin/kernbb/Makefile
===================================================================
--- projects/binutils-2.17/usr.sbin/kernbb/Makefile	(revision 215829)
+++ projects/binutils-2.17/usr.sbin/kernbb/Makefile	(nonexistent)
@@ -1,12 +0,0 @@
-# $FreeBSD$
-
-PROG=	kernbb
-MAN=	kernbb.8
-
-DPADD=	${LIBKVM}
-LDADD=	-lkvm
-
-CFLAGS+= -I${.CURDIR}/../../contrib/gcc
-
-.include <bsd.prog.mk>
-

Property changes on: projects/binutils-2.17/usr.sbin/kernbb/Makefile
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/binutils-2.17/usr.sbin/iostat/iostat.8
===================================================================
--- projects/binutils-2.17/usr.sbin/iostat/iostat.8	(revision 215829)
+++ projects/binutils-2.17/usr.sbin/iostat/iostat.8	(revision 215830)
@@ -1,479 +1,493 @@
 .\"
 .\" Copyright (c) 1997 Kenneth D. Merry.
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. The name of the author may not be used to endorse or promote products
 .\"    derived from this software without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .\" Copyright (c) 1985, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. All advertising materials mentioning features or use of this software
 .\"    must display the following acknowledgement:
 .\"	This product includes software developed by the University of
 .\"	California, Berkeley and its contributors.
 .\" 4. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"	@(#)iostat.8	8.1 (Berkeley) 6/6/93
 .\"
-.Dd April 17, 2006
+.Dd November 24, 2010
 .Dt IOSTAT 8
 .Os
 .Sh NAME
 .Nm iostat
 .Nd report
 .Tn I/O
 statistics
 .Sh SYNOPSIS
 .Nm
 .Op Fl CdhIKoTxz?\&
 .Op Fl c Ar count
 .Op Fl M Ar core
 .Op Fl n Ar devs
 .Op Fl N Ar system
 .Oo
 .Fl t
 .Sm off
 .Ar type , if , pass
 .Sm on
 .Oc
 .Op Fl w Ar wait
 .Op Ar drives
 .Sh DESCRIPTION
 The
 .Nm
 utility displays kernel
 .Tn I/O
 statistics on terminal, device and cpu operations.
 The first statistics that are printed are averaged over the system uptime.
 To get information about the current activity, a suitable wait time should
 be specified, so that the subsequent sets of printed statistics will be
 averaged over that time.
 .Pp
 The options are as follows:
 .Bl -tag -width flag
 .It Fl c
 Repeat the display
 .Ar count
 times.
 If no repeat
 .Ar count
-is specified, the default is infinity.
+is specified, the default depends on whether
+.Fl w
+is specified.
+With
+.Fl w
+the default repeat count is infinity, otherwise it is 1.
 .It Fl C
 Display CPU statistics.
 This is on by default, unless
 .Fl d
+or
+.Fl x
 is specified.
 .It Fl d
 Display only device statistics.
 If this flag is turned on, only device statistics will be displayed, unless
 .Fl C
 or
 .Fl T
 is also specified to enable the display of CPU or TTY statistics.
 .It Fl h
 Put
 .Nm
 in
 .Sq top
 mode.
 In this mode,
 .Nm
 will show devices in order from highest to lowest bytes
 per measurement cycle.
 .It Fl I
 Display total statistics for a given time period, rather than average
 statistics for each second during that time period.
 .It Fl K
 In the blocks transferred display (-o), display block count in kilobytes rather
 then the device native block size.
 .It Fl M
 Extract values associated with the name list from the specified core
 instead of the default
 .Dq Pa /dev/kmem .
 .It Fl n
 Display up to
 .Ar devs
 number of devices.
 The
 .Nm
 utility will display fewer devices if there are not
 .Ar devs
 devices present.
 .It Fl N
 Extract the name list from the specified system instead of the default
 .Dq Pa /boot/kernel/kernel .
 .It Fl o
 Display old-style
 .Nm
 device statistics.
 Sectors per second, transfers per second, and milliseconds per seek are
 displayed.
 If
 .Fl I
 is specified, total blocks/sectors, total transfers, and
 milliseconds per seek are displayed.
 .It Fl t
 Specify which types of devices to display.
 There are three different categories of devices:
 .Pp
 .Bl -tag -width indent -compact
 .It device type:
 .Bl -tag -width 9n -compact
 .It da
 Direct Access devices
 .It sa
 Sequential Access devices
 .It printer
 Printers
 .It proc
 Processor devices
 .It worm
 Write Once Read Multiple devices
 .It cd
 CD devices
 .It scanner
 Scanner devices
 .It optical
 Optical Memory devices
 .It changer
 Medium Changer devices
 .It comm
 Communication devices
 .It array
 Storage Array devices
 .It enclosure
 Enclosure Services devices
 .It floppy
 Floppy devices
 .El
 .Pp
 .It interface:
 .Bl -tag -width 9n -compact
 .It IDE
 Integrated Drive Electronics devices
 .It SCSI
 Small Computer System Interface devices
 .It other
 Any other device interface
 .El
 .Pp
 .It passthrough:
 .Bl -tag -width 9n -compact
 .It pass
 Passthrough devices
 .El
 .El
 .Pp
 The user must specify at least one device type, and may specify at most
 one device type from each category.
 Multiple device types in a single device type statement must be separated by
 commas.
 .Pp
 Any number of
 .Fl t
 arguments may be specified on the command line.
 All
 .Fl t
 arguments are ORed together to form a matching expression against which
 all devices in the system are compared.
 Any device that fully matches any
 .Fl t
 argument will be included in the
 .Nm
 output, up to the number of devices that can be displayed in
 80 columns, or the maximum number of devices specified by the user.
 .It Fl T
 Display TTY statistics.
 This is on by default, unless
 .Fl d
+or
+.Fl x
 is specified.
 .It Fl w
 Pause
 .Ar wait
 seconds between each display.
 If no
 .Ar wait
 interval is specified, the default is 1 second.
 .Pp
 The
 .Nm
 command will accept and honor a non-integer number of seconds.
 Note that the interval only has millisecond granularity.
 Finer values will be truncated.
 E.g.,
 .Dq Li -w1.0001
 is the same as
 .Dq Li -w1.000 .
 The interval will also suffer from modifications to
 .Va kern.hz
 so your mileage may vary.
 .It Fl x
 Show extended disk statistics.
 Each disk is displayed on a line of its own with all available statistics.
+If this flag is turned on, only disk statistics will be displayed, unless
+.Fl C
+or
+.Fl T
+is also specified to enable the display of CPU or TTY statistics.
 .It Fl z
 If
 .Fl x
 is specified, omit lines for devices with no activity.
 .It Fl ?\&
 Display a usage statement and exit.
 .El
 .Pp
 The
 .Nm
 utility displays its information in the following format:
 .Bl -tag -width flag
 .It tty
 .Bl -tag -width indent -compact
 .It tin
 characters read from terminals
 .It tout
 characters written to terminals
 .El
 .It devices
 Device operations.
 The header of the field is the device name and unit number.
 The
 .Nm
 utility
 will display as many devices as will fit in a standard 80 column screen, or
 the maximum number of devices in the system, whichever is smaller.
 If
 .Fl n
 is specified on the command line,
 .Nm
 will display the smaller of the
 requested number of devices, and the maximum number of devices in the system.
 To force
 .Nm
 to display specific drives, their names may be supplied on the command
 line.
 The
 .Nm
 utility
 will not display more devices than will fit in an 80 column screen, unless
 the
 .Fl n
 argument is given on the command line to specify a maximum number of
 devices to display.
 If fewer devices are specified on the command line than will fit in an 80
 column screen,
 .Nm
 will show only the specified devices.
 .Pp
 The standard
 .Nm
 device display shows the following statistics:
 .Pp
 .Bl -tag -width indent -compact
 .It KB/t
 kilobytes per transfer
 .It tps
 transfers per second
 .It MB/s
 megabytes per second
 .El
 .Pp
 The standard
 .Nm
 device display, with the
 .Fl I
 flag specified, shows the following statistics:
 .Pp
 .Bl -tag -width indent -compact
 .It KB/t
 kilobytes per transfer
 .It xfrs
 total number of transfers
 .It MB
 total number of megabytes transferred
 .El
 .Pp
 The extended
 .Nm
 device display, with the
 .Fl x
 flag specified, shows the following statistics:
 .Pp
 .Bl -tag -width indent -compact
 .It r/s
 read operations per second
 .It w/s
 write operations per second
 .It kr/s
 kilobytes read per second
 .It kw/s
 kilobytes write per second
 .It qlen
 transactions queue length
 .It svc_t
 average duration of transactions, in milliseconds
 .It %b
 % of time the device had one or more outstanding transactions
 .El
 .Pp
 The old-style
 .Nm
 display (using
 .Fl o )
 shows the following statistics:
 .Pp
 .Bl -tag -width indent -compact
 .It sps
 sectors transferred per second
 .It tps
 transfers per second
 .It msps
 average milliseconds per transaction
 .El
 .Pp
 The old-style
 .Nm
 display, with the
 .Fl I
 flag specified, shows the following statistics:
 .Pp
 .Bl -tag -width indent -compact
 .It blk
 total blocks/sectors transferred
 .It xfr
 total transfers
 .It msps
 average milliseconds per transaction
 .El
 .It cpu
 .Bl -tag -width indent -compact
 .It \&us
 % of cpu time in user mode
 .It \&ni
 % of cpu time in user mode running niced processes
 .It \&sy
 % of cpu time in system mode
 .It \&in
 % of cpu time in interrupt mode
 .It \&id
 % of cpu time in idle mode
 .El
 .El
 .Sh FILES
 .Bl -tag -width /boot/kernel/kernel -compact
 .It Pa /boot/kernel/kernel
 Default kernel namelist.
 .It Pa /dev/kmem
 Default memory file.
 .El
 .Sh EXAMPLES
 .Dl iostat -w 1 da0 da1 cd0
 .Pp
 Display statistics for the first two Direct Access devices and the first
 CDROM device every second ad infinitum.
 .Pp
 .Dl iostat -c 2
 .Pp
 Display the statistics for the first four devices in the system twice, with
 a one second display interval.
 .Pp
 .Dl iostat -t da -t cd -w 1
 .Pp
 Display statistics for all CDROM and Direct Access devices every second
 ad infinitum.
 .Pp
 .Dl iostat -t da,scsi,pass -t cd,scsi,pass
 .Pp
 Display statistics once for all SCSI passthrough devices that provide access
 to either Direct Access or CDROM devices.
 .Pp
 .Dl iostat -h -n 8 -w 1
 .Pp
 Display up to 8 devices with the most I/O every second ad infinitum.
 .Pp
 .Dl iostat -dh -t da -w 1
 .Pp
 Omit the TTY and CPU displays, show devices in order of performance and
 show only Direct Access devices every second ad infinitum.
 .Pp
 .Dl iostat -Iw 3
 .Pp
 Display total statistics every three seconds ad infinitum.
 .Pp
 .Dl iostat -odICTw 2 -c 9
 .Pp
 Display total statistics using the old-style output format 9 times, with
 a two second interval between each measurement/display.
 The
 .Fl d
 flag generally disables the TTY and CPU displays, but since the
 .Fl T
 and
 .Fl C
 flags are given, the TTY and CPU displays will be displayed.
 .Sh SEE ALSO
 .Xr fstat 1 ,
 .Xr netstat 1 ,
 .Xr nfsstat 1 ,
 .Xr ps 1 ,
 .Xr systat 1 ,
 .Xr devstat 3 ,
 .Xr gstat 8 ,
 .Xr pstat 8 ,
 .Xr vmstat 8
 .Pp
 The sections starting with ``Interpreting system activity'' in
 .%T "Installing and Operating 4.3BSD" .
 .Sh HISTORY
 This version of
 .Nm
 first appeared in
 .Fx 3.0 .
 .Sh AUTHORS
 .An Kenneth Merry Aq ken@FreeBSD.org
 .Sh BUGS
 The use of
 .Nm
 as a debugging tool for crash dumps is probably limited because there is
 currently no way to get statistics that only cover the time immediately before
 the crash.
Index: projects/binutils-2.17/usr.sbin/usbdump/usbdump.c
===================================================================
--- projects/binutils-2.17/usr.sbin/usbdump/usbdump.c	(revision 215829)
+++ projects/binutils-2.17/usr.sbin/usbdump/usbdump.c	(revision 215830)
@@ -1,542 +1,544 @@
 /*-
  * Copyright (c) 2010 Weongyo Jeong <weongyo@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/endian.h>
 #include <sys/ioctl.h>
+#include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/utsname.h>
+#include <net/if.h>
+#include <net/bpf.h>
 #include <dev/usb/usb.h>
 #include <dev/usb/usb_pf.h>
 #include <dev/usb/usbdi.h>
 #include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
 
 struct usbcap {
 	int		fd;		/* fd for /dev/usbpf */
 	u_int		bufsize;
 	char		*buffer;
 
 	/* for -w option */
 	int		wfd;
 	/* for -r option */
 	int		rfd;
 };
 
 struct usbcap_filehdr {
 	u_int		magic;
 #define	USBCAP_FILEHDR_MAGIC	0x9a90000e
 	u_char		major;
 	u_char		minor;
 	u_char		reserved[26];
 } __packed;
 
 static int doexit = 0;
 static int pkt_captured = 0;
 static int verbose = 0;
 static const char *i_arg = "usbus0";;
 static const char *r_arg = NULL;
 static const char *w_arg = NULL;
 static const char *errstr_table[USB_ERR_MAX] = {
 	[USB_ERR_NORMAL_COMPLETION]	= "NORMAL_COMPLETION",
 	[USB_ERR_PENDING_REQUESTS]	= "PENDING_REQUESTS",
 	[USB_ERR_NOT_STARTED]		= "NOT_STARTED",
 	[USB_ERR_INVAL]			= "INVAL",
 	[USB_ERR_NOMEM]			= "NOMEM",
 	[USB_ERR_CANCELLED]		= "CANCELLED",
 	[USB_ERR_BAD_ADDRESS]		= "BAD_ADDRESS",
 	[USB_ERR_BAD_BUFSIZE]		= "BAD_BUFSIZE",
 	[USB_ERR_BAD_FLAG]		= "BAD_FLAG",
 	[USB_ERR_NO_CALLBACK]		= "NO_CALLBACK",
 	[USB_ERR_IN_USE]		= "IN_USE",
 	[USB_ERR_NO_ADDR]		= "NO_ADDR",
 	[USB_ERR_NO_PIPE]		= "NO_PIPE",
 	[USB_ERR_ZERO_NFRAMES]		= "ZERO_NFRAMES",
 	[USB_ERR_ZERO_MAXP]		= "ZERO_MAXP",
 	[USB_ERR_SET_ADDR_FAILED]	= "SET_ADDR_FAILED",
 	[USB_ERR_NO_POWER]		= "NO_POWER",
 	[USB_ERR_TOO_DEEP]		= "TOO_DEEP",
 	[USB_ERR_IOERROR]		= "IOERROR",
 	[USB_ERR_NOT_CONFIGURED]	= "NOT_CONFIGURED",
 	[USB_ERR_TIMEOUT]		= "TIMEOUT",
 	[USB_ERR_SHORT_XFER]		= "SHORT_XFER",
 	[USB_ERR_STALLED]		= "STALLED",
 	[USB_ERR_INTERRUPTED]		= "INTERRUPTED",
 	[USB_ERR_DMA_LOAD_FAILED]	= "DMA_LOAD_FAILED",
 	[USB_ERR_BAD_CONTEXT]		= "BAD_CONTEXT",
 	[USB_ERR_NO_ROOT_HUB]		= "NO_ROOT_HUB",
 	[USB_ERR_NO_INTR_THREAD]	= "NO_INTR_THREAD",
 	[USB_ERR_NOT_LOCKED]		= "NOT_LOCKED",
 };
 
 static const char *xfertype_table[] = {
 	[UE_CONTROL]			= "CTRL",
 	[UE_ISOCHRONOUS]		= "ISOC",
 	[UE_BULK]			= "BULK",
 	[UE_INTERRUPT]			= "INTR"
 };
 
 static void
 handle_sigint(int sig)
 {
 
 	(void)sig;
 	doexit = 1;
 }
 
 static void
 print_flags(u_int32_t flags)
 {
 #define	PRINTFLAGS(name)			\
 	if ((flags & USBPF_FLAG_##name) != 0)	\
 		printf("%s ", #name);
 	printf(" flags %#x", flags);
 	printf(" < ");
 	PRINTFLAGS(FORCE_SHORT_XFER);
 	PRINTFLAGS(SHORT_XFER_OK);
 	PRINTFLAGS(SHORT_FRAMES_OK);
 	PRINTFLAGS(PIPE_BOF);
 	PRINTFLAGS(PROXY_BUFFER);
 	PRINTFLAGS(EXT_BUFFER);
 	PRINTFLAGS(MANUAL_STATUS);
 	PRINTFLAGS(NO_PIPE_OK);
 	PRINTFLAGS(STALL_PIPE);
 	printf(">\n");
 #undef PRINTFLAGS
 }
 
 static void
 print_status(u_int32_t status)
 {
 #define	PRINTSTATUS(name)				\
 	if ((status & USBPF_STATUS_##name) != 0)	\
 		printf("%s ", #name);
 
 	printf(" status %#x", status);
 	printf(" < ");
 	PRINTSTATUS(OPEN);
 	PRINTSTATUS(TRANSFERRING);
 	PRINTSTATUS(DID_DMA_DELAY);
 	PRINTSTATUS(DID_CLOSE);
 	PRINTSTATUS(DRAINING);
 	PRINTSTATUS(STARTED);
 	PRINTSTATUS(BW_RECLAIMED);
 	PRINTSTATUS(CONTROL_XFR);
 	PRINTSTATUS(CONTROL_HDR);
 	PRINTSTATUS(CONTROL_ACT);
 	PRINTSTATUS(CONTROL_STALL);
 	PRINTSTATUS(SHORT_FRAMES_OK);
 	PRINTSTATUS(SHORT_XFER_OK);
 #if USB_HAVE_BUSDMA
 	PRINTSTATUS(BDMA_ENABLE);
 	PRINTSTATUS(BDMA_NO_POST_SYNC);
 	PRINTSTATUS(BDMA_SETUP);
 #endif
 	PRINTSTATUS(ISOCHRONOUS_XFR);
 	PRINTSTATUS(CURR_DMA_SET);
 	PRINTSTATUS(CAN_CANCEL_IMMED);
 	PRINTSTATUS(DOING_CALLBACK);
 	printf(">\n");
 #undef PRINTSTATUS
 }
 
 /*
  * Display a region in traditional hexdump format.
  */
 static void
 hexdump(const char *region, size_t len)
 {
 	const char *line;
 	int x, c;
 	char lbuf[80];
 #define EMIT(fmt, args...)	do {		\
 	sprintf(lbuf, fmt , ## args);		\
 	printf("%s", lbuf);			\
 } while (0)
 
 	for (line = region; line < (region + len); line += 16) {
 		EMIT(" %04lx  ", (long) (line - region));
 		for (x = 0; x < 16; x++) {
 			if ((line + x) < (region + len))
 				EMIT("%02x ", *(const u_int8_t *)(line + x));
 			else
 				EMIT("-- ");
 			if (x == 7)
 				EMIT(" ");
 		}
 		EMIT(" |");
 		for (x = 0; x < 16; x++) {
 			if ((line + x) < (region + len)) {
 				c = *(const u_int8_t *)(line + x);
 				/* !isprint(c) */
 				if ((c < ' ') || (c > '~'))
 					c = '.';
 				EMIT("%c", c);
 			} else
 				EMIT(" ");
 		}
 		EMIT("|\n");
 	}
 #undef EMIT
 }
 
 static void
-print_apacket(const struct usbpf_xhdr *hdr, struct usbpf_pkthdr *up,
+print_apacket(const struct bpf_xhdr *hdr, struct usbpf_pkthdr *up,
     const char *payload)
 {
 	struct tm *tm;
 	struct timeval tv;
 	size_t len;
 	u_int32_t framelen, x;
 	const char *ptr = payload;
 	char buf[64];
 
 	/* A packet from the kernel is based on little endian byte order. */
 	up->up_busunit = le32toh(up->up_busunit);
 	up->up_flags = le32toh(up->up_flags);
 	up->up_status = le32toh(up->up_status);
 	up->up_length = le32toh(up->up_length);
 	up->up_frames = le32toh(up->up_frames);
 	up->up_error = le32toh(up->up_error);
 	up->up_interval = le32toh(up->up_interval);
 
-	tv.tv_sec = hdr->uh_tstamp.ut_sec;
-	tv.tv_usec = hdr->uh_tstamp.ut_frac;
+	tv.tv_sec = hdr->bh_tstamp.bt_sec;
+	tv.tv_usec = hdr->bh_tstamp.bt_frac;
 	tm = localtime(&tv.tv_sec);
 
 	len = strftime(buf, sizeof(buf), "%H:%M:%S", tm);
 	printf("%.*s.%06ju", (int)len, buf, tv.tv_usec);
 	printf(" usbus%d.%d 0x%02x %s %s", up->up_busunit, up->up_address,
 	    up->up_endpoint,
 	    xfertype_table[up->up_xfertype],
 	    up->up_type == USBPF_XFERTAP_SUBMIT ? ">" : "<");
 	printf(" (%d/%d)", up->up_frames, up->up_length);
 	if (up->up_type == USBPF_XFERTAP_DONE)
 		printf(" %s", errstr_table[up->up_error]);
 	if (up->up_xfertype == UE_BULK || up->up_xfertype == UE_ISOCHRONOUS)
 		printf(" %d", up->up_interval);
 	printf("\n");
 
 	if (verbose >= 1) {
 		for (x = 0; x < up->up_frames; x++) {
 			framelen = le32toh(*((const u_int32_t *)ptr));
 			ptr += sizeof(u_int32_t);
 			printf(" frame[%u] len %d\n", x, framelen);
 			assert(framelen < (1024 * 4));
 			hexdump(ptr, framelen);
 			ptr += framelen;
 		}
 	}
 	if (verbose >= 2) {
 		print_flags(up->up_flags);
 		print_status(up->up_status);
 	}
 }
-    
 
 static void
 print_packets(char *data, const int datalen)
 {
 	struct usbpf_pkthdr *up;
-	const struct usbpf_xhdr *hdr;
+	const struct bpf_xhdr *hdr;
 	u_int32_t framelen, x;
 	char *ptr, *next;
 
 	for (ptr = data; ptr < (data + datalen); ptr = next) {
-		hdr = (const struct usbpf_xhdr *)ptr;
-		up = (struct usbpf_pkthdr *)(ptr + hdr->uh_hdrlen);
-		next = ptr + USBPF_WORDALIGN(hdr->uh_hdrlen + hdr->uh_caplen);
+		hdr = (const struct bpf_xhdr *)ptr;
+		up = (struct usbpf_pkthdr *)(ptr + hdr->bh_hdrlen);
+		next = ptr + BPF_WORDALIGN(hdr->bh_hdrlen + hdr->bh_caplen);
 
 		ptr = ((char *)up) + sizeof(struct usbpf_pkthdr);
 		if (w_arg == NULL)
 			print_apacket(hdr, up, ptr);
 		pkt_captured++;
 		for (x = 0; x < up->up_frames; x++) {
 			framelen = le32toh(*((const u_int32_t *)ptr));
 			ptr += sizeof(u_int32_t) + framelen;
 		}
 	}
 }
 
 static void
 write_packets(struct usbcap *p, const char *data, const int datalen)
 {
 	int len = htole32(datalen), ret;
 
 	ret = write(p->wfd, &len, sizeof(int));
 	assert(ret == sizeof(int));
 	ret = write(p->wfd, data, datalen);
 	assert(ret == datalen);
 }
 
 static void
 read_file(struct usbcap *p)
 {
 	int datalen, ret;
 	char *data;
 
 	while ((ret = read(p->rfd, &datalen, sizeof(int))) == sizeof(int)) {
 		datalen = le32toh(datalen);
 		data = malloc(datalen);
 		assert(data != NULL);
 		ret = read(p->rfd, data, datalen);
 		assert(ret == datalen);
 		print_packets(data, datalen);
 		free(data);
 	}
 	if (ret == -1)
 		fprintf(stderr, "read: %s\n", strerror(errno));
 }
 
 static void
 do_loop(struct usbcap *p)
 {
 	int cc;
 
 	while (doexit == 0) {
 		cc = read(p->fd, (char *)p->buffer, p->bufsize);
 		if (cc < 0) {
 			switch (errno) {
 			case EINTR:
 				break;
 			default:
 				fprintf(stderr, "read: %s\n", strerror(errno));
 				return;
 			}
 			continue;
 		}
 		if (cc == 0)
 			continue;
 		if (w_arg != NULL)
 			write_packets(p, p->buffer, cc);
 		print_packets(p->buffer, cc);
 	}
 }
 
 static void
 init_rfile(struct usbcap *p)
 {
 	struct usbcap_filehdr uf;
 	int ret;
 
 	p->rfd = open(r_arg, O_RDONLY);
 	if (p->rfd < 0) {
 		fprintf(stderr, "open: %s (%s)\n", r_arg, strerror(errno));
 		exit(EXIT_FAILURE);
 	}
 	ret = read(p->rfd, &uf, sizeof(uf));
 	assert(ret == sizeof(uf));
 	assert(le32toh(uf.magic) == USBCAP_FILEHDR_MAGIC);
 	assert(uf.major == 0);
 	assert(uf.minor == 1);
 }
 
 static void
 init_wfile(struct usbcap *p)
 {
 	struct usbcap_filehdr uf;
 	int ret;
 
 	p->wfd = open(w_arg, O_CREAT | O_TRUNC | O_WRONLY, S_IRUSR | S_IWUSR);
 	if (p->wfd < 0) {
 		fprintf(stderr, "open: %s (%s)\n", w_arg, strerror(errno));
 		exit(EXIT_FAILURE);
 	}
 	bzero(&uf, sizeof(uf));
 	uf.magic = htole32(USBCAP_FILEHDR_MAGIC);
 	uf.major = 0;
 	uf.minor = 1;
 	ret = write(p->wfd, (const void *)&uf, sizeof(uf));
 	assert(ret == sizeof(uf));
 }
 
 static void
 usage(void)
 {
 
 #define FMT "    %-14s %s\n"
 	fprintf(stderr, "usage: usbdump [options]\n");
 	fprintf(stderr, FMT, "-i ifname", "Listen on USB bus interface");
 	fprintf(stderr, FMT, "-r file", "Read the raw packets from file");
 	fprintf(stderr, FMT, "-s snaplen", "Snapshot bytes from each packet");
 	fprintf(stderr, FMT, "-v", "Increases the verbose level");
 	fprintf(stderr, FMT, "-w file", "Write the raw packets to file");
 #undef FMT
 	exit(1);
 }
 
 int
 main(int argc, char *argv[])
 {
 	struct timeval tv;
-	struct usbpf_insn total_insn;
-	struct usbpf_program total_prog;
-	struct usbpf_stat us;
-	struct usbpf_version uv;
+	struct bpf_insn total_insn;
+	struct bpf_program total_prog;
+	struct bpf_stat us;
+	struct bpf_version bv;
 	struct usbcap uc, *p = &uc;
-	struct usbpf_ifreq ufr;
+	struct ifreq ifr;
 	long snapshot = 192;
 	u_int v;
 	int fd, o;
 	const char *optstring;
 
 	bzero(&uc, sizeof(struct usbcap));
 
 	optstring = "i:r:s:vw:";
 	while ((o = getopt(argc, argv, optstring)) != -1) {
 		switch (o) {
 		case 'i':
 			i_arg = optarg;
 			break;
 		case 'r':
 			r_arg = optarg;
 			init_rfile(p);
 			break;
 		case 's':
 			snapshot = strtol(optarg, NULL, 10);
 			errno = 0;
 			if (snapshot == 0 && errno == EINVAL)
 				usage();
 			/* snapeshot == 0 is special */
 			if (snapshot == 0)
 				snapshot = -1;
 			break;
 		case 'v':
 			verbose++;
 			break;
 		case 'w':
 			w_arg = optarg;
 			init_wfile(p);
 			break;
 		default:
 			usage();
 			/* NOTREACHED */
 		}
 	}
 
 	if (r_arg != NULL) {
 		read_file(p);
 		exit(EXIT_SUCCESS);
 	}
 
-	p->fd = fd = open("/dev/usbpf", O_RDONLY);
+	p->fd = fd = open("/dev/bpf", O_RDONLY);
 	if (p->fd < 0) {
 		fprintf(stderr, "(no devices found)\n");
 		return (EXIT_FAILURE);
 	}
 
-	if (ioctl(fd, UIOCVERSION, (caddr_t)&uv) < 0) {
-		fprintf(stderr, "UIOCVERSION: %s\n", strerror(errno));
+	if (ioctl(fd, BIOCVERSION, (caddr_t)&bv) < 0) {
+		fprintf(stderr, "BIOCVERSION: %s\n", strerror(errno));
 		return (EXIT_FAILURE);
 	}
-	if (uv.uv_major != USBPF_MAJOR_VERSION ||
-	    uv.uv_minor < USBPF_MINOR_VERSION) {
+	if (bv.bv_major != BPF_MAJOR_VERSION ||
+	    bv.bv_minor < BPF_MINOR_VERSION) {
 		fprintf(stderr, "kernel bpf filter out of date");
 		return (EXIT_FAILURE);
 	}
 
-	if ((ioctl(fd, UIOCGBLEN, (caddr_t)&v) < 0) || v < 65536)
-		v = 65536;
+	if ((ioctl(fd, BIOCGBLEN, (caddr_t)&v) < 0) || v < 4096)
+		v = 4096;
 	for ( ; v != 0; v >>= 1) {
-		(void)ioctl(fd, UIOCSBLEN, (caddr_t)&v);
-		(void)strncpy(ufr.ufr_name, i_arg, sizeof(ufr.ufr_name));
-		if (ioctl(fd, UIOCSETIF, (caddr_t)&ufr) >= 0)
+		(void)ioctl(fd, BIOCSBLEN, (caddr_t)&v);
+		(void)strncpy(ifr.ifr_name, i_arg, sizeof(ifr.ifr_name));
+		if (ioctl(fd, BIOCSETIF, (caddr_t)&ifr) >= 0)
 			break;
 	}
 	if (v == 0) {
-		fprintf(stderr, "UIOCSBLEN: %s: No buffer size worked", i_arg);
+		fprintf(stderr, "BIOCSBLEN: %s: No buffer size worked", i_arg);
 		return (EXIT_FAILURE);
 	}
 
-	if (ioctl(fd, UIOCGBLEN, (caddr_t)&v) < 0) {
-		fprintf(stderr, "UIOCGBLEN: %s", strerror(errno));
+	if (ioctl(fd, BIOCGBLEN, (caddr_t)&v) < 0) {
+		fprintf(stderr, "BIOCGBLEN: %s", strerror(errno));
 		return (EXIT_FAILURE);
 	}
 
 	p->bufsize = v;
 	p->buffer = (u_char *)malloc(p->bufsize);
 	if (p->buffer == NULL) {
 		fprintf(stderr, "malloc: %s", strerror(errno));
 		return (EXIT_FAILURE);
 	}
 
 	/* XXX no read filter rules yet so at this moment accept everything */
-	total_insn.code = (u_short)(USBPF_RET | USBPF_K);
+	total_insn.code = (u_short)(BPF_RET | BPF_K);
 	total_insn.jt = 0;
 	total_insn.jf = 0;
 	total_insn.k = snapshot;
 
-	total_prog.uf_len = 1;
-	total_prog.uf_insns = &total_insn;
-	if (ioctl(p->fd, UIOCSETF, (caddr_t)&total_prog) < 0) {
-		fprintf(stderr, "UIOCSETF: %s", strerror(errno));
+	total_prog.bf_len = 1;
+	total_prog.bf_insns = &total_insn;
+	if (ioctl(p->fd, BIOCSETF, (caddr_t)&total_prog) < 0) {
+		fprintf(stderr, "BIOCSETF: %s", strerror(errno));
 		return (EXIT_FAILURE);
 	}
 
 	/* 1 second read timeout */
 	tv.tv_sec = 1;
 	tv.tv_usec = 0;
-	if (ioctl(p->fd, UIOCSRTIMEOUT, (caddr_t)&tv) < 0) {
-		fprintf(stderr, "UIOCSRTIMEOUT: %s", strerror(errno));
+	if (ioctl(p->fd, BIOCSRTIMEOUT, (caddr_t)&tv) < 0) {
+		fprintf(stderr, "BIOCSRTIMEOUT: %s", strerror(errno));
 		return (EXIT_FAILURE);
 	}
 
 	(void)signal(SIGINT, handle_sigint);
 
 	do_loop(p);
 
-	if (ioctl(fd, UIOCGSTATS, (caddr_t)&us) < 0) {
-		fprintf(stderr, "UIOCGSTATS: %s", strerror(errno));
+	if (ioctl(fd, BIOCGSTATS, (caddr_t)&us) < 0) {
+		fprintf(stderr, "BIOCGSTATS: %s", strerror(errno));
 		return (EXIT_FAILURE);
 	}
 
 	/* XXX what's difference between pkt_captured and us.us_recv? */
 	printf("\n");
 	printf("%d packets captured\n", pkt_captured);
-	printf("%d packets received by filter\n", us.us_recv);
-	printf("%d packets dropped by kernel\n", us.us_drop);
+	printf("%d packets received by filter\n", us.bs_recv);
+	printf("%d packets dropped by kernel\n", us.bs_drop);
 
 	if (p->fd > 0)
 		close(p->fd);
 	if (p->rfd > 0)
 		close(p->rfd);
 	if (p->wfd > 0)
 		close(p->wfd);
 
 	return (EXIT_SUCCESS);
 }
Index: projects/binutils-2.17/usr.sbin/zic
===================================================================
--- projects/binutils-2.17/usr.sbin/zic	(revision 215829)
+++ projects/binutils-2.17/usr.sbin/zic	(revision 215830)

Property changes on: projects/binutils-2.17/usr.sbin/zic
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/usr.sbin/zic:r215709-215824
Index: projects/binutils-2.17
===================================================================
--- projects/binutils-2.17	(revision 215829)
+++ projects/binutils-2.17	(revision 215830)

Property changes on: projects/binutils-2.17
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r215709-215824