Index: stable/11/lib/libc/include/reentrant.h
===================================================================
--- stable/11/lib/libc/include/reentrant.h	(revision 330445)
+++ stable/11/lib/libc/include/reentrant.h	(revision 330446)
@@ -1,128 +1,128 @@
 /*-
  * Copyright (c) 1997,98 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by J.T. Conklin.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Requirements:
  * 
  * 1. The thread safe mechanism should be lightweight so the library can
  *    be used by non-threaded applications without unreasonable overhead.
  * 
  * 2. There should be no dependency on a thread engine for non-threaded
  *    applications.
  * 
  * 3. There should be no dependency on any particular thread engine.
  * 
  * 4. The library should be able to be compiled without support for thread
  *    safety.
  * 
  * 
  * Rationale:
  * 
  * One approach for thread safety is to provide discrete versions of the
  * library: one thread safe, the other not.  The disadvantage of this is
  * that libc is rather large, and two copies of a library which are 99%+
  * identical is not an efficient use of resources.
  * 
  * Another approach is to provide a single thread safe library.  However,
  * it should not add significant run time or code size overhead to non-
  * threaded applications.
  * 
  * Since the NetBSD C library is used in other projects, it should be
  * easy to replace the mutual exclusion primitives with ones provided by
  * another system.  Similarly, it should also be easy to remove all
  * support for thread safety completely if the target environment does
  * not support threads.
  * 
  * 
  * Implementation Details:
  * 
  * The mutex primitives used by the library (mutex_t, mutex_lock, etc.)
- * are macros which expand to the cooresponding primitives provided by
+ * are macros which expand to the corresponding primitives provided by
  * the thread engine or to nothing.  The latter is used so that code is
  * not unreasonably cluttered with #ifdefs when all thread safe support
  * is removed.
  * 
  * The mutex macros can be directly mapped to the mutex primitives from
  * pthreads, however it should be reasonably easy to wrap another mutex
  * implementation so it presents a similar interface.
  * 
  * Stub implementations of the mutex functions are provided with *weak*
  * linkage.  These functions simply return success.  When linked with a
  * thread library (i.e. -lpthread), the functions will override the
  * stubs.
  */
 
 #include <pthread.h>
 #include <pthread_np.h>
 #include "libc_private.h"
 
 #define mutex_t			pthread_mutex_t
 #define cond_t			pthread_cond_t
 #define rwlock_t		pthread_rwlock_t
 #define once_t			pthread_once_t
 
 #define thread_key_t		pthread_key_t
 #define MUTEX_INITIALIZER	PTHREAD_MUTEX_INITIALIZER
 #define RWLOCK_INITIALIZER	PTHREAD_RWLOCK_INITIALIZER
 #define ONCE_INITIALIZER	PTHREAD_ONCE_INIT
 
 #define mutex_init(m, a)	_pthread_mutex_init(m, a)
 #define mutex_lock(m)		if (__isthreaded) \
 				_pthread_mutex_lock(m)
 #define mutex_unlock(m)		if (__isthreaded) \
 				_pthread_mutex_unlock(m)
 #define mutex_trylock(m)	(__isthreaded ? 0 : _pthread_mutex_trylock(m))
 
 #define cond_init(c, a, p)	_pthread_cond_init(c, a)
 #define cond_signal(m)		if (__isthreaded) \
 				_pthread_cond_signal(m)
 #define cond_broadcast(m)	if (__isthreaded) \
 				_pthread_cond_broadcast(m)
 #define cond_wait(c, m)		if (__isthreaded) \
 				_pthread_cond_wait(c, m)
 
 #define rwlock_init(l, a)	_pthread_rwlock_init(l, a)
 #define rwlock_rdlock(l)	if (__isthreaded) \
 				_pthread_rwlock_rdlock(l)
 #define rwlock_wrlock(l)	if (__isthreaded) \
 				_pthread_rwlock_wrlock(l)
 #define rwlock_unlock(l)	if (__isthreaded) \
 				_pthread_rwlock_unlock(l)
 
 #define thr_keycreate(k, d)	_pthread_key_create(k, d)
 #define thr_setspecific(k, p)	_pthread_setspecific(k, p)
 #define thr_getspecific(k)	_pthread_getspecific(k)
 #define thr_sigsetmask(f, n, o)	_pthread_sigmask(f, n, o)
 
 #define thr_once(o, i)		_pthread_once(o, i)
 #define thr_self()		_pthread_self()
 #define thr_exit(x)		_pthread_exit(x)
 #define thr_main()		_pthread_main_np()
Index: stable/11/lib/libc/sparc64/fpu/fpu_reg.h
===================================================================
--- stable/11/lib/libc/sparc64/fpu/fpu_reg.h	(revision 330445)
+++ stable/11/lib/libc/sparc64/fpu/fpu_reg.h	(revision 330446)
@@ -1,88 +1,88 @@
 /*-
  * Copyright (c) 2002 by Thomas Moestl <tmm@FreeBSD.org>.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _LIBC_SPARC64_FPU_FPU_REG_H_
 #define _LIBC_SPARC64_FPU_FPU_REG_H_
 
 /*
- * These are not really of type char[]. They are are arrays of functions defined
+ * These are not really of type char[]. They are arrays of functions defined
  * in fpu_reg.S; each array member loads/stores a certain fpu register of the
  * given size.
  */
 extern char __fpu_ld32[];
 extern char __fpu_st32[];
 extern char __fpu_ld64[];
 extern char __fpu_st64[];
 
 /* Size of the functions in the arrays. */
 #define	FPU_LD32_SZ	8
 #define	FPU_ST32_SZ	8
 #define	FPU_LD64_SZ	8
 #define	FPU_ST64_SZ	8
 
 /* Typedefs for convenient casts in the functions below. */
 typedef void (fp_ldst32_fn)(u_int32_t *);
 typedef void (fp_ldst64_fn)(u_int64_t *);
 
 /*
  * These are the functions that are actually used in the fpu emulation code to
  * access the fp registers. They are usually not used more than once, so
  * caching needs not be done here.
  */
 static __inline u_int32_t
 __fpu_getreg(int r)
 {
 	u_int32_t rv;
 
 	((fp_ldst32_fn *)&__fpu_st32[r * FPU_ST32_SZ])(&rv);
 	return (rv);
 }
 
 static __inline u_int64_t
 __fpu_getreg64(int r)
 {
 	u_int64_t rv;
 
 	((fp_ldst64_fn *)&__fpu_st64[(r >> 1) * FPU_ST64_SZ])(&rv);
 	return (rv);
 }
 
 static __inline void
 __fpu_setreg(int r, u_int32_t v)
 {
 
 	((fp_ldst32_fn *)&__fpu_ld32[r * FPU_LD32_SZ])(&v);
 }
 
 static __inline void
 __fpu_setreg64(int r, u_int64_t v)
 {
 
 	((fp_ldst64_fn *)&__fpu_ld64[(r >> 1) * FPU_LD64_SZ])(&v);
 }
 
 #endif /* _LIBC_SPARC64_FPU_FPU_REG_H_ */
Index: stable/11/lib/libedit/map.c
===================================================================
--- stable/11/lib/libedit/map.c	(revision 330445)
+++ stable/11/lib/libedit/map.c	(revision 330446)
@@ -1,1429 +1,1429 @@
 /*	$NetBSD: map.c,v 1.43 2016/02/17 19:47:49 christos Exp $	*/
 
 /*-
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Christos Zoulas of Cornell University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "config.h"
 #if !defined(lint) && !defined(SCCSID)
 #if 0
 static char sccsid[] = "@(#)map.c	8.1 (Berkeley) 6/4/93";
 #else
 __RCSID("$NetBSD: map.c,v 1.43 2016/02/17 19:47:49 christos Exp $");
 #endif
 #endif /* not lint && not SCCSID */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * map.c: Editor function definitions
  */
 #include <ctype.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "el.h"
 #include "help.h"
 #include "parse.h"
 
 private void	map_print_key(EditLine *, el_action_t *, const Char *);
 private void	map_print_some_keys(EditLine *, el_action_t *, wint_t, wint_t);
 private void	map_print_all_keys(EditLine *);
 private void	map_init_nls(EditLine *);
 private void	map_init_meta(EditLine *);
 
 /* keymap tables ; should be N_KEYS*sizeof(KEYCMD) bytes long */
 
 
 private const el_action_t  el_map_emacs[] = {
 	/*   0 */	EM_SET_MARK,		/* ^@ */
 	/*   1 */	ED_MOVE_TO_BEG,		/* ^A */
 	/*   2 */	ED_PREV_CHAR,		/* ^B */
 	/*   3 */	ED_TTY_SIGINT,		/* ^C */
 	/*   4 */	EM_DELETE_OR_LIST,	/* ^D */
 	/*   5 */	ED_MOVE_TO_END,		/* ^E */
 	/*   6 */	ED_NEXT_CHAR,		/* ^F */
 	/*   7 */	ED_UNASSIGNED,		/* ^G */
 	/*   8 */	EM_DELETE_PREV_CHAR,	/* ^H */
 	/*   9 */	ED_UNASSIGNED,		/* ^I */
 	/*  10 */	ED_NEWLINE,		/* ^J */
 	/*  11 */	ED_KILL_LINE,		/* ^K */
 	/*  12 */	ED_CLEAR_SCREEN,	/* ^L */
 	/*  13 */	ED_NEWLINE,		/* ^M */
 	/*  14 */	ED_NEXT_HISTORY,	/* ^N */
 	/*  15 */	ED_TTY_FLUSH_OUTPUT,	/* ^O */
 	/*  16 */	ED_PREV_HISTORY,	/* ^P */
 	/*  17 */	ED_TTY_START_OUTPUT,	/* ^Q */
 	/*  18 */	ED_REDISPLAY,		/* ^R */
 	/*  19 */	ED_TTY_STOP_OUTPUT,	/* ^S */
 	/*  20 */	ED_TRANSPOSE_CHARS,	/* ^T */
 	/*  21 */	EM_KILL_LINE,		/* ^U */
 	/*  22 */	ED_QUOTED_INSERT,	/* ^V */
 	/*  23 */	EM_KILL_REGION,		/* ^W */
 	/*  24 */	ED_SEQUENCE_LEAD_IN,	/* ^X */
 	/*  25 */	EM_YANK,		/* ^Y */
 	/*  26 */	ED_TTY_SIGTSTP,		/* ^Z */
 	/*  27 */	EM_META_NEXT,		/* ^[ */
 	/*  28 */	ED_TTY_SIGQUIT,		/* ^\ */
 	/*  29 */	ED_TTY_DSUSP,		/* ^] */
 	/*  30 */	ED_UNASSIGNED,		/* ^^ */
 	/*  31 */	ED_UNASSIGNED,		/* ^_ */
 	/*  32 */	ED_INSERT,		/* SPACE */
 	/*  33 */	ED_INSERT,		/* ! */
 	/*  34 */	ED_INSERT,		/* " */
 	/*  35 */	ED_INSERT,		/* # */
 	/*  36 */	ED_INSERT,		/* $ */
 	/*  37 */	ED_INSERT,		/* % */
 	/*  38 */	ED_INSERT,		/* & */
 	/*  39 */	ED_INSERT,		/* ' */
 	/*  40 */	ED_INSERT,		/* ( */
 	/*  41 */	ED_INSERT,		/* ) */
 	/*  42 */	ED_INSERT,		/* * */
 	/*  43 */	ED_INSERT,		/* + */
 	/*  44 */	ED_INSERT,		/* , */
 	/*  45 */	ED_INSERT,		/* - */
 	/*  46 */	ED_INSERT,		/* . */
 	/*  47 */	ED_INSERT,		/* / */
 	/*  48 */	ED_DIGIT,		/* 0 */
 	/*  49 */	ED_DIGIT,		/* 1 */
 	/*  50 */	ED_DIGIT,		/* 2 */
 	/*  51 */	ED_DIGIT,		/* 3 */
 	/*  52 */	ED_DIGIT,		/* 4 */
 	/*  53 */	ED_DIGIT,		/* 5 */
 	/*  54 */	ED_DIGIT,		/* 6 */
 	/*  55 */	ED_DIGIT,		/* 7 */
 	/*  56 */	ED_DIGIT,		/* 8 */
 	/*  57 */	ED_DIGIT,		/* 9 */
 	/*  58 */	ED_INSERT,		/* : */
 	/*  59 */	ED_INSERT,		/* ; */
 	/*  60 */	ED_INSERT,		/* < */
 	/*  61 */	ED_INSERT,		/* = */
 	/*  62 */	ED_INSERT,		/* > */
 	/*  63 */	ED_INSERT,		/* ? */
 	/*  64 */	ED_INSERT,		/* @ */
 	/*  65 */	ED_INSERT,		/* A */
 	/*  66 */	ED_INSERT,		/* B */
 	/*  67 */	ED_INSERT,		/* C */
 	/*  68 */	ED_INSERT,		/* D */
 	/*  69 */	ED_INSERT,		/* E */
 	/*  70 */	ED_INSERT,		/* F */
 	/*  71 */	ED_INSERT,		/* G */
 	/*  72 */	ED_INSERT,		/* H */
 	/*  73 */	ED_INSERT,		/* I */
 	/*  74 */	ED_INSERT,		/* J */
 	/*  75 */	ED_INSERT,		/* K */
 	/*  76 */	ED_INSERT,		/* L */
 	/*  77 */	ED_INSERT,		/* M */
 	/*  78 */	ED_INSERT,		/* N */
 	/*  79 */	ED_INSERT,		/* O */
 	/*  80 */	ED_INSERT,		/* P */
 	/*  81 */	ED_INSERT,		/* Q */
 	/*  82 */	ED_INSERT,		/* R */
 	/*  83 */	ED_INSERT,		/* S */
 	/*  84 */	ED_INSERT,		/* T */
 	/*  85 */	ED_INSERT,		/* U */
 	/*  86 */	ED_INSERT,		/* V */
 	/*  87 */	ED_INSERT,		/* W */
 	/*  88 */	ED_INSERT,		/* X */
 	/*  89 */	ED_INSERT,		/* Y */
 	/*  90 */	ED_INSERT,		/* Z */
 	/*  91 */	ED_INSERT,		/* [ */
 	/*  92 */	ED_INSERT,		/* \ */
 	/*  93 */	ED_INSERT,		/* ] */
 	/*  94 */	ED_INSERT,		/* ^ */
 	/*  95 */	ED_INSERT,		/* _ */
 	/*  96 */	ED_INSERT,		/* ` */
 	/*  97 */	ED_INSERT,		/* a */
 	/*  98 */	ED_INSERT,		/* b */
 	/*  99 */	ED_INSERT,		/* c */
 	/* 100 */	ED_INSERT,		/* d */
 	/* 101 */	ED_INSERT,		/* e */
 	/* 102 */	ED_INSERT,		/* f */
 	/* 103 */	ED_INSERT,		/* g */
 	/* 104 */	ED_INSERT,		/* h */
 	/* 105 */	ED_INSERT,		/* i */
 	/* 106 */	ED_INSERT,		/* j */
 	/* 107 */	ED_INSERT,		/* k */
 	/* 108 */	ED_INSERT,		/* l */
 	/* 109 */	ED_INSERT,		/* m */
 	/* 110 */	ED_INSERT,		/* n */
 	/* 111 */	ED_INSERT,		/* o */
 	/* 112 */	ED_INSERT,		/* p */
 	/* 113 */	ED_INSERT,		/* q */
 	/* 114 */	ED_INSERT,		/* r */
 	/* 115 */	ED_INSERT,		/* s */
 	/* 116 */	ED_INSERT,		/* t */
 	/* 117 */	ED_INSERT,		/* u */
 	/* 118 */	ED_INSERT,		/* v */
 	/* 119 */	ED_INSERT,		/* w */
 	/* 120 */	ED_INSERT,		/* x */
 	/* 121 */	ED_INSERT,		/* y */
 	/* 122 */	ED_INSERT,		/* z */
 	/* 123 */	ED_INSERT,		/* { */
 	/* 124 */	ED_INSERT,		/* | */
 	/* 125 */	ED_INSERT,		/* } */
 	/* 126 */	ED_INSERT,		/* ~ */
 	/* 127 */	EM_DELETE_PREV_CHAR,	/* ^? */
 	/* 128 */	ED_UNASSIGNED,		/* M-^@ */
 	/* 129 */	ED_UNASSIGNED,		/* M-^A */
 	/* 130 */	ED_UNASSIGNED,		/* M-^B */
 	/* 131 */	ED_UNASSIGNED,		/* M-^C */
 	/* 132 */	ED_UNASSIGNED,		/* M-^D */
 	/* 133 */	ED_UNASSIGNED,		/* M-^E */
 	/* 134 */	ED_UNASSIGNED,		/* M-^F */
 	/* 135 */	ED_UNASSIGNED,		/* M-^G */
 	/* 136 */	ED_DELETE_PREV_WORD,	/* M-^H */
 	/* 137 */	ED_UNASSIGNED,		/* M-^I */
 	/* 138 */	ED_UNASSIGNED,		/* M-^J */
 	/* 139 */	ED_UNASSIGNED,		/* M-^K */
 	/* 140 */	ED_CLEAR_SCREEN,	/* M-^L */
 	/* 141 */	ED_UNASSIGNED,		/* M-^M */
 	/* 142 */	ED_UNASSIGNED,		/* M-^N */
 	/* 143 */	ED_UNASSIGNED,		/* M-^O */
 	/* 144 */	ED_UNASSIGNED,		/* M-^P */
 	/* 145 */	ED_UNASSIGNED,		/* M-^Q */
 	/* 146 */	ED_UNASSIGNED,		/* M-^R */
 	/* 147 */	ED_UNASSIGNED,		/* M-^S */
 	/* 148 */	ED_UNASSIGNED,		/* M-^T */
 	/* 149 */	ED_UNASSIGNED,		/* M-^U */
 	/* 150 */	ED_UNASSIGNED,		/* M-^V */
 	/* 151 */	ED_UNASSIGNED,		/* M-^W */
 	/* 152 */	ED_UNASSIGNED,		/* M-^X */
 	/* 153 */	ED_UNASSIGNED,		/* M-^Y */
 	/* 154 */	ED_UNASSIGNED,		/* M-^Z */
 	/* 155 */	ED_UNASSIGNED,		/* M-^[ */
 	/* 156 */	ED_UNASSIGNED,		/* M-^\ */
 	/* 157 */	ED_UNASSIGNED,		/* M-^] */
 	/* 158 */	ED_UNASSIGNED,		/* M-^^ */
 	/* 159 */	EM_COPY_PREV_WORD,	/* M-^_ */
 	/* 160 */	ED_UNASSIGNED,		/* M-SPACE */
 	/* 161 */	ED_UNASSIGNED,		/* M-! */
 	/* 162 */	ED_UNASSIGNED,		/* M-" */
 	/* 163 */	ED_UNASSIGNED,		/* M-# */
 	/* 164 */	ED_UNASSIGNED,		/* M-$ */
 	/* 165 */	ED_UNASSIGNED,		/* M-% */
 	/* 166 */	ED_UNASSIGNED,		/* M-& */
 	/* 167 */	ED_UNASSIGNED,		/* M-' */
 	/* 168 */	ED_UNASSIGNED,		/* M-( */
 	/* 169 */	ED_UNASSIGNED,		/* M-) */
 	/* 170 */	ED_UNASSIGNED,		/* M-* */
 	/* 171 */	ED_UNASSIGNED,		/* M-+ */
 	/* 172 */	ED_UNASSIGNED,		/* M-, */
 	/* 173 */	ED_UNASSIGNED,		/* M-- */
 	/* 174 */	ED_UNASSIGNED,		/* M-. */
 	/* 175 */	ED_UNASSIGNED,		/* M-/ */
 	/* 176 */	ED_ARGUMENT_DIGIT,	/* M-0 */
 	/* 177 */	ED_ARGUMENT_DIGIT,	/* M-1 */
 	/* 178 */	ED_ARGUMENT_DIGIT,	/* M-2 */
 	/* 179 */	ED_ARGUMENT_DIGIT,	/* M-3 */
 	/* 180 */	ED_ARGUMENT_DIGIT,	/* M-4 */
 	/* 181 */	ED_ARGUMENT_DIGIT,	/* M-5 */
 	/* 182 */	ED_ARGUMENT_DIGIT,	/* M-6 */
 	/* 183 */	ED_ARGUMENT_DIGIT,	/* M-7 */
 	/* 184 */	ED_ARGUMENT_DIGIT,	/* M-8 */
 	/* 185 */	ED_ARGUMENT_DIGIT,	/* M-9 */
 	/* 186 */	ED_UNASSIGNED,		/* M-: */
 	/* 187 */	ED_UNASSIGNED,		/* M-; */
 	/* 188 */	ED_UNASSIGNED,		/* M-< */
 	/* 189 */	ED_UNASSIGNED,		/* M-= */
 	/* 190 */	ED_UNASSIGNED,		/* M-> */
 	/* 191 */	ED_UNASSIGNED,		/* M-? */
 	/* 192 */	ED_UNASSIGNED,		/* M-@ */
 	/* 193 */	ED_UNASSIGNED,		/* M-A */
 	/* 194 */	ED_PREV_WORD,		/* M-B */
 	/* 195 */	EM_CAPITOL_CASE,	/* M-C */
 	/* 196 */	EM_DELETE_NEXT_WORD,	/* M-D */
 	/* 197 */	ED_UNASSIGNED,		/* M-E */
 	/* 198 */	EM_NEXT_WORD,		/* M-F */
 	/* 199 */	ED_UNASSIGNED,		/* M-G */
 	/* 200 */	ED_UNASSIGNED,		/* M-H */
 	/* 201 */	ED_UNASSIGNED,		/* M-I */
 	/* 202 */	ED_UNASSIGNED,		/* M-J */
 	/* 203 */	ED_UNASSIGNED,		/* M-K */
 	/* 204 */	EM_LOWER_CASE,		/* M-L */
 	/* 205 */	ED_UNASSIGNED,		/* M-M */
 	/* 206 */	ED_SEARCH_NEXT_HISTORY,	/* M-N */
 	/* 207 */	ED_SEQUENCE_LEAD_IN,	/* M-O */
 	/* 208 */	ED_SEARCH_PREV_HISTORY,	/* M-P */
 	/* 209 */	ED_UNASSIGNED,		/* M-Q */
 	/* 210 */	ED_UNASSIGNED,		/* M-R */
 	/* 211 */	ED_UNASSIGNED,		/* M-S */
 	/* 212 */	ED_UNASSIGNED,		/* M-T */
 	/* 213 */	EM_UPPER_CASE,		/* M-U */
 	/* 214 */	ED_UNASSIGNED,		/* M-V */
 	/* 215 */	EM_COPY_REGION,		/* M-W */
 	/* 216 */	ED_COMMAND,		/* M-X */
 	/* 217 */	ED_UNASSIGNED,		/* M-Y */
 	/* 218 */	ED_UNASSIGNED,		/* M-Z */
 	/* 219 */	ED_SEQUENCE_LEAD_IN,	/* M-[ */
 	/* 220 */	ED_UNASSIGNED,		/* M-\ */
 	/* 221 */	ED_UNASSIGNED,		/* M-] */
 	/* 222 */	ED_UNASSIGNED,		/* M-^ */
 	/* 223 */	ED_UNASSIGNED,		/* M-_ */
 	/* 223 */	ED_UNASSIGNED,		/* M-` */
 	/* 224 */	ED_UNASSIGNED,		/* M-a */
 	/* 225 */	ED_PREV_WORD,		/* M-b */
 	/* 226 */	EM_CAPITOL_CASE,	/* M-c */
 	/* 227 */	EM_DELETE_NEXT_WORD,	/* M-d */
 	/* 228 */	ED_UNASSIGNED,		/* M-e */
 	/* 229 */	EM_NEXT_WORD,		/* M-f */
 	/* 230 */	ED_UNASSIGNED,		/* M-g */
 	/* 231 */	ED_UNASSIGNED,		/* M-h */
 	/* 232 */	ED_UNASSIGNED,		/* M-i */
 	/* 233 */	ED_UNASSIGNED,		/* M-j */
 	/* 234 */	ED_UNASSIGNED,		/* M-k */
 	/* 235 */	EM_LOWER_CASE,		/* M-l */
 	/* 236 */	ED_UNASSIGNED,		/* M-m */
 	/* 237 */	ED_SEARCH_NEXT_HISTORY,	/* M-n */
 	/* 238 */	ED_UNASSIGNED,		/* M-o */
 	/* 239 */	ED_SEARCH_PREV_HISTORY,	/* M-p */
 	/* 240 */	ED_UNASSIGNED,		/* M-q */
 	/* 241 */	ED_UNASSIGNED,		/* M-r */
 	/* 242 */	ED_UNASSIGNED,		/* M-s */
 	/* 243 */	ED_UNASSIGNED,		/* M-t */
 	/* 244 */	EM_UPPER_CASE,		/* M-u */
 	/* 245 */	ED_UNASSIGNED,		/* M-v */
 	/* 246 */	EM_COPY_REGION,		/* M-w */
 	/* 247 */	ED_COMMAND,		/* M-x */
 	/* 248 */	ED_UNASSIGNED,		/* M-y */
 	/* 249 */	ED_UNASSIGNED,		/* M-z */
 	/* 250 */	ED_UNASSIGNED,		/* M-{ */
 	/* 251 */	ED_UNASSIGNED,		/* M-| */
 	/* 252 */	ED_UNASSIGNED,		/* M-} */
 	/* 253 */	ED_UNASSIGNED,		/* M-~ */
 	/* 254 */	ED_DELETE_PREV_WORD	/* M-^? */
 	/* 255 */
 };
 
 
 /*
  * keymap table for vi.  Each index into above tbl; should be
  * N_KEYS entries long.  Vi mode uses a sticky-extend to do command mode:
  * insert mode characters are in the normal keymap, and command mode
  * in the extended keymap.
  */
 private const el_action_t  el_map_vi_insert[] = {
 #ifdef KSHVI
 	/*   0 */	ED_UNASSIGNED,		/* ^@ */
 	/*   1 */	ED_INSERT,		/* ^A */
 	/*   2 */	ED_INSERT,		/* ^B */
 	/*   3 */	ED_INSERT,		/* ^C */
 	/*   4 */	VI_LIST_OR_EOF,		/* ^D */
 	/*   5 */	ED_INSERT,		/* ^E */
 	/*   6 */	ED_INSERT,		/* ^F */
 	/*   7 */	ED_INSERT,		/* ^G */
 	/*   8 */	VI_DELETE_PREV_CHAR,	/* ^H */   /* BackSpace key */
 	/*   9 */	ED_INSERT,		/* ^I */   /* Tab Key  */
 	/*  10 */	ED_NEWLINE,		/* ^J */
 	/*  11 */	ED_INSERT,		/* ^K */
 	/*  12 */	ED_INSERT,		/* ^L */
 	/*  13 */	ED_NEWLINE,		/* ^M */
 	/*  14 */	ED_INSERT,		/* ^N */
 	/*  15 */	ED_INSERT,		/* ^O */
 	/*  16 */	ED_INSERT,		/* ^P */
 	/*  17 */	ED_TTY_START_OUTPUT,	/* ^Q */
 	/*  18 */	ED_INSERT,		/* ^R */
 	/*  19 */	ED_TTY_STOP_OUTPUT,	/* ^S */
 	/*  20 */	ED_INSERT,		/* ^T */
 	/*  21 */	VI_KILL_LINE_PREV,	/* ^U */
 	/*  22 */	ED_QUOTED_INSERT,	/* ^V */
 	/*  23 */	ED_DELETE_PREV_WORD,	/* ^W */
 		/* ED_DELETE_PREV_WORD: Only until strt edit pos */
 	/*  24 */	ED_INSERT,		/* ^X */
 	/*  25 */	ED_INSERT,		/* ^Y */
 	/*  26 */	ED_INSERT,		/* ^Z */
 	/*  27 */	VI_COMMAND_MODE,	/* ^[ */  /* [ Esc ] key */
 	/*  28 */	ED_TTY_SIGQUIT,		/* ^\ */
 	/*  29 */	ED_INSERT,		/* ^] */
 	/*  30 */	ED_INSERT,		/* ^^ */
 	/*  31 */	ED_INSERT,		/* ^_ */
 #else /* !KSHVI */
 				/*
 				 * NOTE: These mappings do NOT Correspond well
 				 * to the KSH VI editing assignments.
 				 * On the other and they are convenient and
-				 * many people have have gotten used to them.
+				 * many people have gotten used to them.
 				 */
 	/*   0 */	ED_UNASSIGNED,		/* ^@ */
 	/*   1 */	ED_MOVE_TO_BEG,		/* ^A */
 	/*   2 */	ED_PREV_CHAR,		/* ^B */
 	/*   3 */	ED_TTY_SIGINT,		/* ^C */
 	/*   4 */	VI_LIST_OR_EOF,		/* ^D */
 	/*   5 */	ED_MOVE_TO_END,		/* ^E */
 	/*   6 */	ED_NEXT_CHAR,		/* ^F */
 	/*   7 */	ED_UNASSIGNED,		/* ^G */
 	/*   8 */	VI_DELETE_PREV_CHAR,	/* ^H */   /* BackSpace key */
 	/*   9 */	ED_UNASSIGNED,		/* ^I */   /* Tab Key */
 	/*  10 */	ED_NEWLINE,		/* ^J */
 	/*  11 */	ED_KILL_LINE,		/* ^K */
 	/*  12 */	ED_CLEAR_SCREEN,	/* ^L */
 	/*  13 */	ED_NEWLINE,		/* ^M */
 	/*  14 */	ED_NEXT_HISTORY,	/* ^N */
 	/*  15 */	ED_TTY_FLUSH_OUTPUT,	/* ^O */
 	/*  16 */	ED_PREV_HISTORY,	/* ^P */
 	/*  17 */	ED_TTY_START_OUTPUT,	/* ^Q */
 	/*  18 */	ED_REDISPLAY,		/* ^R */
 	/*  19 */	ED_TTY_STOP_OUTPUT,	/* ^S */
 	/*  20 */	ED_TRANSPOSE_CHARS,	/* ^T */
 	/*  21 */	VI_KILL_LINE_PREV,	/* ^U */
 	/*  22 */	ED_QUOTED_INSERT,	/* ^V */
 	/*  23 */	ED_DELETE_PREV_WORD,	/* ^W */
 	/*  24 */	ED_UNASSIGNED,		/* ^X */
 	/*  25 */	ED_TTY_DSUSP,		/* ^Y */
 	/*  26 */	ED_TTY_SIGTSTP,		/* ^Z */
 	/*  27 */	VI_COMMAND_MODE,	/* ^[ */
 	/*  28 */	ED_TTY_SIGQUIT,		/* ^\ */
 	/*  29 */	ED_UNASSIGNED,		/* ^] */
 	/*  30 */	ED_UNASSIGNED,		/* ^^ */
 	/*  31 */	ED_UNASSIGNED,		/* ^_ */
 #endif  /* KSHVI */
 	/*  32 */	ED_INSERT,		/* SPACE */
 	/*  33 */	ED_INSERT,		/* ! */
 	/*  34 */	ED_INSERT,		/* " */
 	/*  35 */	ED_INSERT,		/* # */
 	/*  36 */	ED_INSERT,		/* $ */
 	/*  37 */	ED_INSERT,		/* % */
 	/*  38 */	ED_INSERT,		/* & */
 	/*  39 */	ED_INSERT,		/* ' */
 	/*  40 */	ED_INSERT,		/* ( */
 	/*  41 */	ED_INSERT,		/* ) */
 	/*  42 */	ED_INSERT,		/* * */
 	/*  43 */	ED_INSERT,		/* + */
 	/*  44 */	ED_INSERT,		/* , */
 	/*  45 */	ED_INSERT,		/* - */
 	/*  46 */	ED_INSERT,		/* . */
 	/*  47 */	ED_INSERT,		/* / */
 	/*  48 */	ED_INSERT,		/* 0 */
 	/*  49 */	ED_INSERT,		/* 1 */
 	/*  50 */	ED_INSERT,		/* 2 */
 	/*  51 */	ED_INSERT,		/* 3 */
 	/*  52 */	ED_INSERT,		/* 4 */
 	/*  53 */	ED_INSERT,		/* 5 */
 	/*  54 */	ED_INSERT,		/* 6 */
 	/*  55 */	ED_INSERT,		/* 7 */
 	/*  56 */	ED_INSERT,		/* 8 */
 	/*  57 */	ED_INSERT,		/* 9 */
 	/*  58 */	ED_INSERT,		/* : */
 	/*  59 */	ED_INSERT,		/* ; */
 	/*  60 */	ED_INSERT,		/* < */
 	/*  61 */	ED_INSERT,		/* = */
 	/*  62 */	ED_INSERT,		/* > */
 	/*  63 */	ED_INSERT,		/* ? */
 	/*  64 */	ED_INSERT,		/* @ */
 	/*  65 */	ED_INSERT,		/* A */
 	/*  66 */	ED_INSERT,		/* B */
 	/*  67 */	ED_INSERT,		/* C */
 	/*  68 */	ED_INSERT,		/* D */
 	/*  69 */	ED_INSERT,		/* E */
 	/*  70 */	ED_INSERT,		/* F */
 	/*  71 */	ED_INSERT,		/* G */
 	/*  72 */	ED_INSERT,		/* H */
 	/*  73 */	ED_INSERT,		/* I */
 	/*  74 */	ED_INSERT,		/* J */
 	/*  75 */	ED_INSERT,		/* K */
 	/*  76 */	ED_INSERT,		/* L */
 	/*  77 */	ED_INSERT,		/* M */
 	/*  78 */	ED_INSERT,		/* N */
 	/*  79 */	ED_INSERT,		/* O */
 	/*  80 */	ED_INSERT,		/* P */
 	/*  81 */	ED_INSERT,		/* Q */
 	/*  82 */	ED_INSERT,		/* R */
 	/*  83 */	ED_INSERT,		/* S */
 	/*  84 */	ED_INSERT,		/* T */
 	/*  85 */	ED_INSERT,		/* U */
 	/*  86 */	ED_INSERT,		/* V */
 	/*  87 */	ED_INSERT,		/* W */
 	/*  88 */	ED_INSERT,		/* X */
 	/*  89 */	ED_INSERT,		/* Y */
 	/*  90 */	ED_INSERT,		/* Z */
 	/*  91 */	ED_INSERT,		/* [ */
 	/*  92 */	ED_INSERT,		/* \ */
 	/*  93 */	ED_INSERT,		/* ] */
 	/*  94 */	ED_INSERT,		/* ^ */
 	/*  95 */	ED_INSERT,		/* _ */
 	/*  96 */	ED_INSERT,		/* ` */
 	/*  97 */	ED_INSERT,		/* a */
 	/*  98 */	ED_INSERT,		/* b */
 	/*  99 */	ED_INSERT,		/* c */
 	/* 100 */	ED_INSERT,		/* d */
 	/* 101 */	ED_INSERT,		/* e */
 	/* 102 */	ED_INSERT,		/* f */
 	/* 103 */	ED_INSERT,		/* g */
 	/* 104 */	ED_INSERT,		/* h */
 	/* 105 */	ED_INSERT,		/* i */
 	/* 106 */	ED_INSERT,		/* j */
 	/* 107 */	ED_INSERT,		/* k */
 	/* 108 */	ED_INSERT,		/* l */
 	/* 109 */	ED_INSERT,		/* m */
 	/* 110 */	ED_INSERT,		/* n */
 	/* 111 */	ED_INSERT,		/* o */
 	/* 112 */	ED_INSERT,		/* p */
 	/* 113 */	ED_INSERT,		/* q */
 	/* 114 */	ED_INSERT,		/* r */
 	/* 115 */	ED_INSERT,		/* s */
 	/* 116 */	ED_INSERT,		/* t */
 	/* 117 */	ED_INSERT,		/* u */
 	/* 118 */	ED_INSERT,		/* v */
 	/* 119 */	ED_INSERT,		/* w */
 	/* 120 */	ED_INSERT,		/* x */
 	/* 121 */	ED_INSERT,		/* y */
 	/* 122 */	ED_INSERT,		/* z */
 	/* 123 */	ED_INSERT,		/* { */
 	/* 124 */	ED_INSERT,		/* | */
 	/* 125 */	ED_INSERT,		/* } */
 	/* 126 */	ED_INSERT,		/* ~ */
 	/* 127 */	VI_DELETE_PREV_CHAR,	/* ^? */
 	/* 128 */	ED_INSERT,		/* M-^@ */
 	/* 129 */	ED_INSERT,		/* M-^A */
 	/* 130 */	ED_INSERT,		/* M-^B */
 	/* 131 */	ED_INSERT,		/* M-^C */
 	/* 132 */	ED_INSERT,		/* M-^D */
 	/* 133 */	ED_INSERT,		/* M-^E */
 	/* 134 */	ED_INSERT,		/* M-^F */
 	/* 135 */	ED_INSERT,		/* M-^G */
 	/* 136 */	ED_INSERT,		/* M-^H */
 	/* 137 */	ED_INSERT,		/* M-^I */
 	/* 138 */	ED_INSERT,		/* M-^J */
 	/* 139 */	ED_INSERT,		/* M-^K */
 	/* 140 */	ED_INSERT,		/* M-^L */
 	/* 141 */	ED_INSERT,		/* M-^M */
 	/* 142 */	ED_INSERT,		/* M-^N */
 	/* 143 */	ED_INSERT,		/* M-^O */
 	/* 144 */	ED_INSERT,		/* M-^P */
 	/* 145 */	ED_INSERT,		/* M-^Q */
 	/* 146 */	ED_INSERT,		/* M-^R */
 	/* 147 */	ED_INSERT,		/* M-^S */
 	/* 148 */	ED_INSERT,		/* M-^T */
 	/* 149 */	ED_INSERT,		/* M-^U */
 	/* 150 */	ED_INSERT,		/* M-^V */
 	/* 151 */	ED_INSERT,		/* M-^W */
 	/* 152 */	ED_INSERT,		/* M-^X */
 	/* 153 */	ED_INSERT,		/* M-^Y */
 	/* 154 */	ED_INSERT,		/* M-^Z */
 	/* 155 */	ED_INSERT,		/* M-^[ */
 	/* 156 */	ED_INSERT,		/* M-^\ */
 	/* 157 */	ED_INSERT,		/* M-^] */
 	/* 158 */	ED_INSERT,		/* M-^^ */
 	/* 159 */	ED_INSERT,		/* M-^_ */
 	/* 160 */	ED_INSERT,		/* M-SPACE */
 	/* 161 */	ED_INSERT,		/* M-! */
 	/* 162 */	ED_INSERT,		/* M-" */
 	/* 163 */	ED_INSERT,		/* M-# */
 	/* 164 */	ED_INSERT,		/* M-$ */
 	/* 165 */	ED_INSERT,		/* M-% */
 	/* 166 */	ED_INSERT,		/* M-& */
 	/* 167 */	ED_INSERT,		/* M-' */
 	/* 168 */	ED_INSERT,		/* M-( */
 	/* 169 */	ED_INSERT,		/* M-) */
 	/* 170 */	ED_INSERT,		/* M-* */
 	/* 171 */	ED_INSERT,		/* M-+ */
 	/* 172 */	ED_INSERT,		/* M-, */
 	/* 173 */	ED_INSERT,		/* M-- */
 	/* 174 */	ED_INSERT,		/* M-. */
 	/* 175 */	ED_INSERT,		/* M-/ */
 	/* 176 */	ED_INSERT,		/* M-0 */
 	/* 177 */	ED_INSERT,		/* M-1 */
 	/* 178 */	ED_INSERT,		/* M-2 */
 	/* 179 */	ED_INSERT,		/* M-3 */
 	/* 180 */	ED_INSERT,		/* M-4 */
 	/* 181 */	ED_INSERT,		/* M-5 */
 	/* 182 */	ED_INSERT,		/* M-6 */
 	/* 183 */	ED_INSERT,		/* M-7 */
 	/* 184 */	ED_INSERT,		/* M-8 */
 	/* 185 */	ED_INSERT,		/* M-9 */
 	/* 186 */	ED_INSERT,		/* M-: */
 	/* 187 */	ED_INSERT,		/* M-; */
 	/* 188 */	ED_INSERT,		/* M-< */
 	/* 189 */	ED_INSERT,		/* M-= */
 	/* 190 */	ED_INSERT,		/* M-> */
 	/* 191 */	ED_INSERT,		/* M-? */
 	/* 192 */	ED_INSERT,		/* M-@ */
 	/* 193 */	ED_INSERT,		/* M-A */
 	/* 194 */	ED_INSERT,		/* M-B */
 	/* 195 */	ED_INSERT,		/* M-C */
 	/* 196 */	ED_INSERT,		/* M-D */
 	/* 197 */	ED_INSERT,		/* M-E */
 	/* 198 */	ED_INSERT,		/* M-F */
 	/* 199 */	ED_INSERT,		/* M-G */
 	/* 200 */	ED_INSERT,		/* M-H */
 	/* 201 */	ED_INSERT,		/* M-I */
 	/* 202 */	ED_INSERT,		/* M-J */
 	/* 203 */	ED_INSERT,		/* M-K */
 	/* 204 */	ED_INSERT,		/* M-L */
 	/* 205 */	ED_INSERT,		/* M-M */
 	/* 206 */	ED_INSERT,		/* M-N */
 	/* 207 */	ED_INSERT,		/* M-O */
 	/* 208 */	ED_INSERT,		/* M-P */
 	/* 209 */	ED_INSERT,		/* M-Q */
 	/* 210 */	ED_INSERT,		/* M-R */
 	/* 211 */	ED_INSERT,		/* M-S */
 	/* 212 */	ED_INSERT,		/* M-T */
 	/* 213 */	ED_INSERT,		/* M-U */
 	/* 214 */	ED_INSERT,		/* M-V */
 	/* 215 */	ED_INSERT,		/* M-W */
 	/* 216 */	ED_INSERT,		/* M-X */
 	/* 217 */	ED_INSERT,		/* M-Y */
 	/* 218 */	ED_INSERT,		/* M-Z */
 	/* 219 */	ED_INSERT,		/* M-[ */
 	/* 220 */	ED_INSERT,		/* M-\ */
 	/* 221 */	ED_INSERT,		/* M-] */
 	/* 222 */	ED_INSERT,		/* M-^ */
 	/* 223 */	ED_INSERT,		/* M-_ */
 	/* 224 */	ED_INSERT,		/* M-` */
 	/* 225 */	ED_INSERT,		/* M-a */
 	/* 226 */	ED_INSERT,		/* M-b */
 	/* 227 */	ED_INSERT,		/* M-c */
 	/* 228 */	ED_INSERT,		/* M-d */
 	/* 229 */	ED_INSERT,		/* M-e */
 	/* 230 */	ED_INSERT,		/* M-f */
 	/* 231 */	ED_INSERT,		/* M-g */
 	/* 232 */	ED_INSERT,		/* M-h */
 	/* 233 */	ED_INSERT,		/* M-i */
 	/* 234 */	ED_INSERT,		/* M-j */
 	/* 235 */	ED_INSERT,		/* M-k */
 	/* 236 */	ED_INSERT,		/* M-l */
 	/* 237 */	ED_INSERT,		/* M-m */
 	/* 238 */	ED_INSERT,		/* M-n */
 	/* 239 */	ED_INSERT,		/* M-o */
 	/* 240 */	ED_INSERT,		/* M-p */
 	/* 241 */	ED_INSERT,		/* M-q */
 	/* 242 */	ED_INSERT,		/* M-r */
 	/* 243 */	ED_INSERT,		/* M-s */
 	/* 244 */	ED_INSERT,		/* M-t */
 	/* 245 */	ED_INSERT,		/* M-u */
 	/* 246 */	ED_INSERT,		/* M-v */
 	/* 247 */	ED_INSERT,		/* M-w */
 	/* 248 */	ED_INSERT,		/* M-x */
 	/* 249 */	ED_INSERT,		/* M-y */
 	/* 250 */	ED_INSERT,		/* M-z */
 	/* 251 */	ED_INSERT,		/* M-{ */
 	/* 252 */	ED_INSERT,		/* M-| */
 	/* 253 */	ED_INSERT,		/* M-} */
 	/* 254 */	ED_INSERT,		/* M-~ */
 	/* 255 */	ED_INSERT		/* M-^? */
 };
 
 private const el_action_t el_map_vi_command[] = {
 	/*   0 */	ED_UNASSIGNED,		/* ^@ */
 	/*   1 */	ED_MOVE_TO_BEG,		/* ^A */
 	/*   2 */	ED_UNASSIGNED,		/* ^B */
 	/*   3 */	ED_TTY_SIGINT,		/* ^C */
 	/*   4 */	ED_UNASSIGNED,		/* ^D */
 	/*   5 */	ED_MOVE_TO_END,		/* ^E */
 	/*   6 */	ED_UNASSIGNED,		/* ^F */
 	/*   7 */	ED_UNASSIGNED,		/* ^G */
 	/*   8 */	ED_DELETE_PREV_CHAR,	/* ^H */
 	/*   9 */	ED_UNASSIGNED,		/* ^I */
 	/*  10 */	ED_NEWLINE,		/* ^J */
 	/*  11 */	ED_KILL_LINE,		/* ^K */
 	/*  12 */	ED_CLEAR_SCREEN,	/* ^L */
 	/*  13 */	ED_NEWLINE,		/* ^M */
 	/*  14 */	ED_NEXT_HISTORY,	/* ^N */
 	/*  15 */	ED_TTY_FLUSH_OUTPUT,	/* ^O */
 	/*  16 */	ED_PREV_HISTORY,	/* ^P */
 	/*  17 */	ED_TTY_START_OUTPUT,	/* ^Q */
 	/*  18 */	ED_REDISPLAY,		/* ^R */
 	/*  19 */	ED_TTY_STOP_OUTPUT,	/* ^S */
 	/*  20 */	ED_UNASSIGNED,		/* ^T */
 	/*  21 */	VI_KILL_LINE_PREV,	/* ^U */
 	/*  22 */	ED_UNASSIGNED,		/* ^V */
 	/*  23 */	ED_DELETE_PREV_WORD,	/* ^W */
 	/*  24 */	ED_UNASSIGNED,		/* ^X */
 	/*  25 */	ED_UNASSIGNED,		/* ^Y */
 	/*  26 */	ED_UNASSIGNED,		/* ^Z */
 	/*  27 */	EM_META_NEXT,		/* ^[ */
 	/*  28 */	ED_TTY_SIGQUIT,		/* ^\ */
 	/*  29 */	ED_UNASSIGNED,		/* ^] */
 	/*  30 */	ED_UNASSIGNED,		/* ^^ */
 	/*  31 */	ED_UNASSIGNED,		/* ^_ */
 	/*  32 */	ED_NEXT_CHAR,		/* SPACE */
 	/*  33 */	ED_UNASSIGNED,		/* ! */
 	/*  34 */	ED_UNASSIGNED,		/* " */
 	/*  35 */	VI_COMMENT_OUT,		/* # */
 	/*  36 */	ED_MOVE_TO_END,		/* $ */
 	/*  37 */	VI_MATCH,		/* % */
 	/*  38 */	ED_UNASSIGNED,		/* & */
 	/*  39 */	ED_UNASSIGNED,		/* ' */
 	/*  40 */	ED_UNASSIGNED,		/* ( */
 	/*  41 */	ED_UNASSIGNED,		/* ) */
 	/*  42 */	ED_UNASSIGNED,		/* * */
 	/*  43 */	ED_NEXT_HISTORY,	/* + */
 	/*  44 */	VI_REPEAT_PREV_CHAR,	/* , */
 	/*  45 */	ED_PREV_HISTORY,	/* - */
 	/*  46 */	VI_REDO,		/* . */
 	/*  47 */	VI_SEARCH_PREV,		/* / */
 	/*  48 */	VI_ZERO,		/* 0 */
 	/*  49 */	ED_ARGUMENT_DIGIT,	/* 1 */
 	/*  50 */	ED_ARGUMENT_DIGIT,	/* 2 */
 	/*  51 */	ED_ARGUMENT_DIGIT,	/* 3 */
 	/*  52 */	ED_ARGUMENT_DIGIT,	/* 4 */
 	/*  53 */	ED_ARGUMENT_DIGIT,	/* 5 */
 	/*  54 */	ED_ARGUMENT_DIGIT,	/* 6 */
 	/*  55 */	ED_ARGUMENT_DIGIT,	/* 7 */
 	/*  56 */	ED_ARGUMENT_DIGIT,	/* 8 */
 	/*  57 */	ED_ARGUMENT_DIGIT,	/* 9 */
 	/*  58 */	ED_COMMAND,		/* : */
 	/*  59 */	VI_REPEAT_NEXT_CHAR,	/* ; */
 	/*  60 */	ED_UNASSIGNED,		/* < */
 	/*  61 */	ED_UNASSIGNED,		/* = */
 	/*  62 */	ED_UNASSIGNED,		/* > */
 	/*  63 */	VI_SEARCH_NEXT,		/* ? */
 	/*  64 */	VI_ALIAS,		/* @ */
 	/*  65 */	VI_ADD_AT_EOL,		/* A */
 	/*  66 */	VI_PREV_BIG_WORD,	/* B */
 	/*  67 */	VI_CHANGE_TO_EOL,	/* C */
 	/*  68 */	ED_KILL_LINE,		/* D */
 	/*  69 */	VI_END_BIG_WORD,	/* E */
 	/*  70 */	VI_PREV_CHAR,		/* F */
 	/*  71 */	VI_TO_HISTORY_LINE,	/* G */
 	/*  72 */	ED_UNASSIGNED,		/* H */
 	/*  73 */	VI_INSERT_AT_BOL,	/* I */
 	/*  74 */	ED_SEARCH_NEXT_HISTORY,	/* J */
 	/*  75 */	ED_SEARCH_PREV_HISTORY,	/* K */
 	/*  76 */	ED_UNASSIGNED,		/* L */
 	/*  77 */	ED_UNASSIGNED,		/* M */
 	/*  78 */	VI_REPEAT_SEARCH_PREV,	/* N */
 	/*  79 */	ED_SEQUENCE_LEAD_IN,	/* O */
 	/*  80 */	VI_PASTE_PREV,		/* P */
 	/*  81 */	ED_UNASSIGNED,		/* Q */
 	/*  82 */	VI_REPLACE_MODE,	/* R */
 	/*  83 */	VI_SUBSTITUTE_LINE,	/* S */
 	/*  84 */	VI_TO_PREV_CHAR,	/* T */
 	/*  85 */	VI_UNDO_LINE,		/* U */
 	/*  86 */	ED_UNASSIGNED,		/* V */
 	/*  87 */	VI_NEXT_BIG_WORD,	/* W */
 	/*  88 */	ED_DELETE_PREV_CHAR,	/* X */
 	/*  89 */	VI_YANK_END,		/* Y */
 	/*  90 */	ED_UNASSIGNED,		/* Z */
 	/*  91 */	ED_SEQUENCE_LEAD_IN,	/* [ */
 	/*  92 */	ED_UNASSIGNED,		/* \ */
 	/*  93 */	ED_UNASSIGNED,		/* ] */
 	/*  94 */	ED_MOVE_TO_BEG,		/* ^ */
 	/*  95 */	VI_HISTORY_WORD,	/* _ */
 	/*  96 */	ED_UNASSIGNED,		/* ` */
 	/*  97 */	VI_ADD,			/* a */
 	/*  98 */	VI_PREV_WORD,		/* b */
 	/*  99 */	VI_CHANGE_META,		/* c */
 	/* 100 */	VI_DELETE_META,		/* d */
 	/* 101 */	VI_END_WORD,		/* e */
 	/* 102 */	VI_NEXT_CHAR,		/* f */
 	/* 103 */	ED_UNASSIGNED,		/* g */
 	/* 104 */	ED_PREV_CHAR,		/* h */
 	/* 105 */	VI_INSERT,		/* i */
 	/* 106 */	ED_NEXT_HISTORY,	/* j */
 	/* 107 */	ED_PREV_HISTORY,	/* k */
 	/* 108 */	ED_NEXT_CHAR,		/* l */
 	/* 109 */	ED_UNASSIGNED,		/* m */
 	/* 110 */	VI_REPEAT_SEARCH_NEXT,	/* n */
 	/* 111 */	ED_UNASSIGNED,		/* o */
 	/* 112 */	VI_PASTE_NEXT,		/* p */
 	/* 113 */	ED_UNASSIGNED,		/* q */
 	/* 114 */	VI_REPLACE_CHAR,	/* r */
 	/* 115 */	VI_SUBSTITUTE_CHAR,	/* s */
 	/* 116 */	VI_TO_NEXT_CHAR,	/* t */
 	/* 117 */	VI_UNDO,		/* u */
 	/* 118 */	VI_HISTEDIT,		/* v */
 	/* 119 */	VI_NEXT_WORD,		/* w */
 	/* 120 */	ED_DELETE_NEXT_CHAR,	/* x */
 	/* 121 */	VI_YANK,		/* y */
 	/* 122 */	ED_UNASSIGNED,		/* z */
 	/* 123 */	ED_UNASSIGNED,		/* { */
 	/* 124 */	VI_TO_COLUMN,		/* | */
 	/* 125 */	ED_UNASSIGNED,		/* } */
 	/* 126 */	VI_CHANGE_CASE,		/* ~ */
 	/* 127 */	ED_DELETE_PREV_CHAR,	/* ^? */
 	/* 128 */	ED_UNASSIGNED,		/* M-^@ */
 	/* 129 */	ED_UNASSIGNED,		/* M-^A */
 	/* 130 */	ED_UNASSIGNED,		/* M-^B */
 	/* 131 */	ED_UNASSIGNED,		/* M-^C */
 	/* 132 */	ED_UNASSIGNED,		/* M-^D */
 	/* 133 */	ED_UNASSIGNED,		/* M-^E */
 	/* 134 */	ED_UNASSIGNED,		/* M-^F */
 	/* 135 */	ED_UNASSIGNED,		/* M-^G */
 	/* 136 */	ED_UNASSIGNED,		/* M-^H */
 	/* 137 */	ED_UNASSIGNED,		/* M-^I */
 	/* 138 */	ED_UNASSIGNED,		/* M-^J */
 	/* 139 */	ED_UNASSIGNED,		/* M-^K */
 	/* 140 */	ED_UNASSIGNED,		/* M-^L */
 	/* 141 */	ED_UNASSIGNED,		/* M-^M */
 	/* 142 */	ED_UNASSIGNED,		/* M-^N */
 	/* 143 */	ED_UNASSIGNED,		/* M-^O */
 	/* 144 */	ED_UNASSIGNED,		/* M-^P */
 	/* 145 */	ED_UNASSIGNED,		/* M-^Q */
 	/* 146 */	ED_UNASSIGNED,		/* M-^R */
 	/* 147 */	ED_UNASSIGNED,		/* M-^S */
 	/* 148 */	ED_UNASSIGNED,		/* M-^T */
 	/* 149 */	ED_UNASSIGNED,		/* M-^U */
 	/* 150 */	ED_UNASSIGNED,		/* M-^V */
 	/* 151 */	ED_UNASSIGNED,		/* M-^W */
 	/* 152 */	ED_UNASSIGNED,		/* M-^X */
 	/* 153 */	ED_UNASSIGNED,		/* M-^Y */
 	/* 154 */	ED_UNASSIGNED,		/* M-^Z */
 	/* 155 */	ED_UNASSIGNED,		/* M-^[ */
 	/* 156 */	ED_UNASSIGNED,		/* M-^\ */
 	/* 157 */	ED_UNASSIGNED,		/* M-^] */
 	/* 158 */	ED_UNASSIGNED,		/* M-^^ */
 	/* 159 */	ED_UNASSIGNED,		/* M-^_ */
 	/* 160 */	ED_UNASSIGNED,		/* M-SPACE */
 	/* 161 */	ED_UNASSIGNED,		/* M-! */
 	/* 162 */	ED_UNASSIGNED,		/* M-" */
 	/* 163 */	ED_UNASSIGNED,		/* M-# */
 	/* 164 */	ED_UNASSIGNED,		/* M-$ */
 	/* 165 */	ED_UNASSIGNED,		/* M-% */
 	/* 166 */	ED_UNASSIGNED,		/* M-& */
 	/* 167 */	ED_UNASSIGNED,		/* M-' */
 	/* 168 */	ED_UNASSIGNED,		/* M-( */
 	/* 169 */	ED_UNASSIGNED,		/* M-) */
 	/* 170 */	ED_UNASSIGNED,		/* M-* */
 	/* 171 */	ED_UNASSIGNED,		/* M-+ */
 	/* 172 */	ED_UNASSIGNED,		/* M-, */
 	/* 173 */	ED_UNASSIGNED,		/* M-- */
 	/* 174 */	ED_UNASSIGNED,		/* M-. */
 	/* 175 */	ED_UNASSIGNED,		/* M-/ */
 	/* 176 */	ED_UNASSIGNED,		/* M-0 */
 	/* 177 */	ED_UNASSIGNED,		/* M-1 */
 	/* 178 */	ED_UNASSIGNED,		/* M-2 */
 	/* 179 */	ED_UNASSIGNED,		/* M-3 */
 	/* 180 */	ED_UNASSIGNED,		/* M-4 */
 	/* 181 */	ED_UNASSIGNED,		/* M-5 */
 	/* 182 */	ED_UNASSIGNED,		/* M-6 */
 	/* 183 */	ED_UNASSIGNED,		/* M-7 */
 	/* 184 */	ED_UNASSIGNED,		/* M-8 */
 	/* 185 */	ED_UNASSIGNED,		/* M-9 */
 	/* 186 */	ED_UNASSIGNED,		/* M-: */
 	/* 187 */	ED_UNASSIGNED,		/* M-; */
 	/* 188 */	ED_UNASSIGNED,		/* M-< */
 	/* 189 */	ED_UNASSIGNED,		/* M-= */
 	/* 190 */	ED_UNASSIGNED,		/* M-> */
 	/* 191 */	ED_UNASSIGNED,		/* M-? */
 	/* 192 */	ED_UNASSIGNED,		/* M-@ */
 	/* 193 */	ED_UNASSIGNED,		/* M-A */
 	/* 194 */	ED_UNASSIGNED,		/* M-B */
 	/* 195 */	ED_UNASSIGNED,		/* M-C */
 	/* 196 */	ED_UNASSIGNED,		/* M-D */
 	/* 197 */	ED_UNASSIGNED,		/* M-E */
 	/* 198 */	ED_UNASSIGNED,		/* M-F */
 	/* 199 */	ED_UNASSIGNED,		/* M-G */
 	/* 200 */	ED_UNASSIGNED,		/* M-H */
 	/* 201 */	ED_UNASSIGNED,		/* M-I */
 	/* 202 */	ED_UNASSIGNED,		/* M-J */
 	/* 203 */	ED_UNASSIGNED,		/* M-K */
 	/* 204 */	ED_UNASSIGNED,		/* M-L */
 	/* 205 */	ED_UNASSIGNED,		/* M-M */
 	/* 206 */	ED_UNASSIGNED,		/* M-N */
 	/* 207 */	ED_SEQUENCE_LEAD_IN,	/* M-O */
 	/* 208 */	ED_UNASSIGNED,		/* M-P */
 	/* 209 */	ED_UNASSIGNED,		/* M-Q */
 	/* 210 */	ED_UNASSIGNED,		/* M-R */
 	/* 211 */	ED_UNASSIGNED,		/* M-S */
 	/* 212 */	ED_UNASSIGNED,		/* M-T */
 	/* 213 */	ED_UNASSIGNED,		/* M-U */
 	/* 214 */	ED_UNASSIGNED,		/* M-V */
 	/* 215 */	ED_UNASSIGNED,		/* M-W */
 	/* 216 */	ED_UNASSIGNED,		/* M-X */
 	/* 217 */	ED_UNASSIGNED,		/* M-Y */
 	/* 218 */	ED_UNASSIGNED,		/* M-Z */
 	/* 219 */	ED_SEQUENCE_LEAD_IN,	/* M-[ */
 	/* 220 */	ED_UNASSIGNED,		/* M-\ */
 	/* 221 */	ED_UNASSIGNED,		/* M-] */
 	/* 222 */	ED_UNASSIGNED,		/* M-^ */
 	/* 223 */	ED_UNASSIGNED,		/* M-_ */
 	/* 224 */	ED_UNASSIGNED,		/* M-` */
 	/* 225 */	ED_UNASSIGNED,		/* M-a */
 	/* 226 */	ED_UNASSIGNED,		/* M-b */
 	/* 227 */	ED_UNASSIGNED,		/* M-c */
 	/* 228 */	ED_UNASSIGNED,		/* M-d */
 	/* 229 */	ED_UNASSIGNED,		/* M-e */
 	/* 230 */	ED_UNASSIGNED,		/* M-f */
 	/* 231 */	ED_UNASSIGNED,		/* M-g */
 	/* 232 */	ED_UNASSIGNED,		/* M-h */
 	/* 233 */	ED_UNASSIGNED,		/* M-i */
 	/* 234 */	ED_UNASSIGNED,		/* M-j */
 	/* 235 */	ED_UNASSIGNED,		/* M-k */
 	/* 236 */	ED_UNASSIGNED,		/* M-l */
 	/* 237 */	ED_UNASSIGNED,		/* M-m */
 	/* 238 */	ED_UNASSIGNED,		/* M-n */
 	/* 239 */	ED_UNASSIGNED,		/* M-o */
 	/* 240 */	ED_UNASSIGNED,		/* M-p */
 	/* 241 */	ED_UNASSIGNED,		/* M-q */
 	/* 242 */	ED_UNASSIGNED,		/* M-r */
 	/* 243 */	ED_UNASSIGNED,		/* M-s */
 	/* 244 */	ED_UNASSIGNED,		/* M-t */
 	/* 245 */	ED_UNASSIGNED,		/* M-u */
 	/* 246 */	ED_UNASSIGNED,		/* M-v */
 	/* 247 */	ED_UNASSIGNED,		/* M-w */
 	/* 248 */	ED_UNASSIGNED,		/* M-x */
 	/* 249 */	ED_UNASSIGNED,		/* M-y */
 	/* 250 */	ED_UNASSIGNED,		/* M-z */
 	/* 251 */	ED_UNASSIGNED,		/* M-{ */
 	/* 252 */	ED_UNASSIGNED,		/* M-| */
 	/* 253 */	ED_UNASSIGNED,		/* M-} */
 	/* 254 */	ED_UNASSIGNED,		/* M-~ */
 	/* 255 */	ED_UNASSIGNED		/* M-^? */
 };
 
 
 /* map_init():
  *	Initialize and allocate the maps
  */
 protected int
 map_init(EditLine *el)
 {
 
 	/*
          * Make sure those are correct before starting.
          */
 #ifdef MAP_DEBUG
 	if (sizeof(el_map_emacs) != N_KEYS * sizeof(el_action_t))
 		EL_ABORT((el->errfile, "Emacs map incorrect\n"));
 	if (sizeof(el_map_vi_command) != N_KEYS * sizeof(el_action_t))
 		EL_ABORT((el->errfile, "Vi command map incorrect\n"));
 	if (sizeof(el_map_vi_insert) != N_KEYS * sizeof(el_action_t))
 		EL_ABORT((el->errfile, "Vi insert map incorrect\n"));
 #endif
 
 	el->el_map.alt = el_malloc(sizeof(*el->el_map.alt) * N_KEYS);
 	if (el->el_map.alt == NULL)
 		return -1;
 	el->el_map.key = el_malloc(sizeof(*el->el_map.key) * N_KEYS);
 	if (el->el_map.key == NULL)
 		return -1;
 	el->el_map.emacs = el_map_emacs;
 	el->el_map.vic = el_map_vi_command;
 	el->el_map.vii = el_map_vi_insert;
 	el->el_map.help = el_malloc(sizeof(*el->el_map.help) * EL_NUM_FCNS);
 	if (el->el_map.help == NULL)
 		return -1;
 	(void) memcpy(el->el_map.help, help__get(),
 	    sizeof(*el->el_map.help) * EL_NUM_FCNS);
 	el->el_map.func = el_malloc(sizeof(*el->el_map.func) * EL_NUM_FCNS);
 	if (el->el_map.func == NULL)
 		return -1;
 	memcpy(el->el_map.func, func__get(), sizeof(*el->el_map.func)
 	    * EL_NUM_FCNS);
 	el->el_map.nfunc = EL_NUM_FCNS;
 
 #ifdef VIDEFAULT
 	map_init_vi(el);
 #else
 	map_init_emacs(el);
 #endif /* VIDEFAULT */
 	return 0;
 }
 
 
 /* map_end():
  *	Free the space taken by the editor maps
  */
 protected void
 map_end(EditLine *el)
 {
 
 	el_free(el->el_map.alt);
 	el->el_map.alt = NULL;
 	el_free(el->el_map.key);
 	el->el_map.key = NULL;
 	el->el_map.emacs = NULL;
 	el->el_map.vic = NULL;
 	el->el_map.vii = NULL;
 	el_free(el->el_map.help);
 	el->el_map.help = NULL;
 	el_free(el->el_map.func);
 	el->el_map.func = NULL;
 }
 
 
 /* map_init_nls():
  *	Find all the printable keys and bind them to self insert
  */
 private void
 map_init_nls(EditLine *el)
 {
 	int i;
 
 	el_action_t *map = el->el_map.key;
 
 	for (i = 0200; i <= 0377; i++)
 		if (Isprint(i))
 			map[i] = ED_INSERT;
 }
 
 
 /* map_init_meta():
  *	Bind all the meta keys to the appropriate ESC-<key> sequence
  */
 private void
 map_init_meta(EditLine *el)
 {
 	Char buf[3];
 	int i;
 	el_action_t *map = el->el_map.key;
 	el_action_t *alt = el->el_map.alt;
 
 	for (i = 0; i <= 0377 && map[i] != EM_META_NEXT; i++)
 		continue;
 
 	if (i > 0377) {
 		for (i = 0; i <= 0377 && alt[i] != EM_META_NEXT; i++)
 			continue;
 		if (i > 0377) {
 			i = 033;
 			if (el->el_map.type == MAP_VI)
 				map = alt;
 		} else
 			map = alt;
 	}
 	buf[0] = (Char) i;
 	buf[2] = 0;
 	for (i = 0200; i <= 0377; i++)
 		switch (map[i]) {
 		case ED_INSERT:
 		case ED_UNASSIGNED:
 		case ED_SEQUENCE_LEAD_IN:
 			break;
 		default:
 			buf[1] = i & 0177;
 			keymacro_add(el, buf, keymacro_map_cmd(el, (int) map[i]), XK_CMD);
 			break;
 		}
 	map[(int) buf[0]] = ED_SEQUENCE_LEAD_IN;
 }
 
 
 /* map_init_vi():
  *	Initialize the vi bindings
  */
 protected void
 map_init_vi(EditLine *el)
 {
 	int i;
 	el_action_t *key = el->el_map.key;
 	el_action_t *alt = el->el_map.alt;
 	const el_action_t *vii = el->el_map.vii;
 	const el_action_t *vic = el->el_map.vic;
 
 	el->el_map.type = MAP_VI;
 	el->el_map.current = el->el_map.key;
 
 	keymacro_reset(el);
 
 	for (i = 0; i < N_KEYS; i++) {
 		key[i] = vii[i];
 		alt[i] = vic[i];
 	}
 
 	map_init_meta(el);
 	map_init_nls(el);
 
 	tty_bind_char(el, 1);
 	terminal_bind_arrow(el);
 }
 
 
 /* map_init_emacs():
  *	Initialize the emacs bindings
  */
 protected void
 map_init_emacs(EditLine *el)
 {
 	int i;
 	Char buf[3];
 	el_action_t *key = el->el_map.key;
 	el_action_t *alt = el->el_map.alt;
 	const el_action_t *emacs = el->el_map.emacs;
 
 	el->el_map.type = MAP_EMACS;
 	el->el_map.current = el->el_map.key;
 	keymacro_reset(el);
 
 	for (i = 0; i < N_KEYS; i++) {
 		key[i] = emacs[i];
 		alt[i] = ED_UNASSIGNED;
 	}
 
 	map_init_meta(el);
 	map_init_nls(el);
 
 	buf[0] = CONTROL('X');
 	buf[1] = CONTROL('X');
 	buf[2] = 0;
 	keymacro_add(el, buf, keymacro_map_cmd(el, EM_EXCHANGE_MARK), XK_CMD);
 
 	tty_bind_char(el, 1);
 	terminal_bind_arrow(el);
 }
 
 
 /* map_set_editor():
  *	Set the editor
  */
 protected int
 map_set_editor(EditLine *el, Char *editor)
 {
 
 	if (Strcmp(editor, STR("emacs")) == 0) {
 		map_init_emacs(el);
 		return 0;
 	}
 	if (Strcmp(editor, STR("vi")) == 0) {
 		map_init_vi(el);
 		return 0;
 	}
 	return -1;
 }
 
 
 /* map_get_editor():
  *	Retrieve the editor
  */
 protected int
 map_get_editor(EditLine *el, const Char **editor)
 {
 
 	if (editor == NULL)
 		return -1;
 	switch (el->el_map.type) {
 	case MAP_EMACS:
 		*editor = STR("emacs");
 		return 0;
 	case MAP_VI:
 		*editor = STR("vi");
 		return 0;
 	}
 	return -1;
 }
 
 
 /* map_print_key():
  *	Print the function description for 1 key
  */
 private void
 map_print_key(EditLine *el, el_action_t *map, const Char *in)
 {
 	char outbuf[EL_BUFSIZ];
 	el_bindings_t *bp, *ep;
 
 	if (in[0] == '\0' || in[1] == '\0') {
 		(void) keymacro__decode_str(in, outbuf, sizeof(outbuf), "");
 		ep = &el->el_map.help[el->el_map.nfunc];
 		for (bp = el->el_map.help; bp < ep; bp++)
 			if (bp->func == map[(unsigned char) *in]) {
 				(void) fprintf(el->el_outfile,
 				    "%s\t->\t" FSTR "\n", outbuf, bp->name);
 				return;
 			}
 	} else
 		keymacro_print(el, in);
 }
 
 
 /* map_print_some_keys():
  *	Print keys from first to last
  */
 private void
 map_print_some_keys(EditLine *el, el_action_t *map, wint_t first, wint_t last)
 {
 	el_bindings_t *bp, *ep;
 	Char firstbuf[2], lastbuf[2];
 	char unparsbuf[EL_BUFSIZ], extrabuf[EL_BUFSIZ];
 
 	firstbuf[0] = (Char)first;
 	firstbuf[1] = 0;
 	lastbuf[0] = (Char)last;
 	lastbuf[1] = 0;
 	if (map[first] == ED_UNASSIGNED) {
 		if (first == last) {
 			(void) keymacro__decode_str(firstbuf, unparsbuf,
 			    sizeof(unparsbuf), STRQQ);
 			(void) fprintf(el->el_outfile,
 			    "%-15s->  is undefined\n", unparsbuf);
 		}
 		return;
 	}
 	ep = &el->el_map.help[el->el_map.nfunc];
 	for (bp = el->el_map.help; bp < ep; bp++) {
 		if (bp->func == map[first]) {
 			if (first == last) {
 				(void) keymacro__decode_str(firstbuf, unparsbuf,
 				    sizeof(unparsbuf), STRQQ);
 				(void) fprintf(el->el_outfile, "%-15s->  " FSTR "\n",
 				    unparsbuf, bp->name);
 			} else {
 				(void) keymacro__decode_str(firstbuf, unparsbuf,
 				    sizeof(unparsbuf), STRQQ);
 				(void) keymacro__decode_str(lastbuf, extrabuf,
 				    sizeof(extrabuf), STRQQ);
 				(void) fprintf(el->el_outfile,
 				    "%-4s to %-7s->  " FSTR "\n",
 				    unparsbuf, extrabuf, bp->name);
 			}
 			return;
 		}
 	}
 #ifdef MAP_DEBUG
 	if (map == el->el_map.key) {
 		(void) keymacro__decode_str(firstbuf, unparsbuf,
 		    sizeof(unparsbuf), STRQQ);
 		(void) fprintf(el->el_outfile,
 		    "BUG!!! %s isn't bound to anything.\n", unparsbuf);
 		(void) fprintf(el->el_outfile, "el->el_map.key[%d] == %d\n",
 		    first, el->el_map.key[first]);
 	} else {
 		(void) keymacro__decode_str(firstbuf, unparsbuf,
 		    sizeof(unparsbuf), STRQQ);
 		(void) fprintf(el->el_outfile,
 		    "BUG!!! %s isn't bound to anything.\n", unparsbuf);
 		(void) fprintf(el->el_outfile, "el->el_map.alt[%d] == %d\n",
 		    first, el->el_map.alt[first]);
 	}
 #endif
 	EL_ABORT((el->el_errfile, "Error printing keys\n"));
 }
 
 
 /* map_print_all_keys():
  *	Print the function description for all keys.
  */
 private void
 map_print_all_keys(EditLine *el)
 {
 	int prev, i;
 
 	(void) fprintf(el->el_outfile, "Standard key bindings\n");
 	prev = 0;
 	for (i = 0; i < N_KEYS; i++) {
 		if (el->el_map.key[prev] == el->el_map.key[i])
 			continue;
 		map_print_some_keys(el, el->el_map.key, prev, i - 1);
 		prev = i;
 	}
 	map_print_some_keys(el, el->el_map.key, prev, i - 1);
 
 	(void) fprintf(el->el_outfile, "Alternative key bindings\n");
 	prev = 0;
 	for (i = 0; i < N_KEYS; i++) {
 		if (el->el_map.alt[prev] == el->el_map.alt[i])
 			continue;
 		map_print_some_keys(el, el->el_map.alt, prev, i - 1);
 		prev = i;
 	}
 	map_print_some_keys(el, el->el_map.alt, prev, i - 1);
 
 	(void) fprintf(el->el_outfile, "Multi-character bindings\n");
 	keymacro_print(el, STR(""));
 	(void) fprintf(el->el_outfile, "Arrow key bindings\n");
 	terminal_print_arrow(el, STR(""));
 }
 
 
 /* map_bind():
  *	Add/remove/change bindings
  */
 protected int
 map_bind(EditLine *el, int argc, const Char **argv)
 {
 	el_action_t *map;
 	int ntype, rem;
 	const Char *p;
 	Char inbuf[EL_BUFSIZ];
 	Char outbuf[EL_BUFSIZ];
 	const Char *in = NULL;
 	Char *out;
 	el_bindings_t *bp, *ep;
 	int cmd;
 	int key;
 
 	if (argv == NULL)
 		return -1;
 
 	map = el->el_map.key;
 	ntype = XK_CMD;
 	key = rem = 0;
 	for (argc = 1; (p = argv[argc]) != NULL; argc++)
 		if (p[0] == '-')
 			switch (p[1]) {
 			case 'a':
 				map = el->el_map.alt;
 				break;
 
 			case 's':
 				ntype = XK_STR;
 				break;
 #ifdef notyet
 			case 'c':
 				ntype = XK_EXE;
 				break;
 #endif
 			case 'k':
 				key = 1;
 				break;
 
 			case 'r':
 				rem = 1;
 				break;
 
 			case 'v':
 				map_init_vi(el);
 				return 0;
 
 			case 'e':
 				map_init_emacs(el);
 				return 0;
 
 			case 'l':
 				ep = &el->el_map.help[el->el_map.nfunc];
 				for (bp = el->el_map.help; bp < ep; bp++)
 					(void) fprintf(el->el_outfile,
 					    "" FSTR "\n\t" FSTR "\n",
 					    bp->name, bp->description);
 				return 0;
 			default:
 				(void) fprintf(el->el_errfile,
 				    "" FSTR ": Invalid switch `%lc'.\n",
 				    argv[0], (wint_t)p[1]);
 			}
 		else
 			break;
 
 	if (argv[argc] == NULL) {
 		map_print_all_keys(el);
 		return 0;
 	}
 	if (key)
 		in = argv[argc++];
 	else if ((in = parse__string(inbuf, argv[argc++])) == NULL) {
 		(void) fprintf(el->el_errfile,
 		    "" FSTR ": Invalid \\ or ^ in instring.\n",
 		    argv[0]);
 		return -1;
 	}
 	if (rem) {
 		if (key) {
 			(void) terminal_clear_arrow(el, in);
 			return -1;
 		}
 		if (in[1])
 			(void) keymacro_delete(el, in);
 		else if (map[(unsigned char) *in] == ED_SEQUENCE_LEAD_IN)
 			(void) keymacro_delete(el, in);
 		else
 			map[(unsigned char) *in] = ED_UNASSIGNED;
 		return 0;
 	}
 	if (argv[argc] == NULL) {
 		if (key)
 			terminal_print_arrow(el, in);
 		else
 			map_print_key(el, map, in);
 		return 0;
 	}
 #ifdef notyet
 	if (argv[argc + 1] != NULL) {
 		bindkeymacro_usage();
 		return -1;
 	}
 #endif
 
 	switch (ntype) {
 	case XK_STR:
 	case XK_EXE:
 		if ((out = parse__string(outbuf, argv[argc])) == NULL) {
 			(void) fprintf(el->el_errfile,
 			    "" FSTR ": Invalid \\ or ^ in outstring.\n", argv[0]);
 			return -1;
 		}
 		if (key)
 			terminal_set_arrow(el, in, keymacro_map_str(el, out), ntype);
 		else
 			keymacro_add(el, in, keymacro_map_str(el, out), ntype);
 		map[(unsigned char) *in] = ED_SEQUENCE_LEAD_IN;
 		break;
 
 	case XK_CMD:
 		if ((cmd = parse_cmd(el, argv[argc])) == -1) {
 			(void) fprintf(el->el_errfile,
 			    "" FSTR ": Invalid command `" FSTR "'.\n",
 			    argv[0], argv[argc]);
 			return -1;
 		}
 		if (key)
 			terminal_set_arrow(el, in, keymacro_map_cmd(el, cmd), ntype);
 		else {
 			if (in[1]) {
 				keymacro_add(el, in, keymacro_map_cmd(el, cmd), ntype);
 				map[(unsigned char) *in] = ED_SEQUENCE_LEAD_IN;
 			} else {
 				keymacro_clear(el, map, in);
 				map[(unsigned char) *in] = (el_action_t)cmd;
 			}
 		}
 		break;
 
 	/* coverity[dead_error_begin] */
 	default:
 		EL_ABORT((el->el_errfile, "Bad XK_ type %d\n", ntype));
 		break;
 	}
 	return 0;
 }
 
 
 /* map_addfunc():
  *	add a user defined function
  */
 protected int
 map_addfunc(EditLine *el, const Char *name, const Char *help, el_func_t func)
 {
 	void *p;
 	size_t nf = el->el_map.nfunc + 1;
 
 	if (name == NULL || help == NULL || func == NULL)
 		return -1;
 
 	if ((p = el_realloc(el->el_map.func, nf *
 	    sizeof(*el->el_map.func))) == NULL)
 		return -1;
 	el->el_map.func = p;
 	if ((p = el_realloc(el->el_map.help, nf * sizeof(*el->el_map.help)))
 	    == NULL)
 		return -1;
 	el->el_map.help = p;
 
 	nf = (size_t)el->el_map.nfunc;
 	el->el_map.func[nf] = func;
 
 	el->el_map.help[nf].name = name;
 	el->el_map.help[nf].func = (int)nf;
 	el->el_map.help[nf].description = help;
 	el->el_map.nfunc++;
 
 	return 0;
 }
Index: stable/11/lib/libedit/refresh.c
===================================================================
--- stable/11/lib/libedit/refresh.c	(revision 330445)
+++ stable/11/lib/libedit/refresh.c	(revision 330446)
@@ -1,1187 +1,1187 @@
 /*	$NetBSD: refresh.c,v 1.45 2016/03/02 19:24:20 christos Exp $	*/
 
 /*-
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Christos Zoulas of Cornell University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "config.h"
 #if !defined(lint) && !defined(SCCSID)
 #if 0
 static char sccsid[] = "@(#)refresh.c	8.1 (Berkeley) 6/4/93";
 #else
 __RCSID("$NetBSD: refresh.c,v 1.45 2016/03/02 19:24:20 christos Exp $");
 #endif
 #endif /* not lint && not SCCSID */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * refresh.c: Lower level screen refreshing functions
  */
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "el.h"
 
 private void	re_nextline(EditLine *);
 private void	re_addc(EditLine *, wint_t);
 private void	re_update_line(EditLine *, Char *, Char *, int);
 private void	re_insert (EditLine *, Char *, int, int, Char *, int);
 private void	re_delete(EditLine *, Char *, int, int, int);
 private void	re_fastputc(EditLine *, wint_t);
 private void	re_clear_eol(EditLine *, int, int, int);
 private void	re__strncopy(Char *, Char *, size_t);
 private void	re__copy_and_pad(Char *, const Char *, size_t);
 
 #ifdef DEBUG_REFRESH
 private void	re_printstr(EditLine *, const char *, Char *, Char *);
 #define	__F el->el_errfile
 #define	ELRE_ASSERT(a, b, c)	do				\
 				    if (/*CONSTCOND*/ a) {	\
 					(void) fprintf b;	\
 					c;			\
 				    }				\
 				while (/*CONSTCOND*/0)
 #define	ELRE_DEBUG(a, b)	ELRE_ASSERT(a,b,;)
 
 /* re_printstr():
  *	Print a string on the debugging pty
  */
 private void
 re_printstr(EditLine *el, const char *str, Char *f, Char *t)
 {
 
 	ELRE_DEBUG(1, (__F, "%s:\"", str));
 	while (f < t)
 		ELRE_DEBUG(1, (__F, "%c", *f++ & 0177));
 	ELRE_DEBUG(1, (__F, "\"\r\n"));
 }
 #else
 #define	ELRE_ASSERT(a, b, c)
 #define	ELRE_DEBUG(a, b)
 #endif
 
 /* re_nextline():
  *	Move to the next line or scroll
  */
 private void
 re_nextline(EditLine *el)
 {
 	el->el_refresh.r_cursor.h = 0;	/* reset it. */
 
 	/*
 	 * If we would overflow (input is longer than terminal size),
 	 * emulate scroll by dropping first line and shuffling the rest.
 	 * We do this via pointer shuffling - it's safe in this case
 	 * and we avoid memcpy().
 	 */
 	if (el->el_refresh.r_cursor.v + 1 >= el->el_terminal.t_size.v) {
 		int i, lins = el->el_terminal.t_size.v;
 		Char *firstline = el->el_vdisplay[0];
 
 		for(i = 1; i < lins; i++)
 			el->el_vdisplay[i - 1] = el->el_vdisplay[i];
 
 		firstline[0] = '\0';		/* empty the string */
 		el->el_vdisplay[i - 1] = firstline;
 	} else
 		el->el_refresh.r_cursor.v++;
 
 	ELRE_ASSERT(el->el_refresh.r_cursor.v >= el->el_terminal.t_size.v,
 	    (__F, "\r\nre_putc: overflow! r_cursor.v == %d > %d\r\n",
 	    el->el_refresh.r_cursor.v, el->el_terminal.t_size.v),
 	    abort());
 }
 
 /* re_addc():
  *	Draw c, expanding tabs, control chars etc.
  */
 private void
 re_addc(EditLine *el, wint_t c)
 {
 	switch (ct_chr_class((Char)c)) {
 	case CHTYPE_TAB:        /* expand the tab */
 		for (;;) {
 			re_putc(el, ' ', 1);
 			if ((el->el_refresh.r_cursor.h & 07) == 0)
 				break;			/* go until tab stop */
 		}
 		break;
 	case CHTYPE_NL: {
 		int oldv = el->el_refresh.r_cursor.v;
 		re_putc(el, '\0', 0);			/* assure end of line */
 		if (oldv == el->el_refresh.r_cursor.v)	/* XXX */
 			re_nextline(el);
 		break;
 	}
 	case CHTYPE_PRINT:
 		re_putc(el, c, 1);
 		break;
 	default: {
 		Char visbuf[VISUAL_WIDTH_MAX];
 		ssize_t i, n =
 		    ct_visual_char(visbuf, VISUAL_WIDTH_MAX, (Char)c);
 		for (i = 0; n-- > 0; ++i)
 		    re_putc(el, visbuf[i], 1);
 		break;
 	}
 	}
 }
 
 
 /* re_putc():
  *	Draw the character given
  */
 protected void
 re_putc(EditLine *el, wint_t c, int shift)
 {
 	int i, w = Width(c);
 	ELRE_DEBUG(1, (__F, "printing %5x '%lc'\r\n", c, c));
 
 	while (shift && (el->el_refresh.r_cursor.h + w > el->el_terminal.t_size.h))
 	    re_putc(el, ' ', 1);
 
 	el->el_vdisplay[el->el_refresh.r_cursor.v]
 	    [el->el_refresh.r_cursor.h] = (Char)c;
 	/* assumes !shift is only used for single-column chars */
 	i = w;
 	while (--i > 0)
 		el->el_vdisplay[el->el_refresh.r_cursor.v]
 		    [el->el_refresh.r_cursor.h + i] = MB_FILL_CHAR;
 
 	if (!shift)
 		return;
 
 	el->el_refresh.r_cursor.h += w;	/* advance to next place */
 	if (el->el_refresh.r_cursor.h >= el->el_terminal.t_size.h) {
 		/* assure end of line */
 		el->el_vdisplay[el->el_refresh.r_cursor.v][el->el_terminal.t_size.h]
 		    = '\0';
 		re_nextline(el);
 	}
 }
 
 
 /* re_refresh():
  *	draws the new virtual screen image from the current input
  *	line, then goes line-by-line changing the real image to the new
  *	virtual image. The routine to re-draw a line can be replaced
  *	easily in hopes of a smarter one being placed there.
  */
 protected void
 re_refresh(EditLine *el)
 {
 	int i, rhdiff;
 	Char *cp, *st;
 	coord_t cur;
 #ifdef notyet
 	size_t termsz;
 #endif
 
 	ELRE_DEBUG(1, (__F, "el->el_line.buffer = :" FSTR ":\r\n",
 	    el->el_line.buffer));
 
 	/* reset the Drawing cursor */
 	el->el_refresh.r_cursor.h = 0;
 	el->el_refresh.r_cursor.v = 0;
 
 	/* temporarily draw rprompt to calculate its size */
 	prompt_print(el, EL_RPROMPT);
 
 	/* reset the Drawing cursor */
 	el->el_refresh.r_cursor.h = 0;
 	el->el_refresh.r_cursor.v = 0;
 
 	if (el->el_line.cursor >= el->el_line.lastchar) {
 		if (el->el_map.current == el->el_map.alt
 		    && el->el_line.lastchar != el->el_line.buffer)
 			el->el_line.cursor = el->el_line.lastchar - 1;
 		else
 			el->el_line.cursor = el->el_line.lastchar;
 	}
 
 	cur.h = -1;		/* set flag in case I'm not set */
 	cur.v = 0;
 
 	prompt_print(el, EL_PROMPT);
 
 	/* draw the current input buffer */
 #if notyet
 	termsz = el->el_terminal.t_size.h * el->el_terminal.t_size.v;
 	if (el->el_line.lastchar - el->el_line.buffer > termsz) {
 		/*
 		 * If line is longer than terminal, process only part
 		 * of line which would influence display.
 		 */
 		size_t rem = (el->el_line.lastchar-el->el_line.buffer)%termsz;
 
 		st = el->el_line.lastchar - rem
 			- (termsz - (((rem / el->el_terminal.t_size.v) - 1)
 					* el->el_terminal.t_size.v));
 	} else
 #endif
 		st = el->el_line.buffer;
 
 	for (cp = st; cp < el->el_line.lastchar; cp++) {
 		if (cp == el->el_line.cursor) {
                         int w = Width(*cp);
 			/* save for later */
 			cur.h = el->el_refresh.r_cursor.h;
 			cur.v = el->el_refresh.r_cursor.v;
                         /* handle being at a linebroken doublewidth char */
                         if (w > 1 && el->el_refresh.r_cursor.h + w >
 			    el->el_terminal.t_size.h) {
 				cur.h = 0;
 				cur.v++;
                         }
 		}
 		re_addc(el, *cp);
 	}
 
 	if (cur.h == -1) {	/* if I haven't been set yet, I'm at the end */
 		cur.h = el->el_refresh.r_cursor.h;
 		cur.v = el->el_refresh.r_cursor.v;
 	}
 	rhdiff = el->el_terminal.t_size.h - el->el_refresh.r_cursor.h -
 	    el->el_rprompt.p_pos.h;
 	if (el->el_rprompt.p_pos.h && !el->el_rprompt.p_pos.v &&
 	    !el->el_refresh.r_cursor.v && rhdiff > 1) {
 		/*
 		 * have a right-hand side prompt that will fit
 		 * on the end of the first line with at least
 		 * one character gap to the input buffer.
 		 */
 		while (--rhdiff > 0)	/* pad out with spaces */
 			re_putc(el, ' ', 1);
 		prompt_print(el, EL_RPROMPT);
 	} else {
 		el->el_rprompt.p_pos.h = 0;	/* flag "not using rprompt" */
 		el->el_rprompt.p_pos.v = 0;
 	}
 
 	re_putc(el, '\0', 0);	/* make line ended with NUL, no cursor shift */
 
 	el->el_refresh.r_newcv = el->el_refresh.r_cursor.v;
 
 	ELRE_DEBUG(1, (__F,
 		"term.h=%d vcur.h=%d vcur.v=%d vdisplay[0]=\r\n:%80.80s:\r\n",
 		el->el_terminal.t_size.h, el->el_refresh.r_cursor.h,
 		el->el_refresh.r_cursor.v, ct_encode_string(el->el_vdisplay[0],
 		&el->el_scratch)));
 
 	ELRE_DEBUG(1, (__F, "updating %d lines.\r\n", el->el_refresh.r_newcv));
 	for (i = 0; i <= el->el_refresh.r_newcv; i++) {
 		/* NOTE THAT re_update_line MAY CHANGE el_display[i] */
 		re_update_line(el, el->el_display[i], el->el_vdisplay[i], i);
 
 		/*
 		 * Copy the new line to be the current one, and pad out with
 		 * spaces to the full width of the terminal so that if we try
 		 * moving the cursor by writing the character that is at the
 		 * end of the screen line, it won't be a NUL or some old
 		 * leftover stuff.
 		 */
 		re__copy_and_pad(el->el_display[i], el->el_vdisplay[i],
 		    (size_t) el->el_terminal.t_size.h);
 	}
 	ELRE_DEBUG(1, (__F,
 	"\r\nel->el_refresh.r_cursor.v=%d,el->el_refresh.r_oldcv=%d i=%d\r\n",
 	    el->el_refresh.r_cursor.v, el->el_refresh.r_oldcv, i));
 
 	if (el->el_refresh.r_oldcv > el->el_refresh.r_newcv)
 		for (; i <= el->el_refresh.r_oldcv; i++) {
 			terminal_move_to_line(el, i);
 			terminal_move_to_char(el, 0);
                         /* This Strlen should be safe even with MB_FILL_CHARs */
 			terminal_clear_EOL(el, (int) Strlen(el->el_display[i]));
 #ifdef DEBUG_REFRESH
 			terminal_overwrite(el, STR("C\b"), 2);
 #endif /* DEBUG_REFRESH */
 			el->el_display[i][0] = '\0';
 		}
 
 	el->el_refresh.r_oldcv = el->el_refresh.r_newcv; /* set for next time */
 	ELRE_DEBUG(1, (__F,
 	    "\r\ncursor.h = %d, cursor.v = %d, cur.h = %d, cur.v = %d\r\n",
 	    el->el_refresh.r_cursor.h, el->el_refresh.r_cursor.v,
 	    cur.h, cur.v));
 	terminal_move_to_line(el, cur.v);	/* go to where the cursor is */
 	terminal_move_to_char(el, cur.h);
 }
 
 
 /* re_goto_bottom():
  *	 used to go to last used screen line
  */
 protected void
 re_goto_bottom(EditLine *el)
 {
 
 	terminal_move_to_line(el, el->el_refresh.r_oldcv);
 	terminal__putc(el, '\n');
 	re_clear_display(el);
 	terminal__flush(el);
 }
 
 
 /* re_insert():
  *	insert num characters of s into d (in front of the character)
  *	at dat, maximum length of d is dlen
  */
 private void
 /*ARGSUSED*/
 re_insert(EditLine *el __attribute__((__unused__)),
     Char *d, int dat, int dlen, Char *s, int num)
 {
 	Char *a, *b;
 
 	if (num <= 0)
 		return;
 	if (num > dlen - dat)
 		num = dlen - dat;
 
 	ELRE_DEBUG(1,
 	    (__F, "re_insert() starting: %d at %d max %d, d == \"%s\"\n",
 	    num, dat, dlen, ct_encode_string(d, &el->el_scratch)));
 	ELRE_DEBUG(1, (__F, "s == \"%s\"\n", ct_encode_string(s,
 	    &el->el_scratch)));
 
 	/* open up the space for num chars */
 	if (num > 0) {
 		b = d + dlen - 1;
 		a = b - num;
 		while (a >= &d[dat])
 			*b-- = *a--;
 		d[dlen] = '\0';	/* just in case */
 	}
 
 	ELRE_DEBUG(1, (__F,
 		"re_insert() after insert: %d at %d max %d, d == \"%s\"\n",
 		num, dat, dlen, ct_encode_string(d, &el->el_scratch)));
 	ELRE_DEBUG(1, (__F, "s == \"%s\"\n", ct_encode_string(s,
 		&el->el_scratch)));
 
 	/* copy the characters */
 	for (a = d + dat; (a < d + dlen) && (num > 0); num--)
 		*a++ = *s++;
 
 #ifdef notyet
         /* ct_encode_string() uses a static buffer, so we can't conveniently
          * encode both d & s here */
 	ELRE_DEBUG(1,
 	    (__F, "re_insert() after copy: %d at %d max %d, %s == \"%s\"\n",
 	    num, dat, dlen, d, s));
 	ELRE_DEBUG(1, (__F, "s == \"%s\"\n", s));
 #endif
 }
 
 
 /* re_delete():
  *	delete num characters d at dat, maximum length of d is dlen
  */
 private void
 /*ARGSUSED*/
 re_delete(EditLine *el __attribute__((__unused__)),
     Char *d, int dat, int dlen, int num)
 {
 	Char *a, *b;
 
 	if (num <= 0)
 		return;
 	if (dat + num >= dlen) {
 		d[dat] = '\0';
 		return;
 	}
 	ELRE_DEBUG(1,
 	    (__F, "re_delete() starting: %d at %d max %d, d == \"%s\"\n",
 	    num, dat, dlen, ct_encode_string(d, &el->el_scratch)));
 
 	/* open up the space for num chars */
 	if (num > 0) {
 		b = d + dat;
 		a = b + num;
 		while (a < &d[dlen])
 			*b++ = *a++;
 		d[dlen] = '\0';	/* just in case */
 	}
 	ELRE_DEBUG(1,
 	    (__F, "re_delete() after delete: %d at %d max %d, d == \"%s\"\n",
 	    num, dat, dlen, ct_encode_string(d, &el->el_scratch)));
 }
 
 
 /* re__strncopy():
  *	Like strncpy without padding.
  */
 private void
 re__strncopy(Char *a, Char *b, size_t n)
 {
 
 	while (n-- && *b)
 		*a++ = *b++;
 }
 
 /* re_clear_eol():
  *	Find the number of characters we need to clear till the end of line
  *	in order to make sure that we have cleared the previous contents of
  *	the line. fx and sx is the number of characters inserted or deleted
  *	in the first or second diff, diff is the difference between the
  *	number of characters between the new and old line.
  */
 private void
 re_clear_eol(EditLine *el, int fx, int sx, int diff)
 {
 
 	ELRE_DEBUG(1, (__F, "re_clear_eol sx %d, fx %d, diff %d\n",
 	    sx, fx, diff));
 
 	if (fx < 0)
 		fx = -fx;
 	if (sx < 0)
 		sx = -sx;
 	if (fx > diff)
 		diff = fx;
 	if (sx > diff)
 		diff = sx;
 
 	ELRE_DEBUG(1, (__F, "re_clear_eol %d\n", diff));
 	terminal_clear_EOL(el, diff);
 }
 
 /*****************************************************************
     re_update_line() is based on finding the middle difference of each line
     on the screen; vis:
 
 			     /old first difference
 	/beginning of line   |              /old last same       /old EOL
 	v		     v              v                    v
 old:	eddie> Oh, my little gruntle-buggy is to me, as lurgid as
 new:	eddie> Oh, my little buggy says to me, as lurgid as
 	^		     ^        ^			   ^
 	\beginning of line   |        \new last same	   \new end of line
 			     \new first difference
 
     all are character pointers for the sake of speed.  Special cases for
     no differences, as well as for end of line additions must be handled.
 **************************************************************** */
 
 /* Minimum at which doing an insert it "worth it".  This should be about
  * half the "cost" of going into insert mode, inserting a character, and
  * going back out.  This should really be calculated from the termcap
  * data...  For the moment, a good number for ANSI terminals.
  */
 #define	MIN_END_KEEP	4
 
 private void
 re_update_line(EditLine *el, Char *old, Char *new, int i)
 {
 	Char *o, *n, *p, c;
 	Char *ofd, *ols, *oe, *nfd, *nls, *ne;
 	Char *osb, *ose, *nsb, *nse;
 	int fx, sx;
 	size_t len;
 
 	/*
          * find first diff
          */
 	for (o = old, n = new; *o && (*o == *n); o++, n++)
 		continue;
 	ofd = o;
 	nfd = n;
 
 	/*
          * Find the end of both old and new
          */
 	while (*o)
 		o++;
 	/*
          * Remove any trailing blanks off of the end, being careful not to
          * back up past the beginning.
          */
 	while (ofd < o) {
 		if (o[-1] != ' ')
 			break;
 		o--;
 	}
 	oe = o;
 	*oe = '\0';
 
 	while (*n)
 		n++;
 
 	/* remove blanks from end of new */
 	while (nfd < n) {
 		if (n[-1] != ' ')
 			break;
 		n--;
 	}
 	ne = n;
 	*ne = '\0';
 
 	/*
          * if no diff, continue to next line of redraw
          */
 	if (*ofd == '\0' && *nfd == '\0') {
 		ELRE_DEBUG(1, (__F, "no difference.\r\n"));
 		return;
 	}
 	/*
          * find last same pointer
          */
 	while ((o > ofd) && (n > nfd) && (*--o == *--n))
 		continue;
 	ols = ++o;
 	nls = ++n;
 
 	/*
          * find same beginning and same end
          */
 	osb = ols;
 	nsb = nls;
 	ose = ols;
 	nse = nls;
 
 	/*
          * case 1: insert: scan from nfd to nls looking for *ofd
          */
 	if (*ofd) {
 		for (c = *ofd, n = nfd; n < nls; n++) {
 			if (c == *n) {
 				for (o = ofd, p = n;
 				    p < nls && o < ols && *o == *p;
 				    o++, p++)
 					continue;
 				/*
 				 * if the new match is longer and it's worth
 				 * keeping, then we take it
 				 */
 				if (((nse - nsb) < (p - n)) &&
 				    (2 * (p - n) > n - nfd)) {
 					nsb = n;
 					nse = p;
 					osb = ofd;
 					ose = o;
 				}
 			}
 		}
 	}
 	/*
          * case 2: delete: scan from ofd to ols looking for *nfd
          */
 	if (*nfd) {
 		for (c = *nfd, o = ofd; o < ols; o++) {
 			if (c == *o) {
 				for (n = nfd, p = o;
 				    p < ols && n < nls && *p == *n;
 				    p++, n++)
 					continue;
 				/*
 				 * if the new match is longer and it's worth
 				 * keeping, then we take it
 				 */
 				if (((ose - osb) < (p - o)) &&
 				    (2 * (p - o) > o - ofd)) {
 					nsb = nfd;
 					nse = n;
 					osb = o;
 					ose = p;
 				}
 			}
 		}
 	}
 	/*
          * Pragmatics I: If old trailing whitespace or not enough characters to
          * save to be worth it, then don't save the last same info.
          */
 	if ((oe - ols) < MIN_END_KEEP) {
 		ols = oe;
 		nls = ne;
 	}
 	/*
          * Pragmatics II: if the terminal isn't smart enough, make the data
          * dumber so the smart update doesn't try anything fancy
          */
 
 	/*
          * fx is the number of characters we need to insert/delete: in the
          * beginning to bring the two same begins together
          */
 	fx = (int)((nsb - nfd) - (osb - ofd));
 	/*
          * sx is the number of characters we need to insert/delete: in the
          * end to bring the two same last parts together
          */
 	sx = (int)((nls - nse) - (ols - ose));
 
 	if (!EL_CAN_INSERT) {
 		if (fx > 0) {
 			osb = ols;
 			ose = ols;
 			nsb = nls;
 			nse = nls;
 		}
 		if (sx > 0) {
 			ols = oe;
 			nls = ne;
 		}
 		if ((ols - ofd) < (nls - nfd)) {
 			ols = oe;
 			nls = ne;
 		}
 	}
 	if (!EL_CAN_DELETE) {
 		if (fx < 0) {
 			osb = ols;
 			ose = ols;
 			nsb = nls;
 			nse = nls;
 		}
 		if (sx < 0) {
 			ols = oe;
 			nls = ne;
 		}
 		if ((ols - ofd) > (nls - nfd)) {
 			ols = oe;
 			nls = ne;
 		}
 	}
 	/*
          * Pragmatics III: make sure the middle shifted pointers are correct if
          * they don't point to anything (we may have moved ols or nls).
          */
 	/* if the change isn't worth it, don't bother */
 	/* was: if (osb == ose) */
 	if ((ose - osb) < MIN_END_KEEP) {
 		osb = ols;
 		ose = ols;
 		nsb = nls;
 		nse = nls;
 	}
 	/*
          * Now that we are done with pragmatics we recompute fx, sx
          */
 	fx = (int)((nsb - nfd) - (osb - ofd));
 	sx = (int)((nls - nse) - (ols - ose));
 
 	ELRE_DEBUG(1, (__F, "fx %d, sx %d\n", fx, sx));
 	ELRE_DEBUG(1, (__F, "ofd %td, osb %td, ose %td, ols %td, oe %td\n",
 		ofd - old, osb - old, ose - old, ols - old, oe - old));
 	ELRE_DEBUG(1, (__F, "nfd %td, nsb %td, nse %td, nls %td, ne %td\n",
 		nfd - new, nsb - new, nse - new, nls - new, ne - new));
 	ELRE_DEBUG(1, (__F,
 		"xxx-xxx:\"00000000001111111111222222222233333333334\"\r\n"));
 	ELRE_DEBUG(1, (__F,
 		"xxx-xxx:\"01234567890123456789012345678901234567890\"\r\n"));
 #ifdef DEBUG_REFRESH
 	re_printstr(el, "old- oe", old, oe);
 	re_printstr(el, "new- ne", new, ne);
 	re_printstr(el, "old-ofd", old, ofd);
 	re_printstr(el, "new-nfd", new, nfd);
 	re_printstr(el, "ofd-osb", ofd, osb);
 	re_printstr(el, "nfd-nsb", nfd, nsb);
 	re_printstr(el, "osb-ose", osb, ose);
 	re_printstr(el, "nsb-nse", nsb, nse);
 	re_printstr(el, "ose-ols", ose, ols);
 	re_printstr(el, "nse-nls", nse, nls);
 	re_printstr(el, "ols- oe", ols, oe);
 	re_printstr(el, "nls- ne", nls, ne);
 #endif /* DEBUG_REFRESH */
 
 	/*
          * el_cursor.v to this line i MUST be in this routine so that if we
          * don't have to change the line, we don't move to it. el_cursor.h to
          * first diff char
          */
 	terminal_move_to_line(el, i);
 
 	/*
          * at this point we have something like this:
          *
          * /old                  /ofd    /osb               /ose    /ols     /oe
          * v.....................v       v..................v       v........v
          * eddie> Oh, my fredded gruntle-buggy is to me, as foo var lurgid as
          * eddie> Oh, my fredded quiux buggy is to me, as gruntle-lurgid as
          * ^.....................^     ^..................^       ^........^
          * \new                  \nfd  \nsb               \nse     \nls    \ne
          *
          * fx is the difference in length between the chars between nfd and
          * nsb, and the chars between ofd and osb, and is thus the number of
          * characters to delete if < 0 (new is shorter than old, as above),
          * or insert (new is longer than short).
          *
          * sx is the same for the second differences.
          */
 
 	/*
          * if we have a net insert on the first difference, AND inserting the
          * net amount ((nsb-nfd) - (osb-ofd)) won't push the last useful
          * character (which is ne if nls != ne, otherwise is nse) off the edge
 	 * of the screen (el->el_terminal.t_size.h) else we do the deletes first
 	 * so that we keep everything we need to.
          */
 
 	/*
          * if the last same is the same like the end, there is no last same
          * part, otherwise we want to keep the last same part set p to the
          * last useful old character
          */
 	p = (ols != oe) ? oe : ose;
 
 	/*
          * if (There is a diffence in the beginning) && (we need to insert
          *   characters) && (the number of characters to insert is less than
          *   the term width)
 	 *	We need to do an insert!
 	 * else if (we need to delete characters)
 	 *	We need to delete characters!
 	 * else
 	 *	No insert or delete
          */
 	if ((nsb != nfd) && fx > 0 &&
 	    ((p - old) + fx <= el->el_terminal.t_size.h)) {
 		ELRE_DEBUG(1,
 		    (__F, "first diff insert at %td...\r\n", nfd - new));
 		/*
 		 * Move to the first char to insert, where the first diff is.
 		 */
 		terminal_move_to_char(el, (int)(nfd - new));
 		/*
 		 * Check if we have stuff to keep at end
 		 */
 		if (nsb != ne) {
 			ELRE_DEBUG(1, (__F, "with stuff to keep at end\r\n"));
 			/*
 		         * insert fx chars of new starting at nfd
 		         */
 			if (fx > 0) {
 				ELRE_DEBUG(!EL_CAN_INSERT, (__F,
 				"ERROR: cannot insert in early first diff\n"));
 				terminal_insertwrite(el, nfd, fx);
 				re_insert(el, old, (int)(ofd - old),
 				    el->el_terminal.t_size.h, nfd, fx);
 			}
 			/*
 		         * write (nsb-nfd) - fx chars of new starting at
 		         * (nfd + fx)
 			 */
 			len = (size_t) ((nsb - nfd) - fx);
 			terminal_overwrite(el, (nfd + fx), len);
 			re__strncopy(ofd + fx, nfd + fx, len);
 		} else {
 			ELRE_DEBUG(1, (__F, "without anything to save\r\n"));
 			len = (size_t)(nsb - nfd);
 			terminal_overwrite(el, nfd, len);
 			re__strncopy(ofd, nfd, len);
 			/*
 		         * Done
 		         */
 			return;
 		}
 	} else if (fx < 0) {
 		ELRE_DEBUG(1,
 		    (__F, "first diff delete at %td...\r\n", ofd - old));
 		/*
 		 * move to the first char to delete where the first diff is
 		 */
 		terminal_move_to_char(el, (int)(ofd - old));
 		/*
 		 * Check if we have stuff to save
 		 */
 		if (osb != oe) {
 			ELRE_DEBUG(1, (__F, "with stuff to save at end\r\n"));
 			/*
 		         * fx is less than zero *always* here but we check
 		         * for code symmetry
 		         */
 			if (fx < 0) {
 				ELRE_DEBUG(!EL_CAN_DELETE, (__F,
 				    "ERROR: cannot delete in first diff\n"));
 				terminal_deletechars(el, -fx);
 				re_delete(el, old, (int)(ofd - old),
 				    el->el_terminal.t_size.h, -fx);
 			}
 			/*
 		         * write (nsb-nfd) chars of new starting at nfd
 		         */
 			len = (size_t) (nsb - nfd);
 			terminal_overwrite(el, nfd, len);
 			re__strncopy(ofd, nfd, len);
 
 		} else {
 			ELRE_DEBUG(1, (__F,
 			    "but with nothing left to save\r\n"));
 			/*
 		         * write (nsb-nfd) chars of new starting at nfd
 		         */
 			terminal_overwrite(el, nfd, (size_t)(nsb - nfd));
 			re_clear_eol(el, fx, sx,
 			    (int)((oe - old) - (ne - new)));
 			/*
 		         * Done
 		         */
 			return;
 		}
 	} else
 		fx = 0;
 
 	if (sx < 0 && (ose - old) + fx < el->el_terminal.t_size.h) {
 		ELRE_DEBUG(1, (__F,
 		    "second diff delete at %td...\r\n", (ose - old) + fx));
 		/*
 		 * Check if we have stuff to delete
 		 */
 		/*
 		 * fx is the number of characters inserted (+) or deleted (-)
 		 */
 
 		terminal_move_to_char(el, (int)((ose - old) + fx));
 		/*
 		 * Check if we have stuff to save
 		 */
 		if (ols != oe) {
 			ELRE_DEBUG(1, (__F, "with stuff to save at end\r\n"));
 			/*
 		         * Again a duplicate test.
 		         */
 			if (sx < 0) {
 				ELRE_DEBUG(!EL_CAN_DELETE, (__F,
 				    "ERROR: cannot delete in second diff\n"));
 				terminal_deletechars(el, -sx);
 			}
 			/*
 		         * write (nls-nse) chars of new starting at nse
 		         */
 			terminal_overwrite(el, nse, (size_t)(nls - nse));
 		} else {
 			ELRE_DEBUG(1, (__F,
 			    "but with nothing left to save\r\n"));
 			terminal_overwrite(el, nse, (size_t)(nls - nse));
 			re_clear_eol(el, fx, sx,
 			    (int)((oe - old) - (ne - new)));
 		}
 	}
 	/*
          * if we have a first insert AND WE HAVEN'T ALREADY DONE IT...
          */
 	if ((nsb != nfd) && (osb - ofd) <= (nsb - nfd) && (fx == 0)) {
 		ELRE_DEBUG(1, (__F, "late first diff insert at %td...\r\n",
 		    nfd - new));
 
 		terminal_move_to_char(el, (int)(nfd - new));
 		/*
 		 * Check if we have stuff to keep at the end
 		 */
 		if (nsb != ne) {
 			ELRE_DEBUG(1, (__F, "with stuff to keep at end\r\n"));
 			/*
 		         * We have to recalculate fx here because we set it
 		         * to zero above as a flag saying that we hadn't done
 		         * an early first insert.
 		         */
 			fx = (int)((nsb - nfd) - (osb - ofd));
 			if (fx > 0) {
 				/*
 				 * insert fx chars of new starting at nfd
 				 */
 				ELRE_DEBUG(!EL_CAN_INSERT, (__F,
 				 "ERROR: cannot insert in late first diff\n"));
 				terminal_insertwrite(el, nfd, fx);
 				re_insert(el, old, (int)(ofd - old),
 				    el->el_terminal.t_size.h, nfd, fx);
 			}
 			/*
 		         * write (nsb-nfd) - fx chars of new starting at
 		         * (nfd + fx)
 			 */
 			len = (size_t) ((nsb - nfd) - fx);
 			terminal_overwrite(el, (nfd + fx), len);
 			re__strncopy(ofd + fx, nfd + fx, len);
 		} else {
 			ELRE_DEBUG(1, (__F, "without anything to save\r\n"));
 			len = (size_t) (nsb - nfd);
 			terminal_overwrite(el, nfd, len);
 			re__strncopy(ofd, nfd, len);
 		}
 	}
 	/*
          * line is now NEW up to nse
          */
 	if (sx >= 0) {
 		ELRE_DEBUG(1, (__F,
 		    "second diff insert at %d...\r\n", (int)(nse - new)));
 		terminal_move_to_char(el, (int)(nse - new));
 		if (ols != oe) {
 			ELRE_DEBUG(1, (__F, "with stuff to keep at end\r\n"));
 			if (sx > 0) {
 				/* insert sx chars of new starting at nse */
 				ELRE_DEBUG(!EL_CAN_INSERT, (__F,
 				    "ERROR: cannot insert in second diff\n"));
 				terminal_insertwrite(el, nse, sx);
 			}
 			/*
 		         * write (nls-nse) - sx chars of new starting at
 			 * (nse + sx)
 		         */
 			terminal_overwrite(el, (nse + sx),
 			    (size_t)((nls - nse) - sx));
 		} else {
 			ELRE_DEBUG(1, (__F, "without anything to save\r\n"));
 			terminal_overwrite(el, nse, (size_t)(nls - nse));
 
 			/*
 	                 * No need to do a clear-to-end here because we were
 	                 * doing a second insert, so we will have over
 	                 * written all of the old string.
 		         */
 		}
 	}
 	ELRE_DEBUG(1, (__F, "done.\r\n"));
 }
 
 
 /* re__copy_and_pad():
  *	Copy string and pad with spaces
  */
 private void
 re__copy_and_pad(Char *dst, const Char *src, size_t width)
 {
 	size_t i;
 
 	for (i = 0; i < width; i++) {
 		if (*src == '\0')
 			break;
 		*dst++ = *src++;
 	}
 
 	for (; i < width; i++)
 		*dst++ = ' ';
 
 	*dst = '\0';
 }
 
 
 /* re_refresh_cursor():
  *	Move to the new cursor position
  */
 protected void
 re_refresh_cursor(EditLine *el)
 {
 	Char *cp;
 	int h, v, th, w;
 
 	if (el->el_line.cursor >= el->el_line.lastchar) {
 		if (el->el_map.current == el->el_map.alt
 		    && el->el_line.lastchar != el->el_line.buffer)
 			el->el_line.cursor = el->el_line.lastchar - 1;
 		else
 			el->el_line.cursor = el->el_line.lastchar;
 	}
 
 	/* first we must find where the cursor is... */
 	h = el->el_prompt.p_pos.h;
 	v = el->el_prompt.p_pos.v;
 	th = el->el_terminal.t_size.h;	/* optimize for speed */
 
 	/* do input buffer to el->el_line.cursor */
 	for (cp = el->el_line.buffer; cp < el->el_line.cursor; cp++) {
                 switch (ct_chr_class(*cp)) {
 		case CHTYPE_NL:  /* handle newline in data part too */
 			h = 0;
 			v++;
 			break;
 		case CHTYPE_TAB: /* if a tab, to next tab stop */
 			while (++h & 07)
 				continue;
 			break;
 		default:
 			w = Width(*cp);
 			if (w > 1 && h + w > th) { /* won't fit on line */
 				h = 0;
 				v++;
 			}
 			h += ct_visual_width(*cp);
 			break;
                 }
 
 		if (h >= th) {	/* check, extra long tabs picked up here also */
 			h -= th;
 			v++;
 		}
 	}
         /* if we have a next character, and it's a doublewidth one, we need to
          * check whether we need to linebreak for it to fit */
         if (cp < el->el_line.lastchar && (w = Width(*cp)) > 1)
                 if (h + w > th) {
                     h = 0;
                     v++;
                 }
 
 	/* now go there */
 	terminal_move_to_line(el, v);
 	terminal_move_to_char(el, h);
 	terminal__flush(el);
 }
 
 
 /* re_fastputc():
  *	Add a character fast.
  */
 private void
 re_fastputc(EditLine *el, wint_t c)
 {
 	int w = Width((Char)c);
 	while (w > 1 && el->el_cursor.h + w > el->el_terminal.t_size.h)
 	    re_fastputc(el, ' ');
 
 	terminal__putc(el, c);
 	el->el_display[el->el_cursor.v][el->el_cursor.h++] = (Char)c;
 	while (--w > 0)
 		el->el_display[el->el_cursor.v][el->el_cursor.h++]
 			= MB_FILL_CHAR;
 
 	if (el->el_cursor.h >= el->el_terminal.t_size.h) {
 		/* if we must overflow */
 		el->el_cursor.h = 0;
 
 		/*
 		 * If we would overflow (input is longer than terminal size),
 		 * emulate scroll by dropping first line and shuffling the rest.
 		 * We do this via pointer shuffling - it's safe in this case
 		 * and we avoid memcpy().
 		 */
 		if (el->el_cursor.v + 1 >= el->el_terminal.t_size.v) {
 			int i, lins = el->el_terminal.t_size.v;
 			Char *firstline = el->el_display[0];
 
 			for(i = 1; i < lins; i++)
 				el->el_display[i - 1] = el->el_display[i];
 
 			re__copy_and_pad(firstline, STR(""), (size_t)0);
 			el->el_display[i - 1] = firstline;
 		} else {
 			el->el_cursor.v++;
 			el->el_refresh.r_oldcv++;
 		}
 		if (EL_HAS_AUTO_MARGINS) {
 			if (EL_HAS_MAGIC_MARGINS) {
 				terminal__putc(el, ' ');
 				terminal__putc(el, '\b');
 			}
 		} else {
 			terminal__putc(el, '\r');
 			terminal__putc(el, '\n');
 		}
 	}
 }
 
 
 /* re_fastaddc():
  *	we added just one char, handle it fast.
  *	Assumes that screen cursor == real cursor
  */
 protected void
 re_fastaddc(EditLine *el)
 {
 	Char c;
 	int rhdiff;
 
 	c = el->el_line.cursor[-1];
 
 	if (c == '\t' || el->el_line.cursor != el->el_line.lastchar) {
 		re_refresh(el);	/* too hard to handle */
 		return;
 	}
 	rhdiff = el->el_terminal.t_size.h - el->el_cursor.h -
 	    el->el_rprompt.p_pos.h;
 	if (el->el_rprompt.p_pos.h && rhdiff < 3) {
 		re_refresh(el);	/* clear out rprompt if less than 1 char gap */
 		return;
 	}			/* else (only do at end of line, no TAB) */
 	switch (ct_chr_class(c)) {
 	case CHTYPE_TAB: /* already handled, should never happen here */
 		break;
 	case CHTYPE_NL:
 	case CHTYPE_PRINT:
 		re_fastputc(el, c);
 		break;
 	case CHTYPE_ASCIICTL:
 	case CHTYPE_NONPRINT: {
 		Char visbuf[VISUAL_WIDTH_MAX];
 		ssize_t i, n =
 		    ct_visual_char(visbuf, VISUAL_WIDTH_MAX, (Char)c);
 		for (i = 0; n-- > 0; ++i)
 			re_fastputc(el, visbuf[i]);
 		break;
 	}
 	}
 	terminal__flush(el);
 }
 
 
 /* re_clear_display():
- *	clear the screen buffers so that new new prompt starts fresh.
+ *	clear the screen buffers so that new prompt starts fresh.
  */
 protected void
 re_clear_display(EditLine *el)
 {
 	int i;
 
 	el->el_cursor.v = 0;
 	el->el_cursor.h = 0;
 	for (i = 0; i < el->el_terminal.t_size.v; i++)
 		el->el_display[i][0] = '\0';
 	el->el_refresh.r_oldcv = 0;
 }
 
 
 /* re_clear_lines():
  *	Make sure all lines are *really* blank
  */
 protected void
 re_clear_lines(EditLine *el)
 {
 
 	if (EL_CAN_CEOL) {
 		int i;
 		for (i = el->el_refresh.r_oldcv; i >= 0; i--) {
 			/* for each line on the screen */
 			terminal_move_to_line(el, i);
 			terminal_move_to_char(el, 0);
 			terminal_clear_EOL(el, el->el_terminal.t_size.h);
 		}
 	} else {
 		terminal_move_to_line(el, el->el_refresh.r_oldcv);
 					/* go to last line */
 		terminal__putc(el, '\r');	/* go to BOL */
 		terminal__putc(el, '\n');	/* go to new line */
 	}
 }
Index: stable/11/lib/libefivar/uefi-dputil.c
===================================================================
--- stable/11/lib/libefivar/uefi-dputil.c	(revision 330445)
+++ stable/11/lib/libefivar/uefi-dputil.c	(revision 330446)
@@ -1,636 +1,636 @@
 /*-
  * Copyright (c) 2017 Netflix, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Routines to format EFI_DEVICE_PATHs from the UEFI standard. Much of
  * this file is taken from EDK2 and rototilled.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <efivar.h>
 #include <limits.h>
 #include <stdio.h>
 #include <string.h>
 #include <sys/endian.h>
 
 #include "efi-osdep.h"
 
 #include "uefi-dplib.h"
 
 /* XXX maybe I sould include the entire DevicePathUtiltiies.c and ifdef out what we don't use */
 
 /*
  * Taken from MdePkg/Library/UefiDevicePathLib/DevicePathUtilities.c
  * hash a11928f3310518ab1c6fd34e8d0fdbb72de9602c 2017-Mar-01
  */
 
 /** @file
   Device Path services. The thing to remember is device paths are built out of
   nodes. The device path is terminated by an end node that is length
   sizeof(EFI_DEVICE_PATH_PROTOCOL). That would be why there is sizeof(EFI_DEVICE_PATH_PROTOCOL)
   all over this file.
 
   The only place where multi-instance device paths are supported is in
   environment varibles. Multi-instance device paths should never be placed
   on a Handle.
 
   Copyright (c) 2006 - 2016, Intel Corporation. All rights reserved.<BR>
   This program and the accompanying materials
   are licensed and made available under the terms and conditions of the BSD License
   which accompanies this distribution.  The full text of the license may be found at
   http://opensource.org/licenses/bsd-license.php.
 
   THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
   WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
 
 **/
 
 //
 // Template for an end-of-device path node.
 //
 static CONST EFI_DEVICE_PATH_PROTOCOL  mUefiDevicePathLibEndDevicePath = {
   END_DEVICE_PATH_TYPE,
   END_ENTIRE_DEVICE_PATH_SUBTYPE,
   {
     END_DEVICE_PATH_LENGTH,
     0
   }
 };
 
 
 /**
   Returns the size of a device path in bytes.
 
   This function returns the size, in bytes, of the device path data structure
   specified by DevicePath including the end of device path node.
   If DevicePath is NULL or invalid, then 0 is returned.
 
   @param  DevicePath  A pointer to a device path data structure.
 
   @retval 0           If DevicePath is NULL or invalid.
   @retval Others      The size of a device path in bytes.
 
 **/
 UINTN
 EFIAPI
 GetDevicePathSize (
   IN CONST EFI_DEVICE_PATH_PROTOCOL  *DevicePath
   )
 {
   CONST EFI_DEVICE_PATH_PROTOCOL  *Start;
 
   if (DevicePath == NULL) {
     return 0;
   }
 
   if (!IsDevicePathValid (DevicePath, 0)) {
     return 0;
   }
 
   //
   // Search for the end of the device path structure
   //
   Start = DevicePath;
   while (!IsDevicePathEnd (DevicePath)) {
     DevicePath = NextDevicePathNode (DevicePath);
   }
 
   //
   // Compute the size and add back in the size of the end device path structure
   //
   return ((UINTN) DevicePath - (UINTN) Start) + DevicePathNodeLength (DevicePath);
 }
 
 /**
   Determine whether a given device path is valid.
   If DevicePath is NULL, then ASSERT().
 
   @param  DevicePath  A pointer to a device path data structure.
   @param  MaxSize     The maximum size of the device path data structure.
 
   @retval TRUE        DevicePath is valid.
-  @retval FALSE       The length of any node node in the DevicePath is less
+  @retval FALSE       The length of any node in the DevicePath is less
                       than sizeof (EFI_DEVICE_PATH_PROTOCOL).
   @retval FALSE       If MaxSize is not zero, the size of the DevicePath
                       exceeds MaxSize.
   @retval FALSE       If PcdMaximumDevicePathNodeCount is not zero, the node
                       count of the DevicePath exceeds PcdMaximumDevicePathNodeCount.
 **/
 BOOLEAN
 EFIAPI
 IsDevicePathValid (
   IN CONST EFI_DEVICE_PATH_PROTOCOL *DevicePath,
   IN       UINTN                    MaxSize
   )
 {
   UINTN Count;
   UINTN Size;
   UINTN NodeLength;
 
   ASSERT (DevicePath != NULL);
 
   if (MaxSize == 0) {
     MaxSize = MAX_UINTN;
   }
 
   //
   // Validate the input size big enough to touch the first node.
   //
   if (MaxSize < sizeof (EFI_DEVICE_PATH_PROTOCOL)) {
     return FALSE;
   }
 
   for (Count = 0, Size = 0; !IsDevicePathEnd (DevicePath); DevicePath = NextDevicePathNode (DevicePath)) {
     NodeLength = DevicePathNodeLength (DevicePath);
     if (NodeLength < sizeof (EFI_DEVICE_PATH_PROTOCOL)) {
       return FALSE;
     }
 
     if (NodeLength > MAX_UINTN - Size) {
       return FALSE;
     }
     Size += NodeLength;
 
     //
     // Validate next node before touch it.
     //
     if (Size > MaxSize - END_DEVICE_PATH_LENGTH ) {
       return FALSE;
     }
 
     if (PcdGet32 (PcdMaximumDevicePathNodeCount) > 0) {
       Count++;
       if (Count >= PcdGet32 (PcdMaximumDevicePathNodeCount)) {
         return FALSE;
       }
     }
   }
 
   //
   // Only return TRUE when the End Device Path node is valid.
   //
   return (BOOLEAN) (DevicePathNodeLength (DevicePath) == END_DEVICE_PATH_LENGTH);
 }
 
 /**
   Returns the Type field of a device path node.
 
   Returns the Type field of the device path node specified by Node.
 
   If Node is NULL, then ASSERT().
 
   @param  Node      A pointer to a device path node data structure.
 
   @return The Type field of the device path node specified by Node.
 
 **/
 UINT8
 EFIAPI
 DevicePathType (
   IN CONST VOID  *Node
   )
 {
   ASSERT (Node != NULL);
   return ((const EFI_DEVICE_PATH_PROTOCOL *)(Node))->Type;
 }
 
 
 /**
   Returns the SubType field of a device path node.
 
   Returns the SubType field of the device path node specified by Node.
 
   If Node is NULL, then ASSERT().
 
   @param  Node      A pointer to a device path node data structure.
 
   @return The SubType field of the device path node specified by Node.
 
 **/
 UINT8
 EFIAPI
 DevicePathSubType (
   IN CONST VOID  *Node
   )
 {
   ASSERT (Node != NULL);
   return ((const EFI_DEVICE_PATH_PROTOCOL *)(Node))->SubType;
 }
 
 /**
   Returns the 16-bit Length field of a device path node.
 
   Returns the 16-bit Length field of the device path node specified by Node.
   Node is not required to be aligned on a 16-bit boundary, so it is recommended
   that a function such as ReadUnaligned16() be used to extract the contents of
   the Length field.
 
   If Node is NULL, then ASSERT().
 
   @param  Node      A pointer to a device path node data structure.
 
   @return The 16-bit Length field of the device path node specified by Node.
 
 **/
 UINTN
 EFIAPI
 DevicePathNodeLength (
   IN CONST VOID  *Node
   )
 {
   ASSERT (Node != NULL);
   return ((const EFI_DEVICE_PATH_PROTOCOL *)Node)->Length[0] |
       (((const EFI_DEVICE_PATH_PROTOCOL *)Node)->Length[1] << 8);
 }
 
 /**
   Returns a pointer to the next node in a device path.
 
   Returns a pointer to the device path node that follows the device path node
   specified by Node.
 
   If Node is NULL, then ASSERT().
 
   @param  Node      A pointer to a device path node data structure.
 
   @return a pointer to the device path node that follows the device path node
   specified by Node.
 
 **/
 EFI_DEVICE_PATH_PROTOCOL *
 EFIAPI
 NextDevicePathNode (
   IN CONST VOID  *Node
   )
 {
   ASSERT (Node != NULL);
   return ((EFI_DEVICE_PATH_PROTOCOL *)(__DECONST(UINT8 *, Node) + DevicePathNodeLength(Node)));
 }
 
 /**
   Determines if a device path node is an end node of a device path.
   This includes nodes that are the end of a device path instance and nodes that
   are the end of an entire device path.
 
   Determines if the device path node specified by Node is an end node of a device path.
   This includes nodes that are the end of a device path instance and nodes that are the
   end of an entire device path.  If Node represents an end node of a device path,
   then TRUE is returned.  Otherwise, FALSE is returned.
 
   If Node is NULL, then ASSERT().
 
   @param  Node      A pointer to a device path node data structure.
 
   @retval TRUE      The device path node specified by Node is an end node of a
                     device path.
   @retval FALSE     The device path node specified by Node is not an end node of
                     a device path.
 
 **/
 BOOLEAN
 EFIAPI
 IsDevicePathEndType (
   IN CONST VOID  *Node
   )
 {
   ASSERT (Node != NULL);
   return (BOOLEAN) (DevicePathType (Node) == END_DEVICE_PATH_TYPE);
 }
 
 /**
   Determines if a device path node is an end node of an entire device path.
 
   Determines if a device path node specified by Node is an end node of an entire
   device path. If Node represents the end of an entire device path, then TRUE is
   returned.  Otherwise, FALSE is returned.
 
   If Node is NULL, then ASSERT().
 
   @param  Node      A pointer to a device path node data structure.
 
   @retval TRUE      The device path node specified by Node is the end of an entire
                     device path.
   @retval FALSE     The device path node specified by Node is not the end of an
                     entire device path.
 
 **/
 BOOLEAN
 EFIAPI
 IsDevicePathEnd (
   IN CONST VOID  *Node
   )
 {
   ASSERT (Node != NULL);
   return (BOOLEAN) (IsDevicePathEndType (Node) && DevicePathSubType(Node) == END_ENTIRE_DEVICE_PATH_SUBTYPE);
 }
 
 /**
   Fills in all the fields of a device path node that is the end of an entire device path.
 
   Fills in all the fields of a device path node specified by Node so Node represents
   the end of an entire device path.  The Type field of Node is set to
   END_DEVICE_PATH_TYPE, the SubType field of Node is set to
   END_ENTIRE_DEVICE_PATH_SUBTYPE, and the Length field of Node is set to
   END_DEVICE_PATH_LENGTH.  Node is not required to be aligned on a 16-bit boundary,
   so it is recommended that a function such as WriteUnaligned16() be used to set
   the contents of the Length field.
 
   If Node is NULL, then ASSERT().
 
   @param  Node      A pointer to a device path node data structure.
 
 **/
 VOID
 EFIAPI
 SetDevicePathEndNode (
   OUT VOID  *Node
   )
 {
   ASSERT (Node != NULL);
   memcpy (Node, &mUefiDevicePathLibEndDevicePath, sizeof (mUefiDevicePathLibEndDevicePath));
 }
 
 /**
   Sets the length, in bytes, of a device path node.
 
   Sets the length of the device path node specified by Node to the value specified
   by NodeLength.  NodeLength is returned.  Node is not required to be aligned on
   a 16-bit boundary, so it is recommended that a function such as WriteUnaligned16()
   be used to set the contents of the Length field.
 
   If Node is NULL, then ASSERT().
   If NodeLength >= SIZE_64KB, then ASSERT().
   If NodeLength < sizeof (EFI_DEVICE_PATH_PROTOCOL), then ASSERT().
 
   @param  Node      A pointer to a device path node data structure.
   @param  Length    The length, in bytes, of the device path node.
 
   @return Length
 
 **/
 UINT16
 EFIAPI
 SetDevicePathNodeLength (
   IN OUT VOID  *Node,
   IN UINTN     Length
   )
 {
   ASSERT (Node != NULL);
   ASSERT ((Length >= sizeof (EFI_DEVICE_PATH_PROTOCOL)) && (Length < SIZE_64KB));
 //  return WriteUnaligned16 ((UINT16 *)&((EFI_DEVICE_PATH_PROTOCOL *)(Node))->Length[0], (UINT16)(Length));
   le16enc(&((EFI_DEVICE_PATH_PROTOCOL *)(Node))->Length[0], (UINT16)(Length));
   return Length;
 }
 
 /**
   Creates a device node.
 
   This function creates a new device node in a newly allocated buffer of size
   NodeLength and initializes the device path node header with NodeType and NodeSubType.
   The new device path node is returned.
   If NodeLength is smaller than a device path header, then NULL is returned.
   If there is not enough memory to allocate space for the new device path, then
   NULL is returned.
   The memory is allocated from EFI boot services memory. It is the responsibility
   of the caller to free the memory allocated.
 
   @param  NodeType                   The device node type for the new device node.
   @param  NodeSubType                The device node sub-type for the new device node.
   @param  NodeLength                 The length of the new device node.
 
   @return The new device path.
 
 **/
 EFI_DEVICE_PATH_PROTOCOL *
 EFIAPI
 CreateDeviceNode (
   IN UINT8                           NodeType,
   IN UINT8                           NodeSubType,
   IN UINT16                          NodeLength
   )
 {
   EFI_DEVICE_PATH_PROTOCOL      *DevicePath;
 
   if (NodeLength < sizeof (EFI_DEVICE_PATH_PROTOCOL)) {
     //
     // NodeLength is less than the size of the header.
     //
     return NULL;
   }
 
   DevicePath = AllocateZeroPool (NodeLength);
   if (DevicePath != NULL) {
      DevicePath->Type    = NodeType;
      DevicePath->SubType = NodeSubType;
      SetDevicePathNodeLength (DevicePath, NodeLength);
   }
 
   return DevicePath;
 }
 
 /**
   Creates a new copy of an existing device path.
 
   This function allocates space for a new copy of the device path specified by DevicePath.
   If DevicePath is NULL, then NULL is returned.  If the memory is successfully
   allocated, then the contents of DevicePath are copied to the newly allocated
   buffer, and a pointer to that buffer is returned.  Otherwise, NULL is returned.
   The memory for the new device path is allocated from EFI boot services memory.
   It is the responsibility of the caller to free the memory allocated.
 
   @param  DevicePath    A pointer to a device path data structure.
 
   @retval NULL          DevicePath is NULL or invalid.
   @retval Others        A pointer to the duplicated device path.
 
 **/
 EFI_DEVICE_PATH_PROTOCOL *
 EFIAPI
 DuplicateDevicePath (
   IN CONST EFI_DEVICE_PATH_PROTOCOL  *DevicePath
   )
 {
   UINTN                     Size;
 
   //
   // Compute the size
   //
   Size = GetDevicePathSize (DevicePath);
   if (Size == 0) {
     return NULL;
   }
 
   //
   // Allocate space for duplicate device path
   //
 
   return AllocateCopyPool (Size, DevicePath);
 }
 
 /**
   Creates a new device path by appending a second device path to a first device path.
 
   This function creates a new device path by appending a copy of SecondDevicePath
   to a copy of FirstDevicePath in a newly allocated buffer.  Only the end-of-device-path
   device node from SecondDevicePath is retained. The newly created device path is
   returned. If FirstDevicePath is NULL, then it is ignored, and a duplicate of
   SecondDevicePath is returned.  If SecondDevicePath is NULL, then it is ignored,
   and a duplicate of FirstDevicePath is returned. If both FirstDevicePath and
   SecondDevicePath are NULL, then a copy of an end-of-device-path is returned.
 
   If there is not enough memory for the newly allocated buffer, then NULL is returned.
   The memory for the new device path is allocated from EFI boot services memory.
   It is the responsibility of the caller to free the memory allocated.
 
   @param  FirstDevicePath            A pointer to a device path data structure.
   @param  SecondDevicePath           A pointer to a device path data structure.
 
   @retval NULL      If there is not enough memory for the newly allocated buffer.
   @retval NULL      If FirstDevicePath or SecondDevicePath is invalid.
   @retval Others    A pointer to the new device path if success.
                     Or a copy an end-of-device-path if both FirstDevicePath and SecondDevicePath are NULL.
 
 **/
 EFI_DEVICE_PATH_PROTOCOL *
 EFIAPI
 AppendDevicePath (
   IN CONST EFI_DEVICE_PATH_PROTOCOL  *FirstDevicePath,  OPTIONAL
   IN CONST EFI_DEVICE_PATH_PROTOCOL  *SecondDevicePath  OPTIONAL
   )
 {
   UINTN                     Size;
   UINTN                     Size1;
   UINTN                     Size2;
   EFI_DEVICE_PATH_PROTOCOL  *NewDevicePath;
   EFI_DEVICE_PATH_PROTOCOL  *DevicePath2;
 
   //
   // If there's only 1 path, just duplicate it.
   //
   if (FirstDevicePath == NULL) {
     return DuplicateDevicePath ((SecondDevicePath != NULL) ? SecondDevicePath : &mUefiDevicePathLibEndDevicePath);
   }
 
   if (SecondDevicePath == NULL) {
     return DuplicateDevicePath (FirstDevicePath);
   }
 
   if (!IsDevicePathValid (FirstDevicePath, 0) || !IsDevicePathValid (SecondDevicePath, 0)) {
     return NULL;
   }
 
   //
   // Allocate space for the combined device path. It only has one end node of
   // length EFI_DEVICE_PATH_PROTOCOL.
   //
   Size1         = GetDevicePathSize (FirstDevicePath);
   Size2         = GetDevicePathSize (SecondDevicePath);
   Size          = Size1 + Size2 - END_DEVICE_PATH_LENGTH;
 
   NewDevicePath = AllocatePool (Size);
 
   if (NewDevicePath != NULL) {
     NewDevicePath = CopyMem (NewDevicePath, FirstDevicePath, Size1);
     //
     // Over write FirstDevicePath EndNode and do the copy
     //
     DevicePath2 = (EFI_DEVICE_PATH_PROTOCOL *) ((CHAR8 *) NewDevicePath +
                   (Size1 - END_DEVICE_PATH_LENGTH));
     CopyMem (DevicePath2, SecondDevicePath, Size2);
   }
 
   return NewDevicePath;
 }
 
 /**
   Creates a new path by appending the device node to the device path.
 
   This function creates a new device path by appending a copy of the device node
   specified by DevicePathNode to a copy of the device path specified by DevicePath
   in an allocated buffer. The end-of-device-path device node is moved after the
   end of the appended device node.
   If DevicePathNode is NULL then a copy of DevicePath is returned.
   If DevicePath is NULL then a copy of DevicePathNode, followed by an end-of-device
   path device node is returned.
   If both DevicePathNode and DevicePath are NULL then a copy of an end-of-device-path
   device node is returned.
   If there is not enough memory to allocate space for the new device path, then
   NULL is returned.
   The memory is allocated from EFI boot services memory. It is the responsibility
   of the caller to free the memory allocated.
 
   @param  DevicePath                 A pointer to a device path data structure.
   @param  DevicePathNode             A pointer to a single device path node.
 
   @retval NULL      If there is not enough memory for the new device path.
   @retval Others    A pointer to the new device path if success.
                     A copy of DevicePathNode followed by an end-of-device-path node
                     if both FirstDevicePath and SecondDevicePath are NULL.
                     A copy of an end-of-device-path node if both FirstDevicePath
                     and SecondDevicePath are NULL.
 
 **/
 EFI_DEVICE_PATH_PROTOCOL *
 EFIAPI
 AppendDevicePathNode (
   IN CONST EFI_DEVICE_PATH_PROTOCOL  *DevicePath,     OPTIONAL
   IN CONST EFI_DEVICE_PATH_PROTOCOL  *DevicePathNode  OPTIONAL
   )
 {
   EFI_DEVICE_PATH_PROTOCOL  *TempDevicePath;
   EFI_DEVICE_PATH_PROTOCOL  *NextNode;
   EFI_DEVICE_PATH_PROTOCOL  *NewDevicePath;
   UINTN                     NodeLength;
 
   if (DevicePathNode == NULL) {
     return DuplicateDevicePath ((DevicePath != NULL) ? DevicePath : &mUefiDevicePathLibEndDevicePath);
   }
   //
   // Build a Node that has a terminator on it
   //
   NodeLength = DevicePathNodeLength (DevicePathNode);
 
   TempDevicePath = AllocatePool (NodeLength + END_DEVICE_PATH_LENGTH);
   if (TempDevicePath == NULL) {
     return NULL;
   }
   TempDevicePath = CopyMem (TempDevicePath, DevicePathNode, NodeLength);
   //
   // Add and end device path node to convert Node to device path
   //
   NextNode = NextDevicePathNode (TempDevicePath);
   SetDevicePathEndNode (NextNode);
   //
   // Append device paths
   //
   NewDevicePath = AppendDevicePath (DevicePath, TempDevicePath);
 
   FreePool (TempDevicePath);
 
   return NewDevicePath;
 }
Index: stable/11/lib/msun/src/catrig.c
===================================================================
--- stable/11/lib/msun/src/catrig.c	(revision 330445)
+++ stable/11/lib/msun/src/catrig.c	(revision 330446)
@@ -1,653 +1,653 @@
 /*-
  * Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <complex.h>
 #include <float.h>
 
 #include "math.h"
 #include "math_private.h"
 
 #undef isinf
 #define isinf(x)	(fabs(x) == INFINITY)
 #undef isnan
 #define isnan(x)	((x) != (x))
 #define	raise_inexact()	do { volatile float junk __unused = 1 + tiny; } while(0)
 #undef signbit
 #define signbit(x)	(__builtin_signbit(x))
 
 /* We need that DBL_EPSILON^2/128 is larger than FOUR_SQRT_MIN. */
 static const double
 A_crossover =		10, /* Hull et al suggest 1.5, but 10 works better */
 B_crossover =		0.6417,			/* suggested by Hull et al */
 FOUR_SQRT_MIN =		0x1p-509,		/* >= 4 * sqrt(DBL_MIN) */
 QUARTER_SQRT_MAX =	0x1p509,		/* <= sqrt(DBL_MAX) / 4 */
 m_e =			2.7182818284590452e0,	/*  0x15bf0a8b145769.0p-51 */
 m_ln2 =			6.9314718055994531e-1,	/*  0x162e42fefa39ef.0p-53 */
 pio2_hi =		1.5707963267948966e0,	/*  0x1921fb54442d18.0p-52 */
 RECIP_EPSILON =		1 / DBL_EPSILON,
 SQRT_3_EPSILON =	2.5809568279517849e-8,	/*  0x1bb67ae8584caa.0p-78 */
 SQRT_6_EPSILON =	3.6500241499888571e-8,	/*  0x13988e1409212e.0p-77 */
 SQRT_MIN =		0x1p-511;		/* >= sqrt(DBL_MIN) */
 
 static const volatile double
 pio2_lo =		6.1232339957367659e-17;	/*  0x11a62633145c07.0p-106 */
 static const volatile float
 tiny =			0x1p-100; 
 
 static double complex clog_for_large_values(double complex z);
 
 /*
  * Testing indicates that all these functions are accurate up to 4 ULP.
  * The functions casin(h) and cacos(h) are about 2.5 times slower than asinh.
  * The functions catan(h) are a little under 2 times slower than atanh.
  *
  * The code for casinh, casin, cacos, and cacosh comes first.  The code is
  * rather complicated, and the four functions are highly interdependent.
  *
  * The code for catanh and catan comes at the end.  It is much simpler than
  * the other functions, and the code for these can be disconnected from the
  * rest of the code.
  */
 
 /*
  *			================================
  *			| casinh, casin, cacos, cacosh |
  *			================================
  */
 
 /*
  * The algorithm is very close to that in "Implementing the complex arcsine
  * and arccosine functions using exception handling" by T. E. Hull, Thomas F.
  * Fairgrieve, and Ping Tak Peter Tang, published in ACM Transactions on
  * Mathematical Software, Volume 23 Issue 3, 1997, Pages 299-335,
  * http://dl.acm.org/citation.cfm?id=275324.
  *
  * Throughout we use the convention z = x + I*y.
  *
  * casinh(z) = sign(x)*log(A+sqrt(A*A-1)) + I*asin(B)
  * where
  * A = (|z+I| + |z-I|) / 2
  * B = (|z+I| - |z-I|) / 2 = y/A
  *
  * These formulas become numerically unstable:
  *   (a) for Re(casinh(z)) when z is close to the line segment [-I, I] (that
  *       is, Re(casinh(z)) is close to 0);
  *   (b) for Im(casinh(z)) when z is close to either of the intervals
  *       [I, I*infinity) or (-I*infinity, -I] (that is, |Im(casinh(z))| is
  *       close to PI/2).
  *
  * These numerical problems are overcome by defining
  * f(a, b) = (hypot(a, b) - b) / 2 = a*a / (hypot(a, b) + b) / 2
  * Then if A < A_crossover, we use
  *   log(A + sqrt(A*A-1)) = log1p((A-1) + sqrt((A-1)*(A+1)))
  *   A-1 = f(x, 1+y) + f(x, 1-y)
  * and if B > B_crossover, we use
  *   asin(B) = atan2(y, sqrt(A*A - y*y)) = atan2(y, sqrt((A+y)*(A-y)))
  *   A-y = f(x, y+1) + f(x, y-1)
  * where without loss of generality we have assumed that x and y are
  * non-negative.
  *
  * Much of the difficulty comes because the intermediate computations may
  * produce overflows or underflows.  This is dealt with in the paper by Hull
  * et al by using exception handling.  We do this by detecting when
  * computations risk underflow or overflow.  The hardest part is handling the
  * underflows when computing f(a, b).
  *
  * Note that the function f(a, b) does not appear explicitly in the paper by
  * Hull et al, but the idea may be found on pages 308 and 309.  Introducing the
  * function f(a, b) allows us to concentrate many of the clever tricks in this
  * paper into one function.
  */
 
 /*
  * Function f(a, b, hypot_a_b) = (hypot(a, b) - b) / 2.
  * Pass hypot(a, b) as the third argument.
  */
 static inline double
 f(double a, double b, double hypot_a_b)
 {
 	if (b < 0)
 		return ((hypot_a_b - b) / 2);
 	if (b == 0)
 		return (a / 2);
 	return (a * a / (hypot_a_b + b) / 2);
 }
 
 /*
  * All the hard work is contained in this function.
  * x and y are assumed positive or zero, and less than RECIP_EPSILON.
  * Upon return:
  * rx = Re(casinh(z)) = -Im(cacos(y + I*x)).
  * B_is_usable is set to 1 if the value of B is usable.
  * If B_is_usable is set to 0, sqrt_A2my2 = sqrt(A*A - y*y), and new_y = y.
  * If returning sqrt_A2my2 has potential to result in an underflow, it is
  * rescaled, and new_y is similarly rescaled.
  */
 static inline void
 do_hard_work(double x, double y, double *rx, int *B_is_usable, double *B,
     double *sqrt_A2my2, double *new_y)
 {
 	double R, S, A; /* A, B, R, and S are as in Hull et al. */
 	double Am1, Amy; /* A-1, A-y. */
 
 	R = hypot(x, y + 1);		/* |z+I| */
 	S = hypot(x, y - 1);		/* |z-I| */
 
 	/* A = (|z+I| + |z-I|) / 2 */
 	A = (R + S) / 2;
 	/*
 	 * Mathematically A >= 1.  There is a small chance that this will not
 	 * be so because of rounding errors.  So we will make certain it is
 	 * so.
 	 */
 	if (A < 1)
 		A = 1;
 
 	if (A < A_crossover) {
 		/*
 		 * Am1 = fp + fm, where fp = f(x, 1+y), and fm = f(x, 1-y).
 		 * rx = log1p(Am1 + sqrt(Am1*(A+1)))
 		 */
 		if (y == 1 && x < DBL_EPSILON * DBL_EPSILON / 128) {
 			/*
 			 * fp is of order x^2, and fm = x/2.
 			 * A = 1 (inexactly).
 			 */
 			*rx = sqrt(x);
 		} else if (x >= DBL_EPSILON * fabs(y - 1)) {
 			/*
 			 * Underflow will not occur because
 			 * x >= DBL_EPSILON^2/128 >= FOUR_SQRT_MIN
 			 */
 			Am1 = f(x, 1 + y, R) + f(x, 1 - y, S);
 			*rx = log1p(Am1 + sqrt(Am1 * (A + 1)));
 		} else if (y < 1) {
 			/*
 			 * fp = x*x/(1+y)/4, fm = x*x/(1-y)/4, and
 			 * A = 1 (inexactly).
 			 */
 			*rx = x / sqrt((1 - y) * (1 + y));
 		} else {		/* if (y > 1) */
 			/*
 			 * A-1 = y-1 (inexactly).
 			 */
 			*rx = log1p((y - 1) + sqrt((y - 1) * (y + 1)));
 		}
 	} else {
 		*rx = log(A + sqrt(A * A - 1));
 	}
 
 	*new_y = y;
 
 	if (y < FOUR_SQRT_MIN) {
 		/*
 		 * Avoid a possible underflow caused by y/A.  For casinh this
 		 * would be legitimate, but will be picked up by invoking atan2
 		 * later on.  For cacos this would not be legitimate.
 		 */
 		*B_is_usable = 0;
 		*sqrt_A2my2 = A * (2 / DBL_EPSILON);
 		*new_y = y * (2 / DBL_EPSILON);
 		return;
 	}
 
 	/* B = (|z+I| - |z-I|) / 2 = y/A */
 	*B = y / A;
 	*B_is_usable = 1;
 
 	if (*B > B_crossover) {
 		*B_is_usable = 0;
 		/*
 		 * Amy = fp + fm, where fp = f(x, y+1), and fm = f(x, y-1).
 		 * sqrt_A2my2 = sqrt(Amy*(A+y))
 		 */
 		if (y == 1 && x < DBL_EPSILON / 128) {
 			/*
 			 * fp is of order x^2, and fm = x/2.
 			 * A = 1 (inexactly).
 			 */
 			*sqrt_A2my2 = sqrt(x) * sqrt((A + y) / 2);
 		} else if (x >= DBL_EPSILON * fabs(y - 1)) {
 			/*
 			 * Underflow will not occur because
 			 * x >= DBL_EPSILON/128 >= FOUR_SQRT_MIN
 			 * and
 			 * x >= DBL_EPSILON^2 >= FOUR_SQRT_MIN
 			 */
 			Amy = f(x, y + 1, R) + f(x, y - 1, S);
 			*sqrt_A2my2 = sqrt(Amy * (A + y));
 		} else if (y > 1) {
 			/*
 			 * fp = x*x/(y+1)/4, fm = x*x/(y-1)/4, and
 			 * A = y (inexactly).
 			 *
 			 * y < RECIP_EPSILON.  So the following
 			 * scaling should avoid any underflow problems.
 			 */
 			*sqrt_A2my2 = x * (4 / DBL_EPSILON / DBL_EPSILON) * y /
 			    sqrt((y + 1) * (y - 1));
 			*new_y = y * (4 / DBL_EPSILON / DBL_EPSILON);
 		} else {		/* if (y < 1) */
 			/*
 			 * fm = 1-y >= DBL_EPSILON, fp is of order x^2, and
 			 * A = 1 (inexactly).
 			 */
 			*sqrt_A2my2 = sqrt((1 - y) * (1 + y));
 		}
 	}
 }
 
 /*
  * casinh(z) = z + O(z^3)   as z -> 0
  *
  * casinh(z) = sign(x)*clog(sign(x)*z) + O(1/z^2)   as z -> infinity
  * The above formula works for the imaginary part as well, because
  * Im(casinh(z)) = sign(x)*atan2(sign(x)*y, fabs(x)) + O(y/z^3)
  *    as z -> infinity, uniformly in y
  */
 double complex
 casinh(double complex z)
 {
 	double x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y;
 	int B_is_usable;
 	double complex w;
 
 	x = creal(z);
 	y = cimag(z);
 	ax = fabs(x);
 	ay = fabs(y);
 
 	if (isnan(x) || isnan(y)) {
 		/* casinh(+-Inf + I*NaN) = +-Inf + I*NaN */
 		if (isinf(x))
 			return (CMPLX(x, y + y));
 		/* casinh(NaN + I*+-Inf) = opt(+-)Inf + I*NaN */
 		if (isinf(y))
 			return (CMPLX(y, x + x));
 		/* casinh(NaN + I*0) = NaN + I*0 */
 		if (y == 0)
 			return (CMPLX(x + x, y));
 		/*
 		 * All other cases involving NaN return NaN + I*NaN.
 		 * C99 leaves it optional whether to raise invalid if one of
 		 * the arguments is not NaN, so we opt not to raise it.
 		 */
 		return (CMPLX(x + 0.0L + (y + 0), x + 0.0L + (y + 0)));
 	}
 
 	if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
 		/* clog...() will raise inexact unless x or y is infinite. */
 		if (signbit(x) == 0)
 			w = clog_for_large_values(z) + m_ln2;
 		else
 			w = clog_for_large_values(-z) + m_ln2;
 		return (CMPLX(copysign(creal(w), x), copysign(cimag(w), y)));
 	}
 
 	/* Avoid spuriously raising inexact for z = 0. */
 	if (x == 0 && y == 0)
 		return (z);
 
 	/* All remaining cases are inexact. */
 	raise_inexact();
 
 	if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
 		return (z);
 
 	do_hard_work(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y);
 	if (B_is_usable)
 		ry = asin(B);
 	else
 		ry = atan2(new_y, sqrt_A2my2);
 	return (CMPLX(copysign(rx, x), copysign(ry, y)));
 }
 
 /*
  * casin(z) = reverse(casinh(reverse(z)))
  * where reverse(x + I*y) = y + I*x = I*conj(z).
  */
 double complex
 casin(double complex z)
 {
 	double complex w = casinh(CMPLX(cimag(z), creal(z)));
 
 	return (CMPLX(cimag(w), creal(w)));
 }
 
 /*
  * cacos(z) = PI/2 - casin(z)
  * but do the computation carefully so cacos(z) is accurate when z is
  * close to 1.
  *
  * cacos(z) = PI/2 - z + O(z^3)   as z -> 0
  *
  * cacos(z) = -sign(y)*I*clog(z) + O(1/z^2)   as z -> infinity
  * The above formula works for the real part as well, because
  * Re(cacos(z)) = atan2(fabs(y), x) + O(y/z^3)
  *    as z -> infinity, uniformly in y
  */
 double complex
 cacos(double complex z)
 {
 	double x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x;
 	int sx, sy;
 	int B_is_usable;
 	double complex w;
 
 	x = creal(z);
 	y = cimag(z);
 	sx = signbit(x);
 	sy = signbit(y);
 	ax = fabs(x);
 	ay = fabs(y);
 
 	if (isnan(x) || isnan(y)) {
 		/* cacos(+-Inf + I*NaN) = NaN + I*opt(-)Inf */
 		if (isinf(x))
 			return (CMPLX(y + y, -INFINITY));
 		/* cacos(NaN + I*+-Inf) = NaN + I*-+Inf */
 		if (isinf(y))
 			return (CMPLX(x + x, -y));
 		/* cacos(0 + I*NaN) = PI/2 + I*NaN with inexact */
 		if (x == 0)
 			return (CMPLX(pio2_hi + pio2_lo, y + y));
 		/*
 		 * All other cases involving NaN return NaN + I*NaN.
 		 * C99 leaves it optional whether to raise invalid if one of
 		 * the arguments is not NaN, so we opt not to raise it.
 		 */
 		return (CMPLX(x + 0.0L + (y + 0), x + 0.0L + (y + 0)));
 	}
 
 	if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
 		/* clog...() will raise inexact unless x or y is infinite. */
 		w = clog_for_large_values(z);
 		rx = fabs(cimag(w));
 		ry = creal(w) + m_ln2;
 		if (sy == 0)
 			ry = -ry;
 		return (CMPLX(rx, ry));
 	}
 
 	/* Avoid spuriously raising inexact for z = 1. */
 	if (x == 1 && y == 0)
 		return (CMPLX(0, -y));
 
 	/* All remaining cases are inexact. */
 	raise_inexact();
 
 	if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
 		return (CMPLX(pio2_hi - (x - pio2_lo), -y));
 
 	do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x);
 	if (B_is_usable) {
 		if (sx == 0)
 			rx = acos(B);
 		else
 			rx = acos(-B);
 	} else {
 		if (sx == 0)
 			rx = atan2(sqrt_A2mx2, new_x);
 		else
 			rx = atan2(sqrt_A2mx2, -new_x);
 	}
 	if (sy == 0)
 		ry = -ry;
 	return (CMPLX(rx, ry));
 }
 
 /*
  * cacosh(z) = I*cacos(z) or -I*cacos(z)
  * where the sign is chosen so Re(cacosh(z)) >= 0.
  */
 double complex
 cacosh(double complex z)
 {
 	double complex w;
 	double rx, ry;
 
 	w = cacos(z);
 	rx = creal(w);
 	ry = cimag(w);
 	/* cacosh(NaN + I*NaN) = NaN + I*NaN */
 	if (isnan(rx) && isnan(ry))
 		return (CMPLX(ry, rx));
 	/* cacosh(NaN + I*+-Inf) = +Inf + I*NaN */
 	/* cacosh(+-Inf + I*NaN) = +Inf + I*NaN */
 	if (isnan(rx))
 		return (CMPLX(fabs(ry), rx));
 	/* cacosh(0 + I*NaN) = NaN + I*NaN */
 	if (isnan(ry))
 		return (CMPLX(ry, ry));
 	return (CMPLX(fabs(ry), copysign(rx, cimag(z))));
 }
 
 /*
  * Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON.
  */
 static double complex
 clog_for_large_values(double complex z)
 {
 	double x, y;
 	double ax, ay, t;
 
 	x = creal(z);
 	y = cimag(z);
 	ax = fabs(x);
 	ay = fabs(y);
 	if (ax < ay) {
 		t = ax;
 		ax = ay;
 		ay = t;
 	}
 
 	/*
 	 * Avoid overflow in hypot() when x and y are both very large.
 	 * Divide x and y by E, and then add 1 to the logarithm.  This
 	 * depends on E being larger than sqrt(2), since the return value of
 	 * hypot cannot overflow if neither argument is greater in magnitude
 	 * than 1/sqrt(2) of the maximum value of the return type.  Likewise
 	 * this determines the necessary threshold for using this method
 	 * (however, actually use 1/2 instead as it is simpler).
 	 *
 	 * Dividing by E causes an insignificant loss of accuracy; however
 	 * this method is still poor since it is uneccessarily slow.
 	 */
 	if (ax > DBL_MAX / 2)
 		return (CMPLX(log(hypot(x / m_e, y / m_e)) + 1, atan2(y, x)));
 
 	/*
 	 * Avoid overflow when x or y is large.  Avoid underflow when x or
 	 * y is small.
 	 */
 	if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN)
 		return (CMPLX(log(hypot(x, y)), atan2(y, x)));
 
 	return (CMPLX(log(ax * ax + ay * ay) / 2, atan2(y, x)));
 }
 
 /*
  *				=================
  *				| catanh, catan |
  *				=================
  */
 
 /*
  * sum_squares(x,y) = x*x + y*y (or just x*x if y*y would underflow).
  * Assumes x*x and y*y will not overflow.
  * Assumes x and y are finite.
  * Assumes y is non-negative.
  * Assumes fabs(x) >= DBL_EPSILON.
  */
 static inline double
 sum_squares(double x, double y)
 {
 
 	/* Avoid underflow when y is small. */
 	if (y < SQRT_MIN)
 		return (x * x);
 
 	return (x * x + y * y);
 }
 
 /*
  * real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y).
  * Assumes x and y are not NaN, and one of x and y is larger than
  * RECIP_EPSILON.  We avoid unwarranted underflow.  It is important to not use
  * the code creal(1/z), because the imaginary part may produce an unwanted
  * underflow.
  * This is only called in a context where inexact is always raised before
  * the call, so no effort is made to avoid or force inexact.
  */
 static inline double
 real_part_reciprocal(double x, double y)
 {
 	double scale;
 	uint32_t hx, hy;
 	int32_t ix, iy;
 
 	/*
 	 * This code is inspired by the C99 document n1124.pdf, Section G.5.1,
 	 * example 2.
 	 */
 	GET_HIGH_WORD(hx, x);
 	ix = hx & 0x7ff00000;
 	GET_HIGH_WORD(hy, y);
 	iy = hy & 0x7ff00000;
 #define	BIAS	(DBL_MAX_EXP - 1)
 /* XXX more guard digits are useful iff there is extra precision. */
 #define	CUTOFF	(DBL_MANT_DIG / 2 + 1)	/* just half or 1 guard digit */
 	if (ix - iy >= CUTOFF << 20 || isinf(x))
 		return (1 / x);		/* +-Inf -> +-0 is special */
 	if (iy - ix >= CUTOFF << 20)
 		return (x / y / y);	/* should avoid double div, but hard */
 	if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20)
 		return (x / (x * x + y * y));
 	scale = 1;
 	SET_HIGH_WORD(scale, 0x7ff00000 - ix);	/* 2**(1-ilogb(x)) */
 	x *= scale;
 	y *= scale;
 	return (x / (x * x + y * y) * scale);
 }
 
 /*
  * catanh(z) = log((1+z)/(1-z)) / 2
  *           = log1p(4*x / |z-1|^2) / 4
  *             + I * atan2(2*y, (1-x)*(1+x)-y*y) / 2
  *
  * catanh(z) = z + O(z^3)   as z -> 0
  *
  * catanh(z) = 1/z + sign(y)*I*PI/2 + O(1/z^3)   as z -> infinity
  * The above formula works for the real part as well, because
  * Re(catanh(z)) = x/|z|^2 + O(x/z^4)
  *    as z -> infinity, uniformly in x
  */
 double complex
 catanh(double complex z)
 {
 	double x, y, ax, ay, rx, ry;
 
 	x = creal(z);
 	y = cimag(z);
 	ax = fabs(x);
 	ay = fabs(y);
 
 	/* This helps handle many cases. */
 	if (y == 0 && ax <= 1)
 		return (CMPLX(atanh(x), y));
 
 	/* To ensure the same accuracy as atan(), and to filter out z = 0. */
 	if (x == 0)
 		return (CMPLX(x, atan(y)));
 
 	if (isnan(x) || isnan(y)) {
 		/* catanh(+-Inf + I*NaN) = +-0 + I*NaN */
 		if (isinf(x))
 			return (CMPLX(copysign(0, x), y + y));
 		/* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */
 		if (isinf(y))
 			return (CMPLX(copysign(0, x),
 			    copysign(pio2_hi + pio2_lo, y)));
 		/*
 		 * All other cases involving NaN return NaN + I*NaN.
 		 * C99 leaves it optional whether to raise invalid if one of
 		 * the arguments is not NaN, so we opt not to raise it.
 		 */
 		return (CMPLX(x + 0.0L + (y + 0), x + 0.0L + (y + 0)));
 	}
 
 	if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
 		return (CMPLX(real_part_reciprocal(x, y),
 		    copysign(pio2_hi + pio2_lo, y)));
 
 	if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) {
 		/*
 		 * z = 0 was filtered out above.  All other cases must raise
-		 * inexact, but this is the only only that needs to do it
+		 * inexact, but this is the only case that needs to do it
 		 * explicitly.
 		 */
 		raise_inexact();
 		return (z);
 	}
 
 	if (ax == 1 && ay < DBL_EPSILON)
 		rx = (m_ln2 - log(ay)) / 2;
 	else
 		rx = log1p(4 * ax / sum_squares(ax - 1, ay)) / 4;
 
 	if (ax == 1)
 		ry = atan2(2, -ay) / 2;
 	else if (ay < DBL_EPSILON)
 		ry = atan2(2 * ay, (1 - ax) * (1 + ax)) / 2;
 	else
 		ry = atan2(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2;
 
 	return (CMPLX(copysign(rx, x), copysign(ry, y)));
 }
 
 /*
  * catan(z) = reverse(catanh(reverse(z)))
  * where reverse(x + I*y) = y + I*x = I*conj(z).
  */
 double complex
 catan(double complex z)
 {
 	double complex w = catanh(CMPLX(cimag(z), creal(z)));
 
 	return (CMPLX(cimag(w), creal(w)));
 }
 
 #if LDBL_MANT_DIG == 53
 __weak_reference(cacosh, cacoshl);
 __weak_reference(cacos, cacosl);
 __weak_reference(casinh, casinhl);
 __weak_reference(casin, casinl);
 __weak_reference(catanh, catanhl);
 __weak_reference(catan, catanl);
 #endif
Index: stable/11/sys/amd64/amd64/support.S
===================================================================
--- stable/11/sys/amd64/amd64/support.S	(revision 330445)
+++ stable/11/sys/amd64/amd64/support.S	(revision 330446)
@@ -1,891 +1,891 @@
 /*-
  * Copyright (c) 2003 Peter Wemm.
  * Copyright (c) 1993 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_ddb.h"
 
 #include <machine/asmacros.h>
 #include <machine/specialreg.h>
 #include <machine/pmap.h>
 
 #include "assym.s"
 
 	.text
 
 /*
  * bcopy family
  * void bzero(void *buf, u_int len)
  */
 
 /* done */
 ENTRY(bzero)
 	PUSH_FRAME_POINTER
 	movq	%rsi,%rcx
 	xorl	%eax,%eax
 	shrq	$3,%rcx
 	rep
 	stosq
 	movq	%rsi,%rcx
 	andq	$7,%rcx
 	rep
 	stosb
 	POP_FRAME_POINTER
 	ret
 END(bzero)
 
 /* Address: %rdi */
 ENTRY(pagezero)
 	PUSH_FRAME_POINTER
 	movq	$PAGE_SIZE/8,%rcx
 	xorl	%eax,%eax
 	rep
 	stosq
 	POP_FRAME_POINTER
 	ret
 END(pagezero)
 
 ENTRY(bcmp)
 	PUSH_FRAME_POINTER
 	movq	%rdx,%rcx
 	shrq	$3,%rcx
 	repe
 	cmpsq
 	jne	1f
 
 	movq	%rdx,%rcx
 	andq	$7,%rcx
 	repe
 	cmpsb
 1:
 	setne	%al
 	movsbl	%al,%eax
 	POP_FRAME_POINTER
 	ret
 END(bcmp)
 
 /*
  * bcopy(src, dst, cnt)
  *       rdi, rsi, rdx
  *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
  */
 ENTRY(bcopy)
 	PUSH_FRAME_POINTER
 	xchgq	%rsi,%rdi
 	movq	%rdx,%rcx
 
 	movq	%rdi,%rax
 	subq	%rsi,%rax
 	cmpq	%rcx,%rax			/* overlapping && src < dst? */
 	jb	1f
 
 	shrq	$3,%rcx				/* copy by 64-bit words */
 	rep
 	movsq
 	movq	%rdx,%rcx
 	andq	$7,%rcx				/* any bytes left? */
 	rep
 	movsb
 	POP_FRAME_POINTER
 	ret
 
 	/* ALIGN_TEXT */
 1:
 	addq	%rcx,%rdi			/* copy backwards */
 	addq	%rcx,%rsi
 	decq	%rdi
 	decq	%rsi
 	andq	$7,%rcx				/* any fractional bytes? */
 	std
 	rep
 	movsb
 	movq	%rdx,%rcx			/* copy remainder by 32-bit words */
 	shrq	$3,%rcx
 	subq	$7,%rsi
 	subq	$7,%rdi
 	rep
 	movsq
 	cld
 	POP_FRAME_POINTER
 	ret
 END(bcopy)
 
 /*
  * Note: memcpy does not support overlapping copies
  */
 ENTRY(memcpy)
 	PUSH_FRAME_POINTER
 	movq	%rdi,%rax
 	movq	%rdx,%rcx
 	shrq	$3,%rcx				/* copy by 64-bit words */
 	rep
 	movsq
 	movq	%rdx,%rcx
 	andq	$7,%rcx				/* any bytes left? */
 	rep
 	movsb
 	POP_FRAME_POINTER
 	ret
 END(memcpy)
 
 /*
  * pagecopy(%rdi=from, %rsi=to)
  */
 ENTRY(pagecopy)
 	PUSH_FRAME_POINTER
 	movq	$-PAGE_SIZE,%rax
 	movq	%rax,%rdx
 	subq	%rax,%rdi
 	subq	%rax,%rsi
 1:
 	prefetchnta (%rdi,%rax)
 	addq	$64,%rax
 	jne	1b
 2:
 	movq	(%rdi,%rdx),%rax
 	movnti	%rax,(%rsi,%rdx)
 	movq	8(%rdi,%rdx),%rax
 	movnti	%rax,8(%rsi,%rdx)
 	movq	16(%rdi,%rdx),%rax
 	movnti	%rax,16(%rsi,%rdx)
 	movq	24(%rdi,%rdx),%rax
 	movnti	%rax,24(%rsi,%rdx)
 	addq	$32,%rdx
 	jne	2b
 	sfence
 	POP_FRAME_POINTER
 	ret
 END(pagecopy)
 
 /* fillw(pat, base, cnt) */
 /*       %rdi,%rsi, %rdx */
 ENTRY(fillw)
 	PUSH_FRAME_POINTER
 	movq	%rdi,%rax
 	movq	%rsi,%rdi
 	movq	%rdx,%rcx
 	rep
 	stosw
 	POP_FRAME_POINTER
 	ret
 END(fillw)
 
 /*****************************************************************************/
 /* copyout and fubyte family                                                 */
 /*****************************************************************************/
 /*
  * Access user memory from inside the kernel. These routines should be
  * the only places that do this.
  *
  * These routines set curpcb->pcb_onfault for the time they execute. When a
  * protection violation occurs inside the functions, the trap handler
  * returns to *curpcb->pcb_onfault instead of the function.
  */
 
 /*
  * copyout(from_kernel, to_user, len)
  *         %rdi,        %rsi,    %rdx
  */
 ENTRY(copyout)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rax
 	movq	$copyout_fault,PCB_ONFAULT(%rax)
 	testq	%rdx,%rdx			/* anything to do? */
 	jz	done_copyout
 
 	/*
 	 * Check explicitly for non-user addresses.  This check is essential
 	 * because it prevents usermode from writing into the kernel.  We do
 	 * not verify anywhere else that the user did not specify a rogue
 	 * address.
 	 */
 	/*
 	 * First, prevent address wrapping.
 	 */
 	movq	%rsi,%rax
 	addq	%rdx,%rax
 	jc	copyout_fault
 /*
  * XXX STOP USING VM_MAXUSER_ADDRESS.
  * It is an end address, not a max, so every time it is used correctly it
  * looks like there is an off by one error, and of course it caused an off
  * by one error in several places.
  */
 	movq	$VM_MAXUSER_ADDRESS,%rcx
 	cmpq	%rcx,%rax
 	ja	copyout_fault
 
 	xchgq	%rdi,%rsi
 	/* bcopy(%rsi, %rdi, %rdx) */
 	movq	%rdx,%rcx
 
 	shrq	$3,%rcx
 	rep
 	movsq
 	movb	%dl,%cl
 	andb	$7,%cl
 	rep
 	movsb
 
 done_copyout:
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rdx
 	movq	%rax,PCB_ONFAULT(%rdx)
 	POP_FRAME_POINTER
 	ret
 
 	ALIGN_TEXT
 copyout_fault:
 	movq	PCPU(CURPCB),%rdx
 	movq	$0,PCB_ONFAULT(%rdx)
 	movq	$EFAULT,%rax
 	POP_FRAME_POINTER
 	ret
 END(copyout)
 
 /*
  * copyin(from_user, to_kernel, len)
  *        %rdi,      %rsi,      %rdx
  */
 ENTRY(copyin)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rax
 	movq	$copyin_fault,PCB_ONFAULT(%rax)
 	testq	%rdx,%rdx			/* anything to do? */
 	jz	done_copyin
 
 	/*
 	 * make sure address is valid
 	 */
 	movq	%rdi,%rax
 	addq	%rdx,%rax
 	jc	copyin_fault
 	movq	$VM_MAXUSER_ADDRESS,%rcx
 	cmpq	%rcx,%rax
 	ja	copyin_fault
 
 	xchgq	%rdi,%rsi
 	movq	%rdx,%rcx
 	movb	%cl,%al
 	shrq	$3,%rcx				/* copy longword-wise */
 	rep
 	movsq
 	movb	%al,%cl
 	andb	$7,%cl				/* copy remaining bytes */
 	rep
 	movsb
 
 done_copyin:
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rdx
 	movq	%rax,PCB_ONFAULT(%rdx)
 	POP_FRAME_POINTER
 	ret
 
 	ALIGN_TEXT
 copyin_fault:
 	movq	PCPU(CURPCB),%rdx
 	movq	$0,PCB_ONFAULT(%rdx)
 	movq	$EFAULT,%rax
 	POP_FRAME_POINTER
 	ret
 END(copyin)
 
 /*
  * casueword32.  Compare and set user integer.  Returns -1 on fault,
  *        0 if access was successful.  Old value is written to *oldp.
  *        dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
  */
 ENTRY(casueword32)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%r8
 	movq	$fusufault,PCB_ONFAULT(%r8)
 
 	movq	$VM_MAXUSER_ADDRESS-4,%rax
 	cmpq	%rax,%rdi			/* verify address is valid */
 	ja	fusufault
 
 	movl	%esi,%eax			/* old */
 #ifdef SMP
 	lock
 #endif
 	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
 
 	/*
 	 * The old value is in %eax.  If the store succeeded it will be the
 	 * value we expected (old) from before the store, otherwise it will
 	 * be the current value.  Save %eax into %esi to prepare the return
 	 * value.
 	 */
 	movl	%eax,%esi
 	xorl	%eax,%eax
 	movq	%rax,PCB_ONFAULT(%r8)
 
 	/*
 	 * Access the oldp after the pcb_onfault is cleared, to correctly
 	 * catch corrupted pointer.
 	 */
 	movl	%esi,(%rdx)			/* oldp = %rdx */
 	POP_FRAME_POINTER
 	ret
 END(casueword32)
 
 /*
  * casueword.  Compare and set user long.  Returns -1 on fault,
  *        0 if access was successful.  Old value is written to *oldp.
  *        dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
  */
 ENTRY(casueword)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%r8
 	movq	$fusufault,PCB_ONFAULT(%r8)
 
 	movq	$VM_MAXUSER_ADDRESS-4,%rax
 	cmpq	%rax,%rdi			/* verify address is valid */
 	ja	fusufault
 
 	movq	%rsi,%rax			/* old */
 #ifdef SMP
 	lock
 #endif
 	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
 
 	/*
 	 * The old value is in %rax.  If the store succeeded it will be the
 	 * value we expected (old) from before the store, otherwise it will
 	 * be the current value.
 	 */
 	movq	%rax,%rsi
 	xorl	%eax,%eax
 	movq	%rax,PCB_ONFAULT(%r8)
 	movq	%rsi,(%rdx)
 	POP_FRAME_POINTER
 	ret
 END(casueword)
 
 /*
  * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
  * byte from user memory.
  * addr = %rdi, valp = %rsi
  */
 
 ALTENTRY(fueword64)
 ENTRY(fueword)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-8,%rax
 	cmpq	%rax,%rdi			/* verify address is valid */
 	ja	fusufault
 
 	xorl	%eax,%eax
 	movq	(%rdi),%r11
 	movq	%rax,PCB_ONFAULT(%rcx)
 	movq	%r11,(%rsi)
 	POP_FRAME_POINTER
 	ret
 END(fueword64)
 END(fueword)
 
 ENTRY(fueword32)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-4,%rax
 	cmpq	%rax,%rdi			/* verify address is valid */
 	ja	fusufault
 
 	xorl	%eax,%eax
 	movl	(%rdi),%r11d
 	movq	%rax,PCB_ONFAULT(%rcx)
 	movl	%r11d,(%rsi)
 	POP_FRAME_POINTER
 	ret
 END(fueword32)
 
 /*
  * fuswintr() and suswintr() are specialized variants of fuword16() and
  * suword16(), respectively.  They are called from the profiling code,
  * potentially at interrupt time.  If they fail, that's okay; good things
  * will happen later.  They always fail for now, until the trap code is
  * able to deal with this.
  */
 ALTENTRY(suswintr)
 ENTRY(fuswintr)
 	movq	$-1,%rax
 	ret
 END(suswintr)
 END(fuswintr)
 
 ENTRY(fuword16)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-2,%rax
 	cmpq	%rax,%rdi
 	ja	fusufault
 
 	movzwl	(%rdi),%eax
 	movq	$0,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(fuword16)
 
 ENTRY(fubyte)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-1,%rax
 	cmpq	%rax,%rdi
 	ja	fusufault
 
 	movzbl	(%rdi),%eax
 	movq	$0,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(fubyte)
 
 	ALIGN_TEXT
 fusufault:
 	movq	PCPU(CURPCB),%rcx
 	xorl	%eax,%eax
 	movq	%rax,PCB_ONFAULT(%rcx)
 	decq	%rax
 	POP_FRAME_POINTER
 	ret
 
 /*
  * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to
  * user memory.
  * addr = %rdi, value = %rsi
  */
 ALTENTRY(suword64)
 ENTRY(suword)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-8,%rax
 	cmpq	%rax,%rdi			/* verify address validity */
 	ja	fusufault
 
 	movq	%rsi,(%rdi)
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rcx
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(suword64)
 END(suword)
 
 ENTRY(suword32)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-4,%rax
 	cmpq	%rax,%rdi			/* verify address validity */
 	ja	fusufault
 
 	movl	%esi,(%rdi)
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rcx
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(suword32)
 
 ENTRY(suword16)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-2,%rax
 	cmpq	%rax,%rdi			/* verify address validity */
 	ja	fusufault
 
 	movw	%si,(%rdi)
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rcx		/* restore trashed register */
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(suword16)
 
 ENTRY(subyte)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-1,%rax
 	cmpq	%rax,%rdi			/* verify address validity */
 	ja	fusufault
 
 	movl	%esi,%eax
 	movb	%al,(%rdi)
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rcx		/* restore trashed register */
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(subyte)
 
 /*
  * copyinstr(from, to, maxlen, int *lencopied)
  *           %rdi, %rsi, %rdx, %rcx
  *
- *	copy a string from from to to, stop when a 0 character is reached.
+ *	copy a string from 'from' to 'to', stop when a 0 character is reached.
  *	return ENAMETOOLONG if string is longer than maxlen, and
  *	EFAULT on protection violations. If lencopied is non-zero,
  *	return the actual length in *lencopied.
  */
 ENTRY(copyinstr)
 	PUSH_FRAME_POINTER
 	movq	%rdx,%r8			/* %r8 = maxlen */
 	movq	%rcx,%r9			/* %r9 = *len */
 	xchgq	%rdi,%rsi			/* %rdi = from, %rsi = to */
 	movq	PCPU(CURPCB),%rcx
 	movq	$cpystrflt,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS,%rax
 
 	/* make sure 'from' is within bounds */
 	subq	%rsi,%rax
 	jbe	cpystrflt
 
 	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
 	cmpq	%rdx,%rax
 	jae	1f
 	movq	%rax,%rdx
 	movq	%rax,%r8
 1:
 	incq	%rdx
 
 2:
 	decq	%rdx
 	jz	3f
 
 	lodsb
 	stosb
 	orb	%al,%al
 	jnz	2b
 
 	/* Success -- 0 byte reached */
 	decq	%rdx
 	xorl	%eax,%eax
 	jmp	cpystrflt_x
 3:
 	/* rdx is zero - return ENAMETOOLONG or EFAULT */
 	movq	$VM_MAXUSER_ADDRESS,%rax
 	cmpq	%rax,%rsi
 	jae	cpystrflt
 4:
 	movq	$ENAMETOOLONG,%rax
 	jmp	cpystrflt_x
 
 cpystrflt:
 	movq	$EFAULT,%rax
 
 cpystrflt_x:
 	/* set *lencopied and return %eax */
 	movq	PCPU(CURPCB),%rcx
 	movq	$0,PCB_ONFAULT(%rcx)
 
 	testq	%r9,%r9
 	jz	1f
 	subq	%rdx,%r8
 	movq	%r8,(%r9)
 1:
 	POP_FRAME_POINTER
 	ret
 END(copyinstr)
 
 /*
  * copystr(from, to, maxlen, int *lencopied)
  *         %rdi, %rsi, %rdx, %rcx
  */
 ENTRY(copystr)
 	PUSH_FRAME_POINTER
 	movq	%rdx,%r8			/* %r8 = maxlen */
 
 	xchgq	%rdi,%rsi
 	incq	%rdx
 1:
 	decq	%rdx
 	jz	4f
 	lodsb
 	stosb
 	orb	%al,%al
 	jnz	1b
 
 	/* Success -- 0 byte reached */
 	decq	%rdx
 	xorl	%eax,%eax
 	jmp	6f
 4:
 	/* rdx is zero -- return ENAMETOOLONG */
 	movq	$ENAMETOOLONG,%rax
 
 6:
 
 	testq	%rcx,%rcx
 	jz	7f
 	/* set *lencopied and return %rax */
 	subq	%rdx,%r8
 	movq	%r8,(%rcx)
 7:
 	POP_FRAME_POINTER
 	ret
 END(copystr)
 
 /*
  * Handling of special amd64 registers and descriptor tables etc
  */
 /* void lgdt(struct region_descriptor *rdp); */
 ENTRY(lgdt)
 	/* reload the descriptor table */
 	lgdt	(%rdi)
 
 	/* flush the prefetch q */
 	jmp	1f
 	nop
 1:
 	movl	$KDSEL,%eax
 	movl	%eax,%ds
 	movl	%eax,%es
 	movl	%eax,%fs	/* Beware, use wrmsr to set 64 bit base */
 	movl	%eax,%gs
 	movl	%eax,%ss
 
 	/* reload code selector by turning return into intersegmental return */
 	popq	%rax
 	pushq	$KCSEL
 	pushq	%rax
 	MEXITCOUNT
 	lretq
 END(lgdt)
 
 /*****************************************************************************/
 /* setjump, longjump                                                         */
 /*****************************************************************************/
 
 ENTRY(setjmp)
 	movq	%rbx,0(%rdi)			/* save rbx */
 	movq	%rsp,8(%rdi)			/* save rsp */
 	movq	%rbp,16(%rdi)			/* save rbp */
 	movq	%r12,24(%rdi)			/* save r12 */
 	movq	%r13,32(%rdi)			/* save r13 */
 	movq	%r14,40(%rdi)			/* save r14 */
 	movq	%r15,48(%rdi)			/* save r15 */
 	movq	0(%rsp),%rdx			/* get rta */
 	movq	%rdx,56(%rdi)			/* save rip */
 	xorl	%eax,%eax			/* return(0); */
 	ret
 END(setjmp)
 
 ENTRY(longjmp)
 	movq	0(%rdi),%rbx			/* restore rbx */
 	movq	8(%rdi),%rsp			/* restore rsp */
 	movq	16(%rdi),%rbp			/* restore rbp */
 	movq	24(%rdi),%r12			/* restore r12 */
 	movq	32(%rdi),%r13			/* restore r13 */
 	movq	40(%rdi),%r14			/* restore r14 */
 	movq	48(%rdi),%r15			/* restore r15 */
 	movq	56(%rdi),%rdx			/* get rta */
 	movq	%rdx,0(%rsp)			/* put in return frame */
 	xorl	%eax,%eax			/* return(1); */
 	incl	%eax
 	ret
 END(longjmp)
 
 /*
  * Support for reading MSRs in the safe manner.
  */
 ENTRY(rdmsr_safe)
 /* int rdmsr_safe(u_int msr, uint64_t *data) */
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%r8
 	movq	$msr_onfault,PCB_ONFAULT(%r8)
 	movl	%edi,%ecx
 	rdmsr			/* Read MSR pointed by %ecx. Returns
 				   hi byte in edx, lo in %eax */
 	salq	$32,%rdx	/* sign-shift %rdx left */
 	movl	%eax,%eax	/* zero-extend %eax -> %rax */
 	orq	%rdx,%rax
 	movq	%rax,(%rsi)
 	xorq	%rax,%rax
 	movq	%rax,PCB_ONFAULT(%r8)
 	POP_FRAME_POINTER
 	ret
 
 /*
  * Support for writing MSRs in the safe manner.
  */
 ENTRY(wrmsr_safe)
 /* int wrmsr_safe(u_int msr, uint64_t data) */
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%r8
 	movq	$msr_onfault,PCB_ONFAULT(%r8)
 	movl	%edi,%ecx
 	movl	%esi,%eax
 	sarq	$32,%rsi
 	movl	%esi,%edx
 	wrmsr			/* Write MSR pointed by %ecx. Accepts
 				   hi byte in edx, lo in %eax. */
 	xorq	%rax,%rax
 	movq	%rax,PCB_ONFAULT(%r8)
 	POP_FRAME_POINTER
 	ret
 
 /*
  * MSR operations fault handler
  */
 	ALIGN_TEXT
 msr_onfault:
 	movq	$0,PCB_ONFAULT(%r8)
 	movl	$EFAULT,%eax
 	POP_FRAME_POINTER
 	ret
 
 /*
  * void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
  * Invalidates address space addressed by ucr3, then returns to kcr3.
  * Done in assembler to ensure no other memory accesses happen while
  * on ucr3.
  */
 	ALIGN_TEXT
 ENTRY(pmap_pti_pcid_invalidate)
 	pushfq
 	cli
 	movq	%rdi,%cr3	/* to user page table */
 	movq	%rsi,%cr3	/* back to kernel */
 	popfq
 	retq
 
 /*
  * void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
  * Invalidates virtual address va in address space ucr3, then returns to kcr3.
  */
 	ALIGN_TEXT
 ENTRY(pmap_pti_pcid_invlpg)
 	pushfq
 	cli
 	movq	%rdi,%cr3	/* to user page table */
 	invlpg	(%rdx)
 	movq	%rsi,%cr3	/* back to kernel */
 	popfq
 	retq
 
 /*
  * void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
  *     vm_offset_t eva);
  * Invalidates virtual addresses between sva and eva in address space ucr3,
  * then returns to kcr3.
  */
 	ALIGN_TEXT
 ENTRY(pmap_pti_pcid_invlrng)
 	pushfq
 	cli
 	movq	%rdi,%cr3	/* to user page table */
 1:	invlpg	(%rdx)
 	addq	$PAGE_SIZE,%rdx
 	cmpq	%rdx,%rcx
 	ja	1b
 	movq	%rsi,%cr3	/* back to kernel */
 	popfq
 	retq
 
 	.altmacro
 	.macro	ibrs_seq_label l
 handle_ibrs_\l:
 	.endm
 	.macro	ibrs_call_label l
 	call	handle_ibrs_\l
 	.endm
 	.macro	ibrs_seq count
 	ll=1
 	.rept	\count
 	ibrs_call_label	%(ll)
 	nop
 	ibrs_seq_label %(ll)
 	addq	$8,%rsp
 	ll=ll+1
 	.endr
 	.endm
 
 /* all callers already saved %rax, %rdx, and %rcx */
 ENTRY(handle_ibrs_entry)
 	cmpb	$0,hw_ibrs_active(%rip)
 	je	1f
 	movl	$MSR_IA32_SPEC_CTRL,%ecx
 	movl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
 	movl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32,%edx
 	wrmsr
 	movb	$1,PCPU(IBPB_SET)
 	testl	$CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip)
 	jne	1f
 	ibrs_seq 32
 1:	ret
 END(handle_ibrs_entry)
 
 ENTRY(handle_ibrs_exit)
 	cmpb	$0,PCPU(IBPB_SET)
 	je	1f
 	movl	$MSR_IA32_SPEC_CTRL,%ecx
 	xorl	%eax,%eax
 	xorl	%edx,%edx
 	wrmsr
 	movb	$0,PCPU(IBPB_SET)
 1:	ret
 END(handle_ibrs_exit)
 
 /* registers-neutral version, but needs stack */
 ENTRY(handle_ibrs_exit_rs)
 	cmpb	$0,PCPU(IBPB_SET)
 	je	1f
 	pushq	%rax
 	pushq	%rdx
 	pushq	%rcx
 	movl	$MSR_IA32_SPEC_CTRL,%ecx
 	xorl	%eax,%eax
 	xorl	%edx,%edx
 	wrmsr
 	popq	%rcx
 	popq	%rdx
 	popq	%rax
 	movb	$0,PCPU(IBPB_SET)
 1:	ret
 END(handle_ibrs_exit_rs)
 
 	.noaltmacro
Index: stable/11/sys/dev/ath/if_ath.c
===================================================================
--- stable/11/sys/dev/ath/if_ath.c	(revision 330445)
+++ stable/11/sys/dev/ath/if_ath.c	(revision 330446)
@@ -1,6658 +1,6657 @@
 /*-
  * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for the Atheros Wireless LAN controller.
  *
  * This software is derived from work of Atsushi Onoe; his contribution
  * is greatly appreciated.
  */
 
 #include "opt_inet.h"
 #include "opt_ath.h"
 /*
  * This is needed for register operations which are performed
  * by the driver - eg, calls to ath_hal_gettsf32().
  *
  * It's also required for any AH_DEBUG checks in here, eg the
  * module dependencies.
  */
 #include "opt_ah.h"
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/callout.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kthread.h>
 #include <sys/taskqueue.h>
 #include <sys/priv.h>
 #include <sys/module.h>
 #include <sys/ktr.h>
 #include <sys/smp.h>	/* for mp_ncpus */
 
 #include <machine/bus.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_llc.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_regdomain.h>
 #ifdef IEEE80211_SUPPORT_SUPERG
 #include <net80211/ieee80211_superg.h>
 #endif
 #ifdef IEEE80211_SUPPORT_TDMA
 #include <net80211/ieee80211_tdma.h>
 #endif
 
 #include <net/bpf.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #endif
 
 #include <dev/ath/if_athvar.h>
 #include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
 #include <dev/ath/ath_hal/ah_diagcodes.h>
 
 #include <dev/ath/if_ath_debug.h>
 #include <dev/ath/if_ath_misc.h>
 #include <dev/ath/if_ath_tsf.h>
 #include <dev/ath/if_ath_tx.h>
 #include <dev/ath/if_ath_sysctl.h>
 #include <dev/ath/if_ath_led.h>
 #include <dev/ath/if_ath_keycache.h>
 #include <dev/ath/if_ath_rx.h>
 #include <dev/ath/if_ath_rx_edma.h>
 #include <dev/ath/if_ath_tx_edma.h>
 #include <dev/ath/if_ath_beacon.h>
 #include <dev/ath/if_ath_btcoex.h>
 #include <dev/ath/if_ath_btcoex_mci.h>
 #include <dev/ath/if_ath_spectral.h>
 #include <dev/ath/if_ath_lna_div.h>
 #include <dev/ath/if_athdfs.h>
 #include <dev/ath/if_ath_ioctl.h>
 #include <dev/ath/if_ath_descdma.h>
 
 #ifdef ATH_TX99_DIAG
 #include <dev/ath/ath_tx99/ath_tx99.h>
 #endif
 
 #ifdef	ATH_DEBUG_ALQ
 #include <dev/ath/if_ath_alq.h>
 #endif
 
 /*
  * Only enable this if you're working on PS-POLL support.
  */
 #define	ATH_SW_PSQ
 
 /*
  * ATH_BCBUF determines the number of vap's that can transmit
  * beacons and also (currently) the number of vap's that can
  * have unique mac addresses/bssid.  When staggering beacons
  * 4 is probably a good max as otherwise the beacons become
  * very closely spaced and there is limited time for cab q traffic
  * to go out.  You can burst beacons instead but that is not good
  * for stations in power save and at some point you really want
  * another radio (and channel).
  *
  * The limit on the number of mac addresses is tied to our use of
  * the U/L bit and tracking addresses in a byte; it would be
  * worthwhile to allow more for applications like proxy sta.
  */
 CTASSERT(ATH_BCBUF <= 8);
 
 static struct ieee80211vap *ath_vap_create(struct ieee80211com *,
 		    const char [IFNAMSIZ], int, enum ieee80211_opmode, int,
 		    const uint8_t [IEEE80211_ADDR_LEN],
 		    const uint8_t [IEEE80211_ADDR_LEN]);
 static void	ath_vap_delete(struct ieee80211vap *);
 static int	ath_init(struct ath_softc *);
 static void	ath_stop(struct ath_softc *);
 static int	ath_reset_vap(struct ieee80211vap *, u_long);
 static int	ath_transmit(struct ieee80211com *, struct mbuf *);
 static int	ath_media_change(struct ifnet *);
 static void	ath_watchdog(void *);
 static void	ath_parent(struct ieee80211com *);
 static void	ath_fatal_proc(void *, int);
 static void	ath_bmiss_vap(struct ieee80211vap *);
 static void	ath_bmiss_proc(void *, int);
 static void	ath_key_update_begin(struct ieee80211vap *);
 static void	ath_key_update_end(struct ieee80211vap *);
 static void	ath_update_mcast_hw(struct ath_softc *);
 static void	ath_update_mcast(struct ieee80211com *);
 static void	ath_update_promisc(struct ieee80211com *);
 static void	ath_updateslot(struct ieee80211com *);
 static void	ath_bstuck_proc(void *, int);
 static void	ath_reset_proc(void *, int);
 static int	ath_desc_alloc(struct ath_softc *);
 static void	ath_desc_free(struct ath_softc *);
 static struct ieee80211_node *ath_node_alloc(struct ieee80211vap *,
 			const uint8_t [IEEE80211_ADDR_LEN]);
 static void	ath_node_cleanup(struct ieee80211_node *);
 static void	ath_node_free(struct ieee80211_node *);
 static void	ath_node_getsignal(const struct ieee80211_node *,
 			int8_t *, int8_t *);
 static void	ath_txq_init(struct ath_softc *sc, struct ath_txq *, int);
 static struct ath_txq *ath_txq_setup(struct ath_softc*, int qtype, int subtype);
 static int	ath_tx_setup(struct ath_softc *, int, int);
 static void	ath_tx_cleanupq(struct ath_softc *, struct ath_txq *);
 static void	ath_tx_cleanup(struct ath_softc *);
 static int	ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq,
 		    int dosched);
 static void	ath_tx_proc_q0(void *, int);
 static void	ath_tx_proc_q0123(void *, int);
 static void	ath_tx_proc(void *, int);
 static void	ath_txq_sched_tasklet(void *, int);
 static int	ath_chan_set(struct ath_softc *, struct ieee80211_channel *);
 static void	ath_chan_change(struct ath_softc *, struct ieee80211_channel *);
 static void	ath_scan_start(struct ieee80211com *);
 static void	ath_scan_end(struct ieee80211com *);
 static void	ath_set_channel(struct ieee80211com *);
 #ifdef	ATH_ENABLE_11N
 static void	ath_update_chw(struct ieee80211com *);
 #endif	/* ATH_ENABLE_11N */
 static void	ath_calibrate(void *);
 static int	ath_newstate(struct ieee80211vap *, enum ieee80211_state, int);
 static void	ath_setup_stationkey(struct ieee80211_node *);
 static void	ath_newassoc(struct ieee80211_node *, int);
 static int	ath_setregdomain(struct ieee80211com *,
 		    struct ieee80211_regdomain *, int,
 		    struct ieee80211_channel []);
 static void	ath_getradiocaps(struct ieee80211com *, int, int *,
 		    struct ieee80211_channel []);
 static int	ath_getchannels(struct ath_softc *);
 
 static int	ath_rate_setup(struct ath_softc *, u_int mode);
 static void	ath_setcurmode(struct ath_softc *, enum ieee80211_phymode);
 
 static void	ath_announce(struct ath_softc *);
 
 static void	ath_dfs_tasklet(void *, int);
 static void	ath_node_powersave(struct ieee80211_node *, int);
 static int	ath_node_set_tim(struct ieee80211_node *, int);
 static void	ath_node_recv_pspoll(struct ieee80211_node *, struct mbuf *);
 
 #ifdef IEEE80211_SUPPORT_TDMA
 #include <dev/ath/if_ath_tdma.h>
 #endif
 
 SYSCTL_DECL(_hw_ath);
 
 /* XXX validate sysctl values */
 static	int ath_longcalinterval = 30;		/* long cals every 30 secs */
 SYSCTL_INT(_hw_ath, OID_AUTO, longcal, CTLFLAG_RW, &ath_longcalinterval,
 	    0, "long chip calibration interval (secs)");
 static	int ath_shortcalinterval = 100;		/* short cals every 100 ms */
 SYSCTL_INT(_hw_ath, OID_AUTO, shortcal, CTLFLAG_RW, &ath_shortcalinterval,
 	    0, "short chip calibration interval (msecs)");
 static	int ath_resetcalinterval = 20*60;	/* reset cal state 20 mins */
 SYSCTL_INT(_hw_ath, OID_AUTO, resetcal, CTLFLAG_RW, &ath_resetcalinterval,
 	    0, "reset chip calibration results (secs)");
 static	int ath_anicalinterval = 100;		/* ANI calibration - 100 msec */
 SYSCTL_INT(_hw_ath, OID_AUTO, anical, CTLFLAG_RW, &ath_anicalinterval,
 	    0, "ANI calibration (msecs)");
 
 int ath_rxbuf = ATH_RXBUF;		/* # rx buffers to allocate */
 SYSCTL_INT(_hw_ath, OID_AUTO, rxbuf, CTLFLAG_RWTUN, &ath_rxbuf,
 	    0, "rx buffers allocated");
 int ath_txbuf = ATH_TXBUF;		/* # tx buffers to allocate */
 SYSCTL_INT(_hw_ath, OID_AUTO, txbuf, CTLFLAG_RWTUN, &ath_txbuf,
 	    0, "tx buffers allocated");
 int ath_txbuf_mgmt = ATH_MGMT_TXBUF;	/* # mgmt tx buffers to allocate */
 SYSCTL_INT(_hw_ath, OID_AUTO, txbuf_mgmt, CTLFLAG_RWTUN, &ath_txbuf_mgmt,
 	    0, "tx (mgmt) buffers allocated");
 
 int ath_bstuck_threshold = 4;		/* max missed beacons */
 SYSCTL_INT(_hw_ath, OID_AUTO, bstuck, CTLFLAG_RW, &ath_bstuck_threshold,
 	    0, "max missed beacon xmits before chip reset");
 
 MALLOC_DEFINE(M_ATHDEV, "athdev", "ath driver dma buffers");
 
 void
 ath_legacy_attach_comp_func(struct ath_softc *sc)
 {
 
 	/*
 	 * Special case certain configurations.  Note the
 	 * CAB queue is handled by these specially so don't
 	 * include them when checking the txq setup mask.
 	 */
 	switch (sc->sc_txqsetup &~ (1<<sc->sc_cabq->axq_qnum)) {
 	case 0x01:
 		TASK_INIT(&sc->sc_txtask, 0, ath_tx_proc_q0, sc);
 		break;
 	case 0x0f:
 		TASK_INIT(&sc->sc_txtask, 0, ath_tx_proc_q0123, sc);
 		break;
 	default:
 		TASK_INIT(&sc->sc_txtask, 0, ath_tx_proc, sc);
 		break;
 	}
 }
 
 /*
  * Set the target power mode.
  *
  * If this is called during a point in time where
  * the hardware is being programmed elsewhere, it will
  * simply store it away and update it when all current
  * uses of the hardware are completed.
  */
 void
 _ath_power_setpower(struct ath_softc *sc, int power_state, const char *file, int line)
 {
 	ATH_LOCK_ASSERT(sc);
 
 	sc->sc_target_powerstate = power_state;
 
 	DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) state=%d, refcnt=%d\n",
 	    __func__,
 	    file,
 	    line,
 	    power_state,
 	    sc->sc_powersave_refcnt);
 
 	if (sc->sc_powersave_refcnt == 0 &&
 	    power_state != sc->sc_cur_powerstate) {
 		sc->sc_cur_powerstate = power_state;
 		ath_hal_setpower(sc->sc_ah, power_state);
 
 		/*
 		 * If the NIC is force-awake, then set the
 		 * self-gen frame state appropriately.
 		 *
 		 * If the nic is in network sleep or full-sleep,
 		 * we let the above call leave the self-gen
 		 * state as "sleep".
 		 */
 		if (sc->sc_cur_powerstate == HAL_PM_AWAKE &&
 		    sc->sc_target_selfgen_state != HAL_PM_AWAKE) {
 			ath_hal_setselfgenpower(sc->sc_ah,
 			    sc->sc_target_selfgen_state);
 		}
 	}
 }
 
 /*
  * Set the current self-generated frames state.
  *
  * This is separate from the target power mode.  The chip may be
  * awake but the desired state is "sleep", so frames sent to the
  * destination has PWRMGT=1 in the 802.11 header.  The NIC also
  * needs to know to set PWRMGT=1 in self-generated frames.
  */
 void
 _ath_power_set_selfgen(struct ath_softc *sc, int power_state, const char *file, int line)
 {
 
 	ATH_LOCK_ASSERT(sc);
 
 	DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) state=%d, refcnt=%d\n",
 	    __func__,
 	    file,
 	    line,
 	    power_state,
 	    sc->sc_target_selfgen_state);
 
 	sc->sc_target_selfgen_state = power_state;
 
 	/*
 	 * If the NIC is force-awake, then set the power state.
 	 * Network-state and full-sleep will already transition it to
 	 * mark self-gen frames as sleeping - and we can't
 	 * guarantee the NIC is awake to program the self-gen frame
 	 * setting anyway.
 	 */
 	if (sc->sc_cur_powerstate == HAL_PM_AWAKE) {
 		ath_hal_setselfgenpower(sc->sc_ah, power_state);
 	}
 }
 
 /*
  * Set the hardware power mode and take a reference.
  *
  * This doesn't update the target power mode in the driver;
  * it just updates the hardware power state.
  *
  * XXX it should only ever force the hardware awake; it should
  * never be called to set it asleep.
  */
 void
 _ath_power_set_power_state(struct ath_softc *sc, int power_state, const char *file, int line)
 {
 	ATH_LOCK_ASSERT(sc);
 
 	DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) state=%d, refcnt=%d\n",
 	    __func__,
 	    file,
 	    line,
 	    power_state,
 	    sc->sc_powersave_refcnt);
 
 	sc->sc_powersave_refcnt++;
 
 	if (power_state != sc->sc_cur_powerstate) {
 		ath_hal_setpower(sc->sc_ah, power_state);
 		sc->sc_cur_powerstate = power_state;
 
 		/*
 		 * Adjust the self-gen powerstate if appropriate.
 		 */
 		if (sc->sc_cur_powerstate == HAL_PM_AWAKE &&
 		    sc->sc_target_selfgen_state != HAL_PM_AWAKE) {
 			ath_hal_setselfgenpower(sc->sc_ah,
 			    sc->sc_target_selfgen_state);
 		}
 
 	}
 }
 
 /*
  * Restore the power save mode to what it once was.
  *
  * This will decrement the reference counter and once it hits
  * zero, it'll restore the powersave state.
  */
 void
 _ath_power_restore_power_state(struct ath_softc *sc, const char *file, int line)
 {
 
 	ATH_LOCK_ASSERT(sc);
 
 	DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) refcnt=%d, target state=%d\n",
 	    __func__,
 	    file,
 	    line,
 	    sc->sc_powersave_refcnt,
 	    sc->sc_target_powerstate);
 
 	if (sc->sc_powersave_refcnt == 0)
 		device_printf(sc->sc_dev, "%s: refcnt=0?\n", __func__);
 	else
 		sc->sc_powersave_refcnt--;
 
 	if (sc->sc_powersave_refcnt == 0 &&
 	    sc->sc_target_powerstate != sc->sc_cur_powerstate) {
 		sc->sc_cur_powerstate = sc->sc_target_powerstate;
 		ath_hal_setpower(sc->sc_ah, sc->sc_target_powerstate);
 	}
 
 	/*
 	 * Adjust the self-gen powerstate if appropriate.
 	 */
 	if (sc->sc_cur_powerstate == HAL_PM_AWAKE &&
 	    sc->sc_target_selfgen_state != HAL_PM_AWAKE) {
 		ath_hal_setselfgenpower(sc->sc_ah,
 		    sc->sc_target_selfgen_state);
 	}
 
 }
 
 /*
  * Configure the initial HAL configuration values based on bus
  * specific parameters.
  *
  * Some PCI IDs and other information may need tweaking.
  *
  * XXX TODO: ath9k and the Atheros HAL only program comm2g_switch_enable
  * if BT antenna diversity isn't enabled.
  *
  * So, let's also figure out how to enable BT diversity for AR9485.
  */
 static void
 ath_setup_hal_config(struct ath_softc *sc, HAL_OPS_CONFIG *ah_config)
 {
 	/* XXX TODO: only for PCI devices? */
 
 	if (sc->sc_pci_devinfo & (ATH_PCI_CUS198 | ATH_PCI_CUS230)) {
 		ah_config->ath_hal_ext_lna_ctl_gpio = 0x200; /* bit 9 */
 		ah_config->ath_hal_ext_atten_margin_cfg = AH_TRUE;
 		ah_config->ath_hal_min_gainidx = AH_TRUE;
 		ah_config->ath_hal_ant_ctrl_comm2g_switch_enable = 0x000bbb88;
 		/* XXX low_rssi_thresh */
 		/* XXX fast_div_bias */
 		device_printf(sc->sc_dev, "configuring for %s\n",
 		    (sc->sc_pci_devinfo & ATH_PCI_CUS198) ?
 		    "CUS198" : "CUS230");
 	}
 
 	if (sc->sc_pci_devinfo & ATH_PCI_CUS217)
 		device_printf(sc->sc_dev, "CUS217 card detected\n");
 
 	if (sc->sc_pci_devinfo & ATH_PCI_CUS252)
 		device_printf(sc->sc_dev, "CUS252 card detected\n");
 
 	if (sc->sc_pci_devinfo & ATH_PCI_AR9565_1ANT)
 		device_printf(sc->sc_dev, "WB335 1-ANT card detected\n");
 
 	if (sc->sc_pci_devinfo & ATH_PCI_AR9565_2ANT)
 		device_printf(sc->sc_dev, "WB335 2-ANT card detected\n");
 
 	if (sc->sc_pci_devinfo & ATH_PCI_BT_ANT_DIV)
 		device_printf(sc->sc_dev,
 		    "Bluetooth Antenna Diversity card detected\n");
 
 	if (sc->sc_pci_devinfo & ATH_PCI_KILLER)
 		device_printf(sc->sc_dev, "Killer Wireless card detected\n");
 
 #if 0
         /*
          * Some WB335 cards do not support antenna diversity. Since
          * we use a hardcoded value for AR9565 instead of using the
          * EEPROM/OTP data, remove the combining feature from
          * the HW capabilities bitmap.
          */
         if (sc->sc_pci_devinfo & (ATH9K_PCI_AR9565_1ANT | ATH9K_PCI_AR9565_2ANT)) {
                 if (!(sc->sc_pci_devinfo & ATH9K_PCI_BT_ANT_DIV))
                         pCap->hw_caps &= ~ATH9K_HW_CAP_ANT_DIV_COMB;
         }
 
         if (sc->sc_pci_devinfo & ATH9K_PCI_BT_ANT_DIV) {
                 pCap->hw_caps |= ATH9K_HW_CAP_BT_ANT_DIV;
                 device_printf(sc->sc_dev, "Set BT/WLAN RX diversity capability\n");
         }
 #endif
 
         if (sc->sc_pci_devinfo & ATH_PCI_D3_L1_WAR) {
                 ah_config->ath_hal_pcie_waen = 0x0040473b;
                 device_printf(sc->sc_dev, "Enable WAR for ASPM D3/L1\n");
         }
 
 #if 0
         if (sc->sc_pci_devinfo & ATH9K_PCI_NO_PLL_PWRSAVE) {
                 ah->config.no_pll_pwrsave = true;
                 device_printf(sc->sc_dev, "Disable PLL PowerSave\n");
         }
 #endif
 
 }
 
 /*
  * Attempt to fetch the MAC address from the kernel environment.
  *
  * Returns 0, macaddr in macaddr if successful; -1 otherwise.
  */
 static int
 ath_fetch_mac_kenv(struct ath_softc *sc, uint8_t *macaddr)
 {
 	char devid_str[32];
 	int local_mac = 0;
 	char *local_macstr;
 
 	/*
 	 * Fetch from the kenv rather than using hints.
 	 *
 	 * Hints would be nice but the transition to dynamic
 	 * hints/kenv doesn't happen early enough for this
 	 * to work reliably (eg on anything embedded.)
 	 */
 	snprintf(devid_str, 32, "hint.%s.%d.macaddr",
 	    device_get_name(sc->sc_dev),
 	    device_get_unit(sc->sc_dev));
 
 	if ((local_macstr = kern_getenv(devid_str)) != NULL) {
 		uint32_t tmpmac[ETHER_ADDR_LEN];
 		int count;
 		int i;
 
 		/* Have a MAC address; should use it */
 		device_printf(sc->sc_dev,
 		    "Overriding MAC address from environment: '%s'\n",
 		    local_macstr);
 
 		/* Extract out the MAC address */
 		count = sscanf(local_macstr, "%x%*c%x%*c%x%*c%x%*c%x%*c%x",
 		    &tmpmac[0], &tmpmac[1],
 		    &tmpmac[2], &tmpmac[3],
 		    &tmpmac[4], &tmpmac[5]);
 		if (count == 6) {
 			/* Valid! */
 			local_mac = 1;
 			for (i = 0; i < ETHER_ADDR_LEN; i++)
 				macaddr[i] = tmpmac[i];
 		}
 		/* Done! */
 		freeenv(local_macstr);
 		local_macstr = NULL;
 	}
 
 	if (local_mac)
 		return (0);
 	return (-1);
 }
 
 #define	HAL_MODE_HT20 (HAL_MODE_11NG_HT20 | HAL_MODE_11NA_HT20)
 #define	HAL_MODE_HT40 \
 	(HAL_MODE_11NG_HT40PLUS | HAL_MODE_11NG_HT40MINUS | \
 	HAL_MODE_11NA_HT40PLUS | HAL_MODE_11NA_HT40MINUS)
 int
 ath_attach(u_int16_t devid, struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = NULL;
 	HAL_STATUS status;
 	int error = 0, i;
 	u_int wmodes;
 	int rx_chainmask, tx_chainmask;
 	HAL_OPS_CONFIG ah_config;
 
 	DPRINTF(sc, ATH_DEBUG_ANY, "%s: devid 0x%x\n", __func__, devid);
 
 	ic->ic_softc = sc;
 	ic->ic_name = device_get_nameunit(sc->sc_dev);
 
 	/*
 	 * Configure the initial configuration data.
 	 *
 	 * This is stuff that may be needed early during attach
 	 * rather than done via configuration calls later.
 	 */
 	bzero(&ah_config, sizeof(ah_config));
 	ath_setup_hal_config(sc, &ah_config);
 
 	ah = ath_hal_attach(devid, sc, sc->sc_st, sc->sc_sh,
 	    sc->sc_eepromdata, &ah_config, &status);
 	if (ah == NULL) {
 		device_printf(sc->sc_dev,
 		    "unable to attach hardware; HAL status %u\n", status);
 		error = ENXIO;
 		goto bad;
 	}
 	sc->sc_ah = ah;
 	sc->sc_invalid = 0;	/* ready to go, enable interrupt handling */
 #ifdef	ATH_DEBUG
 	sc->sc_debug = ath_debug;
 #endif
 
 	/*
 	 * Setup the DMA/EDMA functions based on the current
 	 * hardware support.
 	 *
 	 * This is required before the descriptors are allocated.
 	 */
 	if (ath_hal_hasedma(sc->sc_ah)) {
 		sc->sc_isedma = 1;
 		ath_recv_setup_edma(sc);
 		ath_xmit_setup_edma(sc);
 	} else {
 		ath_recv_setup_legacy(sc);
 		ath_xmit_setup_legacy(sc);
 	}
 
 	if (ath_hal_hasmybeacon(sc->sc_ah)) {
 		sc->sc_do_mybeacon = 1;
 	}
 
 	/*
 	 * Check if the MAC has multi-rate retry support.
 	 * We do this by trying to setup a fake extended
 	 * descriptor.  MAC's that don't have support will
 	 * return false w/o doing anything.  MAC's that do
 	 * support it will return true w/o doing anything.
 	 */
 	sc->sc_mrretry = ath_hal_setupxtxdesc(ah, NULL, 0,0, 0,0, 0,0);
 
 	/*
 	 * Check if the device has hardware counters for PHY
 	 * errors.  If so we need to enable the MIB interrupt
 	 * so we can act on stat triggers.
 	 */
 	if (ath_hal_hwphycounters(ah))
 		sc->sc_needmib = 1;
 
 	/*
 	 * Get the hardware key cache size.
 	 */
 	sc->sc_keymax = ath_hal_keycachesize(ah);
 	if (sc->sc_keymax > ATH_KEYMAX) {
 		device_printf(sc->sc_dev,
 		    "Warning, using only %u of %u key cache slots\n",
 		    ATH_KEYMAX, sc->sc_keymax);
 		sc->sc_keymax = ATH_KEYMAX;
 	}
 	/*
 	 * Reset the key cache since some parts do not
 	 * reset the contents on initial power up.
 	 */
 	for (i = 0; i < sc->sc_keymax; i++)
 		ath_hal_keyreset(ah, i);
 
 	/*
 	 * Collect the default channel list.
 	 */
 	error = ath_getchannels(sc);
 	if (error != 0)
 		goto bad;
 
 	/*
 	 * Setup rate tables for all potential media types.
 	 */
 	ath_rate_setup(sc, IEEE80211_MODE_11A);
 	ath_rate_setup(sc, IEEE80211_MODE_11B);
 	ath_rate_setup(sc, IEEE80211_MODE_11G);
 	ath_rate_setup(sc, IEEE80211_MODE_TURBO_A);
 	ath_rate_setup(sc, IEEE80211_MODE_TURBO_G);
 	ath_rate_setup(sc, IEEE80211_MODE_STURBO_A);
 	ath_rate_setup(sc, IEEE80211_MODE_11NA);
 	ath_rate_setup(sc, IEEE80211_MODE_11NG);
 	ath_rate_setup(sc, IEEE80211_MODE_HALF);
 	ath_rate_setup(sc, IEEE80211_MODE_QUARTER);
 
 	/* NB: setup here so ath_rate_update is happy */
 	ath_setcurmode(sc, IEEE80211_MODE_11A);
 
 	/*
 	 * Allocate TX descriptors and populate the lists.
 	 */
 	error = ath_desc_alloc(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "failed to allocate TX descriptors: %d\n", error);
 		goto bad;
 	}
 	error = ath_txdma_setup(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "failed to allocate TX descriptors: %d\n", error);
 		goto bad;
 	}
 
 	/*
 	 * Allocate RX descriptors and populate the lists.
 	 */
 	error = ath_rxdma_setup(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		     "failed to allocate RX descriptors: %d\n", error);
 		goto bad;
 	}
 
 	callout_init_mtx(&sc->sc_cal_ch, &sc->sc_mtx, 0);
 	callout_init_mtx(&sc->sc_wd_ch, &sc->sc_mtx, 0);
 
 	ATH_TXBUF_LOCK_INIT(sc);
 
 	sc->sc_tq = taskqueue_create("ath_taskq", M_NOWAIT,
 		taskqueue_thread_enqueue, &sc->sc_tq);
 	taskqueue_start_threads(&sc->sc_tq, 1, PI_NET, "%s taskq",
 	    device_get_nameunit(sc->sc_dev));
 
 	TASK_INIT(&sc->sc_rxtask, 0, sc->sc_rx.recv_tasklet, sc);
 	TASK_INIT(&sc->sc_bmisstask, 0, ath_bmiss_proc, sc);
 	TASK_INIT(&sc->sc_bstucktask,0, ath_bstuck_proc, sc);
 	TASK_INIT(&sc->sc_resettask,0, ath_reset_proc, sc);
 	TASK_INIT(&sc->sc_txqtask, 0, ath_txq_sched_tasklet, sc);
 	TASK_INIT(&sc->sc_fataltask, 0, ath_fatal_proc, sc);
 
 	/*
 	 * Allocate hardware transmit queues: one queue for
 	 * beacon frames and one data queue for each QoS
 	 * priority.  Note that the hal handles resetting
 	 * these queues at the needed time.
 	 *
 	 * XXX PS-Poll
 	 */
 	sc->sc_bhalq = ath_beaconq_setup(sc);
 	if (sc->sc_bhalq == (u_int) -1) {
 		device_printf(sc->sc_dev,
 		    "unable to setup a beacon xmit queue!\n");
 		error = EIO;
 		goto bad2;
 	}
 	sc->sc_cabq = ath_txq_setup(sc, HAL_TX_QUEUE_CAB, 0);
 	if (sc->sc_cabq == NULL) {
 		device_printf(sc->sc_dev, "unable to setup CAB xmit queue!\n");
 		error = EIO;
 		goto bad2;
 	}
 	/* NB: insure BK queue is the lowest priority h/w queue */
 	if (!ath_tx_setup(sc, WME_AC_BK, HAL_WME_AC_BK)) {
 		device_printf(sc->sc_dev,
 		    "unable to setup xmit queue for %s traffic!\n",
 		    ieee80211_wme_acnames[WME_AC_BK]);
 		error = EIO;
 		goto bad2;
 	}
 	if (!ath_tx_setup(sc, WME_AC_BE, HAL_WME_AC_BE) ||
 	    !ath_tx_setup(sc, WME_AC_VI, HAL_WME_AC_VI) ||
 	    !ath_tx_setup(sc, WME_AC_VO, HAL_WME_AC_VO)) {
 		/*
 		 * Not enough hardware tx queues to properly do WME;
 		 * just punt and assign them all to the same h/w queue.
 		 * We could do a better job of this if, for example,
 		 * we allocate queues when we switch from station to
 		 * AP mode.
 		 */
 		if (sc->sc_ac2q[WME_AC_VI] != NULL)
 			ath_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_VI]);
 		if (sc->sc_ac2q[WME_AC_BE] != NULL)
 			ath_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_BE]);
 		sc->sc_ac2q[WME_AC_BE] = sc->sc_ac2q[WME_AC_BK];
 		sc->sc_ac2q[WME_AC_VI] = sc->sc_ac2q[WME_AC_BK];
 		sc->sc_ac2q[WME_AC_VO] = sc->sc_ac2q[WME_AC_BK];
 	}
 
 	/*
 	 * Attach the TX completion function.
 	 *
 	 * The non-EDMA chips may have some special case optimisations;
 	 * this method gives everyone a chance to attach cleanly.
 	 */
 	sc->sc_tx.xmit_attach_comp_func(sc);
 
 	/*
 	 * Setup rate control.  Some rate control modules
 	 * call back to change the anntena state so expose
 	 * the necessary entry points.
 	 * XXX maybe belongs in struct ath_ratectrl?
 	 */
 	sc->sc_setdefantenna = ath_setdefantenna;
 	sc->sc_rc = ath_rate_attach(sc);
 	if (sc->sc_rc == NULL) {
 		error = EIO;
 		goto bad2;
 	}
 
 	/* Attach DFS module */
 	if (! ath_dfs_attach(sc)) {
 		device_printf(sc->sc_dev,
 		    "%s: unable to attach DFS\n", __func__);
 		error = EIO;
 		goto bad2;
 	}
 
 	/* Attach spectral module */
 	if (ath_spectral_attach(sc) < 0) {
 		device_printf(sc->sc_dev,
 		    "%s: unable to attach spectral\n", __func__);
 		error = EIO;
 		goto bad2;
 	}
 
 	/* Attach bluetooth coexistence module */
 	if (ath_btcoex_attach(sc) < 0) {
 		device_printf(sc->sc_dev,
 		    "%s: unable to attach bluetooth coexistence\n", __func__);
 		error = EIO;
 		goto bad2;
 	}
 
 	/* Attach LNA diversity module */
 	if (ath_lna_div_attach(sc) < 0) {
 		device_printf(sc->sc_dev,
 		    "%s: unable to attach LNA diversity\n", __func__);
 		error = EIO;
 		goto bad2;
 	}
 
 	/* Start DFS processing tasklet */
 	TASK_INIT(&sc->sc_dfstask, 0, ath_dfs_tasklet, sc);
 
 	/* Configure LED state */
 	sc->sc_blinking = 0;
 	sc->sc_ledstate = 1;
 	sc->sc_ledon = 0;			/* low true */
 	sc->sc_ledidle = (2700*hz)/1000;	/* 2.7sec */
 	callout_init(&sc->sc_ledtimer, 1);
 
 	/*
 	 * Don't setup hardware-based blinking.
 	 *
 	 * Although some NICs may have this configured in the
 	 * default reset register values, the user may wish
 	 * to alter which pins have which function.
 	 *
 	 * The reference driver attaches the MAC network LED to GPIO1 and
 	 * the MAC power LED to GPIO2.  However, the DWA-552 cardbus
 	 * NIC has these reversed.
 	 */
 	sc->sc_hardled = (1 == 0);
 	sc->sc_led_net_pin = -1;
 	sc->sc_led_pwr_pin = -1;
 	/*
 	 * Auto-enable soft led processing for IBM cards and for
 	 * 5211 minipci cards.  Users can also manually enable/disable
 	 * support with a sysctl.
 	 */
 	sc->sc_softled = (devid == AR5212_DEVID_IBM || devid == AR5211_DEVID);
 	ath_led_config(sc);
 	ath_hal_setledstate(ah, HAL_LED_INIT);
 
 	/* XXX not right but it's not used anywhere important */
 	ic->ic_phytype = IEEE80211_T_OFDM;
 	ic->ic_opmode = IEEE80211_M_STA;
 	ic->ic_caps =
 		  IEEE80211_C_STA		/* station mode */
 		| IEEE80211_C_IBSS		/* ibss, nee adhoc, mode */
 		| IEEE80211_C_HOSTAP		/* hostap mode */
 		| IEEE80211_C_MONITOR		/* monitor mode */
 		| IEEE80211_C_AHDEMO		/* adhoc demo mode */
 		| IEEE80211_C_WDS		/* 4-address traffic works */
 		| IEEE80211_C_MBSS		/* mesh point link mode */
 		| IEEE80211_C_SHPREAMBLE	/* short preamble supported */
 		| IEEE80211_C_SHSLOT		/* short slot time supported */
 		| IEEE80211_C_WPA		/* capable of WPA1+WPA2 */
 #ifndef	ATH_ENABLE_11N
 		| IEEE80211_C_BGSCAN		/* capable of bg scanning */
 #endif
 		| IEEE80211_C_TXFRAG		/* handle tx frags */
 #ifdef	ATH_ENABLE_DFS
 		| IEEE80211_C_DFS		/* Enable radar detection */
 #endif
 		| IEEE80211_C_PMGT		/* Station side power mgmt */
 		| IEEE80211_C_SWSLEEP
 		;
 	/*
 	 * Query the hal to figure out h/w crypto support.
 	 */
 	if (ath_hal_ciphersupported(ah, HAL_CIPHER_WEP))
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP;
 	if (ath_hal_ciphersupported(ah, HAL_CIPHER_AES_OCB))
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_AES_OCB;
 	if (ath_hal_ciphersupported(ah, HAL_CIPHER_AES_CCM))
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_AES_CCM;
 	if (ath_hal_ciphersupported(ah, HAL_CIPHER_CKIP))
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_CKIP;
 	if (ath_hal_ciphersupported(ah, HAL_CIPHER_TKIP)) {
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_TKIP;
 		/*
 		 * Check if h/w does the MIC and/or whether the
 		 * separate key cache entries are required to
 		 * handle both tx+rx MIC keys.
 		 */
 		if (ath_hal_ciphersupported(ah, HAL_CIPHER_MIC))
 			ic->ic_cryptocaps |= IEEE80211_CRYPTO_TKIPMIC;
 		/*
 		 * If the h/w supports storing tx+rx MIC keys
 		 * in one cache slot automatically enable use.
 		 */
 		if (ath_hal_hastkipsplit(ah) ||
 		    !ath_hal_settkipsplit(ah, AH_FALSE))
 			sc->sc_splitmic = 1;
 		/*
 		 * If the h/w can do TKIP MIC together with WME then
 		 * we use it; otherwise we force the MIC to be done
 		 * in software by the net80211 layer.
 		 */
 		if (ath_hal_haswmetkipmic(ah))
 			sc->sc_wmetkipmic = 1;
 	}
 	sc->sc_hasclrkey = ath_hal_ciphersupported(ah, HAL_CIPHER_CLR);
 	/*
 	 * Check for multicast key search support.
 	 */
 	if (ath_hal_hasmcastkeysearch(sc->sc_ah) &&
 	    !ath_hal_getmcastkeysearch(sc->sc_ah)) {
 		ath_hal_setmcastkeysearch(sc->sc_ah, 1);
 	}
 	sc->sc_mcastkey = ath_hal_getmcastkeysearch(ah);
 	/*
 	 * Mark key cache slots associated with global keys
 	 * as in use.  If we knew TKIP was not to be used we
 	 * could leave the +32, +64, and +32+64 slots free.
 	 */
 	for (i = 0; i < IEEE80211_WEP_NKID; i++) {
 		setbit(sc->sc_keymap, i);
 		setbit(sc->sc_keymap, i+64);
 		if (sc->sc_splitmic) {
 			setbit(sc->sc_keymap, i+32);
 			setbit(sc->sc_keymap, i+32+64);
 		}
 	}
 	/*
 	 * TPC support can be done either with a global cap or
 	 * per-packet support.  The latter is not available on
 	 * all parts.  We're a bit pedantic here as all parts
 	 * support a global cap.
 	 */
 	if (ath_hal_hastpc(ah) || ath_hal_hastxpowlimit(ah))
 		ic->ic_caps |= IEEE80211_C_TXPMGT;
 
 	/*
 	 * Mark WME capability only if we have sufficient
 	 * hardware queues to do proper priority scheduling.
 	 */
 	if (sc->sc_ac2q[WME_AC_BE] != sc->sc_ac2q[WME_AC_BK])
 		ic->ic_caps |= IEEE80211_C_WME;
 	/*
 	 * Check for misc other capabilities.
 	 */
 	if (ath_hal_hasbursting(ah))
 		ic->ic_caps |= IEEE80211_C_BURST;
 	sc->sc_hasbmask = ath_hal_hasbssidmask(ah);
 	sc->sc_hasbmatch = ath_hal_hasbssidmatch(ah);
 	sc->sc_hastsfadd = ath_hal_hastsfadjust(ah);
 	sc->sc_rxslink = ath_hal_self_linked_final_rxdesc(ah);
 	sc->sc_rxtsf32 = ath_hal_has_long_rxdesc_tsf(ah);
 	sc->sc_hasenforcetxop = ath_hal_hasenforcetxop(ah);
 	sc->sc_rx_lnamixer = ath_hal_hasrxlnamixer(ah);
 	sc->sc_hasdivcomb = ath_hal_hasdivantcomb(ah);
 
 	if (ath_hal_hasfastframes(ah))
 		ic->ic_caps |= IEEE80211_C_FF;
 	wmodes = ath_hal_getwirelessmodes(ah);
 	if (wmodes & (HAL_MODE_108G|HAL_MODE_TURBO))
 		ic->ic_caps |= IEEE80211_C_TURBOP;
 #ifdef IEEE80211_SUPPORT_TDMA
 	if (ath_hal_macversion(ah) > 0x78) {
 		ic->ic_caps |= IEEE80211_C_TDMA; /* capable of TDMA */
 		ic->ic_tdma_update = ath_tdma_update;
 	}
 #endif
 
 	/*
 	 * TODO: enforce that at least this many frames are available
 	 * in the txbuf list before allowing data frames (raw or
 	 * otherwise) to be transmitted.
 	 */
 	sc->sc_txq_data_minfree = 10;
 	/*
 	 * Leave this as default to maintain legacy behaviour.
 	 * Shortening the cabq/mcastq may end up causing some
 	 * undesirable behaviour.
 	 */
 	sc->sc_txq_mcastq_maxdepth = ath_txbuf;
 
 	/*
 	 * How deep can the node software TX queue get whilst it's asleep.
 	 */
 	sc->sc_txq_node_psq_maxdepth = 16;
 
 	/*
-	 * Default the maximum queue depth for a given node
-	 * to 1/4'th the TX buffers, or 64, whichever
-	 * is larger.
+	 * Default the maximum queue to to 1/4'th the TX buffers, or
+	 * 64, whichever is smaller.
 	 */
 	sc->sc_txq_node_maxdepth = MAX(64, ath_txbuf / 4);
 
 	/* Enable CABQ by default */
 	sc->sc_cabq_enable = 1;
 
 	/*
 	 * Allow the TX and RX chainmasks to be overridden by
 	 * environment variables and/or device.hints.
 	 *
 	 * This must be done early - before the hardware is
 	 * calibrated or before the 802.11n stream calculation
 	 * is done.
 	 */
 	if (resource_int_value(device_get_name(sc->sc_dev),
 	    device_get_unit(sc->sc_dev), "rx_chainmask",
 	    &rx_chainmask) == 0) {
 		device_printf(sc->sc_dev, "Setting RX chainmask to 0x%x\n",
 		    rx_chainmask);
 		(void) ath_hal_setrxchainmask(sc->sc_ah, rx_chainmask);
 	}
 	if (resource_int_value(device_get_name(sc->sc_dev),
 	    device_get_unit(sc->sc_dev), "tx_chainmask",
 	    &tx_chainmask) == 0) {
 		device_printf(sc->sc_dev, "Setting TX chainmask to 0x%x\n",
 		    tx_chainmask);
 		(void) ath_hal_settxchainmask(sc->sc_ah, tx_chainmask);
 	}
 
 	/*
 	 * Query the TX/RX chainmask configuration.
 	 *
 	 * This is only relevant for 11n devices.
 	 */
 	ath_hal_getrxchainmask(ah, &sc->sc_rxchainmask);
 	ath_hal_gettxchainmask(ah, &sc->sc_txchainmask);
 
 	/*
 	 * Disable MRR with protected frames by default.
 	 * Only 802.11n series NICs can handle this.
 	 */
 	sc->sc_mrrprot = 0;	/* XXX should be a capability */
 
 	/*
 	 * Query the enterprise mode information the HAL.
 	 */
 	if (ath_hal_getcapability(ah, HAL_CAP_ENTERPRISE_MODE, 0,
 	    &sc->sc_ent_cfg) == HAL_OK)
 		sc->sc_use_ent = 1;
 
 #ifdef	ATH_ENABLE_11N
 	/*
 	 * Query HT capabilities
 	 */
 	if (ath_hal_getcapability(ah, HAL_CAP_HT, 0, NULL) == HAL_OK &&
 	    (wmodes & (HAL_MODE_HT20 | HAL_MODE_HT40))) {
 		uint32_t rxs, txs;
 		uint32_t ldpc;
 
 		device_printf(sc->sc_dev, "[HT] enabling HT modes\n");
 
 		sc->sc_mrrprot = 1;	/* XXX should be a capability */
 
 		ic->ic_htcaps = IEEE80211_HTC_HT	/* HT operation */
 			    | IEEE80211_HTC_AMPDU	/* A-MPDU tx/rx */
 			    | IEEE80211_HTC_AMSDU	/* A-MSDU tx/rx */
 			    | IEEE80211_HTCAP_MAXAMSDU_3839
 			    				/* max A-MSDU length */
 			    | IEEE80211_HTCAP_SMPS_OFF;	/* SM power save off */
 
 		/*
 		 * Enable short-GI for HT20 only if the hardware
 		 * advertises support.
 		 * Notably, anything earlier than the AR9287 doesn't.
 		 */
 		if ((ath_hal_getcapability(ah,
 		    HAL_CAP_HT20_SGI, 0, NULL) == HAL_OK) &&
 		    (wmodes & HAL_MODE_HT20)) {
 			device_printf(sc->sc_dev,
 			    "[HT] enabling short-GI in 20MHz mode\n");
 			ic->ic_htcaps |= IEEE80211_HTCAP_SHORTGI20;
 		}
 
 		if (wmodes & HAL_MODE_HT40)
 			ic->ic_htcaps |= IEEE80211_HTCAP_CHWIDTH40
 			    |  IEEE80211_HTCAP_SHORTGI40;
 
 		/*
 		 * TX/RX streams need to be taken into account when
 		 * negotiating which MCS rates it'll receive and
 		 * what MCS rates are available for TX.
 		 */
 		(void) ath_hal_getcapability(ah, HAL_CAP_STREAMS, 0, &txs);
 		(void) ath_hal_getcapability(ah, HAL_CAP_STREAMS, 1, &rxs);
 		ic->ic_txstream = txs;
 		ic->ic_rxstream = rxs;
 
 		/*
 		 * Setup TX and RX STBC based on what the HAL allows and
 		 * the currently configured chainmask set.
 		 * Ie - don't enable STBC TX if only one chain is enabled.
 		 * STBC RX is fine on a single RX chain; it just won't
 		 * provide any real benefit.
 		 */
 		if (ath_hal_getcapability(ah, HAL_CAP_RX_STBC, 0,
 		    NULL) == HAL_OK) {
 			sc->sc_rx_stbc = 1;
 			device_printf(sc->sc_dev,
 			    "[HT] 1 stream STBC receive enabled\n");
 			ic->ic_htcaps |= IEEE80211_HTCAP_RXSTBC_1STREAM;
 		}
 		if (txs > 1 && ath_hal_getcapability(ah, HAL_CAP_TX_STBC, 0,
 		    NULL) == HAL_OK) {
 			sc->sc_tx_stbc = 1;
 			device_printf(sc->sc_dev,
 			    "[HT] 1 stream STBC transmit enabled\n");
 			ic->ic_htcaps |= IEEE80211_HTCAP_TXSTBC;
 		}
 
 		(void) ath_hal_getcapability(ah, HAL_CAP_RTS_AGGR_LIMIT, 1,
 		    &sc->sc_rts_aggr_limit);
 		if (sc->sc_rts_aggr_limit != (64 * 1024))
 			device_printf(sc->sc_dev,
 			    "[HT] RTS aggregates limited to %d KiB\n",
 			    sc->sc_rts_aggr_limit / 1024);
 
 		/*
 		 * LDPC
 		 */
 		if ((ath_hal_getcapability(ah, HAL_CAP_LDPC, 0, &ldpc))
 		    == HAL_OK && (ldpc == 1)) {
 			sc->sc_has_ldpc = 1;
 			device_printf(sc->sc_dev,
 			    "[HT] LDPC transmit/receive enabled\n");
 			ic->ic_htcaps |= IEEE80211_HTCAP_LDPC;
 		}
 
 
 		device_printf(sc->sc_dev,
 		    "[HT] %d RX streams; %d TX streams\n", rxs, txs);
 	}
 #endif
 
 	/*
 	 * Initial aggregation settings.
 	 */
 	sc->sc_hwq_limit_aggr = ATH_AGGR_MIN_QDEPTH;
 	sc->sc_hwq_limit_nonaggr = ATH_NONAGGR_MIN_QDEPTH;
 	sc->sc_tid_hwq_lo = ATH_AGGR_SCHED_LOW;
 	sc->sc_tid_hwq_hi = ATH_AGGR_SCHED_HIGH;
 	sc->sc_aggr_limit = ATH_AGGR_MAXSIZE;
 	sc->sc_delim_min_pad = 0;
 
 	/*
 	 * Check if the hardware requires PCI register serialisation.
 	 * Some of the Owl based MACs require this.
 	 */
 	if (mp_ncpus > 1 &&
 	    ath_hal_getcapability(ah, HAL_CAP_SERIALISE_WAR,
 	     0, NULL) == HAL_OK) {
 		sc->sc_ah->ah_config.ah_serialise_reg_war = 1;
 		device_printf(sc->sc_dev,
 		    "Enabling register serialisation\n");
 	}
 
 	/*
 	 * Initialise the deferred completed RX buffer list.
 	 */
 	TAILQ_INIT(&sc->sc_rx_rxlist[HAL_RX_QUEUE_HP]);
 	TAILQ_INIT(&sc->sc_rx_rxlist[HAL_RX_QUEUE_LP]);
 
 	/*
 	 * Indicate we need the 802.11 header padded to a
 	 * 32-bit boundary for 4-address and QoS frames.
 	 */
 	ic->ic_flags |= IEEE80211_F_DATAPAD;
 
 	/*
 	 * Query the hal about antenna support.
 	 */
 	sc->sc_defant = ath_hal_getdefantenna(ah);
 
 	/*
 	 * Not all chips have the VEOL support we want to
 	 * use with IBSS beacons; check here for it.
 	 */
 	sc->sc_hasveol = ath_hal_hasveol(ah);
 
 	/* get mac address from kenv first, then hardware */
 	if (ath_fetch_mac_kenv(sc, ic->ic_macaddr) == 0) {
 		/* Tell the HAL now about the new MAC */
 		ath_hal_setmac(ah, ic->ic_macaddr);
 	} else {
 		ath_hal_getmac(ah, ic->ic_macaddr);
 	}
 
 	if (sc->sc_hasbmask)
 		ath_hal_getbssidmask(ah, sc->sc_hwbssidmask);
 
 	/* NB: used to size node table key mapping array */
 	ic->ic_max_keyix = sc->sc_keymax;
 	/* call MI attach routine. */
 	ieee80211_ifattach(ic);
 	ic->ic_setregdomain = ath_setregdomain;
 	ic->ic_getradiocaps = ath_getradiocaps;
 	sc->sc_opmode = HAL_M_STA;
 
 	/* override default methods */
 	ic->ic_ioctl = ath_ioctl;
 	ic->ic_parent = ath_parent;
 	ic->ic_transmit = ath_transmit;
 	ic->ic_newassoc = ath_newassoc;
 	ic->ic_updateslot = ath_updateslot;
 	ic->ic_wme.wme_update = ath_wme_update;
 	ic->ic_vap_create = ath_vap_create;
 	ic->ic_vap_delete = ath_vap_delete;
 	ic->ic_raw_xmit = ath_raw_xmit;
 	ic->ic_update_mcast = ath_update_mcast;
 	ic->ic_update_promisc = ath_update_promisc;
 	ic->ic_node_alloc = ath_node_alloc;
 	sc->sc_node_free = ic->ic_node_free;
 	ic->ic_node_free = ath_node_free;
 	sc->sc_node_cleanup = ic->ic_node_cleanup;
 	ic->ic_node_cleanup = ath_node_cleanup;
 	ic->ic_node_getsignal = ath_node_getsignal;
 	ic->ic_scan_start = ath_scan_start;
 	ic->ic_scan_end = ath_scan_end;
 	ic->ic_set_channel = ath_set_channel;
 #ifdef	ATH_ENABLE_11N
 	/* 802.11n specific - but just override anyway */
 	sc->sc_addba_request = ic->ic_addba_request;
 	sc->sc_addba_response = ic->ic_addba_response;
 	sc->sc_addba_stop = ic->ic_addba_stop;
 	sc->sc_bar_response = ic->ic_bar_response;
 	sc->sc_addba_response_timeout = ic->ic_addba_response_timeout;
 
 	ic->ic_addba_request = ath_addba_request;
 	ic->ic_addba_response = ath_addba_response;
 	ic->ic_addba_response_timeout = ath_addba_response_timeout;
 	ic->ic_addba_stop = ath_addba_stop;
 	ic->ic_bar_response = ath_bar_response;
 
 	ic->ic_update_chw = ath_update_chw;
 #endif	/* ATH_ENABLE_11N */
 
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 	/*
 	 * There's one vendor bitmap entry in the RX radiotap
 	 * header; make sure that's taken into account.
 	 */
 	ieee80211_radiotap_attachv(ic,
 	    &sc->sc_tx_th.wt_ihdr, sizeof(sc->sc_tx_th), 0,
 		ATH_TX_RADIOTAP_PRESENT,
 	    &sc->sc_rx_th.wr_ihdr, sizeof(sc->sc_rx_th), 1,
 		ATH_RX_RADIOTAP_PRESENT);
 #else
 	/*
 	 * No vendor bitmap/extensions are present.
 	 */
 	ieee80211_radiotap_attach(ic,
 	    &sc->sc_tx_th.wt_ihdr, sizeof(sc->sc_tx_th),
 		ATH_TX_RADIOTAP_PRESENT,
 	    &sc->sc_rx_th.wr_ihdr, sizeof(sc->sc_rx_th),
 		ATH_RX_RADIOTAP_PRESENT);
 #endif	/* ATH_ENABLE_RADIOTAP_VENDOR_EXT */
 
 	/*
 	 * Setup the ALQ logging if required
 	 */
 #ifdef	ATH_DEBUG_ALQ
 	if_ath_alq_init(&sc->sc_alq, device_get_nameunit(sc->sc_dev));
 	if_ath_alq_setcfg(&sc->sc_alq,
 	    sc->sc_ah->ah_macVersion,
 	    sc->sc_ah->ah_macRev,
 	    sc->sc_ah->ah_phyRev,
 	    sc->sc_ah->ah_magic);
 #endif
 
 	/*
 	 * Setup dynamic sysctl's now that country code and
 	 * regdomain are available from the hal.
 	 */
 	ath_sysctlattach(sc);
 	ath_sysctl_stats_attach(sc);
 	ath_sysctl_hal_attach(sc);
 
 	if (bootverbose)
 		ieee80211_announce(ic);
 	ath_announce(sc);
 
 	/*
 	 * Put it to sleep for now.
 	 */
 	ATH_LOCK(sc);
 	ath_power_setpower(sc, HAL_PM_FULL_SLEEP);
 	ATH_UNLOCK(sc);
 
 	return 0;
 bad2:
 	ath_tx_cleanup(sc);
 	ath_desc_free(sc);
 	ath_txdma_teardown(sc);
 	ath_rxdma_teardown(sc);
 bad:
 	if (ah)
 		ath_hal_detach(ah);
 	sc->sc_invalid = 1;
 	return error;
 }
 
 int
 ath_detach(struct ath_softc *sc)
 {
 
 	/*
 	 * NB: the order of these is important:
 	 * o stop the chip so no more interrupts will fire
 	 * o call the 802.11 layer before detaching the hal to
 	 *   insure callbacks into the driver to delete global
 	 *   key cache entries can be handled
 	 * o free the taskqueue which drains any pending tasks
 	 * o reclaim the tx queue data structures after calling
 	 *   the 802.11 layer as we'll get called back to reclaim
 	 *   node state and potentially want to use them
 	 * o to cleanup the tx queues the hal is called, so detach
 	 *   it last
 	 * Other than that, it's straightforward...
 	 */
 
 	/*
 	 * XXX Wake the hardware up first.  ath_stop() will still
 	 * wake it up first, but I'd rather do it here just to
 	 * ensure it's awake.
 	 */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ath_power_setpower(sc, HAL_PM_AWAKE);
 
 	/*
 	 * Stop things cleanly.
 	 */
 	ath_stop(sc);
 	ATH_UNLOCK(sc);
 
 	ieee80211_ifdetach(&sc->sc_ic);
 	taskqueue_free(sc->sc_tq);
 #ifdef ATH_TX99_DIAG
 	if (sc->sc_tx99 != NULL)
 		sc->sc_tx99->detach(sc->sc_tx99);
 #endif
 	ath_rate_detach(sc->sc_rc);
 #ifdef	ATH_DEBUG_ALQ
 	if_ath_alq_tidyup(&sc->sc_alq);
 #endif
 	ath_lna_div_detach(sc);
 	ath_btcoex_detach(sc);
 	ath_spectral_detach(sc);
 	ath_dfs_detach(sc);
 	ath_desc_free(sc);
 	ath_txdma_teardown(sc);
 	ath_rxdma_teardown(sc);
 	ath_tx_cleanup(sc);
 	ath_hal_detach(sc->sc_ah);	/* NB: sets chip in full sleep */
 
 	return 0;
 }
 
 /*
  * MAC address handling for multiple BSS on the same radio.
  * The first vap uses the MAC address from the EEPROM.  For
  * subsequent vap's we set the U/L bit (bit 1) in the MAC
  * address and use the next six bits as an index.
  */
 static void
 assign_address(struct ath_softc *sc, uint8_t mac[IEEE80211_ADDR_LEN], int clone)
 {
 	int i;
 
 	if (clone && sc->sc_hasbmask) {
 		/* NB: we only do this if h/w supports multiple bssid */
 		for (i = 0; i < 8; i++)
 			if ((sc->sc_bssidmask & (1<<i)) == 0)
 				break;
 		if (i != 0)
 			mac[0] |= (i << 2)|0x2;
 	} else
 		i = 0;
 	sc->sc_bssidmask |= 1<<i;
 	sc->sc_hwbssidmask[0] &= ~mac[0];
 	if (i == 0)
 		sc->sc_nbssid0++;
 }
 
 static void
 reclaim_address(struct ath_softc *sc, const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	int i = mac[0] >> 2;
 	uint8_t mask;
 
 	if (i != 0 || --sc->sc_nbssid0 == 0) {
 		sc->sc_bssidmask &= ~(1<<i);
 		/* recalculate bssid mask from remaining addresses */
 		mask = 0xff;
 		for (i = 1; i < 8; i++)
 			if (sc->sc_bssidmask & (1<<i))
 				mask &= ~((i<<2)|0x2);
 		sc->sc_hwbssidmask[0] |= mask;
 	}
 }
 
 /*
  * Assign a beacon xmit slot.  We try to space out
  * assignments so when beacons are staggered the
  * traffic coming out of the cab q has maximal time
  * to go out before the next beacon is scheduled.
  */
 static int
 assign_bslot(struct ath_softc *sc)
 {
 	u_int slot, free;
 
 	free = 0;
 	for (slot = 0; slot < ATH_BCBUF; slot++)
 		if (sc->sc_bslot[slot] == NULL) {
 			if (sc->sc_bslot[(slot+1)%ATH_BCBUF] == NULL &&
 			    sc->sc_bslot[(slot-1)%ATH_BCBUF] == NULL)
 				return slot;
 			free = slot;
 			/* NB: keep looking for a double slot */
 		}
 	return free;
 }
 
 static struct ieee80211vap *
 ath_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit,
     enum ieee80211_opmode opmode, int flags,
     const uint8_t bssid[IEEE80211_ADDR_LEN],
     const uint8_t mac0[IEEE80211_ADDR_LEN])
 {
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_vap *avp;
 	struct ieee80211vap *vap;
 	uint8_t mac[IEEE80211_ADDR_LEN];
 	int needbeacon, error;
 	enum ieee80211_opmode ic_opmode;
 
 	avp = malloc(sizeof(struct ath_vap), M_80211_VAP, M_WAITOK | M_ZERO);
 	needbeacon = 0;
 	IEEE80211_ADDR_COPY(mac, mac0);
 
 	ATH_LOCK(sc);
 	ic_opmode = opmode;		/* default to opmode of new vap */
 	switch (opmode) {
 	case IEEE80211_M_STA:
 		if (sc->sc_nstavaps != 0) {	/* XXX only 1 for now */
 			device_printf(sc->sc_dev, "only 1 sta vap supported\n");
 			goto bad;
 		}
 		if (sc->sc_nvaps) {
 			/*
 			 * With multiple vaps we must fall back
 			 * to s/w beacon miss handling.
 			 */
 			flags |= IEEE80211_CLONE_NOBEACONS;
 		}
 		if (flags & IEEE80211_CLONE_NOBEACONS) {
 			/*
 			 * Station mode w/o beacons are implemented w/ AP mode.
 			 */
 			ic_opmode = IEEE80211_M_HOSTAP;
 		}
 		break;
 	case IEEE80211_M_IBSS:
 		if (sc->sc_nvaps != 0) {	/* XXX only 1 for now */
 			device_printf(sc->sc_dev,
 			    "only 1 ibss vap supported\n");
 			goto bad;
 		}
 		needbeacon = 1;
 		break;
 	case IEEE80211_M_AHDEMO:
 #ifdef IEEE80211_SUPPORT_TDMA
 		if (flags & IEEE80211_CLONE_TDMA) {
 			if (sc->sc_nvaps != 0) {
 				device_printf(sc->sc_dev,
 				    "only 1 tdma vap supported\n");
 				goto bad;
 			}
 			needbeacon = 1;
 			flags |= IEEE80211_CLONE_NOBEACONS;
 		}
 		/* fall thru... */
 #endif
 	case IEEE80211_M_MONITOR:
 		if (sc->sc_nvaps != 0 && ic->ic_opmode != opmode) {
 			/*
 			 * Adopt existing mode.  Adding a monitor or ahdemo
 			 * vap to an existing configuration is of dubious
 			 * value but should be ok.
 			 */
 			/* XXX not right for monitor mode */
 			ic_opmode = ic->ic_opmode;
 		}
 		break;
 	case IEEE80211_M_HOSTAP:
 	case IEEE80211_M_MBSS:
 		needbeacon = 1;
 		break;
 	case IEEE80211_M_WDS:
 		if (sc->sc_nvaps != 0 && ic->ic_opmode == IEEE80211_M_STA) {
 			device_printf(sc->sc_dev,
 			    "wds not supported in sta mode\n");
 			goto bad;
 		}
 		/*
 		 * Silently remove any request for a unique
 		 * bssid; WDS vap's always share the local
 		 * mac address.
 		 */
 		flags &= ~IEEE80211_CLONE_BSSID;
 		if (sc->sc_nvaps == 0)
 			ic_opmode = IEEE80211_M_HOSTAP;
 		else
 			ic_opmode = ic->ic_opmode;
 		break;
 	default:
 		device_printf(sc->sc_dev, "unknown opmode %d\n", opmode);
 		goto bad;
 	}
 	/*
 	 * Check that a beacon buffer is available; the code below assumes it.
 	 */
 	if (needbeacon & TAILQ_EMPTY(&sc->sc_bbuf)) {
 		device_printf(sc->sc_dev, "no beacon buffer available\n");
 		goto bad;
 	}
 
 	/* STA, AHDEMO? */
 	if (opmode == IEEE80211_M_HOSTAP || opmode == IEEE80211_M_MBSS) {
 		assign_address(sc, mac, flags & IEEE80211_CLONE_BSSID);
 		ath_hal_setbssidmask(sc->sc_ah, sc->sc_hwbssidmask);
 	}
 
 	vap = &avp->av_vap;
 	/* XXX can't hold mutex across if_alloc */
 	ATH_UNLOCK(sc);
 	error = ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid);
 	ATH_LOCK(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "%s: error %d creating vap\n",
 		    __func__, error);
 		goto bad2;
 	}
 
 	/* h/w crypto support */
 	vap->iv_key_alloc = ath_key_alloc;
 	vap->iv_key_delete = ath_key_delete;
 	vap->iv_key_set = ath_key_set;
 	vap->iv_key_update_begin = ath_key_update_begin;
 	vap->iv_key_update_end = ath_key_update_end;
 
 	/* override various methods */
 	avp->av_recv_mgmt = vap->iv_recv_mgmt;
 	vap->iv_recv_mgmt = ath_recv_mgmt;
 	vap->iv_reset = ath_reset_vap;
 	vap->iv_update_beacon = ath_beacon_update;
 	avp->av_newstate = vap->iv_newstate;
 	vap->iv_newstate = ath_newstate;
 	avp->av_bmiss = vap->iv_bmiss;
 	vap->iv_bmiss = ath_bmiss_vap;
 
 	avp->av_node_ps = vap->iv_node_ps;
 	vap->iv_node_ps = ath_node_powersave;
 
 	avp->av_set_tim = vap->iv_set_tim;
 	vap->iv_set_tim = ath_node_set_tim;
 
 	avp->av_recv_pspoll = vap->iv_recv_pspoll;
 	vap->iv_recv_pspoll = ath_node_recv_pspoll;
 
 	/* Set default parameters */
 
 	/*
 	 * Anything earlier than some AR9300 series MACs don't
 	 * support a smaller MPDU density.
 	 */
 	vap->iv_ampdu_density = IEEE80211_HTCAP_MPDUDENSITY_8;
 	/*
 	 * All NICs can handle the maximum size, however
 	 * AR5416 based MACs can only TX aggregates w/ RTS
 	 * protection when the total aggregate size is <= 8k.
 	 * However, for now that's enforced by the TX path.
 	 */
 	vap->iv_ampdu_rxmax = IEEE80211_HTCAP_MAXRXAMPDU_64K;
 
 	avp->av_bslot = -1;
 	if (needbeacon) {
 		/*
 		 * Allocate beacon state and setup the q for buffered
 		 * multicast frames.  We know a beacon buffer is
 		 * available because we checked above.
 		 */
 		avp->av_bcbuf = TAILQ_FIRST(&sc->sc_bbuf);
 		TAILQ_REMOVE(&sc->sc_bbuf, avp->av_bcbuf, bf_list);
 		if (opmode != IEEE80211_M_IBSS || !sc->sc_hasveol) {
 			/*
 			 * Assign the vap to a beacon xmit slot.  As above
 			 * this cannot fail to find a free one.
 			 */
 			avp->av_bslot = assign_bslot(sc);
 			KASSERT(sc->sc_bslot[avp->av_bslot] == NULL,
 			    ("beacon slot %u not empty", avp->av_bslot));
 			sc->sc_bslot[avp->av_bslot] = vap;
 			sc->sc_nbcnvaps++;
 		}
 		if (sc->sc_hastsfadd && sc->sc_nbcnvaps > 0) {
 			/*
 			 * Multple vaps are to transmit beacons and we
 			 * have h/w support for TSF adjusting; enable
 			 * use of staggered beacons.
 			 */
 			sc->sc_stagbeacons = 1;
 		}
 		ath_txq_init(sc, &avp->av_mcastq, ATH_TXQ_SWQ);
 	}
 
 	ic->ic_opmode = ic_opmode;
 	if (opmode != IEEE80211_M_WDS) {
 		sc->sc_nvaps++;
 		if (opmode == IEEE80211_M_STA)
 			sc->sc_nstavaps++;
 		if (opmode == IEEE80211_M_MBSS)
 			sc->sc_nmeshvaps++;
 	}
 	switch (ic_opmode) {
 	case IEEE80211_M_IBSS:
 		sc->sc_opmode = HAL_M_IBSS;
 		break;
 	case IEEE80211_M_STA:
 		sc->sc_opmode = HAL_M_STA;
 		break;
 	case IEEE80211_M_AHDEMO:
 #ifdef IEEE80211_SUPPORT_TDMA
 		if (vap->iv_caps & IEEE80211_C_TDMA) {
 			sc->sc_tdma = 1;
 			/* NB: disable tsf adjust */
 			sc->sc_stagbeacons = 0;
 		}
 		/*
 		 * NB: adhoc demo mode is a pseudo mode; to the hal it's
 		 * just ap mode.
 		 */
 		/* fall thru... */
 #endif
 	case IEEE80211_M_HOSTAP:
 	case IEEE80211_M_MBSS:
 		sc->sc_opmode = HAL_M_HOSTAP;
 		break;
 	case IEEE80211_M_MONITOR:
 		sc->sc_opmode = HAL_M_MONITOR;
 		break;
 	default:
 		/* XXX should not happen */
 		break;
 	}
 	if (sc->sc_hastsfadd) {
 		/*
 		 * Configure whether or not TSF adjust should be done.
 		 */
 		ath_hal_settsfadjust(sc->sc_ah, sc->sc_stagbeacons);
 	}
 	if (flags & IEEE80211_CLONE_NOBEACONS) {
 		/*
 		 * Enable s/w beacon miss handling.
 		 */
 		sc->sc_swbmiss = 1;
 	}
 	ATH_UNLOCK(sc);
 
 	/* complete setup */
 	ieee80211_vap_attach(vap, ath_media_change, ieee80211_media_status,
 	    mac);
 	return vap;
 bad2:
 	reclaim_address(sc, mac);
 	ath_hal_setbssidmask(sc->sc_ah, sc->sc_hwbssidmask);
 bad:
 	free(avp, M_80211_VAP);
 	ATH_UNLOCK(sc);
 	return NULL;
 }
 
 static void
 ath_vap_delete(struct ieee80211vap *vap)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_vap *avp = ATH_VAP(vap);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
 	if (sc->sc_running) {
 		/*
 		 * Quiesce the hardware while we remove the vap.  In
 		 * particular we need to reclaim all references to
 		 * the vap state by any frames pending on the tx queues.
 		 */
 		ath_hal_intrset(ah, 0);		/* disable interrupts */
 		/* XXX Do all frames from all vaps/nodes need draining here? */
 		ath_stoprecv(sc, 1);		/* stop recv side */
 		ath_draintxq(sc, ATH_RESET_DEFAULT);		/* stop hw xmit side */
 	}
 
 	/* .. leave the hardware awake for now. */
 
 	ieee80211_vap_detach(vap);
 
 	/*
 	 * XXX Danger Will Robinson! Danger!
 	 *
 	 * Because ieee80211_vap_detach() can queue a frame (the station
 	 * diassociate message?) after we've drained the TXQ and
 	 * flushed the software TXQ, we will end up with a frame queued
 	 * to a node whose vap is about to be freed.
 	 *
 	 * To work around this, flush the hardware/software again.
 	 * This may be racy - the ath task may be running and the packet
 	 * may be being scheduled between sw->hw txq. Tsk.
 	 *
 	 * TODO: figure out why a new node gets allocated somewhere around
 	 * here (after the ath_tx_swq() call; and after an ath_stop()
 	 * call!)
 	 */
 
 	ath_draintxq(sc, ATH_RESET_DEFAULT);
 
 	ATH_LOCK(sc);
 	/*
 	 * Reclaim beacon state.  Note this must be done before
 	 * the vap instance is reclaimed as we may have a reference
 	 * to it in the buffer for the beacon frame.
 	 */
 	if (avp->av_bcbuf != NULL) {
 		if (avp->av_bslot != -1) {
 			sc->sc_bslot[avp->av_bslot] = NULL;
 			sc->sc_nbcnvaps--;
 		}
 		ath_beacon_return(sc, avp->av_bcbuf);
 		avp->av_bcbuf = NULL;
 		if (sc->sc_nbcnvaps == 0) {
 			sc->sc_stagbeacons = 0;
 			if (sc->sc_hastsfadd)
 				ath_hal_settsfadjust(sc->sc_ah, 0);
 		}
 		/*
 		 * Reclaim any pending mcast frames for the vap.
 		 */
 		ath_tx_draintxq(sc, &avp->av_mcastq);
 	}
 	/*
 	 * Update bookkeeping.
 	 */
 	if (vap->iv_opmode == IEEE80211_M_STA) {
 		sc->sc_nstavaps--;
 		if (sc->sc_nstavaps == 0 && sc->sc_swbmiss)
 			sc->sc_swbmiss = 0;
 	} else if (vap->iv_opmode == IEEE80211_M_HOSTAP ||
 	    vap->iv_opmode == IEEE80211_M_MBSS) {
 		reclaim_address(sc, vap->iv_myaddr);
 		ath_hal_setbssidmask(ah, sc->sc_hwbssidmask);
 		if (vap->iv_opmode == IEEE80211_M_MBSS)
 			sc->sc_nmeshvaps--;
 	}
 	if (vap->iv_opmode != IEEE80211_M_WDS)
 		sc->sc_nvaps--;
 #ifdef IEEE80211_SUPPORT_TDMA
 	/* TDMA operation ceases when the last vap is destroyed */
 	if (sc->sc_tdma && sc->sc_nvaps == 0) {
 		sc->sc_tdma = 0;
 		sc->sc_swbmiss = 0;
 	}
 #endif
 	free(avp, M_80211_VAP);
 
 	if (sc->sc_running) {
 		/*
 		 * Restart rx+tx machines if still running (RUNNING will
 		 * be reset if we just destroyed the last vap).
 		 */
 		if (ath_startrecv(sc) != 0)
 			device_printf(sc->sc_dev,
 			    "%s: unable to restart recv logic\n", __func__);
 		if (sc->sc_beacons) {		/* restart beacons */
 #ifdef IEEE80211_SUPPORT_TDMA
 			if (sc->sc_tdma)
 				ath_tdma_config(sc, NULL);
 			else
 #endif
 				ath_beacon_config(sc, NULL);
 		}
 		ath_hal_intrset(ah, sc->sc_imask);
 	}
 
 	/* Ok, let the hardware asleep. */
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 }
 
 void
 ath_suspend(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	sc->sc_resume_up = ic->ic_nrunning != 0;
 
 	ieee80211_suspend_all(ic);
 	/*
 	 * NB: don't worry about putting the chip in low power
 	 * mode; pci will power off our socket on suspend and
 	 * CardBus detaches the device.
 	 *
 	 * XXX TODO: well, that's great, except for non-cardbus
 	 * devices!
 	 */
 
 	/*
 	 * XXX This doesn't wait until all pending taskqueue
 	 * items and parallel transmit/receive/other threads
 	 * are running!
 	 */
 	ath_hal_intrset(sc->sc_ah, 0);
 	taskqueue_block(sc->sc_tq);
 
 	ATH_LOCK(sc);
 	callout_stop(&sc->sc_cal_ch);
 	ATH_UNLOCK(sc);
 
 	/*
 	 * XXX ensure sc_invalid is 1
 	 */
 
 	/* Disable the PCIe PHY, complete with workarounds */
 	ath_hal_enablepcie(sc->sc_ah, 1, 1);
 }
 
 /*
  * Reset the key cache since some parts do not reset the
  * contents on resume.  First we clear all entries, then
  * re-load keys that the 802.11 layer assumes are setup
  * in h/w.
  */
 static void
 ath_reset_keycache(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	int i;
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	for (i = 0; i < sc->sc_keymax; i++)
 		ath_hal_keyreset(ah, i);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 	ieee80211_crypto_reload_keys(ic);
 }
 
 /*
  * Fetch the current chainmask configuration based on the current
  * operating channel and options.
  */
 static void
 ath_update_chainmasks(struct ath_softc *sc, struct ieee80211_channel *chan)
 {
 
 	/*
 	 * Set TX chainmask to the currently configured chainmask;
 	 * the TX chainmask depends upon the current operating mode.
 	 */
 	sc->sc_cur_rxchainmask = sc->sc_rxchainmask;
 	if (IEEE80211_IS_CHAN_HT(chan)) {
 		sc->sc_cur_txchainmask = sc->sc_txchainmask;
 	} else {
 		sc->sc_cur_txchainmask = 1;
 	}
 
 	DPRINTF(sc, ATH_DEBUG_RESET,
 	    "%s: TX chainmask is now 0x%x, RX is now 0x%x\n",
 	    __func__,
 	    sc->sc_cur_txchainmask,
 	    sc->sc_cur_rxchainmask);
 }
 
 void
 ath_resume(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 
 	ath_hal_enablepcie(ah, 0, 0);
 
 	/*
 	 * Must reset the chip before we reload the
 	 * keycache as we were powered down on suspend.
 	 */
 	ath_update_chainmasks(sc,
 	    sc->sc_curchan != NULL ? sc->sc_curchan : ic->ic_curchan);
 	ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask,
 	    sc->sc_cur_rxchainmask);
 
 	/* Ensure we set the current power state to on */
 	ATH_LOCK(sc);
 	ath_power_setselfgen(sc, HAL_PM_AWAKE);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ath_power_setpower(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ath_hal_reset(ah, sc->sc_opmode,
 	    sc->sc_curchan != NULL ? sc->sc_curchan : ic->ic_curchan,
 	    AH_FALSE, HAL_RESET_NORMAL, &status);
 	ath_reset_keycache(sc);
 
 	ATH_RX_LOCK(sc);
 	sc->sc_rx_stopped = 1;
 	sc->sc_rx_resetted = 1;
 	ATH_RX_UNLOCK(sc);
 
 	/* Let DFS at it in case it's a DFS channel */
 	ath_dfs_radar_enable(sc, ic->ic_curchan);
 
 	/* Let spectral at in case spectral is enabled */
 	ath_spectral_enable(sc, ic->ic_curchan);
 
 	/*
 	 * Let bluetooth coexistence at in case it's needed for this channel
 	 */
 	ath_btcoex_enable(sc, ic->ic_curchan);
 
 	/*
 	 * If we're doing TDMA, enforce the TXOP limitation for chips that
 	 * support it.
 	 */
 	if (sc->sc_hasenforcetxop && sc->sc_tdma)
 		ath_hal_setenforcetxop(sc->sc_ah, 1);
 	else
 		ath_hal_setenforcetxop(sc->sc_ah, 0);
 
 	/* Restore the LED configuration */
 	ath_led_config(sc);
 	ath_hal_setledstate(ah, HAL_LED_INIT);
 
 	if (sc->sc_resume_up)
 		ieee80211_resume_all(ic);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	/* XXX beacons ? */
 }
 
 void
 ath_shutdown(struct ath_softc *sc)
 {
 
 	ATH_LOCK(sc);
 	ath_stop(sc);
 	ATH_UNLOCK(sc);
 	/* NB: no point powering down chip as we're about to reboot */
 }
 
 /*
  * Interrupt handler.  Most of the actual processing is deferred.
  */
 void
 ath_intr(void *arg)
 {
 	struct ath_softc *sc = arg;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_INT status = 0;
 	uint32_t txqs;
 
 	/*
 	 * If we're inside a reset path, just print a warning and
 	 * clear the ISR. The reset routine will finish it for us.
 	 */
 	ATH_PCU_LOCK(sc);
 	if (sc->sc_inreset_cnt) {
 		HAL_INT status;
 		ath_hal_getisr(ah, &status);	/* clear ISR */
 		ath_hal_intrset(ah, 0);		/* disable further intr's */
 		DPRINTF(sc, ATH_DEBUG_ANY,
 		    "%s: in reset, ignoring: status=0x%x\n",
 		    __func__, status);
 		ATH_PCU_UNLOCK(sc);
 		return;
 	}
 
 	if (sc->sc_invalid) {
 		/*
 		 * The hardware is not ready/present, don't touch anything.
 		 * Note this can happen early on if the IRQ is shared.
 		 */
 		DPRINTF(sc, ATH_DEBUG_ANY, "%s: invalid; ignored\n", __func__);
 		ATH_PCU_UNLOCK(sc);
 		return;
 	}
 	if (!ath_hal_intrpend(ah)) {		/* shared irq, not for us */
 		ATH_PCU_UNLOCK(sc);
 		return;
 	}
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	if (sc->sc_ic.ic_nrunning == 0 && sc->sc_running == 0) {
 		HAL_INT status;
 
 		DPRINTF(sc, ATH_DEBUG_ANY, "%s: ic_nrunning %d sc_running %d\n",
 		    __func__, sc->sc_ic.ic_nrunning, sc->sc_running);
 		ath_hal_getisr(ah, &status);	/* clear ISR */
 		ath_hal_intrset(ah, 0);		/* disable further intr's */
 		ATH_PCU_UNLOCK(sc);
 
 		ATH_LOCK(sc);
 		ath_power_restore_power_state(sc);
 		ATH_UNLOCK(sc);
 		return;
 	}
 
 	/*
 	 * Figure out the reason(s) for the interrupt.  Note
 	 * that the hal returns a pseudo-ISR that may include
 	 * bits we haven't explicitly enabled so we mask the
 	 * value to insure we only process bits we requested.
 	 */
 	ath_hal_getisr(ah, &status);		/* NB: clears ISR too */
 	DPRINTF(sc, ATH_DEBUG_INTR, "%s: status 0x%x\n", __func__, status);
 	ATH_KTR(sc, ATH_KTR_INTERRUPTS, 1, "ath_intr: mask=0x%.8x", status);
 #ifdef	ATH_DEBUG_ALQ
 	if_ath_alq_post_intr(&sc->sc_alq, status, ah->ah_intrstate,
 	    ah->ah_syncstate);
 #endif	/* ATH_DEBUG_ALQ */
 #ifdef	ATH_KTR_INTR_DEBUG
 	ATH_KTR(sc, ATH_KTR_INTERRUPTS, 5,
 	    "ath_intr: ISR=0x%.8x, ISR_S0=0x%.8x, ISR_S1=0x%.8x, ISR_S2=0x%.8x, ISR_S5=0x%.8x",
 	    ah->ah_intrstate[0],
 	    ah->ah_intrstate[1],
 	    ah->ah_intrstate[2],
 	    ah->ah_intrstate[3],
 	    ah->ah_intrstate[6]);
 #endif
 
 	/* Squirrel away SYNC interrupt debugging */
 	if (ah->ah_syncstate != 0) {
 		int i;
 		for (i = 0; i < 32; i++)
 			if (ah->ah_syncstate & (i << i))
 				sc->sc_intr_stats.sync_intr[i]++;
 	}
 
 	status &= sc->sc_imask;			/* discard unasked for bits */
 
 	/* Short-circuit un-handled interrupts */
 	if (status == 0x0) {
 		ATH_PCU_UNLOCK(sc);
 
 		ATH_LOCK(sc);
 		ath_power_restore_power_state(sc);
 		ATH_UNLOCK(sc);
 
 		return;
 	}
 
 	/*
 	 * Take a note that we're inside the interrupt handler, so
 	 * the reset routines know to wait.
 	 */
 	sc->sc_intr_cnt++;
 	ATH_PCU_UNLOCK(sc);
 
 	/*
 	 * Handle the interrupt. We won't run concurrent with the reset
 	 * or channel change routines as they'll wait for sc_intr_cnt
 	 * to be 0 before continuing.
 	 */
 	if (status & HAL_INT_FATAL) {
 		sc->sc_stats.ast_hardware++;
 		ath_hal_intrset(ah, 0);		/* disable intr's until reset */
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_fataltask);
 	} else {
 		if (status & HAL_INT_SWBA) {
 			/*
 			 * Software beacon alert--time to send a beacon.
 			 * Handle beacon transmission directly; deferring
 			 * this is too slow to meet timing constraints
 			 * under load.
 			 */
 #ifdef IEEE80211_SUPPORT_TDMA
 			if (sc->sc_tdma) {
 				if (sc->sc_tdmaswba == 0) {
 					struct ieee80211com *ic = &sc->sc_ic;
 					struct ieee80211vap *vap =
 					    TAILQ_FIRST(&ic->ic_vaps);
 					ath_tdma_beacon_send(sc, vap);
 					sc->sc_tdmaswba =
 					    vap->iv_tdma->tdma_bintval;
 				} else
 					sc->sc_tdmaswba--;
 			} else
 #endif
 			{
 				ath_beacon_proc(sc, 0);
 #ifdef IEEE80211_SUPPORT_SUPERG
 				/*
 				 * Schedule the rx taskq in case there's no
 				 * traffic so any frames held on the staging
 				 * queue are aged and potentially flushed.
 				 */
 				sc->sc_rx.recv_sched(sc, 1);
 #endif
 			}
 		}
 		if (status & HAL_INT_RXEOL) {
 			int imask;
 			ATH_KTR(sc, ATH_KTR_ERROR, 0, "ath_intr: RXEOL");
 			if (! sc->sc_isedma) {
 				ATH_PCU_LOCK(sc);
 				/*
 				 * NB: the hardware should re-read the link when
 				 *     RXE bit is written, but it doesn't work at
 				 *     least on older hardware revs.
 				 */
 				sc->sc_stats.ast_rxeol++;
 				/*
 				 * Disable RXEOL/RXORN - prevent an interrupt
 				 * storm until the PCU logic can be reset.
 				 * In case the interface is reset some other
 				 * way before "sc_kickpcu" is called, don't
 				 * modify sc_imask - that way if it is reset
 				 * by a call to ath_reset() somehow, the
 				 * interrupt mask will be correctly reprogrammed.
 				 */
 				imask = sc->sc_imask;
 				imask &= ~(HAL_INT_RXEOL | HAL_INT_RXORN);
 				ath_hal_intrset(ah, imask);
 				/*
 				 * Only blank sc_rxlink if we've not yet kicked
 				 * the PCU.
 				 *
 				 * This isn't entirely correct - the correct solution
 				 * would be to have a PCU lock and engage that for
 				 * the duration of the PCU fiddling; which would include
 				 * running the RX process. Otherwise we could end up
 				 * messing up the RX descriptor chain and making the
 				 * RX desc list much shorter.
 				 */
 				if (! sc->sc_kickpcu)
 					sc->sc_rxlink = NULL;
 				sc->sc_kickpcu = 1;
 				ATH_PCU_UNLOCK(sc);
 			}
 			/*
 			 * Enqueue an RX proc to handle whatever
 			 * is in the RX queue.
 			 * This will then kick the PCU if required.
 			 */
 			sc->sc_rx.recv_sched(sc, 1);
 		}
 		if (status & HAL_INT_TXURN) {
 			sc->sc_stats.ast_txurn++;
 			/* bump tx trigger level */
 			ath_hal_updatetxtriglevel(ah, AH_TRUE);
 		}
 		/*
 		 * Handle both the legacy and RX EDMA interrupt bits.
 		 * Note that HAL_INT_RXLP is also HAL_INT_RXDESC.
 		 */
 		if (status & (HAL_INT_RX | HAL_INT_RXHP | HAL_INT_RXLP)) {
 			sc->sc_stats.ast_rx_intr++;
 			sc->sc_rx.recv_sched(sc, 1);
 		}
 		if (status & HAL_INT_TX) {
 			sc->sc_stats.ast_tx_intr++;
 			/*
 			 * Grab all the currently set bits in the HAL txq bitmap
 			 * and blank them. This is the only place we should be
 			 * doing this.
 			 */
 			if (! sc->sc_isedma) {
 				ATH_PCU_LOCK(sc);
 				txqs = 0xffffffff;
 				ath_hal_gettxintrtxqs(sc->sc_ah, &txqs);
 				ATH_KTR(sc, ATH_KTR_INTERRUPTS, 3,
 				    "ath_intr: TX; txqs=0x%08x, txq_active was 0x%08x, now 0x%08x",
 				    txqs,
 				    sc->sc_txq_active,
 				    sc->sc_txq_active | txqs);
 				sc->sc_txq_active |= txqs;
 				ATH_PCU_UNLOCK(sc);
 			}
 			taskqueue_enqueue(sc->sc_tq, &sc->sc_txtask);
 		}
 		if (status & HAL_INT_BMISS) {
 			sc->sc_stats.ast_bmiss++;
 			taskqueue_enqueue(sc->sc_tq, &sc->sc_bmisstask);
 		}
 		if (status & HAL_INT_GTT)
 			sc->sc_stats.ast_tx_timeout++;
 		if (status & HAL_INT_CST)
 			sc->sc_stats.ast_tx_cst++;
 		if (status & HAL_INT_MIB) {
 			sc->sc_stats.ast_mib++;
 			ATH_PCU_LOCK(sc);
 			/*
 			 * Disable interrupts until we service the MIB
 			 * interrupt; otherwise it will continue to fire.
 			 */
 			ath_hal_intrset(ah, 0);
 			/*
 			 * Let the hal handle the event.  We assume it will
 			 * clear whatever condition caused the interrupt.
 			 */
 			ath_hal_mibevent(ah, &sc->sc_halstats);
 			/*
 			 * Don't reset the interrupt if we've just
 			 * kicked the PCU, or we may get a nested
 			 * RXEOL before the rxproc has had a chance
 			 * to run.
 			 */
 			if (sc->sc_kickpcu == 0)
 				ath_hal_intrset(ah, sc->sc_imask);
 			ATH_PCU_UNLOCK(sc);
 		}
 		if (status & HAL_INT_RXORN) {
 			/* NB: hal marks HAL_INT_FATAL when RXORN is fatal */
 			ATH_KTR(sc, ATH_KTR_ERROR, 0, "ath_intr: RXORN");
 			sc->sc_stats.ast_rxorn++;
 		}
 		if (status & HAL_INT_TSFOOR) {
 			device_printf(sc->sc_dev, "%s: TSFOOR\n", __func__);
 			sc->sc_syncbeacon = 1;
 		}
 		if (status & HAL_INT_MCI) {
 			ath_btcoex_mci_intr(sc);
 		}
 	}
 	ATH_PCU_LOCK(sc);
 	sc->sc_intr_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 }
 
 static void
 ath_fatal_proc(void *arg, int pending)
 {
 	struct ath_softc *sc = arg;
 	u_int32_t *state;
 	u_int32_t len;
 	void *sp;
 
 	if (sc->sc_invalid)
 		return;
 
 	device_printf(sc->sc_dev, "hardware error; resetting\n");
 	/*
 	 * Fatal errors are unrecoverable.  Typically these
 	 * are caused by DMA errors.  Collect h/w state from
 	 * the hal so we can diagnose what's going on.
 	 */
 	if (ath_hal_getfatalstate(sc->sc_ah, &sp, &len)) {
 		KASSERT(len >= 6*sizeof(u_int32_t), ("len %u bytes", len));
 		state = sp;
 		device_printf(sc->sc_dev,
 		    "0x%08x 0x%08x 0x%08x, 0x%08x 0x%08x 0x%08x\n", state[0],
 		    state[1] , state[2], state[3], state[4], state[5]);
 	}
 	ath_reset(sc, ATH_RESET_NOLOSS);
 }
 
 static void
 ath_bmiss_vap(struct ieee80211vap *vap)
 {
 	struct ath_softc *sc = vap->iv_ic->ic_softc;
 
 	/*
 	 * Workaround phantom bmiss interrupts by sanity-checking
 	 * the time of our last rx'd frame.  If it is within the
 	 * beacon miss interval then ignore the interrupt.  If it's
 	 * truly a bmiss we'll get another interrupt soon and that'll
 	 * be dispatched up for processing.  Note this applies only
 	 * for h/w beacon miss events.
 	 */
 
 	/*
 	 * XXX TODO: Just read the TSF during the interrupt path;
 	 * that way we don't have to wake up again just to read it
 	 * again.
 	 */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	if ((vap->iv_flags_ext & IEEE80211_FEXT_SWBMISS) == 0) {
 		u_int64_t lastrx = sc->sc_lastrx;
 		u_int64_t tsf = ath_hal_gettsf64(sc->sc_ah);
 		/* XXX should take a locked ref to iv_bss */
 		u_int bmisstimeout =
 			vap->iv_bmissthreshold * vap->iv_bss->ni_intval * 1024;
 
 		DPRINTF(sc, ATH_DEBUG_BEACON,
 		    "%s: tsf %llu lastrx %lld (%llu) bmiss %u\n",
 		    __func__, (unsigned long long) tsf,
 		    (unsigned long long)(tsf - lastrx),
 		    (unsigned long long) lastrx, bmisstimeout);
 
 		if (tsf - lastrx <= bmisstimeout) {
 			sc->sc_stats.ast_bmiss_phantom++;
 
 			ATH_LOCK(sc);
 			ath_power_restore_power_state(sc);
 			ATH_UNLOCK(sc);
 
 			return;
 		}
 	}
 
 	/*
 	 * There's no need to keep the hardware awake during the call
 	 * to av_bmiss().
 	 */
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	/*
 	 * Attempt to force a beacon resync.
 	 */
 	sc->sc_syncbeacon = 1;
 
 	ATH_VAP(vap)->av_bmiss(vap);
 }
 
 /* XXX this needs a force wakeup! */
 int
 ath_hal_gethangstate(struct ath_hal *ah, uint32_t mask, uint32_t *hangs)
 {
 	uint32_t rsize;
 	void *sp;
 
 	if (!ath_hal_getdiagstate(ah, HAL_DIAG_CHECK_HANGS, &mask, sizeof(mask), &sp, &rsize))
 		return 0;
 	KASSERT(rsize == sizeof(uint32_t), ("resultsize %u", rsize));
 	*hangs = *(uint32_t *)sp;
 	return 1;
 }
 
 static void
 ath_bmiss_proc(void *arg, int pending)
 {
 	struct ath_softc *sc = arg;
 	uint32_t hangs;
 
 	DPRINTF(sc, ATH_DEBUG_ANY, "%s: pending %u\n", __func__, pending);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ath_beacon_miss(sc);
 
 	/*
 	 * Do a reset upon any becaon miss event.
 	 *
 	 * It may be a non-recognised RX clear hang which needs a reset
 	 * to clear.
 	 */
 	if (ath_hal_gethangstate(sc->sc_ah, 0xff, &hangs) && hangs != 0) {
 		ath_reset(sc, ATH_RESET_NOLOSS);
 		device_printf(sc->sc_dev,
 		    "bb hang detected (0x%x), resetting\n", hangs);
 	} else {
 		ath_reset(sc, ATH_RESET_NOLOSS);
 		ieee80211_beacon_miss(&sc->sc_ic);
 	}
 
 	/* Force a beacon resync, in case they've drifted */
 	sc->sc_syncbeacon = 1;
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 }
 
 /*
  * Handle TKIP MIC setup to deal hardware that doesn't do MIC
  * calcs together with WME.  If necessary disable the crypto
  * hardware and mark the 802.11 state so keys will be setup
  * with the MIC work done in software.
  */
 static void
 ath_settkipmic(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	if ((ic->ic_cryptocaps & IEEE80211_CRYPTO_TKIP) && !sc->sc_wmetkipmic) {
 		if (ic->ic_flags & IEEE80211_F_WME) {
 			ath_hal_settkipmic(sc->sc_ah, AH_FALSE);
 			ic->ic_cryptocaps &= ~IEEE80211_CRYPTO_TKIPMIC;
 		} else {
 			ath_hal_settkipmic(sc->sc_ah, AH_TRUE);
 			ic->ic_cryptocaps |= IEEE80211_CRYPTO_TKIPMIC;
 		}
 	}
 }
 
 static int
 ath_init(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 
 	ATH_LOCK_ASSERT(sc);
 
 	/*
 	 * Force the sleep state awake.
 	 */
 	ath_power_setselfgen(sc, HAL_PM_AWAKE);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ath_power_setpower(sc, HAL_PM_AWAKE);
 
 	/*
 	 * Stop anything previously setup.  This is safe
 	 * whether this is the first time through or not.
 	 */
 	ath_stop(sc);
 
 	/*
 	 * The basic interface to setting the hardware in a good
 	 * state is ``reset''.  On return the hardware is known to
 	 * be powered up and with interrupts disabled.  This must
 	 * be followed by initialization of the appropriate bits
 	 * and then setup of the interrupt mask.
 	 */
 	ath_settkipmic(sc);
 	ath_update_chainmasks(sc, ic->ic_curchan);
 	ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask,
 	    sc->sc_cur_rxchainmask);
 
 	if (!ath_hal_reset(ah, sc->sc_opmode, ic->ic_curchan, AH_FALSE,
 	    HAL_RESET_NORMAL, &status)) {
 		device_printf(sc->sc_dev,
 		    "unable to reset hardware; hal status %u\n", status);
 		return (ENODEV);
 	}
 
 	ATH_RX_LOCK(sc);
 	sc->sc_rx_stopped = 1;
 	sc->sc_rx_resetted = 1;
 	ATH_RX_UNLOCK(sc);
 
 	ath_chan_change(sc, ic->ic_curchan);
 
 	/* Let DFS at it in case it's a DFS channel */
 	ath_dfs_radar_enable(sc, ic->ic_curchan);
 
 	/* Let spectral at in case spectral is enabled */
 	ath_spectral_enable(sc, ic->ic_curchan);
 
 	/*
 	 * Let bluetooth coexistence at in case it's needed for this channel
 	 */
 	ath_btcoex_enable(sc, ic->ic_curchan);
 
 	/*
 	 * If we're doing TDMA, enforce the TXOP limitation for chips that
 	 * support it.
 	 */
 	if (sc->sc_hasenforcetxop && sc->sc_tdma)
 		ath_hal_setenforcetxop(sc->sc_ah, 1);
 	else
 		ath_hal_setenforcetxop(sc->sc_ah, 0);
 
 	/*
 	 * Likewise this is set during reset so update
 	 * state cached in the driver.
 	 */
 	sc->sc_diversity = ath_hal_getdiversity(ah);
 	sc->sc_lastlongcal = ticks;
 	sc->sc_resetcal = 1;
 	sc->sc_lastcalreset = 0;
 	sc->sc_lastani = ticks;
 	sc->sc_lastshortcal = ticks;
 	sc->sc_doresetcal = AH_FALSE;
 	/*
 	 * Beacon timers were cleared here; give ath_newstate()
 	 * a hint that the beacon timers should be poked when
 	 * things transition to the RUN state.
 	 */
 	sc->sc_beacons = 0;
 
 	/*
 	 * Setup the hardware after reset: the key cache
 	 * is filled as needed and the receive engine is
 	 * set going.  Frame transmit is handled entirely
 	 * in the frame output path; there's nothing to do
 	 * here except setup the interrupt mask.
 	 */
 	if (ath_startrecv(sc) != 0) {
 		device_printf(sc->sc_dev, "unable to start recv logic\n");
 		ath_power_restore_power_state(sc);
 		return (ENODEV);
 	}
 
 	/*
 	 * Enable interrupts.
 	 */
 	sc->sc_imask = HAL_INT_RX | HAL_INT_TX
 		  | HAL_INT_RXORN | HAL_INT_TXURN
 		  | HAL_INT_FATAL | HAL_INT_GLOBAL;
 
 	/*
 	 * Enable RX EDMA bits.  Note these overlap with
 	 * HAL_INT_RX and HAL_INT_RXDESC respectively.
 	 */
 	if (sc->sc_isedma)
 		sc->sc_imask |= (HAL_INT_RXHP | HAL_INT_RXLP);
 
 	/*
 	 * If we're an EDMA NIC, we don't care about RXEOL.
 	 * Writing a new descriptor in will simply restart
 	 * RX DMA.
 	 */
 	if (! sc->sc_isedma)
 		sc->sc_imask |= HAL_INT_RXEOL;
 
 	/*
 	 * Enable MCI interrupt for MCI devices.
 	 */
 	if (sc->sc_btcoex_mci)
 		sc->sc_imask |= HAL_INT_MCI;
 
 	/*
 	 * Enable MIB interrupts when there are hardware phy counters.
 	 * Note we only do this (at the moment) for station mode.
 	 */
 	if (sc->sc_needmib && ic->ic_opmode == IEEE80211_M_STA)
 		sc->sc_imask |= HAL_INT_MIB;
 
 	/*
 	 * XXX add capability for this.
 	 *
 	 * If we're in STA mode (and maybe IBSS?) then register for
 	 * TSFOOR interrupts.
 	 */
 	if (ic->ic_opmode == IEEE80211_M_STA)
 		sc->sc_imask |= HAL_INT_TSFOOR;
 
 	/* Enable global TX timeout and carrier sense timeout if available */
 	if (ath_hal_gtxto_supported(ah))
 		sc->sc_imask |= HAL_INT_GTT;
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: imask=0x%x\n",
 		__func__, sc->sc_imask);
 
 	sc->sc_running = 1;
 	callout_reset(&sc->sc_wd_ch, hz, ath_watchdog, sc);
 	ath_hal_intrset(ah, sc->sc_imask);
 
 	ath_power_restore_power_state(sc);
 
 	return (0);
 }
 
 static void
 ath_stop(struct ath_softc *sc)
 {
 	struct ath_hal *ah = sc->sc_ah;
 
 	ATH_LOCK_ASSERT(sc);
 
 	/*
 	 * Wake the hardware up before fiddling with it.
 	 */
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 
 	if (sc->sc_running) {
 		/*
 		 * Shutdown the hardware and driver:
 		 *    reset 802.11 state machine
 		 *    turn off timers
 		 *    disable interrupts
 		 *    turn off the radio
 		 *    clear transmit machinery
 		 *    clear receive machinery
 		 *    drain and release tx queues
 		 *    reclaim beacon resources
 		 *    power down hardware
 		 *
 		 * Note that some of this work is not possible if the
 		 * hardware is gone (invalid).
 		 */
 #ifdef ATH_TX99_DIAG
 		if (sc->sc_tx99 != NULL)
 			sc->sc_tx99->stop(sc->sc_tx99);
 #endif
 		callout_stop(&sc->sc_wd_ch);
 		sc->sc_wd_timer = 0;
 		sc->sc_running = 0;
 		if (!sc->sc_invalid) {
 			if (sc->sc_softled) {
 				callout_stop(&sc->sc_ledtimer);
 				ath_hal_gpioset(ah, sc->sc_ledpin,
 					!sc->sc_ledon);
 				sc->sc_blinking = 0;
 			}
 			ath_hal_intrset(ah, 0);
 		}
 		/* XXX we should stop RX regardless of whether it's valid */
 		if (!sc->sc_invalid) {
 			ath_stoprecv(sc, 1);
 			ath_hal_phydisable(ah);
 		} else
 			sc->sc_rxlink = NULL;
 		ath_draintxq(sc, ATH_RESET_DEFAULT);
 		ath_beacon_free(sc);	/* XXX not needed */
 	}
 
 	/* And now, restore the current power state */
 	ath_power_restore_power_state(sc);
 }
 
 /*
  * Wait until all pending TX/RX has completed.
  *
  * This waits until all existing transmit, receive and interrupts
  * have completed.  It's assumed that the caller has first
  * grabbed the reset lock so it doesn't try to do overlapping
  * chip resets.
  */
 #define	MAX_TXRX_ITERATIONS	100
 static void
 ath_txrx_stop_locked(struct ath_softc *sc)
 {
 	int i = MAX_TXRX_ITERATIONS;
 
 	ATH_UNLOCK_ASSERT(sc);
 	ATH_PCU_LOCK_ASSERT(sc);
 
 	/*
 	 * Sleep until all the pending operations have completed.
 	 *
 	 * The caller must ensure that reset has been incremented
 	 * or the pending operations may continue being queued.
 	 */
 	while (sc->sc_rxproc_cnt || sc->sc_txproc_cnt ||
 	    sc->sc_txstart_cnt || sc->sc_intr_cnt) {
 		if (i <= 0)
 			break;
 		msleep(sc, &sc->sc_pcu_mtx, 0, "ath_txrx_stop",
 		    msecs_to_ticks(10));
 		i--;
 	}
 
 	if (i <= 0)
 		device_printf(sc->sc_dev,
 		    "%s: didn't finish after %d iterations\n",
 		    __func__, MAX_TXRX_ITERATIONS);
 }
 #undef	MAX_TXRX_ITERATIONS
 
 #if 0
 static void
 ath_txrx_stop(struct ath_softc *sc)
 {
 	ATH_UNLOCK_ASSERT(sc);
 	ATH_PCU_UNLOCK_ASSERT(sc);
 
 	ATH_PCU_LOCK(sc);
 	ath_txrx_stop_locked(sc);
 	ATH_PCU_UNLOCK(sc);
 }
 #endif
 
 static void
 ath_txrx_start(struct ath_softc *sc)
 {
 
 	taskqueue_unblock(sc->sc_tq);
 }
 
 /*
  * Grab the reset lock, and wait around until no one else
  * is trying to do anything with it.
  *
  * This is totally horrible but we can't hold this lock for
  * long enough to do TX/RX or we end up with net80211/ip stack
  * LORs and eventual deadlock.
  *
  * "dowait" signals whether to spin, waiting for the reset
  * lock count to reach 0. This should (for now) only be used
  * during the reset path, as the rest of the code may not
  * be locking-reentrant enough to behave correctly.
  *
  * Another, cleaner way should be found to serialise all of
  * these operations.
  */
 #define	MAX_RESET_ITERATIONS	25
 static int
 ath_reset_grablock(struct ath_softc *sc, int dowait)
 {
 	int w = 0;
 	int i = MAX_RESET_ITERATIONS;
 
 	ATH_PCU_LOCK_ASSERT(sc);
 	do {
 		if (sc->sc_inreset_cnt == 0) {
 			w = 1;
 			break;
 		}
 		if (dowait == 0) {
 			w = 0;
 			break;
 		}
 		ATH_PCU_UNLOCK(sc);
 		/*
 		 * 1 tick is likely not enough time for long calibrations
 		 * to complete.  So we should wait quite a while.
 		 */
 		pause("ath_reset_grablock", msecs_to_ticks(100));
 		i--;
 		ATH_PCU_LOCK(sc);
 	} while (i > 0);
 
 	/*
 	 * We always increment the refcounter, regardless
 	 * of whether we succeeded to get it in an exclusive
 	 * way.
 	 */
 	sc->sc_inreset_cnt++;
 
 	if (i <= 0)
 		device_printf(sc->sc_dev,
 		    "%s: didn't finish after %d iterations\n",
 		    __func__, MAX_RESET_ITERATIONS);
 
 	if (w == 0)
 		device_printf(sc->sc_dev,
 		    "%s: warning, recursive reset path!\n",
 		    __func__);
 
 	return w;
 }
 #undef MAX_RESET_ITERATIONS
 
 /*
  * Reset the hardware w/o losing operational state.  This is
  * basically a more efficient way of doing ath_stop, ath_init,
  * followed by state transitions to the current 802.11
  * operational state.  Used to recover from various errors and
  * to reset or reload hardware state.
  */
 int
 ath_reset(struct ath_softc *sc, ATH_RESET_TYPE reset_type)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 	int i;
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
 
 	/* Ensure ATH_LOCK isn't held; ath_rx_proc can't be locked */
 	ATH_PCU_UNLOCK_ASSERT(sc);
 	ATH_UNLOCK_ASSERT(sc);
 
 	/* Try to (stop any further TX/RX from occurring */
 	taskqueue_block(sc->sc_tq);
 
 	/*
 	 * Wake the hardware up.
 	 */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 
 	/*
 	 * Grab the reset lock before TX/RX is stopped.
 	 *
 	 * This is needed to ensure that when the TX/RX actually does finish,
 	 * no further TX/RX/reset runs in parallel with this.
 	 */
 	if (ath_reset_grablock(sc, 1) == 0) {
 		device_printf(sc->sc_dev, "%s: concurrent reset! Danger!\n",
 		    __func__);
 	}
 
 	/* disable interrupts */
 	ath_hal_intrset(ah, 0);
 
 	/*
 	 * Now, ensure that any in progress TX/RX completes before we
 	 * continue.
 	 */
 	ath_txrx_stop_locked(sc);
 
 	ATH_PCU_UNLOCK(sc);
 
 	/*
 	 * Regardless of whether we're doing a no-loss flush or
 	 * not, stop the PCU and handle what's in the RX queue.
 	 * That way frames aren't dropped which shouldn't be.
 	 */
 	ath_stoprecv(sc, (reset_type != ATH_RESET_NOLOSS));
 	ath_rx_flush(sc);
 
 	/*
 	 * Should now wait for pending TX/RX to complete
 	 * and block future ones from occurring. This needs to be
 	 * done before the TX queue is drained.
 	 */
 	ath_draintxq(sc, reset_type);	/* stop xmit side */
 
 	ath_settkipmic(sc);		/* configure TKIP MIC handling */
 	/* NB: indicate channel change so we do a full reset */
 	ath_update_chainmasks(sc, ic->ic_curchan);
 	ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask,
 	    sc->sc_cur_rxchainmask);
 	if (!ath_hal_reset(ah, sc->sc_opmode, ic->ic_curchan, AH_TRUE,
 	    HAL_RESET_NORMAL, &status))
 		device_printf(sc->sc_dev,
 		    "%s: unable to reset hardware; hal status %u\n",
 		    __func__, status);
 	sc->sc_diversity = ath_hal_getdiversity(ah);
 
 	ATH_RX_LOCK(sc);
 	sc->sc_rx_stopped = 1;
 	sc->sc_rx_resetted = 1;
 	ATH_RX_UNLOCK(sc);
 
 	/* Let DFS at it in case it's a DFS channel */
 	ath_dfs_radar_enable(sc, ic->ic_curchan);
 
 	/* Let spectral at in case spectral is enabled */
 	ath_spectral_enable(sc, ic->ic_curchan);
 
 	/*
 	 * Let bluetooth coexistence at in case it's needed for this channel
 	 */
 	ath_btcoex_enable(sc, ic->ic_curchan);
 
 	/*
 	 * If we're doing TDMA, enforce the TXOP limitation for chips that
 	 * support it.
 	 */
 	if (sc->sc_hasenforcetxop && sc->sc_tdma)
 		ath_hal_setenforcetxop(sc->sc_ah, 1);
 	else
 		ath_hal_setenforcetxop(sc->sc_ah, 0);
 
 	if (ath_startrecv(sc) != 0)	/* restart recv */
 		device_printf(sc->sc_dev,
 		    "%s: unable to start recv logic\n", __func__);
 	/*
 	 * We may be doing a reset in response to an ioctl
 	 * that changes the channel so update any state that
 	 * might change as a result.
 	 */
 	ath_chan_change(sc, ic->ic_curchan);
 	if (sc->sc_beacons) {		/* restart beacons */
 #ifdef IEEE80211_SUPPORT_TDMA
 		if (sc->sc_tdma)
 			ath_tdma_config(sc, NULL);
 		else
 #endif
 			ath_beacon_config(sc, NULL);
 	}
 
 	/*
 	 * Release the reset lock and re-enable interrupts here.
 	 * If an interrupt was being processed in ath_intr(),
 	 * it would disable interrupts at this point. So we have
 	 * to atomically enable interrupts and decrement the
 	 * reset counter - this way ath_intr() doesn't end up
 	 * disabling interrupts without a corresponding enable
 	 * in the rest or channel change path.
 	 *
 	 * Grab the TX reference in case we need to transmit.
 	 * That way a parallel transmit doesn't.
 	 */
 	ATH_PCU_LOCK(sc);
 	sc->sc_inreset_cnt--;
 	sc->sc_txstart_cnt++;
 	/* XXX only do this if sc_inreset_cnt == 0? */
 	ath_hal_intrset(ah, sc->sc_imask);
 	ATH_PCU_UNLOCK(sc);
 
 	/*
 	 * TX and RX can be started here. If it were started with
 	 * sc_inreset_cnt > 0, the TX and RX path would abort.
 	 * Thus if this is a nested call through the reset or
 	 * channel change code, TX completion will occur but
 	 * RX completion and ath_start / ath_tx_start will not
 	 * run.
 	 */
 
 	/* Restart TX/RX as needed */
 	ath_txrx_start(sc);
 
 	/* XXX TODO: we need to hold the tx refcount here! */
 
 	/* Restart TX completion and pending TX */
 	if (reset_type == ATH_RESET_NOLOSS) {
 		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 			if (ATH_TXQ_SETUP(sc, i)) {
 				ATH_TXQ_LOCK(&sc->sc_txq[i]);
 				ath_txq_restart_dma(sc, &sc->sc_txq[i]);
 				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
 
 				ATH_TX_LOCK(sc);
 				ath_txq_sched(sc, &sc->sc_txq[i]);
 				ATH_TX_UNLOCK(sc);
 			}
 		}
 	}
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txstart_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	/* Handle any frames in the TX queue */
 	/*
 	 * XXX should this be done by the caller, rather than
 	 * ath_reset() ?
 	 */
 	ath_tx_kick(sc);		/* restart xmit */
 	return 0;
 }
 
 static int
 ath_reset_vap(struct ieee80211vap *vap, u_long cmd)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_hal *ah = sc->sc_ah;
 
 	switch (cmd) {
 	case IEEE80211_IOC_TXPOWER:
 		/*
 		 * If per-packet TPC is enabled, then we have nothing
 		 * to do; otherwise we need to force the global limit.
 		 * All this can happen directly; no need to reset.
 		 */
 		if (!ath_hal_gettpc(ah))
 			ath_hal_settxpowlimit(ah, ic->ic_txpowlimit);
 		return 0;
 	}
 	/* XXX? Full or NOLOSS? */
 	return ath_reset(sc, ATH_RESET_FULL);
 }
 
 struct ath_buf *
 _ath_getbuf_locked(struct ath_softc *sc, ath_buf_type_t btype)
 {
 	struct ath_buf *bf;
 
 	ATH_TXBUF_LOCK_ASSERT(sc);
 
 	if (btype == ATH_BUFTYPE_MGMT)
 		bf = TAILQ_FIRST(&sc->sc_txbuf_mgmt);
 	else
 		bf = TAILQ_FIRST(&sc->sc_txbuf);
 
 	if (bf == NULL) {
 		sc->sc_stats.ast_tx_getnobuf++;
 	} else {
 		if (bf->bf_flags & ATH_BUF_BUSY) {
 			sc->sc_stats.ast_tx_getbusybuf++;
 			bf = NULL;
 		}
 	}
 
 	if (bf != NULL && (bf->bf_flags & ATH_BUF_BUSY) == 0) {
 		if (btype == ATH_BUFTYPE_MGMT)
 			TAILQ_REMOVE(&sc->sc_txbuf_mgmt, bf, bf_list);
 		else {
 			TAILQ_REMOVE(&sc->sc_txbuf, bf, bf_list);
 			sc->sc_txbuf_cnt--;
 
 			/*
 			 * This shuldn't happen; however just to be
 			 * safe print a warning and fudge the txbuf
 			 * count.
 			 */
 			if (sc->sc_txbuf_cnt < 0) {
 				device_printf(sc->sc_dev,
 				    "%s: sc_txbuf_cnt < 0?\n",
 				    __func__);
 				sc->sc_txbuf_cnt = 0;
 			}
 		}
 	} else
 		bf = NULL;
 
 	if (bf == NULL) {
 		/* XXX should check which list, mgmt or otherwise */
 		DPRINTF(sc, ATH_DEBUG_XMIT, "%s: %s\n", __func__,
 		    TAILQ_FIRST(&sc->sc_txbuf) == NULL ?
 			"out of xmit buffers" : "xmit buffer busy");
 		return NULL;
 	}
 
 	/* XXX TODO: should do this at buffer list initialisation */
 	/* XXX (then, ensure the buffer has the right flag set) */
 	bf->bf_flags = 0;
 	if (btype == ATH_BUFTYPE_MGMT)
 		bf->bf_flags |= ATH_BUF_MGMT;
 	else
 		bf->bf_flags &= (~ATH_BUF_MGMT);
 
 	/* Valid bf here; clear some basic fields */
 	bf->bf_next = NULL;	/* XXX just to be sure */
 	bf->bf_last = NULL;	/* XXX again, just to be sure */
 	bf->bf_comp = NULL;	/* XXX again, just to be sure */
 	bzero(&bf->bf_state, sizeof(bf->bf_state));
 
 	/*
 	 * Track the descriptor ID only if doing EDMA
 	 */
 	if (sc->sc_isedma) {
 		bf->bf_descid = sc->sc_txbuf_descid;
 		sc->sc_txbuf_descid++;
 	}
 
 	return bf;
 }
 
 /*
  * When retrying a software frame, buffers marked ATH_BUF_BUSY
  * can't be thrown back on the queue as they could still be
  * in use by the hardware.
  *
  * This duplicates the buffer, or returns NULL.
  *
  * The descriptor is also copied but the link pointers and
  * the DMA segments aren't copied; this frame should thus
  * be again passed through the descriptor setup/chain routines
  * so the link is correct.
  *
  * The caller must free the buffer using ath_freebuf().
  */
 struct ath_buf *
 ath_buf_clone(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ath_buf *tbf;
 
 	tbf = ath_getbuf(sc,
 	    (bf->bf_flags & ATH_BUF_MGMT) ?
 	     ATH_BUFTYPE_MGMT : ATH_BUFTYPE_NORMAL);
 	if (tbf == NULL)
 		return NULL;	/* XXX failure? Why? */
 
 	/* Copy basics */
 	tbf->bf_next = NULL;
 	tbf->bf_nseg = bf->bf_nseg;
 	tbf->bf_flags = bf->bf_flags & ATH_BUF_FLAGS_CLONE;
 	tbf->bf_status = bf->bf_status;
 	tbf->bf_m = bf->bf_m;
 	tbf->bf_node = bf->bf_node;
 	KASSERT((bf->bf_node != NULL), ("%s: bf_node=NULL!", __func__));
 	/* will be setup by the chain/setup function */
 	tbf->bf_lastds = NULL;
 	/* for now, last == self */
 	tbf->bf_last = tbf;
 	tbf->bf_comp = bf->bf_comp;
 
 	/* NOTE: DMA segments will be setup by the setup/chain functions */
 
 	/* The caller has to re-init the descriptor + links */
 
 	/*
 	 * Free the DMA mapping here, before we NULL the mbuf.
 	 * We must only call bus_dmamap_unload() once per mbuf chain
 	 * or behaviour is undefined.
 	 */
 	if (bf->bf_m != NULL) {
 		/*
 		 * XXX is this POSTWRITE call required?
 		 */
 		bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 	}
 
 	bf->bf_m = NULL;
 	bf->bf_node = NULL;
 
 	/* Copy state */
 	memcpy(&tbf->bf_state, &bf->bf_state, sizeof(bf->bf_state));
 
 	return tbf;
 }
 
 struct ath_buf *
 ath_getbuf(struct ath_softc *sc, ath_buf_type_t btype)
 {
 	struct ath_buf *bf;
 
 	ATH_TXBUF_LOCK(sc);
 	bf = _ath_getbuf_locked(sc, btype);
 	/*
 	 * If a mgmt buffer was requested but we're out of those,
 	 * try requesting a normal one.
 	 */
 	if (bf == NULL && btype == ATH_BUFTYPE_MGMT)
 		bf = _ath_getbuf_locked(sc, ATH_BUFTYPE_NORMAL);
 	ATH_TXBUF_UNLOCK(sc);
 	if (bf == NULL) {
 		DPRINTF(sc, ATH_DEBUG_XMIT, "%s: stop queue\n", __func__);
 		sc->sc_stats.ast_tx_qstop++;
 	}
 	return bf;
 }
 
 /*
  * Transmit a single frame.
  *
  * net80211 will free the node reference if the transmit
  * fails, so don't free the node reference here.
  */
 static int
 ath_transmit(struct ieee80211com *ic, struct mbuf *m)
 {
 	struct ath_softc *sc = ic->ic_softc;
 	struct ieee80211_node *ni;
 	struct mbuf *next;
 	struct ath_buf *bf;
 	ath_bufhead frags;
 	int retval = 0;
 
 	/*
 	 * Tell the reset path that we're currently transmitting.
 	 */
 	ATH_PCU_LOCK(sc);
 	if (sc->sc_inreset_cnt > 0) {
 		DPRINTF(sc, ATH_DEBUG_XMIT,
 		    "%s: sc_inreset_cnt > 0; bailing\n", __func__);
 		ATH_PCU_UNLOCK(sc);
 		sc->sc_stats.ast_tx_qstop++;
 		ATH_KTR(sc, ATH_KTR_TX, 0, "ath_start_task: OACTIVE, finish");
 		return (ENOBUFS);	/* XXX should be EINVAL or? */
 	}
 	sc->sc_txstart_cnt++;
 	ATH_PCU_UNLOCK(sc);
 
 	/* Wake the hardware up already */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_KTR(sc, ATH_KTR_TX, 0, "ath_transmit: start");
 	/*
 	 * Grab the TX lock - it's ok to do this here; we haven't
 	 * yet started transmitting.
 	 */
 	ATH_TX_LOCK(sc);
 
 	/*
 	 * Node reference, if there's one.
 	 */
 	ni = (struct ieee80211_node *) m->m_pkthdr.rcvif;
 
 	/*
 	 * Enforce how deep a node queue can get.
 	 *
 	 * XXX it would be nicer if we kept an mbuf queue per
 	 * node and only whacked them into ath_bufs when we
 	 * are ready to schedule some traffic from them.
 	 * .. that may come later.
 	 *
 	 * XXX we should also track the per-node hardware queue
 	 * depth so it is easy to limit the _SUM_ of the swq and
 	 * hwq frames.  Since we only schedule two HWQ frames
 	 * at a time, this should be OK for now.
 	 */
 	if ((!(m->m_flags & M_EAPOL)) &&
 	    (ATH_NODE(ni)->an_swq_depth > sc->sc_txq_node_maxdepth)) {
 		sc->sc_stats.ast_tx_nodeq_overflow++;
 		retval = ENOBUFS;
 		goto finish;
 	}
 
 	/*
 	 * Check how many TX buffers are available.
 	 *
 	 * If this is for non-EAPOL traffic, just leave some
 	 * space free in order for buffer cloning and raw
 	 * frame transmission to occur.
 	 *
 	 * If it's for EAPOL traffic, ignore this for now.
 	 * Management traffic will be sent via the raw transmit
 	 * method which bypasses this check.
 	 *
 	 * This is needed to ensure that EAPOL frames during
 	 * (re) keying have a chance to go out.
 	 *
 	 * See kern/138379 for more information.
 	 */
 	if ((!(m->m_flags & M_EAPOL)) &&
 	    (sc->sc_txbuf_cnt <= sc->sc_txq_data_minfree)) {
 		sc->sc_stats.ast_tx_nobuf++;
 		retval = ENOBUFS;
 		goto finish;
 	}
 
 	/*
 	 * Grab a TX buffer and associated resources.
 	 *
 	 * If it's an EAPOL frame, allocate a MGMT ath_buf.
 	 * That way even with temporary buffer exhaustion due to
 	 * the data path doesn't leave us without the ability
 	 * to transmit management frames.
 	 *
 	 * Otherwise allocate a normal buffer.
 	 */
 	if (m->m_flags & M_EAPOL)
 		bf = ath_getbuf(sc, ATH_BUFTYPE_MGMT);
 	else
 		bf = ath_getbuf(sc, ATH_BUFTYPE_NORMAL);
 
 	if (bf == NULL) {
 		/*
 		 * If we failed to allocate a buffer, fail.
 		 *
 		 * We shouldn't fail normally, due to the check
 		 * above.
 		 */
 		sc->sc_stats.ast_tx_nobuf++;
 		retval = ENOBUFS;
 		goto finish;
 	}
 
 	/*
 	 * At this point we have a buffer; so we need to free it
 	 * if we hit any error conditions.
 	 */
 
 	/*
 	 * Check for fragmentation.  If this frame
 	 * has been broken up verify we have enough
 	 * buffers to send all the fragments so all
 	 * go out or none...
 	 */
 	TAILQ_INIT(&frags);
 	if ((m->m_flags & M_FRAG) &&
 	    !ath_txfrag_setup(sc, &frags, m, ni)) {
 		DPRINTF(sc, ATH_DEBUG_XMIT,
 		    "%s: out of txfrag buffers\n", __func__);
 		sc->sc_stats.ast_tx_nofrag++;
 		if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1);
 		/*
 		 * XXXGL: is mbuf valid after ath_txfrag_setup? If yes,
 		 * we shouldn't free it but return back.
 		 */
 		ieee80211_free_mbuf(m);
 		m = NULL;
 		goto bad;
 	}
 
 	/*
 	 * At this point if we have any TX fragments, then we will
 	 * have bumped the node reference once for each of those.
 	 */
 
 	/*
 	 * XXX Is there anything actually _enforcing_ that the
 	 * fragments are being transmitted in one hit, rather than
 	 * being interleaved with other transmissions on that
 	 * hardware queue?
 	 *
 	 * The ATH TX output lock is the only thing serialising this
 	 * right now.
 	 */
 
 	/*
 	 * Calculate the "next fragment" length field in ath_buf
 	 * in order to let the transmit path know enough about
 	 * what to next write to the hardware.
 	 */
 	if (m->m_flags & M_FRAG) {
 		struct ath_buf *fbf = bf;
 		struct ath_buf *n_fbf = NULL;
 		struct mbuf *fm = m->m_nextpkt;
 
 		/*
 		 * We need to walk the list of fragments and set
 		 * the next size to the following buffer.
 		 * However, the first buffer isn't in the frag
 		 * list, so we have to do some gymnastics here.
 		 */
 		TAILQ_FOREACH(n_fbf, &frags, bf_list) {
 			fbf->bf_nextfraglen = fm->m_pkthdr.len;
 			fbf = n_fbf;
 			fm = fm->m_nextpkt;
 		}
 	}
 
 nextfrag:
 	/*
 	 * Pass the frame to the h/w for transmission.
 	 * Fragmented frames have each frag chained together
 	 * with m_nextpkt.  We know there are sufficient ath_buf's
 	 * to send all the frags because of work done by
 	 * ath_txfrag_setup.  We leave m_nextpkt set while
 	 * calling ath_tx_start so it can use it to extend the
 	 * the tx duration to cover the subsequent frag and
 	 * so it can reclaim all the mbufs in case of an error;
 	 * ath_tx_start clears m_nextpkt once it commits to
 	 * handing the frame to the hardware.
 	 *
 	 * Note: if this fails, then the mbufs are freed but
 	 * not the node reference.
 	 *
 	 * So, we now have to free the node reference ourselves here
 	 * and return OK up to the stack.
 	 */
 	next = m->m_nextpkt;
 	if (ath_tx_start(sc, ni, bf, m)) {
 bad:
 		if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1);
 reclaim:
 		bf->bf_m = NULL;
 		bf->bf_node = NULL;
 		ATH_TXBUF_LOCK(sc);
 		ath_returnbuf_head(sc, bf);
 		/*
 		 * Free the rest of the node references and
 		 * buffers for the fragment list.
 		 */
 		ath_txfrag_cleanup(sc, &frags, ni);
 		ATH_TXBUF_UNLOCK(sc);
 
 		/*
 		 * XXX: And free the node/return OK; ath_tx_start() may have
 		 *      modified the buffer.  We currently have no way to
 		 *      signify that the mbuf was freed but there was an error.
 		 */
 		ieee80211_free_node(ni);
 		retval = 0;
 		goto finish;
 	}
 
 	/*
 	 * Check here if the node is in power save state.
 	 */
 	ath_tx_update_tim(sc, ni, 1);
 
 	if (next != NULL) {
 		/*
 		 * Beware of state changing between frags.
 		 * XXX check sta power-save state?
 		 */
 		if (ni->ni_vap->iv_state != IEEE80211_S_RUN) {
 			DPRINTF(sc, ATH_DEBUG_XMIT,
 			    "%s: flush fragmented packet, state %s\n",
 			    __func__,
 			    ieee80211_state_name[ni->ni_vap->iv_state]);
 			/* XXX dmamap */
 			ieee80211_free_mbuf(next);
 			goto reclaim;
 		}
 		m = next;
 		bf = TAILQ_FIRST(&frags);
 		KASSERT(bf != NULL, ("no buf for txfrag"));
 		TAILQ_REMOVE(&frags, bf, bf_list);
 		goto nextfrag;
 	}
 
 	/*
 	 * Bump watchdog timer.
 	 */
 	sc->sc_wd_timer = 5;
 
 finish:
 	ATH_TX_UNLOCK(sc);
 
 	/*
 	 * Finished transmitting!
 	 */
 	ATH_PCU_LOCK(sc);
 	sc->sc_txstart_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	/* Sleep the hardware if required */
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ATH_KTR(sc, ATH_KTR_TX, 0, "ath_transmit: finished");
 	
 	return (retval);
 }
 
 static int
 ath_media_change(struct ifnet *ifp)
 {
 	int error = ieee80211_media_change(ifp);
 	/* NB: only the fixed rate can change and that doesn't need a reset */
 	return (error == ENETRESET ? 0 : error);
 }
 
 /*
  * Block/unblock tx+rx processing while a key change is done.
  * We assume the caller serializes key management operations
  * so we only need to worry about synchronization with other
  * uses that originate in the driver.
  */
 static void
 ath_key_update_begin(struct ieee80211vap *vap)
 {
 	struct ath_softc *sc = vap->iv_ic->ic_softc;
 
 	DPRINTF(sc, ATH_DEBUG_KEYCACHE, "%s:\n", __func__);
 	taskqueue_block(sc->sc_tq);
 }
 
 static void
 ath_key_update_end(struct ieee80211vap *vap)
 {
 	struct ath_softc *sc = vap->iv_ic->ic_softc;
 
 	DPRINTF(sc, ATH_DEBUG_KEYCACHE, "%s:\n", __func__);
 	taskqueue_unblock(sc->sc_tq);
 }
 
 static void
 ath_update_promisc(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 	u_int32_t rfilt;
 
 	/* configure rx filter */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	rfilt = ath_calcrxfilter(sc);
 	ath_hal_setrxfilter(sc->sc_ah, rfilt);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_MODE, "%s: RX filter 0x%x\n", __func__, rfilt);
 }
 
 /*
  * Driver-internal mcast update call.
  *
  * Assumes the hardware is already awake.
  */
 static void
 ath_update_mcast_hw(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	u_int32_t mfilt[2];
 
 	/* calculate and install multicast filter */
 	if (ic->ic_allmulti == 0) {
 		struct ieee80211vap *vap;
 		struct ifnet *ifp;
 		struct ifmultiaddr *ifma;
 
 		/*
 		 * Merge multicast addresses to form the hardware filter.
 		 */
 		mfilt[0] = mfilt[1] = 0;
 		TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) {
 			ifp = vap->iv_ifp;
 			if_maddr_rlock(ifp);
 			TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 				caddr_t dl;
 				uint32_t val;
 				uint8_t pos;
 
 				/* calculate XOR of eight 6bit values */
 				dl = LLADDR((struct sockaddr_dl *)
 				    ifma->ifma_addr);
 				val = le32dec(dl + 0);
 				pos = (val >> 18) ^ (val >> 12) ^ (val >> 6) ^
 				    val;
 				val = le32dec(dl + 3);
 				pos ^= (val >> 18) ^ (val >> 12) ^ (val >> 6) ^
 				    val;
 				pos &= 0x3f;
 				mfilt[pos / 32] |= (1 << (pos % 32));
 			}
 			if_maddr_runlock(ifp);
 		}
 	} else
 		mfilt[0] = mfilt[1] = ~0;
 
 	ath_hal_setmcastfilter(sc->sc_ah, mfilt[0], mfilt[1]);
 
 	DPRINTF(sc, ATH_DEBUG_MODE, "%s: MC filter %08x:%08x\n",
 		__func__, mfilt[0], mfilt[1]);
 }
 
 /*
  * Called from the net80211 layer - force the hardware
  * awake before operating.
  */
 static void
 ath_update_mcast(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ath_update_mcast_hw(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 }
 
 void
 ath_mode_init(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	u_int32_t rfilt;
 
 	/* configure rx filter */
 	rfilt = ath_calcrxfilter(sc);
 	ath_hal_setrxfilter(ah, rfilt);
 
 	/* configure operational mode */
 	ath_hal_setopmode(ah);
 
 	/* handle any link-level address change */
 	ath_hal_setmac(ah, ic->ic_macaddr);
 
 	/* calculate and install multicast filter */
 	ath_update_mcast_hw(sc);
 }
 
 /*
  * Set the slot time based on the current setting.
  */
 void
 ath_setslottime(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	u_int usec;
 
 	if (IEEE80211_IS_CHAN_HALF(ic->ic_curchan))
 		usec = 13;
 	else if (IEEE80211_IS_CHAN_QUARTER(ic->ic_curchan))
 		usec = 21;
 	else if (IEEE80211_IS_CHAN_ANYG(ic->ic_curchan)) {
 		/* honor short/long slot time only in 11g */
 		/* XXX shouldn't honor on pure g or turbo g channel */
 		if (ic->ic_flags & IEEE80211_F_SHSLOT)
 			usec = HAL_SLOT_TIME_9;
 		else
 			usec = HAL_SLOT_TIME_20;
 	} else
 		usec = HAL_SLOT_TIME_9;
 
 	DPRINTF(sc, ATH_DEBUG_RESET,
 	    "%s: chan %u MHz flags 0x%x %s slot, %u usec\n",
 	    __func__, ic->ic_curchan->ic_freq, ic->ic_curchan->ic_flags,
 	    ic->ic_flags & IEEE80211_F_SHSLOT ? "short" : "long", usec);
 
 	/* Wake up the hardware first before updating the slot time */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ath_hal_setslottime(ah, usec);
 	ath_power_restore_power_state(sc);
 	sc->sc_updateslot = OK;
 	ATH_UNLOCK(sc);
 }
 
 /*
  * Callback from the 802.11 layer to update the
  * slot time based on the current setting.
  */
 static void
 ath_updateslot(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 
 	/*
 	 * When not coordinating the BSS, change the hardware
 	 * immediately.  For other operation we defer the change
 	 * until beacon updates have propagated to the stations.
 	 *
 	 * XXX sc_updateslot isn't changed behind a lock?
 	 */
 	if (ic->ic_opmode == IEEE80211_M_HOSTAP ||
 	    ic->ic_opmode == IEEE80211_M_MBSS)
 		sc->sc_updateslot = UPDATE;
 	else
 		ath_setslottime(sc);
 }
 
 /*
  * Append the contents of src to dst; both queues
  * are assumed to be locked.
  */
 void
 ath_txqmove(struct ath_txq *dst, struct ath_txq *src)
 {
 
 	ATH_TXQ_LOCK_ASSERT(src);
 	ATH_TXQ_LOCK_ASSERT(dst);
 
 	TAILQ_CONCAT(&dst->axq_q, &src->axq_q, bf_list);
 	dst->axq_link = src->axq_link;
 	src->axq_link = NULL;
 	dst->axq_depth += src->axq_depth;
 	dst->axq_aggr_depth += src->axq_aggr_depth;
 	src->axq_depth = 0;
 	src->axq_aggr_depth = 0;
 }
 
 /*
  * Reset the hardware, with no loss.
  *
  * This can't be used for a general case reset.
  */
 static void
 ath_reset_proc(void *arg, int pending)
 {
 	struct ath_softc *sc = arg;
 
 #if 0
 	device_printf(sc->sc_dev, "%s: resetting\n", __func__);
 #endif
 	ath_reset(sc, ATH_RESET_NOLOSS);
 }
 
 /*
  * Reset the hardware after detecting beacons have stopped.
  */
 static void
 ath_bstuck_proc(void *arg, int pending)
 {
 	struct ath_softc *sc = arg;
 	uint32_t hangs = 0;
 
 	if (ath_hal_gethangstate(sc->sc_ah, 0xff, &hangs) && hangs != 0)
 		device_printf(sc->sc_dev, "bb hang detected (0x%x)\n", hangs);
 
 #ifdef	ATH_DEBUG_ALQ
 	if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_STUCK_BEACON))
 		if_ath_alq_post(&sc->sc_alq, ATH_ALQ_STUCK_BEACON, 0, NULL);
 #endif
 
 	device_printf(sc->sc_dev, "stuck beacon; resetting (bmiss count %u)\n",
 	    sc->sc_bmisscount);
 	sc->sc_stats.ast_bstuck++;
 	/*
 	 * This assumes that there's no simultaneous channel mode change
 	 * occurring.
 	 */
 	ath_reset(sc, ATH_RESET_NOLOSS);
 }
 
 static int
 ath_desc_alloc(struct ath_softc *sc)
 {
 	int error;
 
 	error = ath_descdma_setup(sc, &sc->sc_txdma, &sc->sc_txbuf,
 		    "tx", sc->sc_tx_desclen, ath_txbuf, ATH_MAX_SCATTER);
 	if (error != 0) {
 		return error;
 	}
 	sc->sc_txbuf_cnt = ath_txbuf;
 
 	error = ath_descdma_setup(sc, &sc->sc_txdma_mgmt, &sc->sc_txbuf_mgmt,
 		    "tx_mgmt", sc->sc_tx_desclen, ath_txbuf_mgmt,
 		    ATH_TXDESC);
 	if (error != 0) {
 		ath_descdma_cleanup(sc, &sc->sc_txdma, &sc->sc_txbuf);
 		return error;
 	}
 
 	/*
 	 * XXX mark txbuf_mgmt frames with ATH_BUF_MGMT, so the
 	 * flag doesn't have to be set in ath_getbuf_locked().
 	 */
 
 	error = ath_descdma_setup(sc, &sc->sc_bdma, &sc->sc_bbuf,
 			"beacon", sc->sc_tx_desclen, ATH_BCBUF, 1);
 	if (error != 0) {
 		ath_descdma_cleanup(sc, &sc->sc_txdma, &sc->sc_txbuf);
 		ath_descdma_cleanup(sc, &sc->sc_txdma_mgmt,
 		    &sc->sc_txbuf_mgmt);
 		return error;
 	}
 	return 0;
 }
 
 static void
 ath_desc_free(struct ath_softc *sc)
 {
 
 	if (sc->sc_bdma.dd_desc_len != 0)
 		ath_descdma_cleanup(sc, &sc->sc_bdma, &sc->sc_bbuf);
 	if (sc->sc_txdma.dd_desc_len != 0)
 		ath_descdma_cleanup(sc, &sc->sc_txdma, &sc->sc_txbuf);
 	if (sc->sc_txdma_mgmt.dd_desc_len != 0)
 		ath_descdma_cleanup(sc, &sc->sc_txdma_mgmt,
 		    &sc->sc_txbuf_mgmt);
 }
 
 static struct ieee80211_node *
 ath_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	const size_t space = sizeof(struct ath_node) + sc->sc_rc->arc_space;
 	struct ath_node *an;
 
 	an = malloc(space, M_80211_NODE, M_NOWAIT|M_ZERO);
 	if (an == NULL) {
 		/* XXX stat+msg */
 		return NULL;
 	}
 	ath_rate_node_init(sc, an);
 
 	/* Setup the mutex - there's no associd yet so set the name to NULL */
 	snprintf(an->an_name, sizeof(an->an_name), "%s: node %p",
 	    device_get_nameunit(sc->sc_dev), an);
 	mtx_init(&an->an_mtx, an->an_name, NULL, MTX_DEF);
 
 	/* XXX setup ath_tid */
 	ath_tx_tid_init(sc, an);
 
 	DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: an %p\n", __func__, mac, ":", an);
 	return &an->an_node;
 }
 
 static void
 ath_node_cleanup(struct ieee80211_node *ni)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_softc;
 
 	DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: an %p\n", __func__,
 	    ni->ni_macaddr, ":", ATH_NODE(ni));
 
 	/* Cleanup ath_tid, free unused bufs, unlink bufs in TXQ */
 	ath_tx_node_flush(sc, ATH_NODE(ni));
 	ath_rate_node_cleanup(sc, ATH_NODE(ni));
 	sc->sc_node_cleanup(ni);
 }
 
 static void
 ath_node_free(struct ieee80211_node *ni)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_softc;
 
 	DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: an %p\n", __func__,
 	    ni->ni_macaddr, ":", ATH_NODE(ni));
 	mtx_destroy(&ATH_NODE(ni)->an_mtx);
 	sc->sc_node_free(ni);
 }
 
 static void
 ath_node_getsignal(const struct ieee80211_node *ni, int8_t *rssi, int8_t *noise)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_hal *ah = sc->sc_ah;
 
 	*rssi = ic->ic_node_getrssi(ni);
 	if (ni->ni_chan != IEEE80211_CHAN_ANYC)
 		*noise = ath_hal_getchannoise(ah, ni->ni_chan);
 	else
 		*noise = -95;		/* nominally correct */
 }
 
 /*
  * Set the default antenna.
  */
 void
 ath_setdefantenna(struct ath_softc *sc, u_int antenna)
 {
 	struct ath_hal *ah = sc->sc_ah;
 
 	/* XXX block beacon interrupts */
 	ath_hal_setdefantenna(ah, antenna);
 	if (sc->sc_defant != antenna)
 		sc->sc_stats.ast_ant_defswitch++;
 	sc->sc_defant = antenna;
 	sc->sc_rxotherant = 0;
 }
 
 static void
 ath_txq_init(struct ath_softc *sc, struct ath_txq *txq, int qnum)
 {
 	txq->axq_qnum = qnum;
 	txq->axq_ac = 0;
 	txq->axq_depth = 0;
 	txq->axq_aggr_depth = 0;
 	txq->axq_intrcnt = 0;
 	txq->axq_link = NULL;
 	txq->axq_softc = sc;
 	TAILQ_INIT(&txq->axq_q);
 	TAILQ_INIT(&txq->axq_tidq);
 	TAILQ_INIT(&txq->fifo.axq_q);
 	ATH_TXQ_LOCK_INIT(sc, txq);
 }
 
 /*
  * Setup a h/w transmit queue.
  */
 static struct ath_txq *
 ath_txq_setup(struct ath_softc *sc, int qtype, int subtype)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_TXQ_INFO qi;
 	int qnum;
 
 	memset(&qi, 0, sizeof(qi));
 	qi.tqi_subtype = subtype;
 	qi.tqi_aifs = HAL_TXQ_USEDEFAULT;
 	qi.tqi_cwmin = HAL_TXQ_USEDEFAULT;
 	qi.tqi_cwmax = HAL_TXQ_USEDEFAULT;
 	/*
 	 * Enable interrupts only for EOL and DESC conditions.
 	 * We mark tx descriptors to receive a DESC interrupt
 	 * when a tx queue gets deep; otherwise waiting for the
 	 * EOL to reap descriptors.  Note that this is done to
 	 * reduce interrupt load and this only defers reaping
 	 * descriptors, never transmitting frames.  Aside from
 	 * reducing interrupts this also permits more concurrency.
 	 * The only potential downside is if the tx queue backs
 	 * up in which case the top half of the kernel may backup
 	 * due to a lack of tx descriptors.
 	 */
 	if (sc->sc_isedma)
 		qi.tqi_qflags = HAL_TXQ_TXEOLINT_ENABLE |
 		    HAL_TXQ_TXOKINT_ENABLE;
 	else
 		qi.tqi_qflags = HAL_TXQ_TXEOLINT_ENABLE |
 		    HAL_TXQ_TXDESCINT_ENABLE;
 
 	qnum = ath_hal_setuptxqueue(ah, qtype, &qi);
 	if (qnum == -1) {
 		/*
 		 * NB: don't print a message, this happens
 		 * normally on parts with too few tx queues
 		 */
 		return NULL;
 	}
 	if (qnum >= nitems(sc->sc_txq)) {
 		device_printf(sc->sc_dev,
 			"hal qnum %u out of range, max %zu!\n",
 			qnum, nitems(sc->sc_txq));
 		ath_hal_releasetxqueue(ah, qnum);
 		return NULL;
 	}
 	if (!ATH_TXQ_SETUP(sc, qnum)) {
 		ath_txq_init(sc, &sc->sc_txq[qnum], qnum);
 		sc->sc_txqsetup |= 1<<qnum;
 	}
 	return &sc->sc_txq[qnum];
 }
 
 /*
  * Setup a hardware data transmit queue for the specified
  * access control.  The hal may not support all requested
  * queues in which case it will return a reference to a
  * previously setup queue.  We record the mapping from ac's
  * to h/w queues for use by ath_tx_start and also track
  * the set of h/w queues being used to optimize work in the
  * transmit interrupt handler and related routines.
  */
 static int
 ath_tx_setup(struct ath_softc *sc, int ac, int haltype)
 {
 	struct ath_txq *txq;
 
 	if (ac >= nitems(sc->sc_ac2q)) {
 		device_printf(sc->sc_dev, "AC %u out of range, max %zu!\n",
 			ac, nitems(sc->sc_ac2q));
 		return 0;
 	}
 	txq = ath_txq_setup(sc, HAL_TX_QUEUE_DATA, haltype);
 	if (txq != NULL) {
 		txq->axq_ac = ac;
 		sc->sc_ac2q[ac] = txq;
 		return 1;
 	} else
 		return 0;
 }
 
 /*
  * Update WME parameters for a transmit queue.
  */
 static int
 ath_txq_update(struct ath_softc *sc, int ac)
 {
 #define	ATH_EXPONENT_TO_VALUE(v)	((1<<v)-1)
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_txq *txq = sc->sc_ac2q[ac];
 	struct wmeParams *wmep = &ic->ic_wme.wme_chanParams.cap_wmeParams[ac];
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_TXQ_INFO qi;
 
 	ath_hal_gettxqueueprops(ah, txq->axq_qnum, &qi);
 #ifdef IEEE80211_SUPPORT_TDMA
 	if (sc->sc_tdma) {
 		/*
 		 * AIFS is zero so there's no pre-transmit wait.  The
 		 * burst time defines the slot duration and is configured
 		 * through net80211.  The QCU is setup to not do post-xmit
 		 * back off, lockout all lower-priority QCU's, and fire
 		 * off the DMA beacon alert timer which is setup based
 		 * on the slot configuration.
 		 */
 		qi.tqi_qflags = HAL_TXQ_TXOKINT_ENABLE
 			      | HAL_TXQ_TXERRINT_ENABLE
 			      | HAL_TXQ_TXURNINT_ENABLE
 			      | HAL_TXQ_TXEOLINT_ENABLE
 			      | HAL_TXQ_DBA_GATED
 			      | HAL_TXQ_BACKOFF_DISABLE
 			      | HAL_TXQ_ARB_LOCKOUT_GLOBAL
 			      ;
 		qi.tqi_aifs = 0;
 		/* XXX +dbaprep? */
 		qi.tqi_readyTime = sc->sc_tdmaslotlen;
 		qi.tqi_burstTime = qi.tqi_readyTime;
 	} else {
 #endif
 		/*
 		 * XXX shouldn't this just use the default flags
 		 * used in the previous queue setup?
 		 */
 		qi.tqi_qflags = HAL_TXQ_TXOKINT_ENABLE
 			      | HAL_TXQ_TXERRINT_ENABLE
 			      | HAL_TXQ_TXDESCINT_ENABLE
 			      | HAL_TXQ_TXURNINT_ENABLE
 			      | HAL_TXQ_TXEOLINT_ENABLE
 			      ;
 		qi.tqi_aifs = wmep->wmep_aifsn;
 		qi.tqi_cwmin = ATH_EXPONENT_TO_VALUE(wmep->wmep_logcwmin);
 		qi.tqi_cwmax = ATH_EXPONENT_TO_VALUE(wmep->wmep_logcwmax);
 		qi.tqi_readyTime = 0;
 		qi.tqi_burstTime = IEEE80211_TXOP_TO_US(wmep->wmep_txopLimit);
 #ifdef IEEE80211_SUPPORT_TDMA
 	}
 #endif
 
 	DPRINTF(sc, ATH_DEBUG_RESET,
 	    "%s: Q%u qflags 0x%x aifs %u cwmin %u cwmax %u burstTime %u\n",
 	    __func__, txq->axq_qnum, qi.tqi_qflags,
 	    qi.tqi_aifs, qi.tqi_cwmin, qi.tqi_cwmax, qi.tqi_burstTime);
 
 	if (!ath_hal_settxqueueprops(ah, txq->axq_qnum, &qi)) {
 		device_printf(sc->sc_dev, "unable to update hardware queue "
 		    "parameters for %s traffic!\n", ieee80211_wme_acnames[ac]);
 		return 0;
 	} else {
 		ath_hal_resettxqueue(ah, txq->axq_qnum); /* push to h/w */
 		return 1;
 	}
 #undef ATH_EXPONENT_TO_VALUE
 }
 
 /*
  * Callback from the 802.11 layer to update WME parameters.
  */
 int
 ath_wme_update(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 
 	return !ath_txq_update(sc, WME_AC_BE) ||
 	    !ath_txq_update(sc, WME_AC_BK) ||
 	    !ath_txq_update(sc, WME_AC_VI) ||
 	    !ath_txq_update(sc, WME_AC_VO) ? EIO : 0;
 }
 
 /*
  * Reclaim resources for a setup queue.
  */
 static void
 ath_tx_cleanupq(struct ath_softc *sc, struct ath_txq *txq)
 {
 
 	ath_hal_releasetxqueue(sc->sc_ah, txq->axq_qnum);
 	sc->sc_txqsetup &= ~(1<<txq->axq_qnum);
 	ATH_TXQ_LOCK_DESTROY(txq);
 }
 
 /*
  * Reclaim all tx queue resources.
  */
 static void
 ath_tx_cleanup(struct ath_softc *sc)
 {
 	int i;
 
 	ATH_TXBUF_LOCK_DESTROY(sc);
 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++)
 		if (ATH_TXQ_SETUP(sc, i))
 			ath_tx_cleanupq(sc, &sc->sc_txq[i]);
 }
 
 /*
  * Return h/w rate index for an IEEE rate (w/o basic rate bit)
  * using the current rates in sc_rixmap.
  */
 int
 ath_tx_findrix(const struct ath_softc *sc, uint8_t rate)
 {
 	int rix = sc->sc_rixmap[rate];
 	/* NB: return lowest rix for invalid rate */
 	return (rix == 0xff ? 0 : rix);
 }
 
 static void
 ath_tx_update_stats(struct ath_softc *sc, struct ath_tx_status *ts,
     struct ath_buf *bf)
 {
 	struct ieee80211_node *ni = bf->bf_node;
 	struct ieee80211com *ic = &sc->sc_ic;
 	int sr, lr, pri;
 
 	if (ts->ts_status == 0) {
 		u_int8_t txant = ts->ts_antenna;
 		sc->sc_stats.ast_ant_tx[txant]++;
 		sc->sc_ant_tx[txant]++;
 		if (ts->ts_finaltsi != 0)
 			sc->sc_stats.ast_tx_altrate++;
 		pri = M_WME_GETAC(bf->bf_m);
 		if (pri >= WME_AC_VO)
 			ic->ic_wme.wme_hipri_traffic++;
 		if ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)
 			ni->ni_inact = ni->ni_inact_reload;
 	} else {
 		if (ts->ts_status & HAL_TXERR_XRETRY)
 			sc->sc_stats.ast_tx_xretries++;
 		if (ts->ts_status & HAL_TXERR_FIFO)
 			sc->sc_stats.ast_tx_fifoerr++;
 		if (ts->ts_status & HAL_TXERR_FILT)
 			sc->sc_stats.ast_tx_filtered++;
 		if (ts->ts_status & HAL_TXERR_XTXOP)
 			sc->sc_stats.ast_tx_xtxop++;
 		if (ts->ts_status & HAL_TXERR_TIMER_EXPIRED)
 			sc->sc_stats.ast_tx_timerexpired++;
 
 		if (bf->bf_m->m_flags & M_FF)
 			sc->sc_stats.ast_ff_txerr++;
 	}
 	/* XXX when is this valid? */
 	if (ts->ts_flags & HAL_TX_DESC_CFG_ERR)
 		sc->sc_stats.ast_tx_desccfgerr++;
 	/*
 	 * This can be valid for successful frame transmission!
 	 * If there's a TX FIFO underrun during aggregate transmission,
 	 * the MAC will pad the rest of the aggregate with delimiters.
 	 * If a BA is returned, the frame is marked as "OK" and it's up
 	 * to the TX completion code to notice which frames weren't
 	 * successfully transmitted.
 	 */
 	if (ts->ts_flags & HAL_TX_DATA_UNDERRUN)
 		sc->sc_stats.ast_tx_data_underrun++;
 	if (ts->ts_flags & HAL_TX_DELIM_UNDERRUN)
 		sc->sc_stats.ast_tx_delim_underrun++;
 
 	sr = ts->ts_shortretry;
 	lr = ts->ts_longretry;
 	sc->sc_stats.ast_tx_shortretry += sr;
 	sc->sc_stats.ast_tx_longretry += lr;
 
 }
 
 /*
  * The default completion. If fail is 1, this means
  * "please don't retry the frame, and just return -1 status
  * to the net80211 stack.
  */
 void
 ath_tx_default_comp(struct ath_softc *sc, struct ath_buf *bf, int fail)
 {
 	struct ath_tx_status *ts = &bf->bf_status.ds_txstat;
 	int st;
 
 	if (fail == 1)
 		st = -1;
 	else
 		st = ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0) ?
 		    ts->ts_status : HAL_TXERR_XRETRY;
 
 #if 0
 	if (bf->bf_state.bfs_dobaw)
 		device_printf(sc->sc_dev,
 		    "%s: bf %p: seqno %d: dobaw should've been cleared!\n",
 		    __func__,
 		    bf,
 		    SEQNO(bf->bf_state.bfs_seqno));
 #endif
 	if (bf->bf_next != NULL)
 		device_printf(sc->sc_dev,
 		    "%s: bf %p: seqno %d: bf_next not NULL!\n",
 		    __func__,
 		    bf,
 		    SEQNO(bf->bf_state.bfs_seqno));
 
 	/*
 	 * Check if the node software queue is empty; if so
 	 * then clear the TIM.
 	 *
 	 * This needs to be done before the buffer is freed as
 	 * otherwise the node reference will have been released
 	 * and the node may not actually exist any longer.
 	 *
 	 * XXX I don't like this belonging here, but it's cleaner
 	 * to do it here right now then all the other places
 	 * where ath_tx_default_comp() is called.
 	 *
 	 * XXX TODO: during drain, ensure that the callback is
 	 * being called so we get a chance to update the TIM.
 	 */
 	if (bf->bf_node) {
 		ATH_TX_LOCK(sc);
 		ath_tx_update_tim(sc, bf->bf_node, 0);
 		ATH_TX_UNLOCK(sc);
 	}
 
 	/*
 	 * Do any tx complete callback.  Note this must
 	 * be done before releasing the node reference.
 	 * This will free the mbuf, release the net80211
 	 * node and recycle the ath_buf.
 	 */
 	ath_tx_freebuf(sc, bf, st);
 }
 
 /*
  * Update rate control with the given completion status.
  */
 void
 ath_tx_update_ratectrl(struct ath_softc *sc, struct ieee80211_node *ni,
     struct ath_rc_series *rc, struct ath_tx_status *ts, int frmlen,
     int nframes, int nbad)
 {
 	struct ath_node *an;
 
 	/* Only for unicast frames */
 	if (ni == NULL)
 		return;
 
 	an = ATH_NODE(ni);
 	ATH_NODE_UNLOCK_ASSERT(an);
 
 	if ((ts->ts_status & HAL_TXERR_FILT) == 0) {
 		ATH_NODE_LOCK(an);
 		ath_rate_tx_complete(sc, an, rc, ts, frmlen, nframes, nbad);
 		ATH_NODE_UNLOCK(an);
 	}
 }
 
 /*
  * Process the completion of the given buffer.
  *
  * This calls the rate control update and then the buffer completion.
  * This will either free the buffer or requeue it.  In any case, the
  * bf pointer should be treated as invalid after this function is called.
  */
 void
 ath_tx_process_buf_completion(struct ath_softc *sc, struct ath_txq *txq,
     struct ath_tx_status *ts, struct ath_buf *bf)
 {
 	struct ieee80211_node *ni = bf->bf_node;
 
 	ATH_TX_UNLOCK_ASSERT(sc);
 	ATH_TXQ_UNLOCK_ASSERT(txq);
 
 	/* If unicast frame, update general statistics */
 	if (ni != NULL) {
 		/* update statistics */
 		ath_tx_update_stats(sc, ts, bf);
 	}
 
 	/*
 	 * Call the completion handler.
 	 * The completion handler is responsible for
 	 * calling the rate control code.
 	 *
 	 * Frames with no completion handler get the
 	 * rate control code called here.
 	 */
 	if (bf->bf_comp == NULL) {
 		if ((ts->ts_status & HAL_TXERR_FILT) == 0 &&
 		    (bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0) {
 			/*
 			 * XXX assume this isn't an aggregate
 			 * frame.
 			 */
 			ath_tx_update_ratectrl(sc, ni,
 			     bf->bf_state.bfs_rc, ts,
 			    bf->bf_state.bfs_pktlen, 1,
 			    (ts->ts_status == 0 ? 0 : 1));
 		}
 		ath_tx_default_comp(sc, bf, 0);
 	} else
 		bf->bf_comp(sc, bf, 0);
 }
 
 
 
 /*
  * Process completed xmit descriptors from the specified queue.
  * Kick the packet scheduler if needed. This can occur from this
  * particular task.
  */
 static int
 ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq, int dosched)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_buf *bf;
 	struct ath_desc *ds;
 	struct ath_tx_status *ts;
 	struct ieee80211_node *ni;
 #ifdef	IEEE80211_SUPPORT_SUPERG
 	struct ieee80211com *ic = &sc->sc_ic;
 #endif	/* IEEE80211_SUPPORT_SUPERG */
 	int nacked;
 	HAL_STATUS status;
 
 	DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: tx queue %u head %p link %p\n",
 		__func__, txq->axq_qnum,
 		(caddr_t)(uintptr_t) ath_hal_gettxbuf(sc->sc_ah, txq->axq_qnum),
 		txq->axq_link);
 
 	ATH_KTR(sc, ATH_KTR_TXCOMP, 4,
 	    "ath_tx_processq: txq=%u head %p link %p depth %p",
 	    txq->axq_qnum,
 	    (caddr_t)(uintptr_t) ath_hal_gettxbuf(sc->sc_ah, txq->axq_qnum),
 	    txq->axq_link,
 	    txq->axq_depth);
 
 	nacked = 0;
 	for (;;) {
 		ATH_TXQ_LOCK(txq);
 		txq->axq_intrcnt = 0;	/* reset periodic desc intr count */
 		bf = TAILQ_FIRST(&txq->axq_q);
 		if (bf == NULL) {
 			ATH_TXQ_UNLOCK(txq);
 			break;
 		}
 		ds = bf->bf_lastds;	/* XXX must be setup correctly! */
 		ts = &bf->bf_status.ds_txstat;
 
 		status = ath_hal_txprocdesc(ah, ds, ts);
 #ifdef ATH_DEBUG
 		if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
 			ath_printtxbuf(sc, bf, txq->axq_qnum, 0,
 			    status == HAL_OK);
 		else if ((sc->sc_debug & ATH_DEBUG_RESET) && (dosched == 0))
 			ath_printtxbuf(sc, bf, txq->axq_qnum, 0,
 			    status == HAL_OK);
 #endif
 #ifdef	ATH_DEBUG_ALQ
 		if (if_ath_alq_checkdebug(&sc->sc_alq,
 		    ATH_ALQ_EDMA_TXSTATUS)) {
 			if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS,
 			sc->sc_tx_statuslen,
 			(char *) ds);
 		}
 #endif
 
 		if (status == HAL_EINPROGRESS) {
 			ATH_KTR(sc, ATH_KTR_TXCOMP, 3,
 			    "ath_tx_processq: txq=%u, bf=%p ds=%p, HAL_EINPROGRESS",
 			    txq->axq_qnum, bf, ds);
 			ATH_TXQ_UNLOCK(txq);
 			break;
 		}
 		ATH_TXQ_REMOVE(txq, bf, bf_list);
 
 		/*
 		 * Sanity check.
 		 */
 		if (txq->axq_qnum != bf->bf_state.bfs_tx_queue) {
 			device_printf(sc->sc_dev,
 			    "%s: TXQ=%d: bf=%p, bfs_tx_queue=%d\n",
 			    __func__,
 			    txq->axq_qnum,
 			    bf,
 			    bf->bf_state.bfs_tx_queue);
 		}
 		if (txq->axq_qnum != bf->bf_last->bf_state.bfs_tx_queue) {
 			device_printf(sc->sc_dev,
 			    "%s: TXQ=%d: bf_last=%p, bfs_tx_queue=%d\n",
 			    __func__,
 			    txq->axq_qnum,
 			    bf->bf_last,
 			    bf->bf_last->bf_state.bfs_tx_queue);
 		}
 
 #if 0
 		if (txq->axq_depth > 0) {
 			/*
 			 * More frames follow.  Mark the buffer busy
 			 * so it's not re-used while the hardware may
 			 * still re-read the link field in the descriptor.
 			 *
 			 * Use the last buffer in an aggregate as that
 			 * is where the hardware may be - intermediate
 			 * descriptors won't be "busy".
 			 */
 			bf->bf_last->bf_flags |= ATH_BUF_BUSY;
 		} else
 			txq->axq_link = NULL;
 #else
 		bf->bf_last->bf_flags |= ATH_BUF_BUSY;
 #endif
 		if (bf->bf_state.bfs_aggr)
 			txq->axq_aggr_depth--;
 
 		ni = bf->bf_node;
 
 		ATH_KTR(sc, ATH_KTR_TXCOMP, 5,
 		    "ath_tx_processq: txq=%u, bf=%p, ds=%p, ni=%p, ts_status=0x%08x",
 		    txq->axq_qnum, bf, ds, ni, ts->ts_status);
 		/*
 		 * If unicast frame was ack'd update RSSI,
 		 * including the last rx time used to
 		 * workaround phantom bmiss interrupts.
 		 */
 		if (ni != NULL && ts->ts_status == 0 &&
 		    ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)) {
 			nacked++;
 			sc->sc_stats.ast_tx_rssi = ts->ts_rssi;
 			ATH_RSSI_LPF(sc->sc_halstats.ns_avgtxrssi,
 				ts->ts_rssi);
 		}
 		ATH_TXQ_UNLOCK(txq);
 
 		/*
 		 * Update statistics and call completion
 		 */
 		ath_tx_process_buf_completion(sc, txq, ts, bf);
 
 		/* XXX at this point, bf and ni may be totally invalid */
 	}
 #ifdef IEEE80211_SUPPORT_SUPERG
 	/*
 	 * Flush fast-frame staging queue when traffic slows.
 	 */
 	if (txq->axq_depth <= 1)
 		ieee80211_ff_flush(ic, txq->axq_ac);
 #endif
 
 	/* Kick the software TXQ scheduler */
 	if (dosched) {
 		ATH_TX_LOCK(sc);
 		ath_txq_sched(sc, txq);
 		ATH_TX_UNLOCK(sc);
 	}
 
 	ATH_KTR(sc, ATH_KTR_TXCOMP, 1,
 	    "ath_tx_processq: txq=%u: done",
 	    txq->axq_qnum);
 
 	return nacked;
 }
 
 #define	TXQACTIVE(t, q)		( (t) & (1 << (q)))
 
 /*
  * Deferred processing of transmit interrupt; special-cased
  * for a single hardware transmit queue (e.g. 5210 and 5211).
  */
 static void
 ath_tx_proc_q0(void *arg, int npending)
 {
 	struct ath_softc *sc = arg;
 	uint32_t txqs;
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt++;
 	txqs = sc->sc_txq_active;
 	sc->sc_txq_active &= ~txqs;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_KTR(sc, ATH_KTR_TXCOMP, 1,
 	    "ath_tx_proc_q0: txqs=0x%08x", txqs);
 
 	if (TXQACTIVE(txqs, 0) && ath_tx_processq(sc, &sc->sc_txq[0], 1))
 		/* XXX why is lastrx updated in tx code? */
 		sc->sc_lastrx = ath_hal_gettsf64(sc->sc_ah);
 	if (TXQACTIVE(txqs, sc->sc_cabq->axq_qnum))
 		ath_tx_processq(sc, sc->sc_cabq, 1);
 	sc->sc_wd_timer = 0;
 
 	if (sc->sc_softled)
 		ath_led_event(sc, sc->sc_txrix);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ath_tx_kick(sc);
 }
 
 /*
  * Deferred processing of transmit interrupt; special-cased
  * for four hardware queues, 0-3 (e.g. 5212 w/ WME support).
  */
 static void
 ath_tx_proc_q0123(void *arg, int npending)
 {
 	struct ath_softc *sc = arg;
 	int nacked;
 	uint32_t txqs;
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt++;
 	txqs = sc->sc_txq_active;
 	sc->sc_txq_active &= ~txqs;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_KTR(sc, ATH_KTR_TXCOMP, 1,
 	    "ath_tx_proc_q0123: txqs=0x%08x", txqs);
 
 	/*
 	 * Process each active queue.
 	 */
 	nacked = 0;
 	if (TXQACTIVE(txqs, 0))
 		nacked += ath_tx_processq(sc, &sc->sc_txq[0], 1);
 	if (TXQACTIVE(txqs, 1))
 		nacked += ath_tx_processq(sc, &sc->sc_txq[1], 1);
 	if (TXQACTIVE(txqs, 2))
 		nacked += ath_tx_processq(sc, &sc->sc_txq[2], 1);
 	if (TXQACTIVE(txqs, 3))
 		nacked += ath_tx_processq(sc, &sc->sc_txq[3], 1);
 	if (TXQACTIVE(txqs, sc->sc_cabq->axq_qnum))
 		ath_tx_processq(sc, sc->sc_cabq, 1);
 	if (nacked)
 		sc->sc_lastrx = ath_hal_gettsf64(sc->sc_ah);
 
 	sc->sc_wd_timer = 0;
 
 	if (sc->sc_softled)
 		ath_led_event(sc, sc->sc_txrix);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ath_tx_kick(sc);
 }
 
 /*
  * Deferred processing of transmit interrupt.
  */
 static void
 ath_tx_proc(void *arg, int npending)
 {
 	struct ath_softc *sc = arg;
 	int i, nacked;
 	uint32_t txqs;
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt++;
 	txqs = sc->sc_txq_active;
 	sc->sc_txq_active &= ~txqs;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_KTR(sc, ATH_KTR_TXCOMP, 1, "ath_tx_proc: txqs=0x%08x", txqs);
 
 	/*
 	 * Process each active queue.
 	 */
 	nacked = 0;
 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++)
 		if (ATH_TXQ_SETUP(sc, i) && TXQACTIVE(txqs, i))
 			nacked += ath_tx_processq(sc, &sc->sc_txq[i], 1);
 	if (nacked)
 		sc->sc_lastrx = ath_hal_gettsf64(sc->sc_ah);
 
 	sc->sc_wd_timer = 0;
 
 	if (sc->sc_softled)
 		ath_led_event(sc, sc->sc_txrix);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ath_tx_kick(sc);
 }
 #undef	TXQACTIVE
 
 /*
  * Deferred processing of TXQ rescheduling.
  */
 static void
 ath_txq_sched_tasklet(void *arg, int npending)
 {
 	struct ath_softc *sc = arg;
 	int i;
 
 	/* XXX is skipping ok? */
 	ATH_PCU_LOCK(sc);
 #if 0
 	if (sc->sc_inreset_cnt > 0) {
 		device_printf(sc->sc_dev,
 		    "%s: sc_inreset_cnt > 0; skipping\n", __func__);
 		ATH_PCU_UNLOCK(sc);
 		return;
 	}
 #endif
 	sc->sc_txproc_cnt++;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_TX_LOCK(sc);
 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 		if (ATH_TXQ_SETUP(sc, i)) {
 			ath_txq_sched(sc, &sc->sc_txq[i]);
 		}
 	}
 	ATH_TX_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt--;
 	ATH_PCU_UNLOCK(sc);
 }
 
 void
 ath_returnbuf_tail(struct ath_softc *sc, struct ath_buf *bf)
 {
 
 	ATH_TXBUF_LOCK_ASSERT(sc);
 
 	if (bf->bf_flags & ATH_BUF_MGMT)
 		TAILQ_INSERT_TAIL(&sc->sc_txbuf_mgmt, bf, bf_list);
 	else {
 		TAILQ_INSERT_TAIL(&sc->sc_txbuf, bf, bf_list);
 		sc->sc_txbuf_cnt++;
 		if (sc->sc_txbuf_cnt > ath_txbuf) {
 			device_printf(sc->sc_dev,
 			    "%s: sc_txbuf_cnt > %d?\n",
 			    __func__,
 			    ath_txbuf);
 			sc->sc_txbuf_cnt = ath_txbuf;
 		}
 	}
 }
 
 void
 ath_returnbuf_head(struct ath_softc *sc, struct ath_buf *bf)
 {
 
 	ATH_TXBUF_LOCK_ASSERT(sc);
 
 	if (bf->bf_flags & ATH_BUF_MGMT)
 		TAILQ_INSERT_HEAD(&sc->sc_txbuf_mgmt, bf, bf_list);
 	else {
 		TAILQ_INSERT_HEAD(&sc->sc_txbuf, bf, bf_list);
 		sc->sc_txbuf_cnt++;
 		if (sc->sc_txbuf_cnt > ATH_TXBUF) {
 			device_printf(sc->sc_dev,
 			    "%s: sc_txbuf_cnt > %d?\n",
 			    __func__,
 			    ATH_TXBUF);
 			sc->sc_txbuf_cnt = ATH_TXBUF;
 		}
 	}
 }
 
 /*
  * Free the holding buffer if it exists
  */
 void
 ath_txq_freeholdingbuf(struct ath_softc *sc, struct ath_txq *txq)
 {
 	ATH_TXBUF_UNLOCK_ASSERT(sc);
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	if (txq->axq_holdingbf == NULL)
 		return;
 
 	txq->axq_holdingbf->bf_flags &= ~ATH_BUF_BUSY;
 
 	ATH_TXBUF_LOCK(sc);
 	ath_returnbuf_tail(sc, txq->axq_holdingbf);
 	ATH_TXBUF_UNLOCK(sc);
 
 	txq->axq_holdingbf = NULL;
 }
 
 /*
  * Add this buffer to the holding queue, freeing the previous
  * one if it exists.
  */
 static void
 ath_txq_addholdingbuf(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ath_txq *txq;
 
 	txq = &sc->sc_txq[bf->bf_state.bfs_tx_queue];
 
 	ATH_TXBUF_UNLOCK_ASSERT(sc);
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	/* XXX assert ATH_BUF_BUSY is set */
 
 	/* XXX assert the tx queue is under the max number */
 	if (bf->bf_state.bfs_tx_queue > HAL_NUM_TX_QUEUES) {
 		device_printf(sc->sc_dev, "%s: bf=%p: invalid tx queue (%d)\n",
 		    __func__,
 		    bf,
 		    bf->bf_state.bfs_tx_queue);
 		bf->bf_flags &= ~ATH_BUF_BUSY;
 		ath_returnbuf_tail(sc, bf);
 		return;
 	}
 	ath_txq_freeholdingbuf(sc, txq);
 	txq->axq_holdingbf = bf;
 }
 
 /*
  * Return a buffer to the pool and update the 'busy' flag on the
  * previous 'tail' entry.
  *
  * This _must_ only be called when the buffer is involved in a completed
  * TX. The logic is that if it was part of an active TX, the previous
  * buffer on the list is now not involved in a halted TX DMA queue, waiting
  * for restart (eg for TDMA.)
  *
  * The caller must free the mbuf and recycle the node reference.
  *
  * XXX This method of handling busy / holding buffers is insanely stupid.
  * It requires bf_state.bfs_tx_queue to be correctly assigned.  It would
  * be much nicer if buffers in the processq() methods would instead be
  * always completed there (pushed onto a txq or ath_bufhead) so we knew
  * exactly what hardware queue they came from in the first place.
  */
 void
 ath_freebuf(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ath_txq *txq;
 
 	txq = &sc->sc_txq[bf->bf_state.bfs_tx_queue];
 
 	KASSERT((bf->bf_node == NULL), ("%s: bf->bf_node != NULL\n", __func__));
 	KASSERT((bf->bf_m == NULL), ("%s: bf->bf_m != NULL\n", __func__));
 
 	/*
 	 * If this buffer is busy, push it onto the holding queue.
 	 */
 	if (bf->bf_flags & ATH_BUF_BUSY) {
 		ATH_TXQ_LOCK(txq);
 		ath_txq_addholdingbuf(sc, bf);
 		ATH_TXQ_UNLOCK(txq);
 		return;
 	}
 
 	/*
 	 * Not a busy buffer, so free normally
 	 */
 	ATH_TXBUF_LOCK(sc);
 	ath_returnbuf_tail(sc, bf);
 	ATH_TXBUF_UNLOCK(sc);
 }
 
 /*
  * This is currently used by ath_tx_draintxq() and
  * ath_tx_tid_free_pkts().
  *
  * It recycles a single ath_buf.
  */
 void
 ath_tx_freebuf(struct ath_softc *sc, struct ath_buf *bf, int status)
 {
 	struct ieee80211_node *ni = bf->bf_node;
 	struct mbuf *m0 = bf->bf_m;
 
 	/*
 	 * Make sure that we only sync/unload if there's an mbuf.
 	 * If not (eg we cloned a buffer), the unload will have already
 	 * occurred.
 	 */
 	if (bf->bf_m != NULL) {
 		bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 	}
 
 	bf->bf_node = NULL;
 	bf->bf_m = NULL;
 
 	/* Free the buffer, it's not needed any longer */
 	ath_freebuf(sc, bf);
 
 	/* Pass the buffer back to net80211 - completing it */
 	ieee80211_tx_complete(ni, m0, status);
 }
 
 static struct ath_buf *
 ath_tx_draintxq_get_one(struct ath_softc *sc, struct ath_txq *txq)
 {
 	struct ath_buf *bf;
 
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	/*
 	 * Drain the FIFO queue first, then if it's
 	 * empty, move to the normal frame queue.
 	 */
 	bf = TAILQ_FIRST(&txq->fifo.axq_q);
 	if (bf != NULL) {
 		/*
 		 * Is it the last buffer in this set?
 		 * Decrement the FIFO counter.
 		 */
 		if (bf->bf_flags & ATH_BUF_FIFOEND) {
 			if (txq->axq_fifo_depth == 0) {
 				device_printf(sc->sc_dev,
 				    "%s: Q%d: fifo_depth=0, fifo.axq_depth=%d?\n",
 				    __func__,
 				    txq->axq_qnum,
 				    txq->fifo.axq_depth);
 			} else
 				txq->axq_fifo_depth--;
 		}
 		ATH_TXQ_REMOVE(&txq->fifo, bf, bf_list);
 		return (bf);
 	}
 
 	/*
 	 * Debugging!
 	 */
 	if (txq->axq_fifo_depth != 0 || txq->fifo.axq_depth != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: Q%d: fifo_depth=%d, fifo.axq_depth=%d\n",
 		    __func__,
 		    txq->axq_qnum,
 		    txq->axq_fifo_depth,
 		    txq->fifo.axq_depth);
 	}
 
 	/*
 	 * Now drain the pending queue.
 	 */
 	bf = TAILQ_FIRST(&txq->axq_q);
 	if (bf == NULL) {
 		txq->axq_link = NULL;
 		return (NULL);
 	}
 	ATH_TXQ_REMOVE(txq, bf, bf_list);
 	return (bf);
 }
 
 void
 ath_tx_draintxq(struct ath_softc *sc, struct ath_txq *txq)
 {
 #ifdef ATH_DEBUG
 	struct ath_hal *ah = sc->sc_ah;
 #endif
 	struct ath_buf *bf;
 	u_int ix;
 
 	/*
 	 * NB: this assumes output has been stopped and
 	 *     we do not need to block ath_tx_proc
 	 */
 	for (ix = 0;; ix++) {
 		ATH_TXQ_LOCK(txq);
 		bf = ath_tx_draintxq_get_one(sc, txq);
 		if (bf == NULL) {
 			ATH_TXQ_UNLOCK(txq);
 			break;
 		}
 		if (bf->bf_state.bfs_aggr)
 			txq->axq_aggr_depth--;
 #ifdef ATH_DEBUG
 		if (sc->sc_debug & ATH_DEBUG_RESET) {
 			struct ieee80211com *ic = &sc->sc_ic;
 			int status = 0;
 
 			/*
 			 * EDMA operation has a TX completion FIFO
 			 * separate from the TX descriptor, so this
 			 * method of checking the "completion" status
 			 * is wrong.
 			 */
 			if (! sc->sc_isedma) {
 				status = (ath_hal_txprocdesc(ah,
 				    bf->bf_lastds,
 				    &bf->bf_status.ds_txstat) == HAL_OK);
 			}
 			ath_printtxbuf(sc, bf, txq->axq_qnum, ix, status);
 			ieee80211_dump_pkt(ic, mtod(bf->bf_m, const uint8_t *),
 			    bf->bf_m->m_len, 0, -1);
 		}
 #endif /* ATH_DEBUG */
 		/*
 		 * Since we're now doing magic in the completion
 		 * functions, we -must- call it for aggregation
 		 * destinations or BAW tracking will get upset.
 		 */
 		/*
 		 * Clear ATH_BUF_BUSY; the completion handler
 		 * will free the buffer.
 		 */
 		ATH_TXQ_UNLOCK(txq);
 		bf->bf_flags &= ~ATH_BUF_BUSY;
 		if (bf->bf_comp)
 			bf->bf_comp(sc, bf, 1);
 		else
 			ath_tx_default_comp(sc, bf, 1);
 	}
 
 	/*
 	 * Free the holding buffer if it exists
 	 */
 	ATH_TXQ_LOCK(txq);
 	ath_txq_freeholdingbuf(sc, txq);
 	ATH_TXQ_UNLOCK(txq);
 
 	/*
 	 * Drain software queued frames which are on
 	 * active TIDs.
 	 */
 	ath_tx_txq_drain(sc, txq);
 }
 
 static void
 ath_tx_stopdma(struct ath_softc *sc, struct ath_txq *txq)
 {
 	struct ath_hal *ah = sc->sc_ah;
 
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	DPRINTF(sc, ATH_DEBUG_RESET,
 	    "%s: tx queue [%u] %p, active=%d, hwpending=%d, flags 0x%08x, "
 	    "link %p, holdingbf=%p\n",
 	    __func__,
 	    txq->axq_qnum,
 	    (caddr_t)(uintptr_t) ath_hal_gettxbuf(ah, txq->axq_qnum),
 	    (int) (!! ath_hal_txqenabled(ah, txq->axq_qnum)),
 	    (int) ath_hal_numtxpending(ah, txq->axq_qnum),
 	    txq->axq_flags,
 	    txq->axq_link,
 	    txq->axq_holdingbf);
 
 	(void) ath_hal_stoptxdma(ah, txq->axq_qnum);
 	/* We've stopped TX DMA, so mark this as stopped. */
 	txq->axq_flags &= ~ATH_TXQ_PUTRUNNING;
 
 #ifdef	ATH_DEBUG
 	if ((sc->sc_debug & ATH_DEBUG_RESET)
 	    && (txq->axq_holdingbf != NULL)) {
 		ath_printtxbuf(sc, txq->axq_holdingbf, txq->axq_qnum, 0, 0);
 	}
 #endif
 }
 
 int
 ath_stoptxdma(struct ath_softc *sc)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	int i;
 
 	/* XXX return value */
 	if (sc->sc_invalid)
 		return 0;
 
 	if (!sc->sc_invalid) {
 		/* don't touch the hardware if marked invalid */
 		DPRINTF(sc, ATH_DEBUG_RESET, "%s: tx queue [%u] %p, link %p\n",
 		    __func__, sc->sc_bhalq,
 		    (caddr_t)(uintptr_t) ath_hal_gettxbuf(ah, sc->sc_bhalq),
 		    NULL);
 
 		/* stop the beacon queue */
 		(void) ath_hal_stoptxdma(ah, sc->sc_bhalq);
 
 		/* Stop the data queues */
 		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 			if (ATH_TXQ_SETUP(sc, i)) {
 				ATH_TXQ_LOCK(&sc->sc_txq[i]);
 				ath_tx_stopdma(sc, &sc->sc_txq[i]);
 				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
 			}
 		}
 	}
 
 	return 1;
 }
 
 #ifdef	ATH_DEBUG
 void
 ath_tx_dump(struct ath_softc *sc, struct ath_txq *txq)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_buf *bf;
 	int i = 0;
 
 	if (! (sc->sc_debug & ATH_DEBUG_RESET))
 		return;
 
 	device_printf(sc->sc_dev, "%s: Q%d: begin\n",
 	    __func__, txq->axq_qnum);
 	TAILQ_FOREACH(bf, &txq->axq_q, bf_list) {
 		ath_printtxbuf(sc, bf, txq->axq_qnum, i,
 			ath_hal_txprocdesc(ah, bf->bf_lastds,
 			    &bf->bf_status.ds_txstat) == HAL_OK);
 		i++;
 	}
 	device_printf(sc->sc_dev, "%s: Q%d: end\n",
 	    __func__, txq->axq_qnum);
 }
 #endif /* ATH_DEBUG */
 
 /*
  * Drain the transmit queues and reclaim resources.
  */
 void
 ath_legacy_tx_drain(struct ath_softc *sc, ATH_RESET_TYPE reset_type)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_buf *bf_last;
 	int i;
 
 	(void) ath_stoptxdma(sc);
 
 	/*
 	 * Dump the queue contents
 	 */
 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 		/*
 		 * XXX TODO: should we just handle the completed TX frames
 		 * here, whether or not the reset is a full one or not?
 		 */
 		if (ATH_TXQ_SETUP(sc, i)) {
 #ifdef	ATH_DEBUG
 			if (sc->sc_debug & ATH_DEBUG_RESET)
 				ath_tx_dump(sc, &sc->sc_txq[i]);
 #endif	/* ATH_DEBUG */
 			if (reset_type == ATH_RESET_NOLOSS) {
 				ath_tx_processq(sc, &sc->sc_txq[i], 0);
 				ATH_TXQ_LOCK(&sc->sc_txq[i]);
 				/*
 				 * Free the holding buffer; DMA is now
 				 * stopped.
 				 */
 				ath_txq_freeholdingbuf(sc, &sc->sc_txq[i]);
 				/*
 				 * Setup the link pointer to be the
 				 * _last_ buffer/descriptor in the list.
 				 * If there's nothing in the list, set it
 				 * to NULL.
 				 */
 				bf_last = ATH_TXQ_LAST(&sc->sc_txq[i],
 				    axq_q_s);
 				if (bf_last != NULL) {
 					ath_hal_gettxdesclinkptr(ah,
 					    bf_last->bf_lastds,
 					    &sc->sc_txq[i].axq_link);
 				} else {
 					sc->sc_txq[i].axq_link = NULL;
 				}
 				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
 			} else
 				ath_tx_draintxq(sc, &sc->sc_txq[i]);
 		}
 	}
 #ifdef ATH_DEBUG
 	if (sc->sc_debug & ATH_DEBUG_RESET) {
 		struct ath_buf *bf = TAILQ_FIRST(&sc->sc_bbuf);
 		if (bf != NULL && bf->bf_m != NULL) {
 			ath_printtxbuf(sc, bf, sc->sc_bhalq, 0,
 				ath_hal_txprocdesc(ah, bf->bf_lastds,
 				    &bf->bf_status.ds_txstat) == HAL_OK);
 			ieee80211_dump_pkt(&sc->sc_ic,
 			    mtod(bf->bf_m, const uint8_t *), bf->bf_m->m_len,
 			    0, -1);
 		}
 	}
 #endif /* ATH_DEBUG */
 	sc->sc_wd_timer = 0;
 }
 
 /*
  * Update internal state after a channel change.
  */
 static void
 ath_chan_change(struct ath_softc *sc, struct ieee80211_channel *chan)
 {
 	enum ieee80211_phymode mode;
 
 	/*
 	 * Change channels and update the h/w rate map
 	 * if we're switching; e.g. 11a to 11b/g.
 	 */
 	mode = ieee80211_chan2mode(chan);
 	if (mode != sc->sc_curmode)
 		ath_setcurmode(sc, mode);
 	sc->sc_curchan = chan;
 }
 
 /*
  * Set/change channels.  If the channel is really being changed,
  * it's done by resetting the chip.  To accomplish this we must
  * first cleanup any pending DMA, then restart stuff after a la
  * ath_init.
  */
 static int
 ath_chan_set(struct ath_softc *sc, struct ieee80211_channel *chan)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	int ret = 0;
 
 	/* Treat this as an interface reset */
 	ATH_PCU_UNLOCK_ASSERT(sc);
 	ATH_UNLOCK_ASSERT(sc);
 
 	/* (Try to) stop TX/RX from occurring */
 	taskqueue_block(sc->sc_tq);
 
 	ATH_PCU_LOCK(sc);
 
 	/* Disable interrupts */
 	ath_hal_intrset(ah, 0);
 
 	/* Stop new RX/TX/interrupt completion */
 	if (ath_reset_grablock(sc, 1) == 0) {
 		device_printf(sc->sc_dev, "%s: concurrent reset! Danger!\n",
 		    __func__);
 	}
 
 	/* Stop pending RX/TX completion */
 	ath_txrx_stop_locked(sc);
 
 	ATH_PCU_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: %u (%u MHz, flags 0x%x)\n",
 	    __func__, ieee80211_chan2ieee(ic, chan),
 	    chan->ic_freq, chan->ic_flags);
 	if (chan != sc->sc_curchan) {
 		HAL_STATUS status;
 		/*
 		 * To switch channels clear any pending DMA operations;
 		 * wait long enough for the RX fifo to drain, reset the
 		 * hardware at the new frequency, and then re-enable
 		 * the relevant bits of the h/w.
 		 */
 #if 0
 		ath_hal_intrset(ah, 0);		/* disable interrupts */
 #endif
 		ath_stoprecv(sc, 1);		/* turn off frame recv */
 		/*
 		 * First, handle completed TX/RX frames.
 		 */
 		ath_rx_flush(sc);
 		ath_draintxq(sc, ATH_RESET_NOLOSS);
 		/*
 		 * Next, flush the non-scheduled frames.
 		 */
 		ath_draintxq(sc, ATH_RESET_FULL);	/* clear pending tx frames */
 
 		ath_update_chainmasks(sc, chan);
 		ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask,
 		    sc->sc_cur_rxchainmask);
 		if (!ath_hal_reset(ah, sc->sc_opmode, chan, AH_TRUE,
 		    HAL_RESET_NORMAL, &status)) {
 			device_printf(sc->sc_dev, "%s: unable to reset "
 			    "channel %u (%u MHz, flags 0x%x), hal status %u\n",
 			    __func__, ieee80211_chan2ieee(ic, chan),
 			    chan->ic_freq, chan->ic_flags, status);
 			ret = EIO;
 			goto finish;
 		}
 		sc->sc_diversity = ath_hal_getdiversity(ah);
 
 		ATH_RX_LOCK(sc);
 		sc->sc_rx_stopped = 1;
 		sc->sc_rx_resetted = 1;
 		ATH_RX_UNLOCK(sc);
 
 		/* Let DFS at it in case it's a DFS channel */
 		ath_dfs_radar_enable(sc, chan);
 
 		/* Let spectral at in case spectral is enabled */
 		ath_spectral_enable(sc, chan);
 
 		/*
 		 * Let bluetooth coexistence at in case it's needed for this
 		 * channel
 		 */
 		ath_btcoex_enable(sc, ic->ic_curchan);
 
 		/*
 		 * If we're doing TDMA, enforce the TXOP limitation for chips
 		 * that support it.
 		 */
 		if (sc->sc_hasenforcetxop && sc->sc_tdma)
 			ath_hal_setenforcetxop(sc->sc_ah, 1);
 		else
 			ath_hal_setenforcetxop(sc->sc_ah, 0);
 
 		/*
 		 * Re-enable rx framework.
 		 */
 		if (ath_startrecv(sc) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: unable to restart recv logic\n", __func__);
 			ret = EIO;
 			goto finish;
 		}
 
 		/*
 		 * Change channels and update the h/w rate map
 		 * if we're switching; e.g. 11a to 11b/g.
 		 */
 		ath_chan_change(sc, chan);
 
 		/*
 		 * Reset clears the beacon timers; reset them
 		 * here if needed.
 		 */
 		if (sc->sc_beacons) {		/* restart beacons */
 #ifdef IEEE80211_SUPPORT_TDMA
 			if (sc->sc_tdma)
 				ath_tdma_config(sc, NULL);
 			else
 #endif
 			ath_beacon_config(sc, NULL);
 		}
 
 		/*
 		 * Re-enable interrupts.
 		 */
 #if 0
 		ath_hal_intrset(ah, sc->sc_imask);
 #endif
 	}
 
 finish:
 	ATH_PCU_LOCK(sc);
 	sc->sc_inreset_cnt--;
 	/* XXX only do this if sc_inreset_cnt == 0? */
 	ath_hal_intrset(ah, sc->sc_imask);
 	ATH_PCU_UNLOCK(sc);
 
 	ath_txrx_start(sc);
 	/* XXX ath_start? */
 
 	return ret;
 }
 
 /*
  * Periodically recalibrate the PHY to account
  * for temperature/environment changes.
  */
 static void
 ath_calibrate(void *arg)
 {
 	struct ath_softc *sc = arg;
 	struct ath_hal *ah = sc->sc_ah;
 	struct ieee80211com *ic = &sc->sc_ic;
 	HAL_BOOL longCal, isCalDone = AH_TRUE;
 	HAL_BOOL aniCal, shortCal = AH_FALSE;
 	int nextcal;
 
 	ATH_LOCK_ASSERT(sc);
 
 	/*
 	 * Force the hardware awake for ANI work.
 	 */
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 
 	/* Skip trying to do this if we're in reset */
 	if (sc->sc_inreset_cnt)
 		goto restart;
 
 	if (ic->ic_flags & IEEE80211_F_SCAN)	/* defer, off channel */
 		goto restart;
 	longCal = (ticks - sc->sc_lastlongcal >= ath_longcalinterval*hz);
 	aniCal = (ticks - sc->sc_lastani >= ath_anicalinterval*hz/1000);
 	if (sc->sc_doresetcal)
 		shortCal = (ticks - sc->sc_lastshortcal >= ath_shortcalinterval*hz/1000);
 
 	DPRINTF(sc, ATH_DEBUG_CALIBRATE, "%s: shortCal=%d; longCal=%d; aniCal=%d\n", __func__, shortCal, longCal, aniCal);
 	if (aniCal) {
 		sc->sc_stats.ast_ani_cal++;
 		sc->sc_lastani = ticks;
 		ath_hal_ani_poll(ah, sc->sc_curchan);
 	}
 
 	if (longCal) {
 		sc->sc_stats.ast_per_cal++;
 		sc->sc_lastlongcal = ticks;
 		if (ath_hal_getrfgain(ah) == HAL_RFGAIN_NEED_CHANGE) {
 			/*
 			 * Rfgain is out of bounds, reset the chip
 			 * to load new gain values.
 			 */
 			DPRINTF(sc, ATH_DEBUG_CALIBRATE,
 				"%s: rfgain change\n", __func__);
 			sc->sc_stats.ast_per_rfgain++;
 			sc->sc_resetcal = 0;
 			sc->sc_doresetcal = AH_TRUE;
 			taskqueue_enqueue(sc->sc_tq, &sc->sc_resettask);
 			callout_reset(&sc->sc_cal_ch, 1, ath_calibrate, sc);
 			ath_power_restore_power_state(sc);
 			return;
 		}
 		/*
 		 * If this long cal is after an idle period, then
 		 * reset the data collection state so we start fresh.
 		 */
 		if (sc->sc_resetcal) {
 			(void) ath_hal_calreset(ah, sc->sc_curchan);
 			sc->sc_lastcalreset = ticks;
 			sc->sc_lastshortcal = ticks;
 			sc->sc_resetcal = 0;
 			sc->sc_doresetcal = AH_TRUE;
 		}
 	}
 
 	/* Only call if we're doing a short/long cal, not for ANI calibration */
 	if (shortCal || longCal) {
 		isCalDone = AH_FALSE;
 		if (ath_hal_calibrateN(ah, sc->sc_curchan, longCal, &isCalDone)) {
 			if (longCal) {
 				/*
 				 * Calibrate noise floor data again in case of change.
 				 */
 				ath_hal_process_noisefloor(ah);
 			}
 		} else {
 			DPRINTF(sc, ATH_DEBUG_ANY,
 				"%s: calibration of channel %u failed\n",
 				__func__, sc->sc_curchan->ic_freq);
 			sc->sc_stats.ast_per_calfail++;
 		}
 		if (shortCal)
 			sc->sc_lastshortcal = ticks;
 	}
 	if (!isCalDone) {
 restart:
 		/*
 		 * Use a shorter interval to potentially collect multiple
 		 * data samples required to complete calibration.  Once
 		 * we're told the work is done we drop back to a longer
 		 * interval between requests.  We're more aggressive doing
 		 * work when operating as an AP to improve operation right
 		 * after startup.
 		 */
 		sc->sc_lastshortcal = ticks;
 		nextcal = ath_shortcalinterval*hz/1000;
 		if (sc->sc_opmode != HAL_M_HOSTAP)
 			nextcal *= 10;
 		sc->sc_doresetcal = AH_TRUE;
 	} else {
 		/* nextcal should be the shortest time for next event */
 		nextcal = ath_longcalinterval*hz;
 		if (sc->sc_lastcalreset == 0)
 			sc->sc_lastcalreset = sc->sc_lastlongcal;
 		else if (ticks - sc->sc_lastcalreset >= ath_resetcalinterval*hz)
 			sc->sc_resetcal = 1;	/* setup reset next trip */
 		sc->sc_doresetcal = AH_FALSE;
 	}
 	/* ANI calibration may occur more often than short/long/resetcal */
 	if (ath_anicalinterval > 0)
 		nextcal = MIN(nextcal, ath_anicalinterval*hz/1000);
 
 	if (nextcal != 0) {
 		DPRINTF(sc, ATH_DEBUG_CALIBRATE, "%s: next +%u (%sisCalDone)\n",
 		    __func__, nextcal, isCalDone ? "" : "!");
 		callout_reset(&sc->sc_cal_ch, nextcal, ath_calibrate, sc);
 	} else {
 		DPRINTF(sc, ATH_DEBUG_CALIBRATE, "%s: calibration disabled\n",
 		    __func__);
 		/* NB: don't rearm timer */
 	}
 	/*
 	 * Restore power state now that we're done.
 	 */
 	ath_power_restore_power_state(sc);
 }
 
 static void
 ath_scan_start(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_hal *ah = sc->sc_ah;
 	u_int32_t rfilt;
 
 	/* XXX calibration timer? */
 	/* XXXGL: is constant ieee80211broadcastaddr a correct choice? */
 
 	ATH_LOCK(sc);
 	sc->sc_scanning = 1;
 	sc->sc_syncbeacon = 0;
 	rfilt = ath_calcrxfilter(sc);
 	ATH_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 	ath_hal_setrxfilter(ah, rfilt);
 	ath_hal_setassocid(ah, ieee80211broadcastaddr, 0);
 	ATH_PCU_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_STATE, "%s: RX filter 0x%x bssid %s aid 0\n",
 		 __func__, rfilt, ether_sprintf(ieee80211broadcastaddr));
 }
 
 static void
 ath_scan_end(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_hal *ah = sc->sc_ah;
 	u_int32_t rfilt;
 
 	ATH_LOCK(sc);
 	sc->sc_scanning = 0;
 	rfilt = ath_calcrxfilter(sc);
 	ATH_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 	ath_hal_setrxfilter(ah, rfilt);
 	ath_hal_setassocid(ah, sc->sc_curbssid, sc->sc_curaid);
 
 	ath_hal_process_noisefloor(ah);
 	ATH_PCU_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_STATE, "%s: RX filter 0x%x bssid %s aid 0x%x\n",
 		 __func__, rfilt, ether_sprintf(sc->sc_curbssid),
 		 sc->sc_curaid);
 }
 
 #ifdef	ATH_ENABLE_11N
 /*
  * For now, just do a channel change.
  *
  * Later, we'll go through the hard slog of suspending tx/rx, changing rate
  * control state and resetting the hardware without dropping frames out
  * of the queue.
  *
  * The unfortunate trouble here is making absolutely sure that the
  * channel width change has propagated enough so the hardware
  * absolutely isn't handed bogus frames for it's current operating
  * mode. (Eg, 40MHz frames in 20MHz mode.) Since TX and RX can and
  * does occur in parallel, we need to make certain we've blocked
  * any further ongoing TX (and RX, that can cause raw TX)
  * before we do this.
  */
 static void
 ath_update_chw(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 
 	DPRINTF(sc, ATH_DEBUG_STATE, "%s: called\n", __func__);
 	ath_set_channel(ic);
 }
 #endif	/* ATH_ENABLE_11N */
 
 static void
 ath_set_channel(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	(void) ath_chan_set(sc, ic->ic_curchan);
 	/*
 	 * If we are returning to our bss channel then mark state
 	 * so the next recv'd beacon's tsf will be used to sync the
 	 * beacon timers.  Note that since we only hear beacons in
 	 * sta/ibss mode this has no effect in other operating modes.
 	 */
 	ATH_LOCK(sc);
 	if (!sc->sc_scanning && ic->ic_curchan == ic->ic_bsschan)
 		sc->sc_syncbeacon = 1;
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 }
 
 /*
  * Walk the vap list and check if there any vap's in RUN state.
  */
 static int
 ath_isanyrunningvaps(struct ieee80211vap *this)
 {
 	struct ieee80211com *ic = this->iv_ic;
 	struct ieee80211vap *vap;
 
 	IEEE80211_LOCK_ASSERT(ic);
 
 	TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) {
 		if (vap != this && vap->iv_state >= IEEE80211_S_RUN)
 			return 1;
 	}
 	return 0;
 }
 
 static int
 ath_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_vap *avp = ATH_VAP(vap);
 	struct ath_hal *ah = sc->sc_ah;
 	struct ieee80211_node *ni = NULL;
 	int i, error, stamode;
 	u_int32_t rfilt;
 	int csa_run_transition = 0;
 	enum ieee80211_state ostate = vap->iv_state;
 
 	static const HAL_LED_STATE leds[] = {
 	    HAL_LED_INIT,	/* IEEE80211_S_INIT */
 	    HAL_LED_SCAN,	/* IEEE80211_S_SCAN */
 	    HAL_LED_AUTH,	/* IEEE80211_S_AUTH */
 	    HAL_LED_ASSOC, 	/* IEEE80211_S_ASSOC */
 	    HAL_LED_RUN, 	/* IEEE80211_S_CAC */
 	    HAL_LED_RUN, 	/* IEEE80211_S_RUN */
 	    HAL_LED_RUN, 	/* IEEE80211_S_CSA */
 	    HAL_LED_RUN, 	/* IEEE80211_S_SLEEP */
 	};
 
 	DPRINTF(sc, ATH_DEBUG_STATE, "%s: %s -> %s\n", __func__,
 		ieee80211_state_name[ostate],
 		ieee80211_state_name[nstate]);
 
 	/*
 	 * net80211 _should_ have the comlock asserted at this point.
 	 * There are some comments around the calls to vap->iv_newstate
 	 * which indicate that it (newstate) may end up dropping the
 	 * lock.  This and the subsequent lock assert check after newstate
 	 * are an attempt to catch these and figure out how/why.
 	 */
 	IEEE80211_LOCK_ASSERT(ic);
 
 	/* Before we touch the hardware - wake it up */
 	ATH_LOCK(sc);
 	/*
 	 * If the NIC is in anything other than SLEEP state,
 	 * we need to ensure that self-generated frames are
 	 * set for PWRMGT=0.  Otherwise we may end up with
 	 * strange situations.
 	 *
 	 * XXX TODO: is this actually the case? :-)
 	 */
 	if (nstate != IEEE80211_S_SLEEP)
 		ath_power_setselfgen(sc, HAL_PM_AWAKE);
 
 	/*
 	 * Now, wake the thing up.
 	 */
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 
 	/*
 	 * And stop the calibration callout whilst we have
 	 * ATH_LOCK held.
 	 */
 	callout_stop(&sc->sc_cal_ch);
 	ATH_UNLOCK(sc);
 
 	if (ostate == IEEE80211_S_CSA && nstate == IEEE80211_S_RUN)
 		csa_run_transition = 1;
 
 	ath_hal_setledstate(ah, leds[nstate]);	/* set LED */
 
 	if (nstate == IEEE80211_S_SCAN) {
 		/*
 		 * Scanning: turn off beacon miss and don't beacon.
 		 * Mark beacon state so when we reach RUN state we'll
 		 * [re]setup beacons.  Unblock the task q thread so
 		 * deferred interrupt processing is done.
 		 */
 
 		/* Ensure we stay awake during scan */
 		ATH_LOCK(sc);
 		ath_power_setselfgen(sc, HAL_PM_AWAKE);
 		ath_power_setpower(sc, HAL_PM_AWAKE);
 		ATH_UNLOCK(sc);
 
 		ath_hal_intrset(ah,
 		    sc->sc_imask &~ (HAL_INT_SWBA | HAL_INT_BMISS));
 		sc->sc_imask &= ~(HAL_INT_SWBA | HAL_INT_BMISS);
 		sc->sc_beacons = 0;
 		taskqueue_unblock(sc->sc_tq);
 	}
 
 	ni = ieee80211_ref_node(vap->iv_bss);
 	rfilt = ath_calcrxfilter(sc);
 	stamode = (vap->iv_opmode == IEEE80211_M_STA ||
 		   vap->iv_opmode == IEEE80211_M_AHDEMO ||
 		   vap->iv_opmode == IEEE80211_M_IBSS);
 
 	/*
 	 * XXX Dont need to do this (and others) if we've transitioned
 	 * from SLEEP->RUN.
 	 */
 	if (stamode && nstate == IEEE80211_S_RUN) {
 		sc->sc_curaid = ni->ni_associd;
 		IEEE80211_ADDR_COPY(sc->sc_curbssid, ni->ni_bssid);
 		ath_hal_setassocid(ah, sc->sc_curbssid, sc->sc_curaid);
 	}
 	DPRINTF(sc, ATH_DEBUG_STATE, "%s: RX filter 0x%x bssid %s aid 0x%x\n",
 	   __func__, rfilt, ether_sprintf(sc->sc_curbssid), sc->sc_curaid);
 	ath_hal_setrxfilter(ah, rfilt);
 
 	/* XXX is this to restore keycache on resume? */
 	if (vap->iv_opmode != IEEE80211_M_STA &&
 	    (vap->iv_flags & IEEE80211_F_PRIVACY)) {
 		for (i = 0; i < IEEE80211_WEP_NKID; i++)
 			if (ath_hal_keyisvalid(ah, i))
 				ath_hal_keysetmac(ah, i, ni->ni_bssid);
 	}
 
 	/*
 	 * Invoke the parent method to do net80211 work.
 	 */
 	error = avp->av_newstate(vap, nstate, arg);
 	if (error != 0)
 		goto bad;
 
 	/*
 	 * See above: ensure av_newstate() doesn't drop the lock
 	 * on us.
 	 */
 	IEEE80211_LOCK_ASSERT(ic);
 
 	if (nstate == IEEE80211_S_RUN) {
 		/* NB: collect bss node again, it may have changed */
 		ieee80211_free_node(ni);
 		ni = ieee80211_ref_node(vap->iv_bss);
 
 		DPRINTF(sc, ATH_DEBUG_STATE,
 		    "%s(RUN): iv_flags 0x%08x bintvl %d bssid %s "
 		    "capinfo 0x%04x chan %d\n", __func__,
 		    vap->iv_flags, ni->ni_intval, ether_sprintf(ni->ni_bssid),
 		    ni->ni_capinfo, ieee80211_chan2ieee(ic, ic->ic_curchan));
 
 		switch (vap->iv_opmode) {
 #ifdef IEEE80211_SUPPORT_TDMA
 		case IEEE80211_M_AHDEMO:
 			if ((vap->iv_caps & IEEE80211_C_TDMA) == 0)
 				break;
 			/* fall thru... */
 #endif
 		case IEEE80211_M_HOSTAP:
 		case IEEE80211_M_IBSS:
 		case IEEE80211_M_MBSS:
 			/*
 			 * Allocate and setup the beacon frame.
 			 *
 			 * Stop any previous beacon DMA.  This may be
 			 * necessary, for example, when an ibss merge
 			 * causes reconfiguration; there will be a state
 			 * transition from RUN->RUN that means we may
 			 * be called with beacon transmission active.
 			 */
 			ath_hal_stoptxdma(ah, sc->sc_bhalq);
 
 			error = ath_beacon_alloc(sc, ni);
 			if (error != 0)
 				goto bad;
 			/*
 			 * If joining an adhoc network defer beacon timer
 			 * configuration to the next beacon frame so we
 			 * have a current TSF to use.  Otherwise we're
 			 * starting an ibss/bss so there's no need to delay;
 			 * if this is the first vap moving to RUN state, then
 			 * beacon state needs to be [re]configured.
 			 */
 			if (vap->iv_opmode == IEEE80211_M_IBSS &&
 			    ni->ni_tstamp.tsf != 0) {
 				sc->sc_syncbeacon = 1;
 			} else if (!sc->sc_beacons) {
 #ifdef IEEE80211_SUPPORT_TDMA
 				if (vap->iv_caps & IEEE80211_C_TDMA)
 					ath_tdma_config(sc, vap);
 				else
 #endif
 					ath_beacon_config(sc, vap);
 				sc->sc_beacons = 1;
 			}
 			break;
 		case IEEE80211_M_STA:
 			/*
 			 * Defer beacon timer configuration to the next
 			 * beacon frame so we have a current TSF to use
 			 * (any TSF collected when scanning is likely old).
 			 * However if it's due to a CSA -> RUN transition,
 			 * force a beacon update so we pick up a lack of
 			 * beacons from an AP in CAC and thus force a
 			 * scan.
 			 *
 			 * And, there's also corner cases here where
 			 * after a scan, the AP may have disappeared.
 			 * In that case, we may not receive an actual
 			 * beacon to update the beacon timer and thus we
 			 * won't get notified of the missing beacons.
 			 */
 			if (ostate != IEEE80211_S_RUN &&
 			    ostate != IEEE80211_S_SLEEP) {
 				DPRINTF(sc, ATH_DEBUG_BEACON,
 				    "%s: STA; syncbeacon=1\n", __func__);
 				sc->sc_syncbeacon = 1;
 
 				if (csa_run_transition)
 					ath_beacon_config(sc, vap);
 
 			/*
 			 * PR: kern/175227
 			 *
 			 * Reconfigure beacons during reset; as otherwise
 			 * we won't get the beacon timers reprogrammed
 			 * after a reset and thus we won't pick up a
 			 * beacon miss interrupt.
 			 *
 			 * Hopefully we'll see a beacon before the BMISS
 			 * timer fires (too often), leading to a STA
 			 * disassociation.
 			 */
 				sc->sc_beacons = 1;
 			}
 			break;
 		case IEEE80211_M_MONITOR:
 			/*
 			 * Monitor mode vaps have only INIT->RUN and RUN->RUN
 			 * transitions so we must re-enable interrupts here to
 			 * handle the case of a single monitor mode vap.
 			 */
 			ath_hal_intrset(ah, sc->sc_imask);
 			break;
 		case IEEE80211_M_WDS:
 			break;
 		default:
 			break;
 		}
 		/*
 		 * Let the hal process statistics collected during a
 		 * scan so it can provide calibrated noise floor data.
 		 */
 		ath_hal_process_noisefloor(ah);
 		/*
 		 * Reset rssi stats; maybe not the best place...
 		 */
 		sc->sc_halstats.ns_avgbrssi = ATH_RSSI_DUMMY_MARKER;
 		sc->sc_halstats.ns_avgrssi = ATH_RSSI_DUMMY_MARKER;
 		sc->sc_halstats.ns_avgtxrssi = ATH_RSSI_DUMMY_MARKER;
 
 		/*
 		 * Force awake for RUN mode.
 		 */
 		ATH_LOCK(sc);
 		ath_power_setselfgen(sc, HAL_PM_AWAKE);
 		ath_power_setpower(sc, HAL_PM_AWAKE);
 
 		/*
 		 * Finally, start any timers and the task q thread
 		 * (in case we didn't go through SCAN state).
 		 */
 		if (ath_longcalinterval != 0) {
 			/* start periodic recalibration timer */
 			callout_reset(&sc->sc_cal_ch, 1, ath_calibrate, sc);
 		} else {
 			DPRINTF(sc, ATH_DEBUG_CALIBRATE,
 			    "%s: calibration disabled\n", __func__);
 		}
 		ATH_UNLOCK(sc);
 
 		taskqueue_unblock(sc->sc_tq);
 	} else if (nstate == IEEE80211_S_INIT) {
 		/*
 		 * If there are no vaps left in RUN state then
 		 * shutdown host/driver operation:
 		 * o disable interrupts
 		 * o disable the task queue thread
 		 * o mark beacon processing as stopped
 		 */
 		if (!ath_isanyrunningvaps(vap)) {
 			sc->sc_imask &= ~(HAL_INT_SWBA | HAL_INT_BMISS);
 			/* disable interrupts  */
 			ath_hal_intrset(ah, sc->sc_imask &~ HAL_INT_GLOBAL);
 			taskqueue_block(sc->sc_tq);
 			sc->sc_beacons = 0;
 		}
 #ifdef IEEE80211_SUPPORT_TDMA
 		ath_hal_setcca(ah, AH_TRUE);
 #endif
 	} else if (nstate == IEEE80211_S_SLEEP) {
 		/* We're going to sleep, so transition appropriately */
 		/* For now, only do this if we're a single STA vap */
 		if (sc->sc_nvaps == 1 &&
 		    vap->iv_opmode == IEEE80211_M_STA) {
 			DPRINTF(sc, ATH_DEBUG_BEACON, "%s: syncbeacon=%d\n", __func__, sc->sc_syncbeacon);
 			ATH_LOCK(sc);
 			/*
 			 * Always at least set the self-generated
 			 * frame config to set PWRMGT=1.
 			 */
 			ath_power_setselfgen(sc, HAL_PM_NETWORK_SLEEP);
 
 			/*
 			 * If we're not syncing beacons, transition
 			 * to NETWORK_SLEEP.
 			 *
 			 * We stay awake if syncbeacon > 0 in case
 			 * we need to listen for some beacons otherwise
 			 * our beacon timer config may be wrong.
 			 */
 			if (sc->sc_syncbeacon == 0) {
 				ath_power_setpower(sc, HAL_PM_NETWORK_SLEEP);
 			}
 			ATH_UNLOCK(sc);
 		}
 	}
 bad:
 	ieee80211_free_node(ni);
 
 	/*
 	 * Restore the power state - either to what it was, or
 	 * to network_sleep if it's alright.
 	 */
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 	return error;
 }
 
 /*
  * Allocate a key cache slot to the station so we can
  * setup a mapping from key index to node. The key cache
  * slot is needed for managing antenna state and for
  * compression when stations do not use crypto.  We do
  * it uniliaterally here; if crypto is employed this slot
  * will be reassigned.
  */
 static void
 ath_setup_stationkey(struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ath_softc *sc = vap->iv_ic->ic_softc;
 	ieee80211_keyix keyix, rxkeyix;
 
 	/* XXX should take a locked ref to vap->iv_bss */
 	if (!ath_key_alloc(vap, &ni->ni_ucastkey, &keyix, &rxkeyix)) {
 		/*
 		 * Key cache is full; we'll fall back to doing
 		 * the more expensive lookup in software.  Note
 		 * this also means no h/w compression.
 		 */
 		/* XXX msg+statistic */
 	} else {
 		/* XXX locking? */
 		ni->ni_ucastkey.wk_keyix = keyix;
 		ni->ni_ucastkey.wk_rxkeyix = rxkeyix;
 		/* NB: must mark device key to get called back on delete */
 		ni->ni_ucastkey.wk_flags |= IEEE80211_KEY_DEVKEY;
 		IEEE80211_ADDR_COPY(ni->ni_ucastkey.wk_macaddr, ni->ni_macaddr);
 		/* NB: this will create a pass-thru key entry */
 		ath_keyset(sc, vap, &ni->ni_ucastkey, vap->iv_bss);
 	}
 }
 
 /*
  * Setup driver-specific state for a newly associated node.
  * Note that we're called also on a re-associate, the isnew
  * param tells us if this is the first time or not.
  */
 static void
 ath_newassoc(struct ieee80211_node *ni, int isnew)
 {
 	struct ath_node *an = ATH_NODE(ni);
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ath_softc *sc = vap->iv_ic->ic_softc;
 	const struct ieee80211_txparam *tp = ni->ni_txparms;
 
 	an->an_mcastrix = ath_tx_findrix(sc, tp->mcastrate);
 	an->an_mgmtrix = ath_tx_findrix(sc, tp->mgmtrate);
 
 	DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: reassoc; isnew=%d, is_powersave=%d\n",
 	    __func__,
 	    ni->ni_macaddr,
 	    ":",
 	    isnew,
 	    an->an_is_powersave);
 
 	ATH_NODE_LOCK(an);
 	ath_rate_newassoc(sc, an, isnew);
 	ATH_NODE_UNLOCK(an);
 
 	if (isnew &&
 	    (vap->iv_flags & IEEE80211_F_PRIVACY) == 0 && sc->sc_hasclrkey &&
 	    ni->ni_ucastkey.wk_keyix == IEEE80211_KEYIX_NONE)
 		ath_setup_stationkey(ni);
 
 	/*
 	 * If we're reassociating, make sure that any paused queues
 	 * get unpaused.
 	 *
 	 * Now, we may have frames in the hardware queue for this node.
 	 * So if we are reassociating and there are frames in the queue,
 	 * we need to go through the cleanup path to ensure that they're
 	 * marked as non-aggregate.
 	 */
 	if (! isnew) {
 		DPRINTF(sc, ATH_DEBUG_NODE,
 		    "%s: %6D: reassoc; is_powersave=%d\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    an->an_is_powersave);
 
 		/* XXX for now, we can't hold the lock across assoc */
 		ath_tx_node_reassoc(sc, an);
 
 		/* XXX for now, we can't hold the lock across wakeup */
 		if (an->an_is_powersave)
 			ath_tx_node_wakeup(sc, an);
 	}
 }
 
 static int
 ath_setregdomain(struct ieee80211com *ic, struct ieee80211_regdomain *reg,
 	int nchans, struct ieee80211_channel chans[])
 {
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 
 	DPRINTF(sc, ATH_DEBUG_REGDOMAIN,
 	    "%s: rd %u cc %u location %c%s\n",
 	    __func__, reg->regdomain, reg->country, reg->location,
 	    reg->ecm ? " ecm" : "");
 
 	status = ath_hal_set_channels(ah, chans, nchans,
 	    reg->country, reg->regdomain);
 	if (status != HAL_OK) {
 		DPRINTF(sc, ATH_DEBUG_REGDOMAIN, "%s: failed, status %u\n",
 		    __func__, status);
 		return EINVAL;		/* XXX */
 	}
 
 	return 0;
 }
 
 static void
 ath_getradiocaps(struct ieee80211com *ic,
 	int maxchans, int *nchans, struct ieee80211_channel chans[])
 {
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_hal *ah = sc->sc_ah;
 
 	DPRINTF(sc, ATH_DEBUG_REGDOMAIN, "%s: use rd %u cc %d\n",
 	    __func__, SKU_DEBUG, CTRY_DEFAULT);
 
 	/* XXX check return */
 	(void) ath_hal_getchannels(ah, chans, maxchans, nchans,
 	    HAL_MODE_ALL, CTRY_DEFAULT, SKU_DEBUG, AH_TRUE);
 
 }
 
 static int
 ath_getchannels(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 
 	/*
 	 * Collect channel set based on EEPROM contents.
 	 */
 	status = ath_hal_init_channels(ah, ic->ic_channels, IEEE80211_CHAN_MAX,
 	    &ic->ic_nchans, HAL_MODE_ALL, CTRY_DEFAULT, SKU_NONE, AH_TRUE);
 	if (status != HAL_OK) {
 		device_printf(sc->sc_dev,
 		    "%s: unable to collect channel list from hal, status %d\n",
 		    __func__, status);
 		return EINVAL;
 	}
 	(void) ath_hal_getregdomain(ah, &sc->sc_eerd);
 	ath_hal_getcountrycode(ah, &sc->sc_eecc);	/* NB: cannot fail */
 	/* XXX map Atheros sku's to net80211 SKU's */
 	/* XXX net80211 types too small */
 	ic->ic_regdomain.regdomain = (uint16_t) sc->sc_eerd;
 	ic->ic_regdomain.country = (uint16_t) sc->sc_eecc;
 	ic->ic_regdomain.isocc[0] = ' ';	/* XXX don't know */
 	ic->ic_regdomain.isocc[1] = ' ';
 
 	ic->ic_regdomain.ecm = 1;
 	ic->ic_regdomain.location = 'I';
 
 	DPRINTF(sc, ATH_DEBUG_REGDOMAIN,
 	    "%s: eeprom rd %u cc %u (mapped rd %u cc %u) location %c%s\n",
 	    __func__, sc->sc_eerd, sc->sc_eecc,
 	    ic->ic_regdomain.regdomain, ic->ic_regdomain.country,
 	    ic->ic_regdomain.location, ic->ic_regdomain.ecm ? " ecm" : "");
 	return 0;
 }
 
 static int
 ath_rate_setup(struct ath_softc *sc, u_int mode)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	const HAL_RATE_TABLE *rt;
 
 	switch (mode) {
 	case IEEE80211_MODE_11A:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11A);
 		break;
 	case IEEE80211_MODE_HALF:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11A_HALF_RATE);
 		break;
 	case IEEE80211_MODE_QUARTER:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11A_QUARTER_RATE);
 		break;
 	case IEEE80211_MODE_11B:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11B);
 		break;
 	case IEEE80211_MODE_11G:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11G);
 		break;
 	case IEEE80211_MODE_TURBO_A:
 		rt = ath_hal_getratetable(ah, HAL_MODE_108A);
 		break;
 	case IEEE80211_MODE_TURBO_G:
 		rt = ath_hal_getratetable(ah, HAL_MODE_108G);
 		break;
 	case IEEE80211_MODE_STURBO_A:
 		rt = ath_hal_getratetable(ah, HAL_MODE_TURBO);
 		break;
 	case IEEE80211_MODE_11NA:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11NA_HT20);
 		break;
 	case IEEE80211_MODE_11NG:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11NG_HT20);
 		break;
 	default:
 		DPRINTF(sc, ATH_DEBUG_ANY, "%s: invalid mode %u\n",
 			__func__, mode);
 		return 0;
 	}
 	sc->sc_rates[mode] = rt;
 	return (rt != NULL);
 }
 
 static void
 ath_setcurmode(struct ath_softc *sc, enum ieee80211_phymode mode)
 {
 	/* NB: on/off times from the Atheros NDIS driver, w/ permission */
 	static const struct {
 		u_int		rate;		/* tx/rx 802.11 rate */
 		u_int16_t	timeOn;		/* LED on time (ms) */
 		u_int16_t	timeOff;	/* LED off time (ms) */
 	} blinkrates[] = {
 		{ 108,  40,  10 },
 		{  96,  44,  11 },
 		{  72,  50,  13 },
 		{  48,  57,  14 },
 		{  36,  67,  16 },
 		{  24,  80,  20 },
 		{  22, 100,  25 },
 		{  18, 133,  34 },
 		{  12, 160,  40 },
 		{  10, 200,  50 },
 		{   6, 240,  58 },
 		{   4, 267,  66 },
 		{   2, 400, 100 },
 		{   0, 500, 130 },
 		/* XXX half/quarter rates */
 	};
 	const HAL_RATE_TABLE *rt;
 	int i, j;
 
 	memset(sc->sc_rixmap, 0xff, sizeof(sc->sc_rixmap));
 	rt = sc->sc_rates[mode];
 	KASSERT(rt != NULL, ("no h/w rate set for phy mode %u", mode));
 	for (i = 0; i < rt->rateCount; i++) {
 		uint8_t ieeerate = rt->info[i].dot11Rate & IEEE80211_RATE_VAL;
 		if (rt->info[i].phy != IEEE80211_T_HT)
 			sc->sc_rixmap[ieeerate] = i;
 		else
 			sc->sc_rixmap[ieeerate | IEEE80211_RATE_MCS] = i;
 	}
 	memset(sc->sc_hwmap, 0, sizeof(sc->sc_hwmap));
 	for (i = 0; i < nitems(sc->sc_hwmap); i++) {
 		if (i >= rt->rateCount) {
 			sc->sc_hwmap[i].ledon = (500 * hz) / 1000;
 			sc->sc_hwmap[i].ledoff = (130 * hz) / 1000;
 			continue;
 		}
 		sc->sc_hwmap[i].ieeerate =
 			rt->info[i].dot11Rate & IEEE80211_RATE_VAL;
 		if (rt->info[i].phy == IEEE80211_T_HT)
 			sc->sc_hwmap[i].ieeerate |= IEEE80211_RATE_MCS;
 		sc->sc_hwmap[i].txflags = IEEE80211_RADIOTAP_F_DATAPAD;
 		if (rt->info[i].shortPreamble ||
 		    rt->info[i].phy == IEEE80211_T_OFDM)
 			sc->sc_hwmap[i].txflags |= IEEE80211_RADIOTAP_F_SHORTPRE;
 		sc->sc_hwmap[i].rxflags = sc->sc_hwmap[i].txflags;
 		for (j = 0; j < nitems(blinkrates)-1; j++)
 			if (blinkrates[j].rate == sc->sc_hwmap[i].ieeerate)
 				break;
 		/* NB: this uses the last entry if the rate isn't found */
 		/* XXX beware of overlow */
 		sc->sc_hwmap[i].ledon = (blinkrates[j].timeOn * hz) / 1000;
 		sc->sc_hwmap[i].ledoff = (blinkrates[j].timeOff * hz) / 1000;
 	}
 	sc->sc_currates = rt;
 	sc->sc_curmode = mode;
 	/*
 	 * All protection frames are transmitted at 2Mb/s for
 	 * 11g, otherwise at 1Mb/s.
 	 */
 	if (mode == IEEE80211_MODE_11G)
 		sc->sc_protrix = ath_tx_findrix(sc, 2*2);
 	else
 		sc->sc_protrix = ath_tx_findrix(sc, 2*1);
 	/* NB: caller is responsible for resetting rate control state */
 }
 
 static void
 ath_watchdog(void *arg)
 {
 	struct ath_softc *sc = arg;
 	struct ieee80211com *ic = &sc->sc_ic;
 	int do_reset = 0;
 
 	ATH_LOCK_ASSERT(sc);
 
 	if (sc->sc_wd_timer != 0 && --sc->sc_wd_timer == 0) {
 		uint32_t hangs;
 
 		ath_power_set_power_state(sc, HAL_PM_AWAKE);
 
 		if (ath_hal_gethangstate(sc->sc_ah, 0xffff, &hangs) &&
 		    hangs != 0) {
 			device_printf(sc->sc_dev, "%s hang detected (0x%x)\n",
 			    hangs & 0xff ? "bb" : "mac", hangs);
 		} else
 			device_printf(sc->sc_dev, "device timeout\n");
 		do_reset = 1;
 		counter_u64_add(ic->ic_oerrors, 1);
 		sc->sc_stats.ast_watchdog++;
 
 		ath_power_restore_power_state(sc);
 	}
 
 	/*
 	 * We can't hold the lock across the ath_reset() call.
 	 *
 	 * And since this routine can't hold a lock and sleep,
 	 * do the reset deferred.
 	 */
 	if (do_reset) {
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_resettask);
 	}
 
 	callout_schedule(&sc->sc_wd_ch, hz);
 }
 
 static void
 ath_parent(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 	int error = EDOOFUS;
 
 	ATH_LOCK(sc);
 	if (ic->ic_nrunning > 0) {
 		/*
 		 * To avoid rescanning another access point,
 		 * do not call ath_init() here.  Instead,
 		 * only reflect promisc mode settings.
 		 */
 		if (sc->sc_running) {
 			ath_power_set_power_state(sc, HAL_PM_AWAKE);
 			ath_mode_init(sc);
 			ath_power_restore_power_state(sc);
 		} else if (!sc->sc_invalid) {
 			/*
 			 * Beware of being called during attach/detach
 			 * to reset promiscuous mode.  In that case we
 			 * will still be marked UP but not RUNNING.
 			 * However trying to re-init the interface
 			 * is the wrong thing to do as we've already
 			 * torn down much of our state.  There's
 			 * probably a better way to deal with this.
 			 */
 			error = ath_init(sc);
 		}
 	} else {
 		ath_stop(sc);
 		if (!sc->sc_invalid)
 			ath_power_setpower(sc, HAL_PM_FULL_SLEEP);
 	}
 	ATH_UNLOCK(sc);
 
 	if (error == 0) {                        
 #ifdef ATH_TX99_DIAG
 		if (sc->sc_tx99 != NULL)
 			sc->sc_tx99->start(sc->sc_tx99);
 		else
 #endif
 		ieee80211_start_all(ic);
 	}
 }
 
 /*
  * Announce various information on device/driver attach.
  */
 static void
 ath_announce(struct ath_softc *sc)
 {
 	struct ath_hal *ah = sc->sc_ah;
 
 	device_printf(sc->sc_dev, "%s mac %d.%d RF%s phy %d.%d\n",
 		ath_hal_mac_name(ah), ah->ah_macVersion, ah->ah_macRev,
 		ath_hal_rf_name(ah), ah->ah_phyRev >> 4, ah->ah_phyRev & 0xf);
 	device_printf(sc->sc_dev, "2GHz radio: 0x%.4x; 5GHz radio: 0x%.4x\n",
 		ah->ah_analog2GhzRev, ah->ah_analog5GhzRev);
 	if (bootverbose) {
 		int i;
 		for (i = 0; i <= WME_AC_VO; i++) {
 			struct ath_txq *txq = sc->sc_ac2q[i];
 			device_printf(sc->sc_dev,
 			    "Use hw queue %u for %s traffic\n",
 			    txq->axq_qnum, ieee80211_wme_acnames[i]);
 		}
 		device_printf(sc->sc_dev, "Use hw queue %u for CAB traffic\n",
 		    sc->sc_cabq->axq_qnum);
 		device_printf(sc->sc_dev, "Use hw queue %u for beacons\n",
 		    sc->sc_bhalq);
 	}
 	if (ath_rxbuf != ATH_RXBUF)
 		device_printf(sc->sc_dev, "using %u rx buffers\n", ath_rxbuf);
 	if (ath_txbuf != ATH_TXBUF)
 		device_printf(sc->sc_dev, "using %u tx buffers\n", ath_txbuf);
 	if (sc->sc_mcastkey && bootverbose)
 		device_printf(sc->sc_dev, "using multicast key search\n");
 }
 
 static void
 ath_dfs_tasklet(void *p, int npending)
 {
 	struct ath_softc *sc = (struct ath_softc *) p;
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	/*
 	 * If previous processing has found a radar event,
 	 * signal this to the net80211 layer to begin DFS
 	 * processing.
 	 */
 	if (ath_dfs_process_radar_event(sc, sc->sc_curchan)) {
 		/* DFS event found, initiate channel change */
 		/*
 		 * XXX doesn't currently tell us whether the event
 		 * XXX was found in the primary or extension
 		 * XXX channel!
 		 */
 		IEEE80211_LOCK(ic);
 		ieee80211_dfs_notify_radar(ic, sc->sc_curchan);
 		IEEE80211_UNLOCK(ic);
 	}
 }
 
 /*
  * Enable/disable power save.  This must be called with
  * no TX driver locks currently held, so it should only
  * be called from the RX path (which doesn't hold any
  * TX driver locks.)
  */
 static void
 ath_node_powersave(struct ieee80211_node *ni, int enable)
 {
 #ifdef	ATH_SW_PSQ
 	struct ath_node *an = ATH_NODE(ni);
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_vap *avp = ATH_VAP(ni->ni_vap);
 
 	/* XXX and no TXQ locks should be held here */
 
 	DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE, "%s: %6D: enable=%d\n",
 	    __func__,
 	    ni->ni_macaddr,
 	    ":",
 	    !! enable);
 
 	/* Suspend or resume software queue handling */
 	if (enable)
 		ath_tx_node_sleep(sc, an);
 	else
 		ath_tx_node_wakeup(sc, an);
 
 	/* Update net80211 state */
 	avp->av_node_ps(ni, enable);
 #else
 	struct ath_vap *avp = ATH_VAP(ni->ni_vap);
 
 	/* Update net80211 state */
 	avp->av_node_ps(ni, enable);
 #endif/* ATH_SW_PSQ */
 }
 
 /*
  * Notification from net80211 that the powersave queue state has
  * changed.
  *
  * Since the software queue also may have some frames:
  *
  * + if the node software queue has frames and the TID state
  *   is 0, we set the TIM;
  * + if the node and the stack are both empty, we clear the TIM bit.
  * + If the stack tries to set the bit, always set it.
  * + If the stack tries to clear the bit, only clear it if the
  *   software queue in question is also cleared.
  *
  * TODO: this is called during node teardown; so let's ensure this
  * is all correctly handled and that the TIM bit is cleared.
  * It may be that the node flush is called _AFTER_ the net80211
  * stack clears the TIM.
  *
  * Here is the racy part.  Since it's possible >1 concurrent,
  * overlapping TXes will appear complete with a TX completion in
  * another thread, it's possible that the concurrent TIM calls will
  * clash.  We can't hold the node lock here because setting the
  * TIM grabs the net80211 comlock and this may cause a LOR.
  * The solution is either to totally serialise _everything_ at
  * this point (ie, all TX, completion and any reset/flush go into
  * one taskqueue) or a new "ath TIM lock" needs to be created that
  * just wraps the driver state change and this call to avp->av_set_tim().
  *
  * The same race exists in the net80211 power save queue handling
  * as well.  Since multiple transmitting threads may queue frames
  * into the driver, as well as ps-poll and the driver transmitting
  * frames (and thus clearing the psq), it's quite possible that
  * a packet entering the PSQ and a ps-poll being handled will
  * race, causing the TIM to be cleared and not re-set.
  */
 static int
 ath_node_set_tim(struct ieee80211_node *ni, int enable)
 {
 #ifdef	ATH_SW_PSQ
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	struct ath_node *an = ATH_NODE(ni);
 	struct ath_vap *avp = ATH_VAP(ni->ni_vap);
 	int changed = 0;
 
 	ATH_TX_LOCK(sc);
 	an->an_stack_psq = enable;
 
 	/*
 	 * This will get called for all operating modes,
 	 * even if avp->av_set_tim is unset.
 	 * It's currently set for hostap/ibss modes; but
 	 * the same infrastructure is used for both STA
 	 * and AP/IBSS node power save.
 	 */
 	if (avp->av_set_tim == NULL) {
 		ATH_TX_UNLOCK(sc);
 		return (0);
 	}
 
 	/*
 	 * If setting the bit, always set it here.
 	 * If clearing the bit, only clear it if the
 	 * software queue is also empty.
 	 *
 	 * If the node has left power save, just clear the TIM
 	 * bit regardless of the state of the power save queue.
 	 *
 	 * XXX TODO: although atomics are used, it's quite possible
 	 * that a race will occur between this and setting/clearing
 	 * in another thread.  TX completion will occur always in
 	 * one thread, however setting/clearing the TIM bit can come
 	 * from a variety of different process contexts!
 	 */
 	if (enable && an->an_tim_set == 1) {
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: enable=%d, tim_set=1, ignoring\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    enable);
 		ATH_TX_UNLOCK(sc);
 	} else if (enable) {
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: enable=%d, enabling TIM\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    enable);
 		an->an_tim_set = 1;
 		ATH_TX_UNLOCK(sc);
 		changed = avp->av_set_tim(ni, enable);
 	} else if (an->an_swq_depth == 0) {
 		/* disable */
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: enable=%d, an_swq_depth == 0, disabling\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    enable);
 		an->an_tim_set = 0;
 		ATH_TX_UNLOCK(sc);
 		changed = avp->av_set_tim(ni, enable);
 	} else if (! an->an_is_powersave) {
 		/*
 		 * disable regardless; the node isn't in powersave now
 		 */
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: enable=%d, an_pwrsave=0, disabling\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    enable);
 		an->an_tim_set = 0;
 		ATH_TX_UNLOCK(sc);
 		changed = avp->av_set_tim(ni, enable);
 	} else {
 		/*
 		 * psq disable, node is currently in powersave, node
 		 * software queue isn't empty, so don't clear the TIM bit
 		 * for now.
 		 */
 		ATH_TX_UNLOCK(sc);
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: enable=%d, an_swq_depth > 0, ignoring\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    enable);
 		changed = 0;
 	}
 
 	return (changed);
 #else
 	struct ath_vap *avp = ATH_VAP(ni->ni_vap);
 
 	/*
 	 * Some operating modes don't set av_set_tim(), so don't
 	 * update it here.
 	 */
 	if (avp->av_set_tim == NULL)
 		return (0);
 
 	return (avp->av_set_tim(ni, enable));
 #endif /* ATH_SW_PSQ */
 }
 
 /*
  * Set or update the TIM from the software queue.
  *
  * Check the software queue depth before attempting to do lock
  * anything; that avoids trying to obtain the lock.  Then,
  * re-check afterwards to ensure nothing has changed in the
  * meantime.
  *
  * set:   This is designed to be called from the TX path, after
  *        a frame has been queued; to see if the swq > 0.
  *
  * clear: This is designed to be called from the buffer completion point
  *        (right now it's ath_tx_default_comp()) where the state of
  *        a software queue has changed.
  *
  * It makes sense to place it at buffer free / completion rather
  * than after each software queue operation, as there's no real
  * point in churning the TIM bit as the last frames in the software
  * queue are transmitted.  If they fail and we retry them, we'd
  * just be setting the TIM bit again anyway.
  */
 void
 ath_tx_update_tim(struct ath_softc *sc, struct ieee80211_node *ni,
      int enable)
 {
 #ifdef	ATH_SW_PSQ
 	struct ath_node *an;
 	struct ath_vap *avp;
 
 	/* Don't do this for broadcast/etc frames */
 	if (ni == NULL)
 		return;
 
 	an = ATH_NODE(ni);
 	avp = ATH_VAP(ni->ni_vap);
 
 	/*
 	 * And for operating modes without the TIM handler set, let's
 	 * just skip those.
 	 */
 	if (avp->av_set_tim == NULL)
 		return;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	if (enable) {
 		if (an->an_is_powersave &&
 		    an->an_tim_set == 0 &&
 		    an->an_swq_depth != 0) {
 			DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 			    "%s: %6D: swq_depth>0, tim_set=0, set!\n",
 			    __func__,
 			    ni->ni_macaddr,
 			    ":");
 			an->an_tim_set = 1;
 			(void) avp->av_set_tim(ni, 1);
 		}
 	} else {
 		/*
 		 * Don't bother grabbing the lock unless the queue is empty.
 		 */
 		if (an->an_swq_depth != 0)
 			return;
 
 		if (an->an_is_powersave &&
 		    an->an_stack_psq == 0 &&
 		    an->an_tim_set == 1 &&
 		    an->an_swq_depth == 0) {
 			DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 			    "%s: %6D: swq_depth=0, tim_set=1, psq_set=0,"
 			    " clear!\n",
 			    __func__,
 			    ni->ni_macaddr,
 			    ":");
 			an->an_tim_set = 0;
 			(void) avp->av_set_tim(ni, 0);
 		}
 	}
 #else
 	return;
 #endif	/* ATH_SW_PSQ */
 }
 
 /*
  * Received a ps-poll frame from net80211.
  *
  * Here we get a chance to serve out a software-queued frame ourselves
  * before we punt it to net80211 to transmit us one itself - either
  * because there's traffic in the net80211 psq, or a NULL frame to
  * indicate there's nothing else.
  */
 static void
 ath_node_recv_pspoll(struct ieee80211_node *ni, struct mbuf *m)
 {
 #ifdef	ATH_SW_PSQ
 	struct ath_node *an;
 	struct ath_vap *avp;
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_softc;
 	int tid;
 
 	/* Just paranoia */
 	if (ni == NULL)
 		return;
 
 	/*
 	 * Unassociated (temporary node) station.
 	 */
 	if (ni->ni_associd == 0)
 		return;
 
 	/*
 	 * We do have an active node, so let's begin looking into it.
 	 */
 	an = ATH_NODE(ni);
 	avp = ATH_VAP(ni->ni_vap);
 
 	/*
 	 * For now, we just call the original ps-poll method.
 	 * Once we're ready to flip this on:
 	 *
 	 * + Set leak to 1, as no matter what we're going to have
 	 *   to send a frame;
 	 * + Check the software queue and if there's something in it,
 	 *   schedule the highest TID thas has traffic from this node.
 	 *   Then make sure we schedule the software scheduler to
 	 *   run so it picks up said frame.
 	 *
 	 * That way whatever happens, we'll at least send _a_ frame
 	 * to the given node.
 	 *
 	 * Again, yes, it's crappy QoS if the node has multiple
 	 * TIDs worth of traffic - but let's get it working first
 	 * before we optimise it.
 	 *
 	 * Also yes, there's definitely latency here - we're not
 	 * direct dispatching to the hardware in this path (and
 	 * we're likely being called from the packet receive path,
 	 * so going back into TX may be a little hairy!) but again
 	 * I'd like to get this working first before optimising
 	 * turn-around time.
 	 */
 
 	ATH_TX_LOCK(sc);
 
 	/*
 	 * Legacy - we're called and the node isn't asleep.
 	 * Immediately punt.
 	 */
 	if (! an->an_is_powersave) {
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: not in powersave?\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":");
 		ATH_TX_UNLOCK(sc);
 		avp->av_recv_pspoll(ni, m);
 		return;
 	}
 
 	/*
 	 * We're in powersave.
 	 *
 	 * Leak a frame.
 	 */
 	an->an_leak_count = 1;
 
 	/*
 	 * Now, if there's no frames in the node, just punt to
 	 * recv_pspoll.
 	 *
 	 * Don't bother checking if the TIM bit is set, we really
 	 * only care if there are any frames here!
 	 */
 	if (an->an_swq_depth == 0) {
 		ATH_TX_UNLOCK(sc);
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: SWQ empty; punting to net80211\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":");
 		avp->av_recv_pspoll(ni, m);
 		return;
 	}
 
 	/*
 	 * Ok, let's schedule the highest TID that has traffic
 	 * and then schedule something.
 	 */
 	for (tid = IEEE80211_TID_SIZE - 1; tid >= 0; tid--) {
 		struct ath_tid *atid = &an->an_tid[tid];
 		/*
 		 * No frames? Skip.
 		 */
 		if (atid->axq_depth == 0)
 			continue;
 		ath_tx_tid_sched(sc, atid);
 		/*
 		 * XXX we could do a direct call to the TXQ
 		 * scheduler code here to optimise latency
 		 * at the expense of a REALLY deep callstack.
 		 */
 		ATH_TX_UNLOCK(sc);
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_txqtask);
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: leaking frame to TID %d\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    tid);
 		return;
 	}
 
 	ATH_TX_UNLOCK(sc);
 
 	/*
 	 * XXX nothing in the TIDs at this point? Eek.
 	 */
 	DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 	    "%s: %6D: TIDs empty, but ath_node showed traffic?!\n",
 	    __func__,
 	    ni->ni_macaddr,
 	    ":");
 	avp->av_recv_pspoll(ni, m);
 #else
 	avp->av_recv_pspoll(ni, m);
 #endif	/* ATH_SW_PSQ */
 }
 
 MODULE_VERSION(if_ath, 1);
 MODULE_DEPEND(if_ath, wlan, 1, 1, 1);          /* 802.11 media layer */
 #if	defined(IEEE80211_ALQ) || defined(AH_DEBUG_ALQ) || defined(ATH_DEBUG_ALQ)
 MODULE_DEPEND(if_ath, alq, 1, 1, 1);
 #endif
Index: stable/11/sys/dev/cadence/if_cgem.c
===================================================================
--- stable/11/sys/dev/cadence/if_cgem.c	(revision 330445)
+++ stable/11/sys/dev/cadence/if_cgem.c	(revision 330446)
@@ -1,1866 +1,1866 @@
 /*-
  * Copyright (c) 2012-2014 Thomas Skibo <thomasskibo@yahoo.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * A network interface driver for Cadence GEM Gigabit Ethernet
  * interface such as the one used in Xilinx Zynq-7000 SoC.
  *
  * Reference: Zynq-7000 All Programmable SoC Technical Reference Manual.
  * (v1.4) November 16, 2012.  Xilinx doc UG585.  GEM is covered in Ch. 16
  * and register definitions are in appendix B.18.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 
 #include <machine/bus.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_mib.h>
 #include <net/if_types.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #endif
 
 #include <net/bpf.h>
 #include <net/bpfdesc.h>
 
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 
 #include <dev/cadence/if_cgem_hw.h>
 
 #include "miibus_if.h"
 
 #define IF_CGEM_NAME "cgem"
 
 #define CGEM_NUM_RX_DESCS	512	/* size of receive descriptor ring */
 #define CGEM_NUM_TX_DESCS	512	/* size of transmit descriptor ring */
 
 #define MAX_DESC_RING_SIZE (MAX(CGEM_NUM_RX_DESCS*sizeof(struct cgem_rx_desc),\
 				CGEM_NUM_TX_DESCS*sizeof(struct cgem_tx_desc)))
 
 
 /* Default for sysctl rxbufs.  Must be < CGEM_NUM_RX_DESCS of course. */
 #define DEFAULT_NUM_RX_BUFS	256	/* number of receive bufs to queue. */
 
 #define TX_MAX_DMA_SEGS		8	/* maximum segs in a tx mbuf dma */
 
 #define CGEM_CKSUM_ASSIST	(CSUM_IP | CSUM_TCP | CSUM_UDP | \
 				 CSUM_TCP_IPV6 | CSUM_UDP_IPV6)
 
 struct cgem_softc {
 	if_t			ifp;
 	struct mtx		sc_mtx;
 	device_t		dev;
 	device_t		miibus;
 	u_int			mii_media_active;	/* last active media */
 	int			if_old_flags;
 	struct resource 	*mem_res;
 	struct resource 	*irq_res;
 	void			*intrhand;
 	struct callout		tick_ch;
 	uint32_t		net_ctl_shadow;
 	int			ref_clk_num;
 	u_char			eaddr[6];
 
 	bus_dma_tag_t		desc_dma_tag;
 	bus_dma_tag_t		mbuf_dma_tag;
 
 	/* receive descriptor ring */
 	struct cgem_rx_desc	*rxring;
 	bus_addr_t		rxring_physaddr;
 	struct mbuf		*rxring_m[CGEM_NUM_RX_DESCS];
 	bus_dmamap_t		rxring_m_dmamap[CGEM_NUM_RX_DESCS];
 	int			rxring_hd_ptr;	/* where to put rcv bufs */
 	int			rxring_tl_ptr;	/* where to get receives */
 	int			rxring_queued;	/* how many rcv bufs queued */
  	bus_dmamap_t		rxring_dma_map;
 	int			rxbufs;		/* tunable number rcv bufs */
 	int			rxhangwar;	/* rx hang work-around */
 	u_int			rxoverruns;	/* rx overruns */
 	u_int			rxnobufs;	/* rx buf ring empty events */
 	u_int			rxdmamapfails;	/* rx dmamap failures */
 	uint32_t		rx_frames_prev;
 
 	/* transmit descriptor ring */
 	struct cgem_tx_desc	*txring;
 	bus_addr_t		txring_physaddr;
 	struct mbuf		*txring_m[CGEM_NUM_TX_DESCS];
 	bus_dmamap_t		txring_m_dmamap[CGEM_NUM_TX_DESCS];
 	int			txring_hd_ptr;	/* where to put next xmits */
 	int			txring_tl_ptr;	/* next xmit mbuf to free */
 	int			txring_queued;	/* num xmits segs queued */
 	bus_dmamap_t		txring_dma_map;
 	u_int			txfull;		/* tx ring full events */
 	u_int			txdefrags;	/* tx calls to m_defrag() */
 	u_int			txdefragfails;	/* tx m_defrag() failures */
 	u_int			txdmamapfails;	/* tx dmamap failures */
 
 	/* hardware provided statistics */
 	struct cgem_hw_stats {
 		uint64_t		tx_bytes;
 		uint32_t		tx_frames;
 		uint32_t		tx_frames_bcast;
 		uint32_t		tx_frames_multi;
 		uint32_t		tx_frames_pause;
 		uint32_t		tx_frames_64b;
 		uint32_t		tx_frames_65to127b;
 		uint32_t		tx_frames_128to255b;
 		uint32_t		tx_frames_256to511b;
 		uint32_t		tx_frames_512to1023b;
 		uint32_t		tx_frames_1024to1536b;
 		uint32_t		tx_under_runs;
 		uint32_t		tx_single_collisn;
 		uint32_t		tx_multi_collisn;
 		uint32_t		tx_excsv_collisn;
 		uint32_t		tx_late_collisn;
 		uint32_t		tx_deferred_frames;
 		uint32_t		tx_carrier_sense_errs;
 
 		uint64_t		rx_bytes;
 		uint32_t		rx_frames;
 		uint32_t		rx_frames_bcast;
 		uint32_t		rx_frames_multi;
 		uint32_t		rx_frames_pause;
 		uint32_t		rx_frames_64b;
 		uint32_t		rx_frames_65to127b;
 		uint32_t		rx_frames_128to255b;
 		uint32_t		rx_frames_256to511b;
 		uint32_t		rx_frames_512to1023b;
 		uint32_t		rx_frames_1024to1536b;
 		uint32_t		rx_frames_undersize;
 		uint32_t		rx_frames_oversize;
 		uint32_t		rx_frames_jabber;
 		uint32_t		rx_frames_fcs_errs;
 		uint32_t		rx_frames_length_errs;
 		uint32_t		rx_symbol_errs;
 		uint32_t		rx_align_errs;
 		uint32_t		rx_resource_errs;
 		uint32_t		rx_overrun_errs;
 		uint32_t		rx_ip_hdr_csum_errs;
 		uint32_t		rx_tcp_csum_errs;
 		uint32_t		rx_udp_csum_errs;
 	} stats;
 };
 
 #define RD4(sc, off) 		(bus_read_4((sc)->mem_res, (off)))
 #define WR4(sc, off, val) 	(bus_write_4((sc)->mem_res, (off), (val)))
 #define BARRIER(sc, off, len, flags) \
 	(bus_barrier((sc)->mem_res, (off), (len), (flags))
 
 #define CGEM_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
 #define CGEM_UNLOCK(sc)	mtx_unlock(&(sc)->sc_mtx)
 #define CGEM_LOCK_INIT(sc)	\
 	mtx_init(&(sc)->sc_mtx, device_get_nameunit((sc)->dev), \
 		 MTX_NETWORK_LOCK, MTX_DEF)
 #define CGEM_LOCK_DESTROY(sc)	mtx_destroy(&(sc)->sc_mtx)
 #define CGEM_ASSERT_LOCKED(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
 
 /* Allow platforms to optionally provide a way to set the reference clock. */
 int cgem_set_ref_clk(int unit, int frequency);
 
 static devclass_t cgem_devclass;
 
 static int cgem_probe(device_t dev);
 static int cgem_attach(device_t dev);
 static int cgem_detach(device_t dev);
 static void cgem_tick(void *);
 static void cgem_intr(void *);
 
 static void cgem_mediachange(struct cgem_softc *, struct mii_data *);
 
 static void
 cgem_get_mac(struct cgem_softc *sc, u_char eaddr[])
 {
 	int i;
 	uint32_t rnd;
 
 	/* See if boot loader gave us a MAC address already. */
 	for (i = 0; i < 4; i++) {
 		uint32_t low = RD4(sc, CGEM_SPEC_ADDR_LOW(i));
 		uint32_t high = RD4(sc, CGEM_SPEC_ADDR_HI(i)) & 0xffff;
 		if (low != 0 || high != 0) {
 			eaddr[0] = low & 0xff;
 			eaddr[1] = (low >> 8) & 0xff;
 			eaddr[2] = (low >> 16) & 0xff;
 			eaddr[3] = (low >> 24) & 0xff;
 			eaddr[4] = high & 0xff;
 			eaddr[5] = (high >> 8) & 0xff;
 			break;
 		}
 	}
 
 	/* No MAC from boot loader?  Assign a random one. */
 	if (i == 4) {
 		rnd = arc4random();
 
 		eaddr[0] = 'b';
 		eaddr[1] = 's';
 		eaddr[2] = 'd';
 		eaddr[3] = (rnd >> 16) & 0xff;
 		eaddr[4] = (rnd >> 8) & 0xff;
 		eaddr[5] = rnd & 0xff;
 
 		device_printf(sc->dev, "no mac address found, assigning "
 			      "random: %02x:%02x:%02x:%02x:%02x:%02x\n",
 			      eaddr[0], eaddr[1], eaddr[2],
 			      eaddr[3], eaddr[4], eaddr[5]);
 	}
 
 	/* Move address to first slot and zero out the rest. */
 	WR4(sc, CGEM_SPEC_ADDR_LOW(0), (eaddr[3] << 24) |
 	    (eaddr[2] << 16) | (eaddr[1] << 8) | eaddr[0]);
 	WR4(sc, CGEM_SPEC_ADDR_HI(0), (eaddr[5] << 8) | eaddr[4]);
 
 	for (i = 1; i < 4; i++) {
 		WR4(sc, CGEM_SPEC_ADDR_LOW(i), 0);
 		WR4(sc, CGEM_SPEC_ADDR_HI(i), 0);
 	}
 }
 
 /* cgem_mac_hash():  map 48-bit address to a 6-bit hash.
  * The 6-bit hash corresponds to a bit in a 64-bit hash
  * register.  Setting that bit in the hash register enables
  * reception of all frames with a destination address that hashes
  * to that 6-bit value.
  *
  * The hash function is described in sec. 16.2.3 in the Zynq-7000 Tech
  * Reference Manual.  Bits 0-5 in the hash are the exclusive-or of
  * every sixth bit in the destination address.
  */
 static int
 cgem_mac_hash(u_char eaddr[])
 {
 	int hash;
 	int i, j;
 
 	hash = 0;
 	for (i = 0; i < 6; i++)
 		for (j = i; j < 48; j += 6)
 			if ((eaddr[j >> 3] & (1 << (j & 7))) != 0)
 				hash ^= (1 << i);
 
 	return hash;
 }
 
 /* After any change in rx flags or multi-cast addresses, set up
  * hash registers and net config register bits.
  */
 static void
 cgem_rx_filter(struct cgem_softc *sc)
 {
 	if_t ifp = sc->ifp;
 	u_char *mta;
 
 	int index, i, mcnt;
 	uint32_t hash_hi, hash_lo;
 	uint32_t net_cfg;
 
 	hash_hi = 0;
 	hash_lo = 0;
 
 	net_cfg = RD4(sc, CGEM_NET_CFG);
 
 	net_cfg &= ~(CGEM_NET_CFG_MULTI_HASH_EN |
 		     CGEM_NET_CFG_NO_BCAST | 
 		     CGEM_NET_CFG_COPY_ALL);
 
 	if ((if_getflags(ifp) & IFF_PROMISC) != 0)
 		net_cfg |= CGEM_NET_CFG_COPY_ALL;
 	else {
 		if ((if_getflags(ifp) & IFF_BROADCAST) == 0)
 			net_cfg |= CGEM_NET_CFG_NO_BCAST;
 		if ((if_getflags(ifp) & IFF_ALLMULTI) != 0) {
 			hash_hi = 0xffffffff;
 			hash_lo = 0xffffffff;
 		} else {
 			mcnt = if_multiaddr_count(ifp, -1);
 			mta = malloc(ETHER_ADDR_LEN * mcnt, M_DEVBUF,
 				     M_NOWAIT);
 			if (mta == NULL) {
 				device_printf(sc->dev,
 				      "failed to allocate temp mcast list\n");
 				return;
 			}
 			if_multiaddr_array(ifp, mta, &mcnt, mcnt);
 			for (i = 0; i < mcnt; i++) {
 				index = cgem_mac_hash(
 					LLADDR((struct sockaddr_dl *)
 					       (mta + (i * ETHER_ADDR_LEN))));
 				if (index > 31)
 					hash_hi |= (1 << (index - 32));
 				else
 					hash_lo |= (1 << index);
 			}
 			free(mta, M_DEVBUF);
 		}
 
 		if (hash_hi != 0 || hash_lo != 0)
 			net_cfg |= CGEM_NET_CFG_MULTI_HASH_EN;
 	}
 
 	WR4(sc, CGEM_HASH_TOP, hash_hi);
 	WR4(sc, CGEM_HASH_BOT, hash_lo);
 	WR4(sc, CGEM_NET_CFG, net_cfg);
 }
 
 /* For bus_dmamap_load() callback. */
 static void
 cgem_getaddr(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 {
 
 	if (nsegs != 1 || error != 0)
 		return;
 	*(bus_addr_t *)arg = segs[0].ds_addr;
 }
 
 /* Create DMA'able descriptor rings. */
 static int
 cgem_setup_descs(struct cgem_softc *sc)
 {
 	int i, err;
 
 	sc->txring = NULL;
 	sc->rxring = NULL;
 
 	/* Allocate non-cached DMA space for RX and TX descriptors.
 	 */
 	err = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0,
 				 BUS_SPACE_MAXADDR_32BIT,
 				 BUS_SPACE_MAXADDR,
 				 NULL, NULL,
 				 MAX_DESC_RING_SIZE,
 				 1,
 				 MAX_DESC_RING_SIZE,
 				 0,
 				 busdma_lock_mutex,
 				 &sc->sc_mtx,
 				 &sc->desc_dma_tag);
 	if (err)
 		return (err);
 
 	/* Set up a bus_dma_tag for mbufs. */
 	err = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0,
 				 BUS_SPACE_MAXADDR_32BIT,
 				 BUS_SPACE_MAXADDR,
 				 NULL, NULL,
 				 MCLBYTES,
 				 TX_MAX_DMA_SEGS,
 				 MCLBYTES,
 				 0,
 				 busdma_lock_mutex,
 				 &sc->sc_mtx,
 				 &sc->mbuf_dma_tag);
 	if (err)
 		return (err);
 
 	/* Allocate DMA memory in non-cacheable space. */
 	err = bus_dmamem_alloc(sc->desc_dma_tag,
 			       (void **)&sc->rxring,
 			       BUS_DMA_NOWAIT | BUS_DMA_COHERENT,
 			       &sc->rxring_dma_map);
 	if (err)
 		return (err);
 
 	/* Load descriptor DMA memory. */
 	err = bus_dmamap_load(sc->desc_dma_tag, sc->rxring_dma_map,
 			      (void *)sc->rxring,
 			      CGEM_NUM_RX_DESCS*sizeof(struct cgem_rx_desc),
 			      cgem_getaddr, &sc->rxring_physaddr,
 			      BUS_DMA_NOWAIT);
 	if (err)
 		return (err);
 
 	/* Initialize RX descriptors. */
 	for (i = 0; i < CGEM_NUM_RX_DESCS; i++) {
 		sc->rxring[i].addr = CGEM_RXDESC_OWN;
 		sc->rxring[i].ctl = 0;
 		sc->rxring_m[i] = NULL;
 		sc->rxring_m_dmamap[i] = NULL;
 	}
 	sc->rxring[CGEM_NUM_RX_DESCS - 1].addr |= CGEM_RXDESC_WRAP;
 
 	sc->rxring_hd_ptr = 0;
 	sc->rxring_tl_ptr = 0;
 	sc->rxring_queued = 0;
 
 	/* Allocate DMA memory for TX descriptors in non-cacheable space. */
 	err = bus_dmamem_alloc(sc->desc_dma_tag,
 			       (void **)&sc->txring,
 			       BUS_DMA_NOWAIT | BUS_DMA_COHERENT,
 			       &sc->txring_dma_map);
 	if (err)
 		return (err);
 
 	/* Load TX descriptor DMA memory. */
 	err = bus_dmamap_load(sc->desc_dma_tag, sc->txring_dma_map,
 			      (void *)sc->txring,
 			      CGEM_NUM_TX_DESCS*sizeof(struct cgem_tx_desc),
 			      cgem_getaddr, &sc->txring_physaddr, 
 			      BUS_DMA_NOWAIT);
 	if (err)
 		return (err);
 
 	/* Initialize TX descriptor ring. */
 	for (i = 0; i < CGEM_NUM_TX_DESCS; i++) {
 		sc->txring[i].addr = 0;
 		sc->txring[i].ctl = CGEM_TXDESC_USED;
 		sc->txring_m[i] = NULL;
 		sc->txring_m_dmamap[i] = NULL;
 	}
 	sc->txring[CGEM_NUM_TX_DESCS - 1].ctl |= CGEM_TXDESC_WRAP;
 
 	sc->txring_hd_ptr = 0;
 	sc->txring_tl_ptr = 0;
 	sc->txring_queued = 0;
 
 	return (0);
 }
 
 /* Fill receive descriptor ring with mbufs. */
 static void
 cgem_fill_rqueue(struct cgem_softc *sc)
 {
 	struct mbuf *m = NULL;
 	bus_dma_segment_t segs[TX_MAX_DMA_SEGS];
 	int nsegs;
 
 	CGEM_ASSERT_LOCKED(sc);
 
 	while (sc->rxring_queued < sc->rxbufs) {
 		/* Get a cluster mbuf. */
 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		if (m == NULL)
 			break;
 
 		m->m_len = MCLBYTES;
 		m->m_pkthdr.len = MCLBYTES;
 		m->m_pkthdr.rcvif = sc->ifp;
 
 		/* Load map and plug in physical address. */
 		if (bus_dmamap_create(sc->mbuf_dma_tag, 0,
 			      &sc->rxring_m_dmamap[sc->rxring_hd_ptr])) {
 			sc->rxdmamapfails++;
 			m_free(m);
 			break;
 		}
 		if (bus_dmamap_load_mbuf_sg(sc->mbuf_dma_tag, 
 			      sc->rxring_m_dmamap[sc->rxring_hd_ptr], m,
 			      segs, &nsegs, BUS_DMA_NOWAIT)) {
 			sc->rxdmamapfails++;
 			bus_dmamap_destroy(sc->mbuf_dma_tag,
 				   sc->rxring_m_dmamap[sc->rxring_hd_ptr]);
 			sc->rxring_m_dmamap[sc->rxring_hd_ptr] = NULL;
 			m_free(m);
 			break;
 		}
 		sc->rxring_m[sc->rxring_hd_ptr] = m;
 
 		/* Sync cache with receive buffer. */
 		bus_dmamap_sync(sc->mbuf_dma_tag,
 				sc->rxring_m_dmamap[sc->rxring_hd_ptr],
 				BUS_DMASYNC_PREREAD);
 
 		/* Write rx descriptor and increment head pointer. */
 		sc->rxring[sc->rxring_hd_ptr].ctl = 0;
 		if (sc->rxring_hd_ptr == CGEM_NUM_RX_DESCS - 1) {
 			sc->rxring[sc->rxring_hd_ptr].addr = segs[0].ds_addr |
 				CGEM_RXDESC_WRAP;
 			sc->rxring_hd_ptr = 0;
 		} else
 			sc->rxring[sc->rxring_hd_ptr++].addr = segs[0].ds_addr;
 			
 		sc->rxring_queued++;
 	}
 }
 
 /* Pull received packets off of receive descriptor ring. */
 static void
 cgem_recv(struct cgem_softc *sc)
 {
 	if_t ifp = sc->ifp;
 	struct mbuf *m, *m_hd, **m_tl;
 	uint32_t ctl;
 
 	CGEM_ASSERT_LOCKED(sc);
 
 	/* Pick up all packets in which the OWN bit is set. */
 	m_hd = NULL;
 	m_tl = &m_hd;
 	while (sc->rxring_queued > 0 &&
 	       (sc->rxring[sc->rxring_tl_ptr].addr & CGEM_RXDESC_OWN) != 0) {
 
 		ctl = sc->rxring[sc->rxring_tl_ptr].ctl;
 
 		/* Grab filled mbuf. */
 		m = sc->rxring_m[sc->rxring_tl_ptr];
 		sc->rxring_m[sc->rxring_tl_ptr] = NULL;
 
 		/* Sync cache with receive buffer. */
 		bus_dmamap_sync(sc->mbuf_dma_tag,
 				sc->rxring_m_dmamap[sc->rxring_tl_ptr],
 				BUS_DMASYNC_POSTREAD);
 
 		/* Unload and destroy dmamap. */
 		bus_dmamap_unload(sc->mbuf_dma_tag,
 		  	sc->rxring_m_dmamap[sc->rxring_tl_ptr]);
 		bus_dmamap_destroy(sc->mbuf_dma_tag,
 				   sc->rxring_m_dmamap[sc->rxring_tl_ptr]);
 		sc->rxring_m_dmamap[sc->rxring_tl_ptr] = NULL;
 
 		/* Increment tail pointer. */
 		if (++sc->rxring_tl_ptr == CGEM_NUM_RX_DESCS)
 			sc->rxring_tl_ptr = 0;
 		sc->rxring_queued--;
 
 		/* Check FCS and make sure entire packet landed in one mbuf
 		 * cluster (which is much bigger than the largest ethernet
 		 * packet).
 		 */
 		if ((ctl & CGEM_RXDESC_BAD_FCS) != 0 ||
 		    (ctl & (CGEM_RXDESC_SOF | CGEM_RXDESC_EOF)) !=
 		           (CGEM_RXDESC_SOF | CGEM_RXDESC_EOF)) {
 			/* discard. */
 			m_free(m);
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			continue;
 		}
 
 		/* Ready it to hand off to upper layers. */
 		m->m_data += ETHER_ALIGN;
 		m->m_len = (ctl & CGEM_RXDESC_LENGTH_MASK);
 		m->m_pkthdr.rcvif = ifp;
 		m->m_pkthdr.len = m->m_len;
 
 		/* Are we using hardware checksumming?  Check the
 		 * status in the receive descriptor.
 		 */
 		if ((if_getcapenable(ifp) & IFCAP_RXCSUM) != 0) {
 			/* TCP or UDP checks out, IP checks out too. */
 			if ((ctl & CGEM_RXDESC_CKSUM_STAT_MASK) ==
 			    CGEM_RXDESC_CKSUM_STAT_TCP_GOOD ||
 			    (ctl & CGEM_RXDESC_CKSUM_STAT_MASK) ==
 			    CGEM_RXDESC_CKSUM_STAT_UDP_GOOD) {
 				m->m_pkthdr.csum_flags |=
 					CSUM_IP_CHECKED | CSUM_IP_VALID |
 					CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			} else if ((ctl & CGEM_RXDESC_CKSUM_STAT_MASK) ==
 				   CGEM_RXDESC_CKSUM_STAT_IP_GOOD) {
 				/* Only IP checks out. */
 				m->m_pkthdr.csum_flags |=
 					CSUM_IP_CHECKED | CSUM_IP_VALID;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 		}
 
 		/* Queue it up for delivery below. */
 		*m_tl = m;
 		m_tl = &m->m_next;
 	}
 
 	/* Replenish receive buffers. */
 	cgem_fill_rqueue(sc);
 
 	/* Unlock and send up packets. */
 	CGEM_UNLOCK(sc);
 	while (m_hd != NULL) {
 		m = m_hd;
 		m_hd = m_hd->m_next;
 		m->m_next = NULL;
 		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 		if_input(ifp, m);
 	}
 	CGEM_LOCK(sc);
 }
 
 /* Find completed transmits and free their mbufs. */
 static void
 cgem_clean_tx(struct cgem_softc *sc)
 {
 	struct mbuf *m;
 	uint32_t ctl;
 
 	CGEM_ASSERT_LOCKED(sc);
 
 	/* free up finished transmits. */
 	while (sc->txring_queued > 0 &&
 	       ((ctl = sc->txring[sc->txring_tl_ptr].ctl) &
 		CGEM_TXDESC_USED) != 0) {
 
 		/* Sync cache. */
 		bus_dmamap_sync(sc->mbuf_dma_tag,
 				sc->txring_m_dmamap[sc->txring_tl_ptr],
 				BUS_DMASYNC_POSTWRITE);
 
 		/* Unload and destroy DMA map. */
 		bus_dmamap_unload(sc->mbuf_dma_tag,
 				  sc->txring_m_dmamap[sc->txring_tl_ptr]);
 		bus_dmamap_destroy(sc->mbuf_dma_tag,
 				   sc->txring_m_dmamap[sc->txring_tl_ptr]);
 		sc->txring_m_dmamap[sc->txring_tl_ptr] = NULL;
 
 		/* Free up the mbuf. */
 		m = sc->txring_m[sc->txring_tl_ptr];
 		sc->txring_m[sc->txring_tl_ptr] = NULL;
 		m_freem(m);
 
 		/* Check the status. */
 		if ((ctl & CGEM_TXDESC_AHB_ERR) != 0) {
 			/* Serious bus error. log to console. */
 			device_printf(sc->dev, "cgem_clean_tx: Whoa! "
 				   "AHB error, addr=0x%x\n",
 				   sc->txring[sc->txring_tl_ptr].addr);
 		} else if ((ctl & (CGEM_TXDESC_RETRY_ERR |
 				   CGEM_TXDESC_LATE_COLL)) != 0) {
 			if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1);
 		} else
 			if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1);
 
 		/* If the packet spanned more than one tx descriptor,
 		 * skip descriptors until we find the end so that only
 		 * start-of-frame descriptors are processed.
 		 */
 		while ((ctl & CGEM_TXDESC_LAST_BUF) == 0) {
 			if ((ctl & CGEM_TXDESC_WRAP) != 0)
 				sc->txring_tl_ptr = 0;
 			else
 				sc->txring_tl_ptr++;
 			sc->txring_queued--;
 
 			ctl = sc->txring[sc->txring_tl_ptr].ctl;
 
 			sc->txring[sc->txring_tl_ptr].ctl =
 				ctl | CGEM_TXDESC_USED;
 		}
 
 		/* Next descriptor. */
 		if ((ctl & CGEM_TXDESC_WRAP) != 0)
 			sc->txring_tl_ptr = 0;
 		else
 			sc->txring_tl_ptr++;
 		sc->txring_queued--;
 
 		if_setdrvflagbits(sc->ifp, 0, IFF_DRV_OACTIVE);
 	}
 }
 
 /* Start transmits. */
 static void
 cgem_start_locked(if_t ifp)
 {
 	struct cgem_softc *sc = (struct cgem_softc *) if_getsoftc(ifp);
 	struct mbuf *m;
 	bus_dma_segment_t segs[TX_MAX_DMA_SEGS];
 	uint32_t ctl;
 	int i, nsegs, wrap, err;
 
 	CGEM_ASSERT_LOCKED(sc);
 
 	if ((if_getdrvflags(ifp) & IFF_DRV_OACTIVE) != 0)
 		return;
 
 	for (;;) {
 		/* Check that there is room in the descriptor ring. */
 		if (sc->txring_queued >=
 		    CGEM_NUM_TX_DESCS - TX_MAX_DMA_SEGS * 2) {
 
 			/* Try to make room. */
 			cgem_clean_tx(sc);
 
 			/* Still no room? */
 			if (sc->txring_queued >=
 			    CGEM_NUM_TX_DESCS - TX_MAX_DMA_SEGS * 2) {
 				if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
 				sc->txfull++;
 				break;
 			}
 		}
 
 		/* Grab next transmit packet. */
 		m = if_dequeue(ifp);
 		if (m == NULL)
 			break;
 
 		/* Create and load DMA map. */
 		if (bus_dmamap_create(sc->mbuf_dma_tag, 0,
 			      &sc->txring_m_dmamap[sc->txring_hd_ptr])) {
 			m_freem(m);
 			sc->txdmamapfails++;
 			continue;
 		}
 		err = bus_dmamap_load_mbuf_sg(sc->mbuf_dma_tag,
 				      sc->txring_m_dmamap[sc->txring_hd_ptr],
 				      m, segs, &nsegs, BUS_DMA_NOWAIT);
 		if (err == EFBIG) {
 			/* Too many segments!  defrag and try again. */
 			struct mbuf *m2 = m_defrag(m, M_NOWAIT);
 
 			if (m2 == NULL) {
 				sc->txdefragfails++;
 				m_freem(m);
 				bus_dmamap_destroy(sc->mbuf_dma_tag,
 				   sc->txring_m_dmamap[sc->txring_hd_ptr]);
 				sc->txring_m_dmamap[sc->txring_hd_ptr] = NULL;
 				continue;
 			}
 			m = m2;
 			err = bus_dmamap_load_mbuf_sg(sc->mbuf_dma_tag,
 				      sc->txring_m_dmamap[sc->txring_hd_ptr],
 				      m, segs, &nsegs, BUS_DMA_NOWAIT);
 			sc->txdefrags++;
 		}
 		if (err) {
 			/* Give up. */
 			m_freem(m);
 			bus_dmamap_destroy(sc->mbuf_dma_tag,
 				   sc->txring_m_dmamap[sc->txring_hd_ptr]);
 			sc->txring_m_dmamap[sc->txring_hd_ptr] = NULL;
 			sc->txdmamapfails++;
 			continue;
 		}
 		sc->txring_m[sc->txring_hd_ptr] = m;
 
 		/* Sync tx buffer with cache. */
 		bus_dmamap_sync(sc->mbuf_dma_tag,
 				sc->txring_m_dmamap[sc->txring_hd_ptr],
 				BUS_DMASYNC_PREWRITE);
 
 		/* Set wrap flag if next packet might run off end of ring. */
 		wrap = sc->txring_hd_ptr + nsegs + TX_MAX_DMA_SEGS >=
 			CGEM_NUM_TX_DESCS;
 
 		/* Fill in the TX descriptors back to front so that USED
 		 * bit in first descriptor is cleared last.
 		 */
 		for (i = nsegs - 1; i >= 0; i--) {
 			/* Descriptor address. */
 			sc->txring[sc->txring_hd_ptr + i].addr =
 				segs[i].ds_addr;
 
 			/* Descriptor control word. */
 			ctl = segs[i].ds_len;
 			if (i == nsegs - 1) {
 				ctl |= CGEM_TXDESC_LAST_BUF;
 				if (wrap)
 					ctl |= CGEM_TXDESC_WRAP;
 			}
 			sc->txring[sc->txring_hd_ptr + i].ctl = ctl;
 
 			if (i != 0)
 				sc->txring_m[sc->txring_hd_ptr + i] = NULL;
 		}
 
 		if (wrap)
 			sc->txring_hd_ptr = 0;
 		else
 			sc->txring_hd_ptr += nsegs;
 		sc->txring_queued += nsegs;
 
 		/* Kick the transmitter. */
 		WR4(sc, CGEM_NET_CTRL, sc->net_ctl_shadow |
 		    CGEM_NET_CTRL_START_TX);
 
-		/* If there is a BPF listener, bounce a copy to to him. */
+		/* If there is a BPF listener, bounce a copy to him. */
 		ETHER_BPF_MTAP(ifp, m);
 	}
 }
 
 static void
 cgem_start(if_t ifp)
 {
 	struct cgem_softc *sc = (struct cgem_softc *) if_getsoftc(ifp);
 
 	CGEM_LOCK(sc);
 	cgem_start_locked(ifp);
 	CGEM_UNLOCK(sc);
 }
 
 static void
 cgem_poll_hw_stats(struct cgem_softc *sc)
 {
 	uint32_t n;
 
 	CGEM_ASSERT_LOCKED(sc);
 
 	sc->stats.tx_bytes += RD4(sc, CGEM_OCTETS_TX_BOT);
 	sc->stats.tx_bytes += (uint64_t)RD4(sc, CGEM_OCTETS_TX_TOP) << 32;
 
 	sc->stats.tx_frames += RD4(sc, CGEM_FRAMES_TX);
 	sc->stats.tx_frames_bcast += RD4(sc, CGEM_BCAST_FRAMES_TX);
 	sc->stats.tx_frames_multi += RD4(sc, CGEM_MULTI_FRAMES_TX);
 	sc->stats.tx_frames_pause += RD4(sc, CGEM_PAUSE_FRAMES_TX);
 	sc->stats.tx_frames_64b += RD4(sc, CGEM_FRAMES_64B_TX);
 	sc->stats.tx_frames_65to127b += RD4(sc, CGEM_FRAMES_65_127B_TX);
 	sc->stats.tx_frames_128to255b += RD4(sc, CGEM_FRAMES_128_255B_TX);
 	sc->stats.tx_frames_256to511b += RD4(sc, CGEM_FRAMES_256_511B_TX);
 	sc->stats.tx_frames_512to1023b += RD4(sc, CGEM_FRAMES_512_1023B_TX);
 	sc->stats.tx_frames_1024to1536b += RD4(sc, CGEM_FRAMES_1024_1518B_TX);
 	sc->stats.tx_under_runs += RD4(sc, CGEM_TX_UNDERRUNS);
 
 	n = RD4(sc, CGEM_SINGLE_COLL_FRAMES);
 	sc->stats.tx_single_collisn += n;
 	if_inc_counter(sc->ifp, IFCOUNTER_COLLISIONS, n);
 	n = RD4(sc, CGEM_MULTI_COLL_FRAMES);
 	sc->stats.tx_multi_collisn += n;
 	if_inc_counter(sc->ifp, IFCOUNTER_COLLISIONS, n);
 	n = RD4(sc, CGEM_EXCESSIVE_COLL_FRAMES);
 	sc->stats.tx_excsv_collisn += n;
 	if_inc_counter(sc->ifp, IFCOUNTER_COLLISIONS, n);
 	n = RD4(sc, CGEM_LATE_COLL);
 	sc->stats.tx_late_collisn += n;
 	if_inc_counter(sc->ifp, IFCOUNTER_COLLISIONS, n);
 
 	sc->stats.tx_deferred_frames += RD4(sc, CGEM_DEFERRED_TX_FRAMES);
 	sc->stats.tx_carrier_sense_errs += RD4(sc, CGEM_CARRIER_SENSE_ERRS);
 
 	sc->stats.rx_bytes += RD4(sc, CGEM_OCTETS_RX_BOT);
 	sc->stats.rx_bytes += (uint64_t)RD4(sc, CGEM_OCTETS_RX_TOP) << 32;
 
 	sc->stats.rx_frames += RD4(sc, CGEM_FRAMES_RX);
 	sc->stats.rx_frames_bcast += RD4(sc, CGEM_BCAST_FRAMES_RX);
 	sc->stats.rx_frames_multi += RD4(sc, CGEM_MULTI_FRAMES_RX);
 	sc->stats.rx_frames_pause += RD4(sc, CGEM_PAUSE_FRAMES_RX);
 	sc->stats.rx_frames_64b += RD4(sc, CGEM_FRAMES_64B_RX);
 	sc->stats.rx_frames_65to127b += RD4(sc, CGEM_FRAMES_65_127B_RX);
 	sc->stats.rx_frames_128to255b += RD4(sc, CGEM_FRAMES_128_255B_RX);
 	sc->stats.rx_frames_256to511b += RD4(sc, CGEM_FRAMES_256_511B_RX);
 	sc->stats.rx_frames_512to1023b += RD4(sc, CGEM_FRAMES_512_1023B_RX);
 	sc->stats.rx_frames_1024to1536b += RD4(sc, CGEM_FRAMES_1024_1518B_RX);
 	sc->stats.rx_frames_undersize += RD4(sc, CGEM_UNDERSZ_RX);
 	sc->stats.rx_frames_oversize += RD4(sc, CGEM_OVERSZ_RX);
 	sc->stats.rx_frames_jabber += RD4(sc, CGEM_JABBERS_RX);
 	sc->stats.rx_frames_fcs_errs += RD4(sc, CGEM_FCS_ERRS);
 	sc->stats.rx_frames_length_errs += RD4(sc, CGEM_LENGTH_FIELD_ERRS);
 	sc->stats.rx_symbol_errs += RD4(sc, CGEM_RX_SYMBOL_ERRS);
 	sc->stats.rx_align_errs += RD4(sc, CGEM_ALIGN_ERRS);
 	sc->stats.rx_resource_errs += RD4(sc, CGEM_RX_RESOURCE_ERRS);
 	sc->stats.rx_overrun_errs += RD4(sc, CGEM_RX_OVERRUN_ERRS);
 	sc->stats.rx_ip_hdr_csum_errs += RD4(sc, CGEM_IP_HDR_CKSUM_ERRS);
 	sc->stats.rx_tcp_csum_errs += RD4(sc, CGEM_TCP_CKSUM_ERRS);
 	sc->stats.rx_udp_csum_errs += RD4(sc, CGEM_UDP_CKSUM_ERRS);
 }
 
 static void
 cgem_tick(void *arg)
 {
 	struct cgem_softc *sc = (struct cgem_softc *)arg;
 	struct mii_data *mii;
 
 	CGEM_ASSERT_LOCKED(sc);
 
 	/* Poll the phy. */
 	if (sc->miibus != NULL) {
 		mii = device_get_softc(sc->miibus);
 		mii_tick(mii);
 	}
 
 	/* Poll statistics registers. */
 	cgem_poll_hw_stats(sc);
 
 	/* Check for receiver hang. */
 	if (sc->rxhangwar && sc->rx_frames_prev == sc->stats.rx_frames) {
 		/*
 		 * Reset receiver logic by toggling RX_EN bit.  1usec
 		 * delay is necessary especially when operating at 100mbps
 		 * and 10mbps speeds.
 		 */
 		WR4(sc, CGEM_NET_CTRL, sc->net_ctl_shadow &
 		    ~CGEM_NET_CTRL_RX_EN);
 		DELAY(1);
 		WR4(sc, CGEM_NET_CTRL, sc->net_ctl_shadow);
 	}
 	sc->rx_frames_prev = sc->stats.rx_frames;
 
 	/* Next callout in one second. */
 	callout_reset(&sc->tick_ch, hz, cgem_tick, sc);
 }
 
 /* Interrupt handler. */
 static void
 cgem_intr(void *arg)
 {
 	struct cgem_softc *sc = (struct cgem_softc *)arg;
 	if_t ifp = sc->ifp;
 	uint32_t istatus;
 
 	CGEM_LOCK(sc);
 
 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
 		CGEM_UNLOCK(sc);
 		return;
 	}
 
 	/* Read interrupt status and immediately clear the bits. */
 	istatus = RD4(sc, CGEM_INTR_STAT);
 	WR4(sc, CGEM_INTR_STAT, istatus);
 
 	/* Packets received. */
 	if ((istatus & CGEM_INTR_RX_COMPLETE) != 0)
 		cgem_recv(sc);
 
 	/* Free up any completed transmit buffers. */
 	cgem_clean_tx(sc);
 
 	/* Hresp not ok.  Something is very bad with DMA.  Try to clear. */
 	if ((istatus & CGEM_INTR_HRESP_NOT_OK) != 0) {
 		device_printf(sc->dev, "cgem_intr: hresp not okay! "
 			      "rx_status=0x%x\n", RD4(sc, CGEM_RX_STAT));
 		WR4(sc, CGEM_RX_STAT, CGEM_RX_STAT_HRESP_NOT_OK);
 	}
 
 	/* Receiver overrun. */
 	if ((istatus & CGEM_INTR_RX_OVERRUN) != 0) {
 		/* Clear status bit. */
 		WR4(sc, CGEM_RX_STAT, CGEM_RX_STAT_OVERRUN);
 		sc->rxoverruns++;
 	}
 
 	/* Receiver ran out of bufs. */
 	if ((istatus & CGEM_INTR_RX_USED_READ) != 0) {
 		WR4(sc, CGEM_NET_CTRL, sc->net_ctl_shadow |
 		    CGEM_NET_CTRL_FLUSH_DPRAM_PKT);
 		cgem_fill_rqueue(sc);
 		sc->rxnobufs++;
 	}
 
 	/* Restart transmitter if needed. */
 	if (!if_sendq_empty(ifp))
 		cgem_start_locked(ifp);
 
 	CGEM_UNLOCK(sc);
 }
 
 /* Reset hardware. */
 static void
 cgem_reset(struct cgem_softc *sc)
 {
 
 	CGEM_ASSERT_LOCKED(sc);
 
 	WR4(sc, CGEM_NET_CTRL, 0);
 	WR4(sc, CGEM_NET_CFG, 0);
 	WR4(sc, CGEM_NET_CTRL, CGEM_NET_CTRL_CLR_STAT_REGS);
 	WR4(sc, CGEM_TX_STAT, CGEM_TX_STAT_ALL);
 	WR4(sc, CGEM_RX_STAT, CGEM_RX_STAT_ALL);
 	WR4(sc, CGEM_INTR_DIS, CGEM_INTR_ALL);
 	WR4(sc, CGEM_HASH_BOT, 0);
 	WR4(sc, CGEM_HASH_TOP, 0);
 	WR4(sc, CGEM_TX_QBAR, 0);	/* manual says do this. */
 	WR4(sc, CGEM_RX_QBAR, 0);
 
 	/* Get management port running even if interface is down. */
 	WR4(sc, CGEM_NET_CFG,
 	    CGEM_NET_CFG_DBUS_WIDTH_32 |
 	    CGEM_NET_CFG_MDC_CLK_DIV_64);
 
 	sc->net_ctl_shadow = CGEM_NET_CTRL_MGMT_PORT_EN;
 	WR4(sc, CGEM_NET_CTRL, sc->net_ctl_shadow);
 }
 
 /* Bring up the hardware. */
 static void
 cgem_config(struct cgem_softc *sc)
 {
 	if_t ifp = sc->ifp;
 	uint32_t net_cfg;
 	uint32_t dma_cfg;
 	u_char *eaddr = if_getlladdr(ifp);
 
 	CGEM_ASSERT_LOCKED(sc);
 
 	/* Program Net Config Register. */
 	net_cfg = CGEM_NET_CFG_DBUS_WIDTH_32 |
 		CGEM_NET_CFG_MDC_CLK_DIV_64 |
 		CGEM_NET_CFG_FCS_REMOVE |
 		CGEM_NET_CFG_RX_BUF_OFFSET(ETHER_ALIGN) |
 		CGEM_NET_CFG_GIGE_EN |
 		CGEM_NET_CFG_1536RXEN |
 		CGEM_NET_CFG_FULL_DUPLEX |
 		CGEM_NET_CFG_SPEED100;
 
 	/* Enable receive checksum offloading? */
 	if ((if_getcapenable(ifp) & IFCAP_RXCSUM) != 0)
 		net_cfg |=  CGEM_NET_CFG_RX_CHKSUM_OFFLD_EN;
 
 	WR4(sc, CGEM_NET_CFG, net_cfg);
 
 	/* Program DMA Config Register. */
 	dma_cfg = CGEM_DMA_CFG_RX_BUF_SIZE(MCLBYTES) |
 		CGEM_DMA_CFG_RX_PKTBUF_MEMSZ_SEL_8K |
 		CGEM_DMA_CFG_TX_PKTBUF_MEMSZ_SEL |
 		CGEM_DMA_CFG_AHB_FIXED_BURST_LEN_16 |
 		CGEM_DMA_CFG_DISC_WHEN_NO_AHB;
 
 	/* Enable transmit checksum offloading? */
 	if ((if_getcapenable(ifp) & IFCAP_TXCSUM) != 0)
 		dma_cfg |= CGEM_DMA_CFG_CHKSUM_GEN_OFFLOAD_EN;
 
 	WR4(sc, CGEM_DMA_CFG, dma_cfg);
 
 	/* Write the rx and tx descriptor ring addresses to the QBAR regs. */
 	WR4(sc, CGEM_RX_QBAR, (uint32_t) sc->rxring_physaddr);
 	WR4(sc, CGEM_TX_QBAR, (uint32_t) sc->txring_physaddr);
 	
 	/* Enable rx and tx. */
 	sc->net_ctl_shadow |= (CGEM_NET_CTRL_TX_EN | CGEM_NET_CTRL_RX_EN);
 	WR4(sc, CGEM_NET_CTRL, sc->net_ctl_shadow);
 
 	/* Set receive address in case it changed. */
 	WR4(sc, CGEM_SPEC_ADDR_LOW(0), (eaddr[3] << 24) |
 	    (eaddr[2] << 16) | (eaddr[1] << 8) | eaddr[0]);
 	WR4(sc, CGEM_SPEC_ADDR_HI(0), (eaddr[5] << 8) | eaddr[4]);
 
 	/* Set up interrupts. */
 	WR4(sc, CGEM_INTR_EN,
 	    CGEM_INTR_RX_COMPLETE | CGEM_INTR_RX_OVERRUN |
 	    CGEM_INTR_TX_USED_READ | CGEM_INTR_RX_USED_READ |
 	    CGEM_INTR_HRESP_NOT_OK);
 }
 
 /* Turn on interface and load up receive ring with buffers. */
 static void
 cgem_init_locked(struct cgem_softc *sc)
 {
 	struct mii_data *mii;
 
 	CGEM_ASSERT_LOCKED(sc);
 
 	if ((if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) != 0)
 		return;
 
 	cgem_config(sc);
 	cgem_fill_rqueue(sc);
 
 	if_setdrvflagbits(sc->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
 
 	mii = device_get_softc(sc->miibus);
 	mii_mediachg(mii);
 
 	callout_reset(&sc->tick_ch, hz, cgem_tick, sc);
 }
 
 static void
 cgem_init(void *arg)
 {
 	struct cgem_softc *sc = (struct cgem_softc *)arg;
 
 	CGEM_LOCK(sc);
 	cgem_init_locked(sc);
 	CGEM_UNLOCK(sc);
 }
 
 /* Turn off interface.  Free up any buffers in transmit or receive queues. */
 static void
 cgem_stop(struct cgem_softc *sc)
 {
 	int i;
 
 	CGEM_ASSERT_LOCKED(sc);
 
 	callout_stop(&sc->tick_ch);
 
 	/* Shut down hardware. */
 	cgem_reset(sc);
 
 	/* Clear out transmit queue. */
 	for (i = 0; i < CGEM_NUM_TX_DESCS; i++) {
 		sc->txring[i].ctl = CGEM_TXDESC_USED;
 		sc->txring[i].addr = 0;
 		if (sc->txring_m[i]) {
 			/* Unload and destroy dmamap. */
 			bus_dmamap_unload(sc->mbuf_dma_tag,
 					  sc->txring_m_dmamap[i]);
 			bus_dmamap_destroy(sc->mbuf_dma_tag,
 					   sc->txring_m_dmamap[i]);
 			sc->txring_m_dmamap[i] = NULL;
 			m_freem(sc->txring_m[i]);
 			sc->txring_m[i] = NULL;
 		}
 	}
 	sc->txring[CGEM_NUM_TX_DESCS - 1].ctl |= CGEM_TXDESC_WRAP;
 
 	sc->txring_hd_ptr = 0;
 	sc->txring_tl_ptr = 0;
 	sc->txring_queued = 0;
 
 	/* Clear out receive queue. */
 	for (i = 0; i < CGEM_NUM_RX_DESCS; i++) {
 		sc->rxring[i].addr = CGEM_RXDESC_OWN;
 		sc->rxring[i].ctl = 0;
 		if (sc->rxring_m[i]) {
 			/* Unload and destroy dmamap. */
 			bus_dmamap_unload(sc->mbuf_dma_tag,
 				  sc->rxring_m_dmamap[i]);
 			bus_dmamap_destroy(sc->mbuf_dma_tag,
 				   sc->rxring_m_dmamap[i]);
 			sc->rxring_m_dmamap[i] = NULL;
 
 			m_freem(sc->rxring_m[i]);
 			sc->rxring_m[i] = NULL;
 		}
 	}
 	sc->rxring[CGEM_NUM_RX_DESCS - 1].addr |= CGEM_RXDESC_WRAP;
 
 	sc->rxring_hd_ptr = 0;
 	sc->rxring_tl_ptr = 0;
 	sc->rxring_queued = 0;
 
 	/* Force next statchg or linkchg to program net config register. */
 	sc->mii_media_active = 0;
 }
 
 
 static int
 cgem_ioctl(if_t ifp, u_long cmd, caddr_t data)
 {
 	struct cgem_softc *sc = if_getsoftc(ifp);
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct mii_data *mii;
 	int error = 0, mask;
 
 	switch (cmd) {
 	case SIOCSIFFLAGS:
 		CGEM_LOCK(sc);
 		if ((if_getflags(ifp) & IFF_UP) != 0) {
 			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
 				if (((if_getflags(ifp) ^ sc->if_old_flags) &
 				     (IFF_PROMISC | IFF_ALLMULTI)) != 0) {
 					cgem_rx_filter(sc);
 				}
 			} else {
 				cgem_init_locked(sc);
 			}
 		} else if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
 			if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 			cgem_stop(sc);
 		}
 		sc->if_old_flags = if_getflags(ifp);
 		CGEM_UNLOCK(sc);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		/* Set up multi-cast filters. */
 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
 			CGEM_LOCK(sc);
 			cgem_rx_filter(sc);
 			CGEM_UNLOCK(sc);
 		}
 		break;
 
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		mii = device_get_softc(sc->miibus);
 		error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, cmd);
 		break;
 
 	case SIOCSIFCAP:
 		CGEM_LOCK(sc);
 		mask = if_getcapenable(ifp) ^ ifr->ifr_reqcap;
 
 		if ((mask & IFCAP_TXCSUM) != 0) {
 			if ((ifr->ifr_reqcap & IFCAP_TXCSUM) != 0) {
 				/* Turn on TX checksumming. */
 				if_setcapenablebit(ifp, IFCAP_TXCSUM |
 						   IFCAP_TXCSUM_IPV6, 0);
 				if_sethwassistbits(ifp, CGEM_CKSUM_ASSIST, 0);
 
 				WR4(sc, CGEM_DMA_CFG,
 				    RD4(sc, CGEM_DMA_CFG) |
 				     CGEM_DMA_CFG_CHKSUM_GEN_OFFLOAD_EN);
 			} else {
 				/* Turn off TX checksumming. */
 				if_setcapenablebit(ifp, 0, IFCAP_TXCSUM |
 						   IFCAP_TXCSUM_IPV6);
 				if_sethwassistbits(ifp, 0, CGEM_CKSUM_ASSIST);
 
 				WR4(sc, CGEM_DMA_CFG,
 				    RD4(sc, CGEM_DMA_CFG) &
 				     ~CGEM_DMA_CFG_CHKSUM_GEN_OFFLOAD_EN);
 			}
 		}
 		if ((mask & IFCAP_RXCSUM) != 0) {
 			if ((ifr->ifr_reqcap & IFCAP_RXCSUM) != 0) {
 				/* Turn on RX checksumming. */
 				if_setcapenablebit(ifp, IFCAP_RXCSUM |
 						   IFCAP_RXCSUM_IPV6, 0);
 				WR4(sc, CGEM_NET_CFG,
 				    RD4(sc, CGEM_NET_CFG) |
 				     CGEM_NET_CFG_RX_CHKSUM_OFFLD_EN);
 			} else {
 				/* Turn off RX checksumming. */
 				if_setcapenablebit(ifp, 0, IFCAP_RXCSUM |
 						   IFCAP_RXCSUM_IPV6);
 				WR4(sc, CGEM_NET_CFG,
 				    RD4(sc, CGEM_NET_CFG) &
 				     ~CGEM_NET_CFG_RX_CHKSUM_OFFLD_EN);
 			}
 		}
 		if ((if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_TXCSUM)) == 
 		    (IFCAP_RXCSUM | IFCAP_TXCSUM))
 			if_setcapenablebit(ifp, IFCAP_VLAN_HWCSUM, 0);
 		else
 			if_setcapenablebit(ifp, 0, IFCAP_VLAN_HWCSUM);
 
 		CGEM_UNLOCK(sc);
 		break;
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 
 	return (error);
 }
 
 /* MII bus support routines.
  */
 static void
 cgem_child_detached(device_t dev, device_t child)
 {
 	struct cgem_softc *sc = device_get_softc(dev);
 
 	if (child == sc->miibus)
 		sc->miibus = NULL;
 }
 
 static int
 cgem_ifmedia_upd(if_t ifp)
 {
 	struct cgem_softc *sc = (struct cgem_softc *) if_getsoftc(ifp);
 	struct mii_data *mii;
 	struct mii_softc *miisc;
 	int error = 0;
 
 	mii = device_get_softc(sc->miibus);
 	CGEM_LOCK(sc);
 	if ((if_getflags(ifp) & IFF_UP) != 0) {
 		LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
 			PHY_RESET(miisc);
 		error = mii_mediachg(mii);
 	}
 	CGEM_UNLOCK(sc);
 
 	return (error);
 }
 
 static void
 cgem_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr)
 {
 	struct cgem_softc *sc = (struct cgem_softc *) if_getsoftc(ifp);
 	struct mii_data *mii;
 
 	mii = device_get_softc(sc->miibus);
 	CGEM_LOCK(sc);
 	mii_pollstat(mii);
 	ifmr->ifm_active = mii->mii_media_active;
 	ifmr->ifm_status = mii->mii_media_status;
 	CGEM_UNLOCK(sc);
 }
 
 static int
 cgem_miibus_readreg(device_t dev, int phy, int reg)
 {
 	struct cgem_softc *sc = device_get_softc(dev);
 	int tries, val;
 
 	WR4(sc, CGEM_PHY_MAINT,
 	    CGEM_PHY_MAINT_CLAUSE_22 | CGEM_PHY_MAINT_MUST_10 |
 	    CGEM_PHY_MAINT_OP_READ |
 	    (phy << CGEM_PHY_MAINT_PHY_ADDR_SHIFT) |
 	    (reg << CGEM_PHY_MAINT_REG_ADDR_SHIFT));
 
 	/* Wait for completion. */
 	tries=0;
 	while ((RD4(sc, CGEM_NET_STAT) & CGEM_NET_STAT_PHY_MGMT_IDLE) == 0) {
 		DELAY(5);
 		if (++tries > 200) {
 			device_printf(dev, "phy read timeout: %d\n", reg);
 			return (-1);
 		}
 	}
 
 	val = RD4(sc, CGEM_PHY_MAINT) & CGEM_PHY_MAINT_DATA_MASK;
 
 	if (reg == MII_EXTSR)
 		/*
 		 * MAC does not support half-duplex at gig speeds.
 		 * Let mii(4) exclude the capability.
 		 */
 		val &= ~(EXTSR_1000XHDX | EXTSR_1000THDX);
 
 	return (val);
 }
 
 static int
 cgem_miibus_writereg(device_t dev, int phy, int reg, int data)
 {
 	struct cgem_softc *sc = device_get_softc(dev);
 	int tries;
 	
 	WR4(sc, CGEM_PHY_MAINT,
 	    CGEM_PHY_MAINT_CLAUSE_22 | CGEM_PHY_MAINT_MUST_10 |
 	    CGEM_PHY_MAINT_OP_WRITE |
 	    (phy << CGEM_PHY_MAINT_PHY_ADDR_SHIFT) |
 	    (reg << CGEM_PHY_MAINT_REG_ADDR_SHIFT) |
 	    (data & CGEM_PHY_MAINT_DATA_MASK));
 
 	/* Wait for completion. */
 	tries = 0;
 	while ((RD4(sc, CGEM_NET_STAT) & CGEM_NET_STAT_PHY_MGMT_IDLE) == 0) {
 		DELAY(5);
 		if (++tries > 200) {
 			device_printf(dev, "phy write timeout: %d\n", reg);
 			return (-1);
 		}
 	}
 
 	return (0);
 }
 
 static void
 cgem_miibus_statchg(device_t dev)
 {
 	struct cgem_softc *sc  = device_get_softc(dev);
 	struct mii_data *mii = device_get_softc(sc->miibus);
 
 	CGEM_ASSERT_LOCKED(sc);
 
 	if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
 	    (IFM_ACTIVE | IFM_AVALID) &&
 	    sc->mii_media_active != mii->mii_media_active)
 		cgem_mediachange(sc, mii);
 }
 
 static void
 cgem_miibus_linkchg(device_t dev)
 {
 	struct cgem_softc *sc  = device_get_softc(dev);
 	struct mii_data *mii = device_get_softc(sc->miibus);
 
 	CGEM_ASSERT_LOCKED(sc);
 
 	if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
 	    (IFM_ACTIVE | IFM_AVALID) &&
 	    sc->mii_media_active != mii->mii_media_active)
 		cgem_mediachange(sc, mii);
 }
 
 /*
  * Overridable weak symbol cgem_set_ref_clk().  This allows platforms to
  * provide a function to set the cgem's reference clock.
  */
 static int __used
 cgem_default_set_ref_clk(int unit, int frequency)
 {
 
 	return 0;
 }
 __weak_reference(cgem_default_set_ref_clk, cgem_set_ref_clk);
 
 /* Call to set reference clock and network config bits according to media. */
 static void
 cgem_mediachange(struct cgem_softc *sc,	struct mii_data *mii)
 {
 	uint32_t net_cfg;
 	int ref_clk_freq;
 
 	CGEM_ASSERT_LOCKED(sc);
 
 	/* Update hardware to reflect media. */
 	net_cfg = RD4(sc, CGEM_NET_CFG);
 	net_cfg &= ~(CGEM_NET_CFG_SPEED100 | CGEM_NET_CFG_GIGE_EN |
 		     CGEM_NET_CFG_FULL_DUPLEX);
 
 	switch (IFM_SUBTYPE(mii->mii_media_active)) {
 	case IFM_1000_T:
 		net_cfg |= (CGEM_NET_CFG_SPEED100 |
 			    CGEM_NET_CFG_GIGE_EN);
 		ref_clk_freq = 125000000;
 		break;
 	case IFM_100_TX:
 		net_cfg |= CGEM_NET_CFG_SPEED100;
 		ref_clk_freq = 25000000;
 		break;
 	default:
 		ref_clk_freq = 2500000;
 	}
 
 	if ((mii->mii_media_active & IFM_FDX) != 0)
 		net_cfg |= CGEM_NET_CFG_FULL_DUPLEX;
 
 	WR4(sc, CGEM_NET_CFG, net_cfg);
 
 	/* Set the reference clock if necessary. */
 	if (cgem_set_ref_clk(sc->ref_clk_num, ref_clk_freq))
 		device_printf(sc->dev, "cgem_mediachange: "
 			      "could not set ref clk%d to %d.\n",
 			      sc->ref_clk_num, ref_clk_freq);
 
 	sc->mii_media_active = mii->mii_media_active;
 }
 
 static void
 cgem_add_sysctls(device_t dev)
 {
 	struct cgem_softc *sc = device_get_softc(dev);
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid_list *child;
 	struct sysctl_oid *tree;
 
 	ctx = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rxbufs", CTLFLAG_RW,
 		       &sc->rxbufs, 0,
 		       "Number receive buffers to provide");
 
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rxhangwar", CTLFLAG_RW,
 		       &sc->rxhangwar, 0,
 		       "Enable receive hang work-around");
 
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "_rxoverruns", CTLFLAG_RD,
 			&sc->rxoverruns, 0,
 			"Receive overrun events");
 
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "_rxnobufs", CTLFLAG_RD,
 			&sc->rxnobufs, 0,
 			"Receive buf queue empty events");
 
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "_rxdmamapfails", CTLFLAG_RD,
 			&sc->rxdmamapfails, 0,
 			"Receive DMA map failures");
 
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "_txfull", CTLFLAG_RD,
 			&sc->txfull, 0,
 			"Transmit ring full events");
 
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "_txdmamapfails", CTLFLAG_RD,
 			&sc->txdmamapfails, 0,
 			"Transmit DMA map failures");
 
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "_txdefrags", CTLFLAG_RD,
 			&sc->txdefrags, 0,
 			"Transmit m_defrag() calls");
 
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "_txdefragfails", CTLFLAG_RD,
 			&sc->txdefragfails, 0,
 			"Transmit m_defrag() failures");
 
 	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "stats", CTLFLAG_RD,
 			       NULL, "GEM statistics");
 	child = SYSCTL_CHILDREN(tree);
 
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_bytes", CTLFLAG_RD,
 			 &sc->stats.tx_bytes, "Total bytes transmitted");
 
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_frames", CTLFLAG_RD,
 			&sc->stats.tx_frames, 0, "Total frames transmitted");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_frames_bcast", CTLFLAG_RD,
 			&sc->stats.tx_frames_bcast, 0,
 			"Number broadcast frames transmitted");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_frames_multi", CTLFLAG_RD,
 			&sc->stats.tx_frames_multi, 0,
 			"Number multicast frames transmitted");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_frames_pause",
 			CTLFLAG_RD, &sc->stats.tx_frames_pause, 0,
 			"Number pause frames transmitted");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_frames_64b", CTLFLAG_RD,
 			&sc->stats.tx_frames_64b, 0,
 			"Number frames transmitted of size 64 bytes or less");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_frames_65to127b", CTLFLAG_RD,
 			&sc->stats.tx_frames_65to127b, 0,
 			"Number frames transmitted of size 65-127 bytes");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_frames_128to255b",
 			CTLFLAG_RD, &sc->stats.tx_frames_128to255b, 0,
 			"Number frames transmitted of size 128-255 bytes");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_frames_256to511b",
 			CTLFLAG_RD, &sc->stats.tx_frames_256to511b, 0,
 			"Number frames transmitted of size 256-511 bytes");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_frames_512to1023b",
 			CTLFLAG_RD, &sc->stats.tx_frames_512to1023b, 0,
 			"Number frames transmitted of size 512-1023 bytes");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_frames_1024to1536b",
 			CTLFLAG_RD, &sc->stats.tx_frames_1024to1536b, 0,
 			"Number frames transmitted of size 1024-1536 bytes");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_under_runs",
 			CTLFLAG_RD, &sc->stats.tx_under_runs, 0,
 			"Number transmit under-run events");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_single_collisn",
 			CTLFLAG_RD, &sc->stats.tx_single_collisn, 0,
 			"Number single-collision transmit frames");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_multi_collisn",
 			CTLFLAG_RD, &sc->stats.tx_multi_collisn, 0,
 			"Number multi-collision transmit frames");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_excsv_collisn",
 			CTLFLAG_RD, &sc->stats.tx_excsv_collisn, 0,
 			"Number excessive collision transmit frames");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_late_collisn",
 			CTLFLAG_RD, &sc->stats.tx_late_collisn, 0,
 			"Number late-collision transmit frames");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_deferred_frames",
 			CTLFLAG_RD, &sc->stats.tx_deferred_frames, 0,
 			"Number deferred transmit frames");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_carrier_sense_errs",
 			CTLFLAG_RD, &sc->stats.tx_carrier_sense_errs, 0,
 			"Number carrier sense errors on transmit");
 
 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_bytes", CTLFLAG_RD,
 			 &sc->stats.rx_bytes, "Total bytes received");
 
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames", CTLFLAG_RD,
 			&sc->stats.rx_frames, 0, "Total frames received");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_bcast",
 			CTLFLAG_RD, &sc->stats.rx_frames_bcast, 0,
 			"Number broadcast frames received");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_multi",
 			CTLFLAG_RD, &sc->stats.rx_frames_multi, 0,
 			"Number multicast frames received");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_pause",
 			CTLFLAG_RD, &sc->stats.rx_frames_pause, 0,
 			"Number pause frames received");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_64b",
 			CTLFLAG_RD, &sc->stats.rx_frames_64b, 0,
 			"Number frames received of size 64 bytes or less");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_65to127b",
 			CTLFLAG_RD, &sc->stats.rx_frames_65to127b, 0,
 			"Number frames received of size 65-127 bytes");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_128to255b",
 			CTLFLAG_RD, &sc->stats.rx_frames_128to255b, 0,
 			"Number frames received of size 128-255 bytes");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_256to511b",
 			CTLFLAG_RD, &sc->stats.rx_frames_256to511b, 0,
 			"Number frames received of size 256-511 bytes");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_512to1023b",
 			CTLFLAG_RD, &sc->stats.rx_frames_512to1023b, 0,
 			"Number frames received of size 512-1023 bytes");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_1024to1536b",
 			CTLFLAG_RD, &sc->stats.rx_frames_1024to1536b, 0,
 			"Number frames received of size 1024-1536 bytes");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_undersize",
 			CTLFLAG_RD, &sc->stats.rx_frames_undersize, 0,
 			"Number undersize frames received");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_oversize",
 			CTLFLAG_RD, &sc->stats.rx_frames_oversize, 0,
 			"Number oversize frames received");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_jabber",
 			CTLFLAG_RD, &sc->stats.rx_frames_jabber, 0,
 			"Number jabber frames received");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_fcs_errs",
 			CTLFLAG_RD, &sc->stats.rx_frames_fcs_errs, 0,
 			"Number frames received with FCS errors");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_length_errs",
 			CTLFLAG_RD, &sc->stats.rx_frames_length_errs, 0,
 			"Number frames received with length errors");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_symbol_errs",
 			CTLFLAG_RD, &sc->stats.rx_symbol_errs, 0,
 			"Number receive symbol errors");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_align_errs",
 			CTLFLAG_RD, &sc->stats.rx_align_errs, 0,
 			"Number receive alignment errors");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_resource_errs",
 			CTLFLAG_RD, &sc->stats.rx_resource_errs, 0,
 			"Number frames received when no rx buffer available");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_overrun_errs",
 			CTLFLAG_RD, &sc->stats.rx_overrun_errs, 0,
 			"Number frames received but not copied due to "
 			"receive overrun");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_ip_hdr_csum_errs",
 			CTLFLAG_RD, &sc->stats.rx_ip_hdr_csum_errs, 0,
 			"Number frames received with IP header checksum "
 			"errors");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_tcp_csum_errs",
 			CTLFLAG_RD, &sc->stats.rx_tcp_csum_errs, 0,
 			"Number frames received with TCP checksum errors");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_frames_udp_csum_errs",
 			CTLFLAG_RD, &sc->stats.rx_udp_csum_errs, 0,
 			"Number frames received with UDP checksum errors");
 }
 
 
 static int
 cgem_probe(device_t dev)
 {
 
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 
 	if (!ofw_bus_is_compatible(dev, "cadence,gem"))
 		return (ENXIO);
 
 	device_set_desc(dev, "Cadence CGEM Gigabit Ethernet Interface");
 	return (0);
 }
 
 static int
 cgem_attach(device_t dev)
 {
 	struct cgem_softc *sc = device_get_softc(dev);
 	if_t ifp = NULL;
 	phandle_t node;
 	pcell_t cell;
 	int rid, err;
 	u_char eaddr[ETHER_ADDR_LEN];
 
 	sc->dev = dev;
 	CGEM_LOCK_INIT(sc);
 
 	/* Get reference clock number and base divider from fdt. */
 	node = ofw_bus_get_node(dev);
 	sc->ref_clk_num = 0;
 	if (OF_getprop(node, "ref-clock-num", &cell, sizeof(cell)) > 0)
 		sc->ref_clk_num = fdt32_to_cpu(cell);
 
 	/* Get memory resource. */
 	rid = 0;
 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 					     RF_ACTIVE);
 	if (sc->mem_res == NULL) {
 		device_printf(dev, "could not allocate memory resources.\n");
 		return (ENOMEM);
 	}
 
 	/* Get IRQ resource. */
 	rid = 0;
 	sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 					     RF_ACTIVE);
 	if (sc->irq_res == NULL) {
 		device_printf(dev, "could not allocate interrupt resource.\n");
 		cgem_detach(dev);
 		return (ENOMEM);
 	}
 
 	/* Set up ifnet structure. */
 	ifp = sc->ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "could not allocate ifnet structure\n");
 		cgem_detach(dev);
 		return (ENOMEM);
 	}
 	if_setsoftc(ifp, sc);
 	if_initname(ifp, IF_CGEM_NAME, device_get_unit(dev));
 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
 	if_setinitfn(ifp, cgem_init);
 	if_setioctlfn(ifp, cgem_ioctl);
 	if_setstartfn(ifp, cgem_start);
 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
 			      IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM, 0);
 	if_setsendqlen(ifp, CGEM_NUM_TX_DESCS);
 	if_setsendqready(ifp);
 
 	/* Disable hardware checksumming by default. */
 	if_sethwassist(ifp, 0);
 	if_setcapenable(ifp, if_getcapabilities(ifp) &
 		~(IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 | IFCAP_VLAN_HWCSUM));
 
 	sc->if_old_flags = if_getflags(ifp);
 	sc->rxbufs = DEFAULT_NUM_RX_BUFS;
 	sc->rxhangwar = 1;
 
 	/* Reset hardware. */
 	CGEM_LOCK(sc);
 	cgem_reset(sc);
 	CGEM_UNLOCK(sc);
 
 	/* Attach phy to mii bus. */
 	err = mii_attach(dev, &sc->miibus, ifp,
 			 cgem_ifmedia_upd, cgem_ifmedia_sts,
 			 BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0);
 	if (err) {
 		device_printf(dev, "attaching PHYs failed\n");
 		cgem_detach(dev);
 		return (err);
 	}
 
 	/* Set up TX and RX descriptor area. */
 	err = cgem_setup_descs(sc);
 	if (err) {
 		device_printf(dev, "could not set up dma mem for descs.\n");
 		cgem_detach(dev);
 		return (ENOMEM);
 	}
 
 	/* Get a MAC address. */
 	cgem_get_mac(sc, eaddr);
 
 	/* Start ticks. */
 	callout_init_mtx(&sc->tick_ch, &sc->sc_mtx, 0);
 
 	ether_ifattach(ifp, eaddr);
 
 	err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_NET | INTR_MPSAFE |
 			     INTR_EXCL, NULL, cgem_intr, sc, &sc->intrhand);
 	if (err) {
 		device_printf(dev, "could not set interrupt handler.\n");
 		ether_ifdetach(ifp);
 		cgem_detach(dev);
 		return (err);
 	}
 
 	cgem_add_sysctls(dev);
 
 	return (0);
 }
 
 static int
 cgem_detach(device_t dev)
 {
 	struct cgem_softc *sc = device_get_softc(dev);
 	int i;
 
 	if (sc == NULL)
 		return (ENODEV);
 
 	if (device_is_attached(dev)) {
 		CGEM_LOCK(sc);
 		cgem_stop(sc);
 		CGEM_UNLOCK(sc);
 		callout_drain(&sc->tick_ch);
 		if_setflagbits(sc->ifp, 0, IFF_UP);
 		ether_ifdetach(sc->ifp);
 	}
 
 	if (sc->miibus != NULL) {
 		device_delete_child(dev, sc->miibus);
 		sc->miibus = NULL;
 	}
 
 	/* Release resources. */
 	if (sc->mem_res != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
 				     rman_get_rid(sc->mem_res), sc->mem_res);
 		sc->mem_res = NULL;
 	}
 	if (sc->irq_res != NULL) {
 		if (sc->intrhand)
 			bus_teardown_intr(dev, sc->irq_res, sc->intrhand);
 		bus_release_resource(dev, SYS_RES_IRQ,
 				     rman_get_rid(sc->irq_res), sc->irq_res);
 		sc->irq_res = NULL;
 	}
 
 	/* Release DMA resources. */
 	if (sc->rxring != NULL) {
 		if (sc->rxring_physaddr != 0) {
 			bus_dmamap_unload(sc->desc_dma_tag,
 					  sc->rxring_dma_map);
 			sc->rxring_physaddr = 0;
 		}
 		bus_dmamem_free(sc->desc_dma_tag, sc->rxring,
 				sc->rxring_dma_map);
 		sc->rxring = NULL;
 		for (i = 0; i < CGEM_NUM_RX_DESCS; i++)
 			if (sc->rxring_m_dmamap[i] != NULL) {
 				bus_dmamap_destroy(sc->mbuf_dma_tag,
 						   sc->rxring_m_dmamap[i]);
 				sc->rxring_m_dmamap[i] = NULL;
 			}
 	}
 	if (sc->txring != NULL) {
 		if (sc->txring_physaddr != 0) {
 			bus_dmamap_unload(sc->desc_dma_tag,
 					  sc->txring_dma_map);
 			sc->txring_physaddr = 0;
 		}
 		bus_dmamem_free(sc->desc_dma_tag, sc->txring,
 				sc->txring_dma_map);
 		sc->txring = NULL;
 		for (i = 0; i < CGEM_NUM_TX_DESCS; i++)
 			if (sc->txring_m_dmamap[i] != NULL) {
 				bus_dmamap_destroy(sc->mbuf_dma_tag,
 						   sc->txring_m_dmamap[i]);
 				sc->txring_m_dmamap[i] = NULL;
 			}
 	}
 	if (sc->desc_dma_tag != NULL) {
 		bus_dma_tag_destroy(sc->desc_dma_tag);
 		sc->desc_dma_tag = NULL;
 	}
 	if (sc->mbuf_dma_tag != NULL) {
 		bus_dma_tag_destroy(sc->mbuf_dma_tag);
 		sc->mbuf_dma_tag = NULL;
 	}
 
 	bus_generic_detach(dev);
 
 	CGEM_LOCK_DESTROY(sc);
 
 	return (0);
 }
 
 static device_method_t cgem_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		cgem_probe),
 	DEVMETHOD(device_attach,	cgem_attach),
 	DEVMETHOD(device_detach,	cgem_detach),
 
 	/* Bus interface */
 	DEVMETHOD(bus_child_detached,	cgem_child_detached),
 
 	/* MII interface */
 	DEVMETHOD(miibus_readreg,	cgem_miibus_readreg),
 	DEVMETHOD(miibus_writereg,	cgem_miibus_writereg),
 	DEVMETHOD(miibus_statchg,	cgem_miibus_statchg),
 	DEVMETHOD(miibus_linkchg,	cgem_miibus_linkchg),
 
 	DEVMETHOD_END
 };
 
 static driver_t cgem_driver = {
 	"cgem",
 	cgem_methods,
 	sizeof(struct cgem_softc),
 };
 
 DRIVER_MODULE(cgem, simplebus, cgem_driver, cgem_devclass, NULL, NULL);
 DRIVER_MODULE(miibus, cgem, miibus_driver, miibus_devclass, NULL, NULL);
 MODULE_DEPEND(cgem, miibus, 1, 1, 1);
 MODULE_DEPEND(cgem, ether, 1, 1, 1);
Index: stable/11/sys/dev/de/if_de.c
===================================================================
--- stable/11/sys/dev/de/if_de.c	(revision 330445)
+++ stable/11/sys/dev/de/if_de.c	(revision 330446)
@@ -1,5017 +1,5017 @@
 /*	$NetBSD: if_de.c,v 1.86 1999/06/01 19:17:59 thorpej Exp $	*/
 /*-
  * Copyright (c) 1994-1997 Matt Thomas (matt@3am-software.com)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Id: if_de.c,v 1.94 1997/07/03 16:55:07 thomas Exp
  */
 
 /*
  * DEC 21040 PCI Ethernet Controller
  *
  * Written by Matt Thomas
  * BPF support code stolen directly from if_ec.c
  *
  *   This driver supports the DEC DE435 or any other PCI
  *   board which support 21040, 21041, or 21140 (mostly).
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define	TULIP_HDR_DATA
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/endian.h>
 #include <sys/ktr.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/eventhandler.h>
 #include <machine/bus.h>
 #include <machine/bus_dma.h>
 #include <machine/resource.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 
 #include <net/bpf.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #endif
 
 #include <vm/vm.h>
 
 #include <net/if_var.h>
 #include <vm/pmap.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 #include <dev/de/dc21040reg.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 /*
  * Intel CPUs should use I/O mapped access.
  */
 #if defined(__i386__)
 #define	TULIP_IOMAPPED
 #endif
 
 #if 0
 /* This enables KTR traces at KTR_DEV. */
 #define	KTR_TULIP	KTR_DEV
 #else
 #define	KTR_TULIP	0
 #endif
 
 #if 0
 /*
  * This turns on all sort of debugging stuff and make the
  * driver much larger.
  */
 #define TULIP_DEBUG
 #endif
 
 #if 0
 #define	TULIP_PERFSTATS
 #endif
 
 #define	TULIP_HZ	10
 
 #include <dev/de/if_devar.h>
 
 #define	SYNC_NONE	0
 #define	SYNC_RX		1
 #define	SYNC_TX		2
 
 /*
  * This module supports
  *	the DEC 21040 PCI Ethernet Controller.
  *	the DEC 21041 PCI Ethernet Controller.
  *	the DEC 21140 PCI Fast Ethernet Controller.
  */
 static void	tulip_addr_filter(tulip_softc_t * const sc);
 static int	tulip_ifmedia_change(struct ifnet * const ifp);
 static void	tulip_ifmedia_status(struct ifnet * const ifp,
 		    struct ifmediareq *req);
 static void	tulip_init(void *);
 static void	tulip_init_locked(tulip_softc_t * const sc);
 static void	tulip_intr_shared(void *arg);
 static void	tulip_intr_normal(void *arg);
 static void	tulip_mii_autonegotiate(tulip_softc_t * const sc,
 		    const unsigned phyaddr);
 static int	tulip_mii_map_abilities(tulip_softc_t * const sc,
 		    unsigned abilities);
 static tulip_media_t
 		tulip_mii_phy_readspecific(tulip_softc_t * const sc);
 static unsigned	tulip_mii_readreg(tulip_softc_t * const sc, unsigned devaddr,
 		    unsigned regno);
 static void	tulip_mii_writereg(tulip_softc_t * const sc, unsigned devaddr,
 		    unsigned regno, unsigned data);
 static void	tulip_reset(tulip_softc_t * const sc);
 static void	tulip_rx_intr(tulip_softc_t * const sc);
 static int	tulip_srom_decode(tulip_softc_t * const sc);
 static void	tulip_start(struct ifnet *ifp);
 static void	tulip_start_locked(tulip_softc_t * const sc);
 static struct mbuf *
 		tulip_txput(tulip_softc_t * const sc, struct mbuf *m);
 static void	tulip_txput_setup(tulip_softc_t * const sc);
 static void	tulip_watchdog(void *arg);
 struct mbuf *	tulip_dequeue_mbuf(tulip_ringinfo_t *ri, tulip_descinfo_t *di,
 		    int sync);
 static void	tulip_dma_map_addr(void *, bus_dma_segment_t *, int, int);
 static void	tulip_dma_map_rxbuf(void *, bus_dma_segment_t *, int,
 		    bus_size_t, int);
 
 static void
 tulip_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
     bus_addr_t *paddr;
 
     if (error)
 	return;
 
     paddr = arg;
     *paddr = segs->ds_addr;
 }
 
 static void
 tulip_dma_map_rxbuf(void *arg, bus_dma_segment_t *segs, int nseg,
     bus_size_t mapsize, int error)
 {
     tulip_desc_t *desc;
 
     if (error)
 	return;
 
     desc = arg;
     KASSERT(nseg == 1, ("too many DMA segments"));
     KASSERT(segs[0].ds_len >= TULIP_RX_BUFLEN, ("receive buffer too small"));
 
     desc->d_addr1 = segs[0].ds_addr & 0xffffffff;
     desc->d_length1 = TULIP_RX_BUFLEN;
 #ifdef not_needed
     /* These should already always be zero. */
     desc->d_addr2 = 0;
     desc->d_length2 = 0;
 #endif
 }
 
 struct mbuf *
 tulip_dequeue_mbuf(tulip_ringinfo_t *ri, tulip_descinfo_t *di, int sync)
 {
     struct mbuf *m;
 
     m = di->di_mbuf;
     if (m != NULL) {
 	switch (sync) {
 	case SYNC_NONE:
 	    break;
 	case SYNC_RX:
 	    TULIP_RXMAP_POSTSYNC(ri, di);
 	    break;
 	case SYNC_TX:
 	    TULIP_TXMAP_POSTSYNC(ri, di);
 	    break;
 	default:
 	    panic("bad sync flag: %d", sync);
 	}
 	bus_dmamap_unload(ri->ri_data_tag, *di->di_map);
 	di->di_mbuf = NULL;
     }
     return (m);
 }
 
 static void
 tulip_timeout_callback(void *arg)
 {
     tulip_softc_t * const sc = arg;
 
     TULIP_PERFSTART(timeout)
     TULIP_LOCK_ASSERT(sc);
 
     sc->tulip_flags &= ~TULIP_TIMEOUTPENDING;
     sc->tulip_probe_timeout -= 1000 / TULIP_HZ;
     (sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_TIMER);
 
     TULIP_PERFEND(timeout);
 }
 
 static void
 tulip_timeout(tulip_softc_t * const sc)
 {
     TULIP_LOCK_ASSERT(sc);
     if (sc->tulip_flags & TULIP_TIMEOUTPENDING)
 	return;
     sc->tulip_flags |= TULIP_TIMEOUTPENDING;
     callout_reset(&sc->tulip_callout, (hz + TULIP_HZ / 2) / TULIP_HZ,
 	tulip_timeout_callback, sc);
 }
 
 static int
 tulip_txprobe(tulip_softc_t * const sc)
 {
     struct mbuf *m;
     u_char *enaddr;
 
     /*
      * Before we are sure this is the right media we need
      * to send a small packet to make sure there's carrier.
      * Strangely, BNC and AUI will "see" receive data if
      * either is connected so the transmit is the only way
      * to verify the connectivity.
      */
     TULIP_LOCK_ASSERT(sc);
     MGETHDR(m, M_NOWAIT, MT_DATA);
     if (m == NULL)
 	return 0;
     /*
      * Construct a LLC TEST message which will point to ourselves.
      */
     if (sc->tulip_ifp->if_input != NULL)
 	enaddr = IF_LLADDR(sc->tulip_ifp);
     else
 	enaddr = sc->tulip_enaddr;
     bcopy(enaddr, mtod(m, struct ether_header *)->ether_dhost, ETHER_ADDR_LEN);
     bcopy(enaddr, mtod(m, struct ether_header *)->ether_shost, ETHER_ADDR_LEN);
     mtod(m, struct ether_header *)->ether_type = htons(3);
     mtod(m, unsigned char *)[14] = 0;
     mtod(m, unsigned char *)[15] = 0;
     mtod(m, unsigned char *)[16] = 0xE3;	/* LLC Class1 TEST (no poll) */
     m->m_len = m->m_pkthdr.len = sizeof(struct ether_header) + 3;
     /*
      * send it!
      */
     sc->tulip_cmdmode |= TULIP_CMD_TXRUN;
     sc->tulip_intrmask |= TULIP_STS_TXINTR;
     sc->tulip_flags |= TULIP_TXPROBE_ACTIVE;
     TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
     TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
     if ((m = tulip_txput(sc, m)) != NULL)
 	m_freem(m);
     sc->tulip_probe.probe_txprobes++;
     return 1;
 }
 
 static void
 tulip_media_set(tulip_softc_t * const sc, tulip_media_t media)
 {
     const tulip_media_info_t *mi = sc->tulip_mediums[media];
 
     TULIP_LOCK_ASSERT(sc);
     if (mi == NULL)
 	return;
 
     /*
      * If we are switching media, make sure we don't think there's
      * any stale RX activity
      */
     sc->tulip_flags &= ~TULIP_RXACT;
     if (mi->mi_type == TULIP_MEDIAINFO_SIA) {
 	TULIP_CSR_WRITE(sc, csr_sia_connectivity, TULIP_SIACONN_RESET);
 	TULIP_CSR_WRITE(sc, csr_sia_tx_rx,        mi->mi_sia_tx_rx);
 	if (sc->tulip_features & TULIP_HAVE_SIAGP) {
 	    TULIP_CSR_WRITE(sc, csr_sia_general,  mi->mi_sia_gp_control|mi->mi_sia_general);
 	    DELAY(50);
 	    TULIP_CSR_WRITE(sc, csr_sia_general,  mi->mi_sia_gp_data|mi->mi_sia_general);
 	} else {
 	    TULIP_CSR_WRITE(sc, csr_sia_general,  mi->mi_sia_general);
 	}
 	TULIP_CSR_WRITE(sc, csr_sia_connectivity, mi->mi_sia_connectivity);
     } else if (mi->mi_type == TULIP_MEDIAINFO_GPR) {
 #define	TULIP_GPR_CMDBITS	(TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION|TULIP_CMD_SCRAMBLER|TULIP_CMD_TXTHRSHLDCTL)
 	/*
 	 * If the cmdmode bits don't match the currently operating mode,
 	 * set the cmdmode appropriately and reset the chip.
 	 */
 	if (((mi->mi_cmdmode ^ TULIP_CSR_READ(sc, csr_command)) & TULIP_GPR_CMDBITS) != 0) {
 	    sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS;
 	    sc->tulip_cmdmode |= mi->mi_cmdmode;
 	    tulip_reset(sc);
 	}
 	TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET|sc->tulip_gpinit);
 	DELAY(10);
 	TULIP_CSR_WRITE(sc, csr_gp, (u_int8_t) mi->mi_gpdata);
     } else if (mi->mi_type == TULIP_MEDIAINFO_SYM) {
 	/*
 	 * If the cmdmode bits don't match the currently operating mode,
 	 * set the cmdmode appropriately and reset the chip.
 	 */
 	if (((mi->mi_cmdmode ^ TULIP_CSR_READ(sc, csr_command)) & TULIP_GPR_CMDBITS) != 0) {
 	    sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS;
 	    sc->tulip_cmdmode |= mi->mi_cmdmode;
 	    tulip_reset(sc);
 	}
 	TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_gpcontrol);
 	TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_gpdata);
     } else if (mi->mi_type == TULIP_MEDIAINFO_MII
 	       && sc->tulip_probe_state != TULIP_PROBE_INACTIVE) {
 	int idx;
 	if (sc->tulip_features & TULIP_HAVE_SIAGP) {
 	    const u_int8_t *dp;
 	    dp = &sc->tulip_rombuf[mi->mi_reset_offset];
 	    for (idx = 0; idx < mi->mi_reset_length; idx++, dp += 2) {
 		DELAY(10);
 		TULIP_CSR_WRITE(sc, csr_sia_general, (dp[0] + 256 * dp[1]) << 16);
 	    }
 	    sc->tulip_phyaddr = mi->mi_phyaddr;
 	    dp = &sc->tulip_rombuf[mi->mi_gpr_offset];
 	    for (idx = 0; idx < mi->mi_gpr_length; idx++, dp += 2) {
 		DELAY(10);
 		TULIP_CSR_WRITE(sc, csr_sia_general, (dp[0] + 256 * dp[1]) << 16);
 	    }
 	} else {
 	    for (idx = 0; idx < mi->mi_reset_length; idx++) {
 		DELAY(10);
 		TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_reset_offset + idx]);
 	    }
 	    sc->tulip_phyaddr = mi->mi_phyaddr;
 	    for (idx = 0; idx < mi->mi_gpr_length; idx++) {
 		DELAY(10);
 		TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_gpr_offset + idx]);
 	    }
 	}
 	if (sc->tulip_flags & TULIP_TRYNWAY) {
 	    tulip_mii_autonegotiate(sc, sc->tulip_phyaddr);
 	} else if ((sc->tulip_flags & TULIP_DIDNWAY) == 0) {
 	    u_int32_t data = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_CONTROL);
 	    data &= ~(PHYCTL_SELECT_100MB|PHYCTL_FULL_DUPLEX|PHYCTL_AUTONEG_ENABLE);
 	    sc->tulip_flags &= ~TULIP_DIDNWAY;
 	    if (TULIP_IS_MEDIA_FD(media))
 		data |= PHYCTL_FULL_DUPLEX;
 	    if (TULIP_IS_MEDIA_100MB(media))
 		data |= PHYCTL_SELECT_100MB;
 	    tulip_mii_writereg(sc, sc->tulip_phyaddr, PHYREG_CONTROL, data);
 	}
     }
 }
 
 static void
 tulip_linkup(tulip_softc_t * const sc, tulip_media_t media)
 {
     TULIP_LOCK_ASSERT(sc);
     if ((sc->tulip_flags & TULIP_LINKUP) == 0)
 	sc->tulip_flags |= TULIP_PRINTLINKUP;
     sc->tulip_flags |= TULIP_LINKUP;
     sc->tulip_ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 #if 0 /* XXX how does with work with ifmedia? */
     if ((sc->tulip_flags & TULIP_DIDNWAY) == 0) {
 	if (sc->tulip_ifp->if_flags & IFF_FULLDUPLEX) {
 	    if (TULIP_CAN_MEDIA_FD(media)
 		    && sc->tulip_mediums[TULIP_FD_MEDIA_OF(media)] != NULL)
 		media = TULIP_FD_MEDIA_OF(media);
 	} else {
 	    if (TULIP_IS_MEDIA_FD(media)
 		    && sc->tulip_mediums[TULIP_HD_MEDIA_OF(media)] != NULL)
 		media = TULIP_HD_MEDIA_OF(media);
 	}
     }
 #endif
     if (sc->tulip_media != media) {
 #ifdef TULIP_DEBUG
 	sc->tulip_dbg.dbg_last_media = sc->tulip_media;
 #endif
 	sc->tulip_media = media;
 	sc->tulip_flags |= TULIP_PRINTMEDIA;
 	if (TULIP_IS_MEDIA_FD(sc->tulip_media)) {
 	    sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX;
 	} else if (sc->tulip_chipid != TULIP_21041 || (sc->tulip_flags & TULIP_DIDNWAY) == 0) {
 	    sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX;
 	}
     }
     /*
      * We could set probe_timeout to 0 but setting to 3000 puts this
      * in one central place and the only matters is tulip_link is
      * followed by a tulip_timeout.  Therefore setting it should not
      * result in aberrant behaviour.
      */
     sc->tulip_probe_timeout = 3000;
     sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
     sc->tulip_flags &= ~(TULIP_TXPROBE_ACTIVE|TULIP_TRYNWAY);
     if (sc->tulip_flags & TULIP_INRESET) {
 	tulip_media_set(sc, sc->tulip_media);
     } else if (sc->tulip_probe_media != sc->tulip_media) {
 	/*
 	 * No reason to change media if we have the right media.
 	 */
 	tulip_reset(sc);
     }
     tulip_init_locked(sc);
 }
 
 static void
 tulip_media_print(tulip_softc_t * const sc)
 {
 
     TULIP_LOCK_ASSERT(sc);
     if ((sc->tulip_flags & TULIP_LINKUP) == 0)
 	return;
     if (sc->tulip_flags & TULIP_PRINTMEDIA) {
 	device_printf(sc->tulip_dev, "enabling %s port\n",
 	    tulip_mediums[sc->tulip_media]);
 	sc->tulip_flags &= ~(TULIP_PRINTMEDIA|TULIP_PRINTLINKUP);
     } else if (sc->tulip_flags & TULIP_PRINTLINKUP) {
 	device_printf(sc->tulip_dev, "link up\n");
 	sc->tulip_flags &= ~TULIP_PRINTLINKUP;
     }
 }
 
 #if defined(TULIP_DO_GPR_SENSE)
 static tulip_media_t
 tulip_21140_gpr_media_sense(tulip_softc_t * const sc)
 {
     struct ifnet *ifp sc->tulip_ifp;
     tulip_media_t maybe_media = TULIP_MEDIA_UNKNOWN;
     tulip_media_t last_media = TULIP_MEDIA_UNKNOWN;
     tulip_media_t media;
 
     TULIP_LOCK_ASSERT(sc);
 
     /*
      * If one of the media blocks contained a default media flag,
      * use that.
      */
     for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) {
 	const tulip_media_info_t *mi;
 	/*
 	 * Media is not supported (or is full-duplex).
 	 */
 	if ((mi = sc->tulip_mediums[media]) == NULL || TULIP_IS_MEDIA_FD(media))
 	    continue;
 	if (mi->mi_type != TULIP_MEDIAINFO_GPR)
 	    continue;
 
 	/*
 	 * Remember the media is this is the "default" media.
 	 */
 	if (mi->mi_default && maybe_media == TULIP_MEDIA_UNKNOWN)
 	    maybe_media = media;
 
 	/*
 	 * No activity mask?  Can't see if it is active if there's no mask.
 	 */
 	if (mi->mi_actmask == 0)
 	    continue;
 
 	/*
 	 * Does the activity data match?
 	 */
 	if ((TULIP_CSR_READ(sc, csr_gp) & mi->mi_actmask) != mi->mi_actdata)
 	    continue;
 
 #if defined(TULIP_DEBUG)
 	device_printf(sc->tulip_dev, "%s: %s: 0x%02x & 0x%02x == 0x%02x\n",
 	    __func__, tulip_mediums[media], TULIP_CSR_READ(sc, csr_gp) & 0xFF,
 	    mi->mi_actmask, mi->mi_actdata);
 #endif
 	/*
 	 * It does!  If this is the first media we detected, then 
 	 * remember this media.  If isn't the first, then there were
 	 * multiple matches which we equate to no match (since we don't
 	 * which to select (if any).
 	 */
 	if (last_media == TULIP_MEDIA_UNKNOWN) {
 	    last_media = media;
 	} else if (last_media != media) {
 	    last_media = TULIP_MEDIA_UNKNOWN;
 	}
     }
     return (last_media != TULIP_MEDIA_UNKNOWN) ? last_media : maybe_media;
 }
 #endif /* TULIP_DO_GPR_SENSE */
 
 static tulip_link_status_t
 tulip_media_link_monitor(tulip_softc_t * const sc)
 {
     const tulip_media_info_t * const mi = sc->tulip_mediums[sc->tulip_media];
     tulip_link_status_t linkup = TULIP_LINK_DOWN;
 
     TULIP_LOCK_ASSERT(sc);
     if (mi == NULL) {
 #if defined(DIAGNOSTIC) || defined(TULIP_DEBUG)
 	panic("tulip_media_link_monitor: %s: botch at line %d\n",
 	      tulip_mediums[sc->tulip_media],__LINE__);
 #else
 	return TULIP_LINK_UNKNOWN;
 #endif
     }
 
 
     /*
      * Have we seen some packets?  If so, the link must be good.
      */
     if ((sc->tulip_flags & (TULIP_RXACT|TULIP_LINKUP)) == (TULIP_RXACT|TULIP_LINKUP)) {
 	sc->tulip_flags &= ~TULIP_RXACT;
 	sc->tulip_probe_timeout = 3000;
 	return TULIP_LINK_UP;
     }
 
     sc->tulip_flags &= ~TULIP_RXACT;
     if (mi->mi_type == TULIP_MEDIAINFO_MII) {
 	u_int32_t status;
 	/*
 	 * Read the PHY status register.
 	 */
 	status = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_STATUS);
 	if (status & PHYSTS_AUTONEG_DONE) {
 	    /*
 	     * If the PHY has completed autonegotiation, see the if the
 	     * remote systems abilities have changed.  If so, upgrade or
 	     * downgrade as appropriate.
 	     */
 	    u_int32_t abilities = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_AUTONEG_ABILITIES);
 	    abilities = (abilities << 6) & status;
 	    if (abilities != sc->tulip_abilities) {
 #if defined(TULIP_DEBUG)
 		loudprintf("%s(phy%d): autonegotiation changed: 0x%04x -> 0x%04x\n",
 			   ifp->if_xname, sc->tulip_phyaddr,
 			   sc->tulip_abilities, abilities);
 #endif
 		if (tulip_mii_map_abilities(sc, abilities)) {
 		    tulip_linkup(sc, sc->tulip_probe_media);
 		    return TULIP_LINK_UP;
 		}
 		/*
 		 * if we had selected media because of autonegotiation,
 		 * we need to probe for the new media.
 		 */
 		sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
 		if (sc->tulip_flags & TULIP_DIDNWAY)
 		    return TULIP_LINK_DOWN;
 	    }
 	}
 	/*
 	 * The link is now up.  If was down, say its back up.
 	 */
 	if ((status & (PHYSTS_LINK_UP|PHYSTS_REMOTE_FAULT)) == PHYSTS_LINK_UP)
 	    linkup = TULIP_LINK_UP;
     } else if (mi->mi_type == TULIP_MEDIAINFO_GPR) {
 	/*
 	 * No activity sensor?  Assume all's well.
 	 */
 	if (mi->mi_actmask == 0)
 	    return TULIP_LINK_UNKNOWN;
 	/*
 	 * Does the activity data match?
 	 */
 	if ((TULIP_CSR_READ(sc, csr_gp) & mi->mi_actmask) == mi->mi_actdata)
 	    linkup = TULIP_LINK_UP;
     } else if (mi->mi_type == TULIP_MEDIAINFO_SIA) {
 	/*
 	 * Assume non TP ok for now.
 	 */
 	if (!TULIP_IS_MEDIA_TP(sc->tulip_media))
 	    return TULIP_LINK_UNKNOWN;
 	if ((TULIP_CSR_READ(sc, csr_sia_status) & TULIP_SIASTS_LINKFAIL) == 0)
 	    linkup = TULIP_LINK_UP;
 #if defined(TULIP_DEBUG)
 	if (sc->tulip_probe_timeout <= 0)
 	    device_printf(sc->tulip_dev, "sia status = 0x%08x\n",
 		    TULIP_CSR_READ(sc, csr_sia_status));
 #endif
     } else if (mi->mi_type == TULIP_MEDIAINFO_SYM) {
 	return TULIP_LINK_UNKNOWN;
     }
     /*
      * We will wait for 3 seconds until the link goes into suspect mode.
      */
     if (sc->tulip_flags & TULIP_LINKUP) {
 	if (linkup == TULIP_LINK_UP)
 	    sc->tulip_probe_timeout = 3000;
 	if (sc->tulip_probe_timeout > 0)
 	    return TULIP_LINK_UP;
 
 	sc->tulip_flags &= ~TULIP_LINKUP;
 	device_printf(sc->tulip_dev, "link down: cable problem?\n");
     }
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_link_downed++;
 #endif
     return TULIP_LINK_DOWN;
 }
 
 static void
 tulip_media_poll(tulip_softc_t * const sc, tulip_mediapoll_event_t event)
 {
 
     TULIP_LOCK_ASSERT(sc);
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_events[event]++;
 #endif
     if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE
 	    && event == TULIP_MEDIAPOLL_TIMER) {
 	switch (tulip_media_link_monitor(sc)) {
 	    case TULIP_LINK_DOWN: {
 		/*
 		 * Link Monitor failed.  Probe for new media.
 		 */
 		event = TULIP_MEDIAPOLL_LINKFAIL;
 		break;
 	    }
 	    case TULIP_LINK_UP: {
 		/*
 		 * Check again soon.
 		 */
 		tulip_timeout(sc);
 		return;
 	    }
 	    case TULIP_LINK_UNKNOWN: {
 		/*
 		 * We can't tell so don't bother.
 		 */
 		return;
 	    }
 	}
     }
 
     if (event == TULIP_MEDIAPOLL_LINKFAIL) {
 	if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE) {
 	    if (TULIP_DO_AUTOSENSE(sc)) {
 #if defined(TULIP_DEBUG)
 		sc->tulip_dbg.dbg_link_failures++;
 #endif
 		sc->tulip_media = TULIP_MEDIA_UNKNOWN;
 		if (sc->tulip_ifp->if_flags & IFF_UP)
 		    tulip_reset(sc);	/* restart probe */
 	    }
 	    return;
 	}
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_link_pollintrs++;
 #endif
     }
 
     if (event == TULIP_MEDIAPOLL_START) {
 	sc->tulip_ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 	if (sc->tulip_probe_state != TULIP_PROBE_INACTIVE)
 	    return;
 	sc->tulip_probe_mediamask = 0;
 	sc->tulip_probe_passes = 0;
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_media_probes++;
 #endif
 	/*
 	 * If the SROM contained an explicit media to use, use it.
 	 */
 	sc->tulip_cmdmode &= ~(TULIP_CMD_RXRUN|TULIP_CMD_FULLDUPLEX);
 	sc->tulip_flags |= TULIP_TRYNWAY|TULIP_PROBE1STPASS;
 	sc->tulip_flags &= ~(TULIP_DIDNWAY|TULIP_PRINTMEDIA|TULIP_PRINTLINKUP);
 	/*
 	 * connidx is defaulted to a media_unknown type.
 	 */
 	sc->tulip_probe_media = tulip_srom_conninfo[sc->tulip_connidx].sc_media;
 	if (sc->tulip_probe_media != TULIP_MEDIA_UNKNOWN) {
 	    tulip_linkup(sc, sc->tulip_probe_media);
 	    tulip_timeout(sc);
 	    return;
 	}
 
 	if (sc->tulip_features & TULIP_HAVE_GPR) {
 	    sc->tulip_probe_state = TULIP_PROBE_GPRTEST;
 	    sc->tulip_probe_timeout = 2000;
 	} else {
 	    sc->tulip_probe_media = TULIP_MEDIA_MAX;
 	    sc->tulip_probe_timeout = 0;
 	    sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 	}
     }
 
     /*
      * Ignore txprobe failures or spurious callbacks.
      */
     if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED
 	    && sc->tulip_probe_state != TULIP_PROBE_MEDIATEST) {
 	sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
 	return;
     }
 
     /*
      * If we really transmitted a packet, then that's the media we'll use.
      */
     if (event == TULIP_MEDIAPOLL_TXPROBE_OK || event == TULIP_MEDIAPOLL_LINKPASS) {
 	if (event == TULIP_MEDIAPOLL_LINKPASS) {
 	    /* XXX Check media status just to be sure */
 	    sc->tulip_probe_media = TULIP_MEDIA_10BASET;
 #if defined(TULIP_DEBUG)
 	} else {
 	    sc->tulip_dbg.dbg_txprobes_ok[sc->tulip_probe_media]++;
 #endif
 	}
 	tulip_linkup(sc, sc->tulip_probe_media);
 	tulip_timeout(sc);
 	return;
     }
 
     if (sc->tulip_probe_state == TULIP_PROBE_GPRTEST) {
 #if defined(TULIP_DO_GPR_SENSE)
 	/*
 	 * Check for media via the general purpose register.
 	 *
 	 * Try to sense the media via the GPR.  If the same value
 	 * occurs 3 times in a row then just use that.
 	 */
 	if (sc->tulip_probe_timeout > 0) {
 	    tulip_media_t new_probe_media = tulip_21140_gpr_media_sense(sc);
 #if defined(TULIP_DEBUG)
 	    device_printf(sc->tulip_dev, "%s: gpr sensing = %s\n", __func__,
 		   tulip_mediums[new_probe_media]);
 #endif
 	    if (new_probe_media != TULIP_MEDIA_UNKNOWN) {
 		if (new_probe_media == sc->tulip_probe_media) {
 		    if (--sc->tulip_probe_count == 0)
 			tulip_linkup(sc, sc->tulip_probe_media);
 		} else {
 		    sc->tulip_probe_count = 10;
 		}
 	    }
 	    sc->tulip_probe_media = new_probe_media;
 	    tulip_timeout(sc);
 	    return;
 	}
 #endif /* TULIP_DO_GPR_SENSE */
 	/*
 	 * Brute force.  We cycle through each of the media types
 	 * and try to transmit a packet.
 	 */
 	sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 	sc->tulip_probe_media = TULIP_MEDIA_MAX;
 	sc->tulip_probe_timeout = 0;
 	tulip_timeout(sc);
 	return;
     }
 
     if (sc->tulip_probe_state != TULIP_PROBE_MEDIATEST
 	   && (sc->tulip_features & TULIP_HAVE_MII)) {
 	tulip_media_t old_media = sc->tulip_probe_media;
 	tulip_mii_autonegotiate(sc, sc->tulip_phyaddr);
 	switch (sc->tulip_probe_state) {
 	    case TULIP_PROBE_FAILED:
 	    case TULIP_PROBE_MEDIATEST: {
 		/*
 		 * Try the next media.
 		 */
 		sc->tulip_probe_mediamask |= sc->tulip_mediums[sc->tulip_probe_media]->mi_mediamask;
 		sc->tulip_probe_timeout = 0;
 #ifdef notyet
 		if (sc->tulip_probe_state == TULIP_PROBE_FAILED)
 		    break;
 		if (sc->tulip_probe_media != tulip_mii_phy_readspecific(sc))
 		    break;
 		sc->tulip_probe_timeout = TULIP_IS_MEDIA_TP(sc->tulip_probe_media) ? 2500 : 300;
 #endif
 		break;
 	    }
 	    case TULIP_PROBE_PHYAUTONEG: {
 		return;
 	    }
 	    case TULIP_PROBE_INACTIVE: {
 		/*
 		 * Only probe if we autonegotiated a media that hasn't failed.
 		 */
 		sc->tulip_probe_timeout = 0;
 		if (sc->tulip_probe_mediamask & TULIP_BIT(sc->tulip_probe_media)) {
 		    sc->tulip_probe_media = old_media;
 		    break;
 		}
 		tulip_linkup(sc, sc->tulip_probe_media);
 		tulip_timeout(sc);
 		return;
 	    }
 	    default: {
 #if defined(DIAGNOSTIC) || defined(TULIP_DEBUG)
 		panic("tulip_media_poll: botch at line %d\n", __LINE__);
 #endif
 		break;
 	    }
 	}
     }
 
     if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED) {
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_txprobes_failed[sc->tulip_probe_media]++;
 #endif
 	sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
 	return;
     }
 
     /*
      * switch to another media if we tried this one enough.
      */
     if (/* event == TULIP_MEDIAPOLL_TXPROBE_FAILED || */ sc->tulip_probe_timeout <= 0) {
 #if defined(TULIP_DEBUG)
 	if (sc->tulip_probe_media == TULIP_MEDIA_UNKNOWN) {
 	    device_printf(sc->tulip_dev, "poll media unknown!\n");
 	    sc->tulip_probe_media = TULIP_MEDIA_MAX;
 	}
 #endif
 	/*
 	 * Find the next media type to check for.  Full Duplex
 	 * types are not allowed.
 	 */
 	do {
 	    sc->tulip_probe_media -= 1;
 	    if (sc->tulip_probe_media == TULIP_MEDIA_UNKNOWN) {
 		if (++sc->tulip_probe_passes == 3) {
 		    device_printf(sc->tulip_dev,
 			"autosense failed: cable problem?\n");
 		    if ((sc->tulip_ifp->if_flags & IFF_UP) == 0) {
 			sc->tulip_ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
 			return;
 		    }
 		}
 		sc->tulip_flags ^= TULIP_TRYNWAY;	/* XXX */
 		sc->tulip_probe_mediamask = 0;
 		sc->tulip_probe_media = TULIP_MEDIA_MAX - 1;
 	    }
 	} while (sc->tulip_mediums[sc->tulip_probe_media] == NULL
 		 || (sc->tulip_probe_mediamask & TULIP_BIT(sc->tulip_probe_media))
 		 || TULIP_IS_MEDIA_FD(sc->tulip_probe_media));
 
 #if defined(TULIP_DEBUG)
 	device_printf(sc->tulip_dev, "%s: probing %s\n",
 	       event == TULIP_MEDIAPOLL_TXPROBE_FAILED ? "txprobe failed" : "timeout",
 	       tulip_mediums[sc->tulip_probe_media]);
 #endif
 	sc->tulip_probe_timeout = TULIP_IS_MEDIA_TP(sc->tulip_probe_media) ? 2500 : 1000;
 	sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 	sc->tulip_probe.probe_txprobes = 0;
 	tulip_reset(sc);
 	tulip_media_set(sc, sc->tulip_probe_media);
 	sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
     }
     tulip_timeout(sc);
 
     /*
      * If this is hanging off a phy, we know are doing NWAY and we have
      * forced the phy to a specific speed.  Wait for link up before
      * before sending a packet.
      */
     switch (sc->tulip_mediums[sc->tulip_probe_media]->mi_type) {
 	case TULIP_MEDIAINFO_MII: {
 	    if (sc->tulip_probe_media != tulip_mii_phy_readspecific(sc))
 		return;
 	    break;
 	}
 	case TULIP_MEDIAINFO_SIA: {
 	    if (TULIP_IS_MEDIA_TP(sc->tulip_probe_media)) {
 		if (TULIP_CSR_READ(sc, csr_sia_status) & TULIP_SIASTS_LINKFAIL)
 		    return;
 		tulip_linkup(sc, sc->tulip_probe_media);
 #ifdef notyet
 		if (sc->tulip_features & TULIP_HAVE_MII)
 		    tulip_timeout(sc);
 #endif
 		return;
 	    }
 	    break;
 	}
 	case TULIP_MEDIAINFO_RESET:
 	case TULIP_MEDIAINFO_SYM:
 	case TULIP_MEDIAINFO_NONE:
 	case TULIP_MEDIAINFO_GPR: {
 	    break;
 	}
     }
     /*
      * Try to send a packet.
      */
     tulip_txprobe(sc);
 }
 
 static void
 tulip_media_select(tulip_softc_t * const sc)
 {
     TULIP_LOCK_ASSERT(sc);
     if (sc->tulip_features & TULIP_HAVE_GPR) {
 	TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET|sc->tulip_gpinit);
 	DELAY(10);
 	TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_gpdata);
     }
     /*
      * If this board has no media, just return
      */
     if (sc->tulip_features & TULIP_HAVE_NOMEDIA)
 	return;
 
     if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) {
 	TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	(*sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_START);
     } else {
 	tulip_media_set(sc, sc->tulip_media);
     }
 }
 
 static void
 tulip_21040_mediainfo_init(tulip_softc_t * const sc, tulip_media_t media)
 {
     TULIP_LOCK_ASSERT(sc);
     sc->tulip_cmdmode |= TULIP_CMD_CAPTREFFCT|TULIP_CMD_THRSHLD160
 	|TULIP_CMD_BACKOFFCTR;
     sc->tulip_ifp->if_baudrate = 10000000;
 
     if (media == TULIP_MEDIA_10BASET || media == TULIP_MEDIA_UNKNOWN) {
 	TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[0], 21040, 10BASET);
 	TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[1], 21040, 10BASET_FD);
 	sc->tulip_intrmask |= TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL;
     }
 
     if (media == TULIP_MEDIA_AUIBNC || media == TULIP_MEDIA_UNKNOWN) {
 	TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[2], 21040, AUIBNC);
     }
 
     if (media == TULIP_MEDIA_UNKNOWN) {
 	TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[3], 21040, EXTSIA);
     }
 }
 
 static void
 tulip_21040_media_probe(tulip_softc_t * const sc)
 {
     TULIP_LOCK_ASSERT(sc);
     tulip_21040_mediainfo_init(sc, TULIP_MEDIA_UNKNOWN);
     return;
 }
 
 static void
 tulip_21040_10baset_only_media_probe(tulip_softc_t * const sc)
 {
     TULIP_LOCK_ASSERT(sc);
     tulip_21040_mediainfo_init(sc, TULIP_MEDIA_10BASET);
     tulip_media_set(sc, TULIP_MEDIA_10BASET);
     sc->tulip_media = TULIP_MEDIA_10BASET;
 }
 
 static void
 tulip_21040_10baset_only_media_select(tulip_softc_t * const sc)
 {
     TULIP_LOCK_ASSERT(sc);
     sc->tulip_flags |= TULIP_LINKUP;
     if (sc->tulip_media == TULIP_MEDIA_10BASET_FD) {
 	sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX;
 	sc->tulip_flags &= ~TULIP_SQETEST;
     } else {
 	sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX;
 	sc->tulip_flags |= TULIP_SQETEST;
     }
     tulip_media_set(sc, sc->tulip_media);
 }
 
 static void
 tulip_21040_auibnc_only_media_probe(tulip_softc_t * const sc)
 {
     TULIP_LOCK_ASSERT(sc);
     tulip_21040_mediainfo_init(sc, TULIP_MEDIA_AUIBNC);
     sc->tulip_flags |= TULIP_SQETEST|TULIP_LINKUP;
     tulip_media_set(sc, TULIP_MEDIA_AUIBNC);
     sc->tulip_media = TULIP_MEDIA_AUIBNC;
 }
 
 static void
 tulip_21040_auibnc_only_media_select(tulip_softc_t * const sc)
 {
     TULIP_LOCK_ASSERT(sc);
     tulip_media_set(sc, TULIP_MEDIA_AUIBNC);
     sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX;
 }
 
 static const tulip_boardsw_t tulip_21040_boardsw = {
     TULIP_21040_GENERIC,
     tulip_21040_media_probe,
     tulip_media_select,
     tulip_media_poll,
 };
 
 static const tulip_boardsw_t tulip_21040_10baset_only_boardsw = {
     TULIP_21040_GENERIC,
     tulip_21040_10baset_only_media_probe,
     tulip_21040_10baset_only_media_select,
     NULL,
 };
 
 static const tulip_boardsw_t tulip_21040_auibnc_only_boardsw = {
     TULIP_21040_GENERIC,
     tulip_21040_auibnc_only_media_probe,
     tulip_21040_auibnc_only_media_select,
     NULL,
 };
 
 static void
 tulip_21041_mediainfo_init(tulip_softc_t * const sc)
 {
     tulip_media_info_t * const mi = sc->tulip_mediainfo;
 
     TULIP_LOCK_ASSERT(sc);
 #ifdef notyet
     if (sc->tulip_revinfo >= 0x20) {
 	TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041P2, 10BASET);
 	TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041P2, 10BASET_FD);
 	TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041P2, AUI);
 	TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041P2, BNC);
 	return;
     }
 #endif
     TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041, 10BASET);
     TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041, 10BASET_FD);
     TULIP_MEDIAINFO_SIA_INIT(sc, &mi[2], 21041, AUI);
     TULIP_MEDIAINFO_SIA_INIT(sc, &mi[3], 21041, BNC);
 }
 
 static void
 tulip_21041_media_probe(tulip_softc_t * const sc)
 {
     TULIP_LOCK_ASSERT(sc);
     sc->tulip_ifp->if_baudrate = 10000000;
     sc->tulip_cmdmode |= TULIP_CMD_CAPTREFFCT|TULIP_CMD_ENHCAPTEFFCT
 	|TULIP_CMD_THRSHLD160|TULIP_CMD_BACKOFFCTR;
     sc->tulip_intrmask |= TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL;
     tulip_21041_mediainfo_init(sc);
 }
 
 static void
 tulip_21041_media_poll(tulip_softc_t * const sc,
     const tulip_mediapoll_event_t event)
 {
     u_int32_t sia_status;
 
     TULIP_LOCK_ASSERT(sc);
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_events[event]++;
 #endif
 
     if (event == TULIP_MEDIAPOLL_LINKFAIL) {
 	if (sc->tulip_probe_state != TULIP_PROBE_INACTIVE
 		|| !TULIP_DO_AUTOSENSE(sc))
 	    return;
 	sc->tulip_media = TULIP_MEDIA_UNKNOWN;
 	tulip_reset(sc);	/* start probe */
 	return;
     }
 
     /*
      * If we've been been asked to start a poll or link change interrupt
      * restart the probe (and reset the tulip to a known state).
      */
     if (event == TULIP_MEDIAPOLL_START) {
 	sc->tulip_ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 	sc->tulip_cmdmode &= ~(TULIP_CMD_FULLDUPLEX|TULIP_CMD_RXRUN);
 #ifdef notyet
 	if (sc->tulip_revinfo >= 0x20) {
 	    sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX;
 	    sc->tulip_flags |= TULIP_DIDNWAY;
 	}
 #endif
 	TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 	sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 	sc->tulip_probe_media = TULIP_MEDIA_10BASET;
 	sc->tulip_probe_timeout = TULIP_21041_PROBE_10BASET_TIMEOUT;
 	tulip_media_set(sc, TULIP_MEDIA_10BASET);
 	tulip_timeout(sc);
 	return;
     }
 
     if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE)
 	return;
 
     if (event == TULIP_MEDIAPOLL_TXPROBE_OK) {
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_txprobes_ok[sc->tulip_probe_media]++;
 #endif
 	tulip_linkup(sc, sc->tulip_probe_media);
 	return;
     }
 
     sia_status = TULIP_CSR_READ(sc, csr_sia_status);
     TULIP_CSR_WRITE(sc, csr_sia_status, sia_status);
     if ((sia_status & TULIP_SIASTS_LINKFAIL) == 0) {
 	if (sc->tulip_revinfo >= 0x20) {
 	    if (sia_status & (PHYSTS_10BASET_FD << (16 - 6)))
 		sc->tulip_probe_media = TULIP_MEDIA_10BASET_FD;
 	}
 	/*
 	 * If the link has passed LinkPass, 10baseT is the
 	 * proper media to use.
 	 */
 	tulip_linkup(sc, sc->tulip_probe_media);
 	return;
     }
 
     /*
      * wait for up to 2.4 seconds for the link to reach pass state.
      * Only then start scanning the other media for activity.
      * choose media with receive activity over those without.
      */
     if (sc->tulip_probe_media == TULIP_MEDIA_10BASET) {
 	if (event != TULIP_MEDIAPOLL_TIMER)
 	    return;
 	if (sc->tulip_probe_timeout > 0
 		&& (sia_status & TULIP_SIASTS_OTHERRXACTIVITY) == 0) {
 	    tulip_timeout(sc);
 	    return;
 	}
 	sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT;
 	sc->tulip_flags |= TULIP_WANTRXACT;
 	if (sia_status & TULIP_SIASTS_OTHERRXACTIVITY) {
 	    sc->tulip_probe_media = TULIP_MEDIA_BNC;
 	} else {
 	    sc->tulip_probe_media = TULIP_MEDIA_AUI;
 	}
 	tulip_media_set(sc, sc->tulip_probe_media);
 	tulip_timeout(sc);
 	return;
     }
 
     /*
      * If we failed, clear the txprobe active flag.
      */
     if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED)
 	sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
 
 
     if (event == TULIP_MEDIAPOLL_TIMER) {
 	/*
 	 * If we've received something, then that's our link!
 	 */
 	if (sc->tulip_flags & TULIP_RXACT) {
 	    tulip_linkup(sc, sc->tulip_probe_media);
 	    return;
 	}
 	/*
 	 * if no txprobe active  
 	 */
 	if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0
 		&& ((sc->tulip_flags & TULIP_WANTRXACT) == 0
 		    || (sia_status & TULIP_SIASTS_RXACTIVITY))) {
 	    sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT;
 	    tulip_txprobe(sc);
 	    tulip_timeout(sc);
 	    return;
 	}
 	/*
 	 * Take 2 passes through before deciding to not
 	 * wait for receive activity.  Then take another
 	 * two passes before spitting out a warning.
 	 */
 	if (sc->tulip_probe_timeout <= 0) {
 	    if (sc->tulip_flags & TULIP_WANTRXACT) {
 		sc->tulip_flags &= ~TULIP_WANTRXACT;
 		sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT;
 	    } else {
 		device_printf(sc->tulip_dev,
 		    "autosense failed: cable problem?\n");
 		if ((sc->tulip_ifp->if_flags & IFF_UP) == 0) {
 		    sc->tulip_ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		    sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
 		    return;
 		}
 	    }
 	}
     }
     
     /*
      * Since this media failed to probe, try the other one.
      */
     sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT;
     if (sc->tulip_probe_media == TULIP_MEDIA_AUI) {
 	sc->tulip_probe_media = TULIP_MEDIA_BNC;
     } else {
 	sc->tulip_probe_media = TULIP_MEDIA_AUI;
     }
     tulip_media_set(sc, sc->tulip_probe_media);
     sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
     tulip_timeout(sc);
 }
 
 static const tulip_boardsw_t tulip_21041_boardsw = {
     TULIP_21041_GENERIC,
     tulip_21041_media_probe,
     tulip_media_select,
     tulip_21041_media_poll
 };
 
 static const tulip_phy_attr_t tulip_mii_phy_attrlist[] = {
     { 0x20005c00, 0,		/* 08-00-17 */
       {
 	{ 0x19, 0x0040, 0x0040 },	/* 10TX */
 	{ 0x19, 0x0040, 0x0000 },	/* 100TX */
       },
 #if defined(TULIP_DEBUG)
       "NS DP83840",
 #endif
     },
     { 0x0281F400, 0,		/* 00-A0-7D */
       {
 	{ 0x12, 0x0010, 0x0000 },	/* 10T */
 	{ },				/* 100TX */
 	{ 0x12, 0x0010, 0x0010 },	/* 100T4 */
 	{ 0x12, 0x0008, 0x0008 },	/* FULL_DUPLEX */
       },
 #if defined(TULIP_DEBUG)
       "Seeq 80C240"
 #endif
     },
 #if 0
     { 0x0015F420, 0,	/* 00-A0-7D */
       {
 	{ 0x12, 0x0010, 0x0000 },	/* 10T */
 	{ },				/* 100TX */
 	{ 0x12, 0x0010, 0x0010 },	/* 100T4 */
 	{ 0x12, 0x0008, 0x0008 },	/* FULL_DUPLEX */
       },
 #if defined(TULIP_DEBUG)
       "Broadcom BCM5000"
 #endif
     },
 #endif
     { 0x0281F400, 0,		/* 00-A0-BE */
       {
 	{ 0x11, 0x8000, 0x0000 },	/* 10T */
 	{ 0x11, 0x8000, 0x8000 },	/* 100TX */
 	{ },				/* 100T4 */
 	{ 0x11, 0x4000, 0x4000 },	/* FULL_DUPLEX */
       },
 #if defined(TULIP_DEBUG)
       "ICS 1890"
 #endif 
     },
     { 0 }
 };
 
 static tulip_media_t
 tulip_mii_phy_readspecific(tulip_softc_t * const sc)
 {
     const tulip_phy_attr_t *attr;
     u_int16_t data;
     u_int32_t id;
     unsigned idx = 0;
     static const tulip_media_t table[] = {
 	TULIP_MEDIA_UNKNOWN,
 	TULIP_MEDIA_10BASET,
 	TULIP_MEDIA_100BASETX,
 	TULIP_MEDIA_100BASET4,
 	TULIP_MEDIA_UNKNOWN,
 	TULIP_MEDIA_10BASET_FD,
 	TULIP_MEDIA_100BASETX_FD,
 	TULIP_MEDIA_UNKNOWN
     };
 
     TULIP_LOCK_ASSERT(sc);
 
     /*
      * Don't read phy specific registers if link is not up.
      */
     data = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_STATUS);
     if ((data & (PHYSTS_LINK_UP|PHYSTS_EXTENDED_REGS)) != (PHYSTS_LINK_UP|PHYSTS_EXTENDED_REGS))
 	return TULIP_MEDIA_UNKNOWN;
 
     id = (tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_IDLOW) << 16) |
 	tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_IDHIGH);
     for (attr = tulip_mii_phy_attrlist;; attr++) {
 	if (attr->attr_id == 0)
 	    return TULIP_MEDIA_UNKNOWN;
 	if ((id & ~0x0F) == attr->attr_id)
 	    break;
     }
 
     if (attr->attr_modes[PHY_MODE_100TX].pm_regno) {
 	const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_100TX];
 	data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno);
 	if ((data & pm->pm_mask) == pm->pm_value)
 	    idx = 2;
     }
     if (idx == 0 && attr->attr_modes[PHY_MODE_100T4].pm_regno) {
 	const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_100T4];
 	data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno);
 	if ((data & pm->pm_mask) == pm->pm_value)
 	    idx = 3;
     }
     if (idx == 0 && attr->attr_modes[PHY_MODE_10T].pm_regno) {
 	const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_10T];
 	data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno);
 	if ((data & pm->pm_mask) == pm->pm_value)
 	    idx = 1;
     } 
     if (idx != 0 && attr->attr_modes[PHY_MODE_FULLDUPLEX].pm_regno) {
 	const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_FULLDUPLEX];
 	data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno);
 	idx += ((data & pm->pm_mask) == pm->pm_value ? 4 : 0);
     }
     return table[idx];
 }
 
 static unsigned
 tulip_mii_get_phyaddr(tulip_softc_t * const sc, unsigned offset)
 {
     unsigned phyaddr;
 
     TULIP_LOCK_ASSERT(sc);
     for (phyaddr = 1; phyaddr < 32; phyaddr++) {
 	unsigned status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS);
 	if (status == 0 || status == 0xFFFF || status < PHYSTS_10BASET)
 	    continue;
 	if (offset == 0)
 	    return phyaddr;
 	offset--;
     }
     if (offset == 0) {
 	unsigned status = tulip_mii_readreg(sc, 0, PHYREG_STATUS);
 	if (status == 0 || status == 0xFFFF || status < PHYSTS_10BASET)
 	    return TULIP_MII_NOPHY;
 	return 0;
     }
     return TULIP_MII_NOPHY;
 }
 
 static int
 tulip_mii_map_abilities(tulip_softc_t * const sc, unsigned abilities)
 {
     TULIP_LOCK_ASSERT(sc);
     sc->tulip_abilities = abilities;
     if (abilities & PHYSTS_100BASETX_FD) {
 	sc->tulip_probe_media = TULIP_MEDIA_100BASETX_FD;
     } else if (abilities & PHYSTS_100BASET4) {
 	sc->tulip_probe_media = TULIP_MEDIA_100BASET4;
     } else if (abilities & PHYSTS_100BASETX) {
 	sc->tulip_probe_media = TULIP_MEDIA_100BASETX;
     } else if (abilities & PHYSTS_10BASET_FD) {
 	sc->tulip_probe_media = TULIP_MEDIA_10BASET_FD;
     } else if (abilities & PHYSTS_10BASET) {
 	sc->tulip_probe_media = TULIP_MEDIA_10BASET;
     } else {
 	sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 	return 0;
     }
     sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
     return 1;
 }
 
 static void
 tulip_mii_autonegotiate(tulip_softc_t * const sc, const unsigned phyaddr)
 {
     struct ifnet *ifp = sc->tulip_ifp;
 
     TULIP_LOCK_ASSERT(sc);
     switch (sc->tulip_probe_state) {
         case TULIP_PROBE_MEDIATEST:
         case TULIP_PROBE_INACTIVE: {
 	    sc->tulip_flags |= TULIP_DIDNWAY;
 	    tulip_mii_writereg(sc, phyaddr, PHYREG_CONTROL, PHYCTL_RESET);
 	    sc->tulip_probe_timeout = 3000;
 	    sc->tulip_intrmask |= TULIP_STS_ABNRMLINTR|TULIP_STS_NORMALINTR;
 	    sc->tulip_probe_state = TULIP_PROBE_PHYRESET;
 	}
         /* FALLTHROUGH */
         case TULIP_PROBE_PHYRESET: {
 	    u_int32_t status;
 	    u_int32_t data = tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL);
 	    if (data & PHYCTL_RESET) {
 		if (sc->tulip_probe_timeout > 0) {
 		    tulip_timeout(sc);
 		    return;
 		}
 		printf("%s(phy%d): error: reset of PHY never completed!\n",
 			   ifp->if_xname, phyaddr);
 		sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
 		sc->tulip_probe_state = TULIP_PROBE_FAILED;
 		sc->tulip_ifp->if_flags &= ~IFF_UP;
 		sc->tulip_ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		return;
 	    }
 	    status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS);
 	    if ((status & PHYSTS_CAN_AUTONEG) == 0) {
 #if defined(TULIP_DEBUG)
 		loudprintf("%s(phy%d): autonegotiation disabled\n",
 			   ifp->if_xname, phyaddr);
 #endif
 		sc->tulip_flags &= ~TULIP_DIDNWAY;
 		sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 		return;
 	    }
 	    if (tulip_mii_readreg(sc, phyaddr, PHYREG_AUTONEG_ADVERTISEMENT) != ((status >> 6) | 0x01))
 		tulip_mii_writereg(sc, phyaddr, PHYREG_AUTONEG_ADVERTISEMENT, (status >> 6) | 0x01);
 	    tulip_mii_writereg(sc, phyaddr, PHYREG_CONTROL, data|PHYCTL_AUTONEG_RESTART|PHYCTL_AUTONEG_ENABLE);
 	    data = tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL);
 #if defined(TULIP_DEBUG)
 	    if ((data & PHYCTL_AUTONEG_ENABLE) == 0)
 		loudprintf("%s(phy%d): oops: enable autonegotiation failed: 0x%04x\n",
 			   ifp->if_xname, phyaddr, data);
 	    else
 		loudprintf("%s(phy%d): autonegotiation restarted: 0x%04x\n",
 			   ifp->if_xname, phyaddr, data);
 	    sc->tulip_dbg.dbg_nway_starts++;
 #endif
 	    sc->tulip_probe_state = TULIP_PROBE_PHYAUTONEG;
 	    sc->tulip_probe_timeout = 3000;
 	}
         /* FALLTHROUGH */
         case TULIP_PROBE_PHYAUTONEG: {
 	    u_int32_t status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS);
 	    u_int32_t data;
 	    if ((status & PHYSTS_AUTONEG_DONE) == 0) {
 		if (sc->tulip_probe_timeout > 0) {
 		    tulip_timeout(sc);
 		    return;
 		}
 #if defined(TULIP_DEBUG)
 		loudprintf("%s(phy%d): autonegotiation timeout: sts=0x%04x, ctl=0x%04x\n",
 			   ifp->if_xname, phyaddr, status,
 			   tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL));
 #endif
 		sc->tulip_flags &= ~TULIP_DIDNWAY;
 		sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 		return;
 	    }
 	    data = tulip_mii_readreg(sc, phyaddr, PHYREG_AUTONEG_ABILITIES);
 #if defined(TULIP_DEBUG)
 	    loudprintf("%s(phy%d): autonegotiation complete: 0x%04x\n",
 		       ifp->if_xname, phyaddr, data);
 #endif
 	    data = (data << 6) & status;
 	    if (!tulip_mii_map_abilities(sc, data))
 		sc->tulip_flags &= ~TULIP_DIDNWAY;
 	    return;
 	}
 	default: {
 #if defined(DIAGNOSTIC)
 	    panic("tulip_media_poll: botch at line %d\n", __LINE__);
 #endif
 	    break;
 	}
     }
 #if defined(TULIP_DEBUG)
     loudprintf("%s(phy%d): autonegotiation failure: state = %d\n",
 	       ifp->if_xname, phyaddr, sc->tulip_probe_state);
 	    sc->tulip_dbg.dbg_nway_failures++;
 #endif
 }
 
 static void
 tulip_2114x_media_preset(tulip_softc_t * const sc)
 {
     const tulip_media_info_t *mi = NULL;
     tulip_media_t media = sc->tulip_media;
 
     TULIP_LOCK_ASSERT(sc);
     if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE)
 	media = sc->tulip_media;
     else
 	media = sc->tulip_probe_media;
     
     sc->tulip_cmdmode &= ~TULIP_CMD_PORTSELECT;
     sc->tulip_flags &= ~TULIP_SQETEST;
     if (media != TULIP_MEDIA_UNKNOWN && media != TULIP_MEDIA_MAX) {
 #if defined(TULIP_DEBUG)
 	if (media < TULIP_MEDIA_MAX && sc->tulip_mediums[media] != NULL) {
 #endif
 	    mi = sc->tulip_mediums[media];
 	    if (mi->mi_type == TULIP_MEDIAINFO_MII) {
 		sc->tulip_cmdmode |= TULIP_CMD_PORTSELECT;
 	    } else if (mi->mi_type == TULIP_MEDIAINFO_GPR
 		       || mi->mi_type == TULIP_MEDIAINFO_SYM) {
 		sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS;
 		sc->tulip_cmdmode |= mi->mi_cmdmode;
 	    } else if (mi->mi_type == TULIP_MEDIAINFO_SIA) {
 		TULIP_CSR_WRITE(sc, csr_sia_connectivity, TULIP_SIACONN_RESET);
 	    }
 #if defined(TULIP_DEBUG)
 	} else {
 	    device_printf(sc->tulip_dev, "preset: bad media %d!\n", media);
 	}
 #endif
     }
     switch (media) {
 	case TULIP_MEDIA_BNC:
 	case TULIP_MEDIA_AUI:
 	case TULIP_MEDIA_10BASET: {
 	    sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX;
 	    sc->tulip_cmdmode |= TULIP_CMD_TXTHRSHLDCTL;
 	    sc->tulip_ifp->if_baudrate = 10000000;
 	    sc->tulip_flags |= TULIP_SQETEST;
 	    break;
 	}
 	case TULIP_MEDIA_10BASET_FD: {
 	    sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX|TULIP_CMD_TXTHRSHLDCTL;
 	    sc->tulip_ifp->if_baudrate = 10000000;
 	    break;
 	}
 	case TULIP_MEDIA_100BASEFX:
 	case TULIP_MEDIA_100BASET4:
 	case TULIP_MEDIA_100BASETX: {
 	    sc->tulip_cmdmode &= ~(TULIP_CMD_FULLDUPLEX|TULIP_CMD_TXTHRSHLDCTL);
 	    sc->tulip_cmdmode |= TULIP_CMD_PORTSELECT;
 	    sc->tulip_ifp->if_baudrate = 100000000;
 	    break;
 	}
 	case TULIP_MEDIA_100BASEFX_FD:
 	case TULIP_MEDIA_100BASETX_FD: {
 	    sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX|TULIP_CMD_PORTSELECT;
 	    sc->tulip_cmdmode &= ~TULIP_CMD_TXTHRSHLDCTL;
 	    sc->tulip_ifp->if_baudrate = 100000000;
 	    break;
 	}
 	default: {
 	    break;
 	}
     }
     TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 }
 
 /*
  ********************************************************************
  *  Start of 21140/21140A support which does not use the MII interface 
  */
 
 static void
 tulip_null_media_poll(tulip_softc_t * const sc, tulip_mediapoll_event_t event)
 {
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_events[event]++;
 #endif
 #if defined(DIAGNOSTIC)
     device_printf(sc->tulip_dev, "botch(media_poll) at line %d\n", __LINE__);
 #endif
 }
 
 static inline void
 tulip_21140_mediainit(tulip_softc_t * const sc, tulip_media_info_t * const mip,
     tulip_media_t const media, unsigned gpdata, unsigned cmdmode)
 {
     TULIP_LOCK_ASSERT(sc);
     sc->tulip_mediums[media] = mip;
     mip->mi_type = TULIP_MEDIAINFO_GPR;
     mip->mi_cmdmode = cmdmode;
     mip->mi_gpdata = gpdata;
 }
 
 static void
 tulip_21140_evalboard_media_probe(tulip_softc_t * const sc)
 {
     tulip_media_info_t *mip = sc->tulip_mediainfo;
 
     TULIP_LOCK_ASSERT(sc);
     sc->tulip_gpinit = TULIP_GP_EB_PINS;
     sc->tulip_gpdata = TULIP_GP_EB_INIT;
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_PINS);
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_INIT);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT |
 	TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL);
     DELAY(1000000);
     if ((TULIP_CSR_READ(sc, csr_gp) & TULIP_GP_EB_OK100) != 0) {
 	sc->tulip_media = TULIP_MEDIA_10BASET;
     } else {
 	sc->tulip_media = TULIP_MEDIA_100BASETX;
     }
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET,
 			  TULIP_GP_EB_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD,
 			  TULIP_GP_EB_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX,
 			  TULIP_GP_EB_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD,
 			  TULIP_GP_EB_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX);
 }
 
 static const tulip_boardsw_t tulip_21140_eb_boardsw = {
     TULIP_21140_DEC_EB,
     tulip_21140_evalboard_media_probe,
     tulip_media_select,
     tulip_null_media_poll,
     tulip_2114x_media_preset,
 };
 
 static void
 tulip_21140_accton_media_probe(tulip_softc_t * const sc)
 {
     tulip_media_info_t *mip = sc->tulip_mediainfo;
     unsigned gpdata;
 
     TULIP_LOCK_ASSERT(sc);
     sc->tulip_gpinit = TULIP_GP_EB_PINS;
     sc->tulip_gpdata = TULIP_GP_EB_INIT;
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_PINS);
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_INIT);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT |
 	TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL);
     DELAY(1000000);
     gpdata = TULIP_CSR_READ(sc, csr_gp);
     if ((gpdata & TULIP_GP_EN1207_UTP_INIT) == 0) {
 	sc->tulip_media = TULIP_MEDIA_10BASET;
     } else {
 	if ((gpdata & TULIP_GP_EN1207_BNC_INIT) == 0) {
 		sc->tulip_media = TULIP_MEDIA_BNC;
         } else {
 		sc->tulip_media = TULIP_MEDIA_100BASETX;
         }
     }
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_BNC,
 			  TULIP_GP_EN1207_BNC_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET,
 			  TULIP_GP_EN1207_UTP_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD,
 			  TULIP_GP_EN1207_UTP_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX,
 			  TULIP_GP_EN1207_100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD,
 			  TULIP_GP_EN1207_100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX);
 }
 
 static const tulip_boardsw_t tulip_21140_accton_boardsw = {
     TULIP_21140_EN1207,
     tulip_21140_accton_media_probe,
     tulip_media_select,
     tulip_null_media_poll,
     tulip_2114x_media_preset,
 };
 
 static void
 tulip_21140_smc9332_media_probe(tulip_softc_t * const sc)
 {
     tulip_media_info_t *mip = sc->tulip_mediainfo;
     int idx, cnt = 0;
 
     TULIP_LOCK_ASSERT(sc);
     TULIP_CSR_WRITE(sc, csr_command, TULIP_CMD_PORTSELECT|TULIP_CMD_MUSTBEONE);
     TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET);
     DELAY(10);	/* Wait 10 microseconds (actually 50 PCI cycles but at 
 		   33MHz that comes to two microseconds but wait a
 		   bit longer anyways) */
     TULIP_CSR_WRITE(sc, csr_command, TULIP_CMD_PORTSELECT |
 	TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE);
     sc->tulip_gpinit = TULIP_GP_SMC_9332_PINS;
     sc->tulip_gpdata = TULIP_GP_SMC_9332_INIT;
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_SMC_9332_PINS|TULIP_GP_PINSET);
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_SMC_9332_INIT);
     DELAY(200000);
     for (idx = 1000; idx > 0; idx--) {
 	u_int32_t csr = TULIP_CSR_READ(sc, csr_gp);
 	if ((csr & (TULIP_GP_SMC_9332_OK10|TULIP_GP_SMC_9332_OK100)) == (TULIP_GP_SMC_9332_OK10|TULIP_GP_SMC_9332_OK100)) {
 	    if (++cnt > 100)
 		break;
 	} else if ((csr & TULIP_GP_SMC_9332_OK10) == 0) {
 	    break;
 	} else {
 	    cnt = 0;
 	}
 	DELAY(1000);
     }
     sc->tulip_media = cnt > 100 ? TULIP_MEDIA_100BASETX : TULIP_MEDIA_10BASET;
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX,
 			  TULIP_GP_SMC_9332_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD,
 			  TULIP_GP_SMC_9332_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET,
 			  TULIP_GP_SMC_9332_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD,
 			  TULIP_GP_SMC_9332_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX);
 }
  
 static const tulip_boardsw_t tulip_21140_smc9332_boardsw = {
     TULIP_21140_SMC_9332,
     tulip_21140_smc9332_media_probe,
     tulip_media_select,
     tulip_null_media_poll,
     tulip_2114x_media_preset,
 };
 
 static void
 tulip_21140_cogent_em100_media_probe(tulip_softc_t * const sc)
 {
     tulip_media_info_t *mip = sc->tulip_mediainfo;
     u_int32_t cmdmode = TULIP_CSR_READ(sc, csr_command);
 
     TULIP_LOCK_ASSERT(sc);
     sc->tulip_gpinit = TULIP_GP_EM100_PINS;
     sc->tulip_gpdata = TULIP_GP_EM100_INIT;
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EM100_PINS);
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EM100_INIT);
 
     cmdmode = TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION|TULIP_CMD_MUSTBEONE;
     cmdmode &= ~(TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_SCRAMBLER);
     if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100FX_ID) {
 	TULIP_CSR_WRITE(sc, csr_command, cmdmode);
 	sc->tulip_media = TULIP_MEDIA_100BASEFX;
 
 	tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASEFX,
 			  TULIP_GP_EM100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION);
 	tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASEFX_FD,
 			  TULIP_GP_EM100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_FULLDUPLEX);
     } else {
 	TULIP_CSR_WRITE(sc, csr_command, cmdmode|TULIP_CMD_SCRAMBLER);
 	sc->tulip_media = TULIP_MEDIA_100BASETX;
 	tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX,
 			  TULIP_GP_EM100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER);
 	tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD,
 			  TULIP_GP_EM100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX);
     }
 }
 
 static const tulip_boardsw_t tulip_21140_cogent_em100_boardsw = {
     TULIP_21140_COGENT_EM100,
     tulip_21140_cogent_em100_media_probe,
     tulip_media_select,
     tulip_null_media_poll,
     tulip_2114x_media_preset
 };
 
 static void
 tulip_21140_znyx_zx34x_media_probe(tulip_softc_t * const sc)
 {
     tulip_media_info_t *mip = sc->tulip_mediainfo;
     int cnt10 = 0, cnt100 = 0, idx;
 
     TULIP_LOCK_ASSERT(sc);
     sc->tulip_gpinit = TULIP_GP_ZX34X_PINS;
     sc->tulip_gpdata = TULIP_GP_ZX34X_INIT;
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ZX34X_PINS);
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ZX34X_INIT);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT |
 	TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL);
 
     DELAY(200000);
     for (idx = 1000; idx > 0; idx--) {
 	u_int32_t csr = TULIP_CSR_READ(sc, csr_gp);
 	if ((csr & (TULIP_GP_ZX34X_LNKFAIL|TULIP_GP_ZX34X_SYMDET|TULIP_GP_ZX34X_SIGDET)) == (TULIP_GP_ZX34X_LNKFAIL|TULIP_GP_ZX34X_SYMDET|TULIP_GP_ZX34X_SIGDET)) {
 	    if (++cnt100 > 100)
 		break;
 	} else if ((csr & TULIP_GP_ZX34X_LNKFAIL) == 0) {
 	    if (++cnt10 > 100)
 		break;
 	} else {
 	    cnt10 = 0;
 	    cnt100 = 0;
 	}
 	DELAY(1000);
     }
     sc->tulip_media = cnt100 > 100 ? TULIP_MEDIA_100BASETX : TULIP_MEDIA_10BASET;
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET,
 			  TULIP_GP_ZX34X_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD,
 			  TULIP_GP_ZX34X_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX,
 			  TULIP_GP_ZX34X_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD,
 			  TULIP_GP_ZX34X_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX);
 }
 
 static const tulip_boardsw_t tulip_21140_znyx_zx34x_boardsw = {
     TULIP_21140_ZNYX_ZX34X,
     tulip_21140_znyx_zx34x_media_probe,
     tulip_media_select,
     tulip_null_media_poll,
     tulip_2114x_media_preset,
 };
 
 static void
 tulip_2114x_media_probe(tulip_softc_t * const sc)
 {
     TULIP_LOCK_ASSERT(sc);
     sc->tulip_cmdmode |= TULIP_CMD_MUSTBEONE
 	|TULIP_CMD_BACKOFFCTR|TULIP_CMD_THRSHLD72;
 }
 
 static const tulip_boardsw_t tulip_2114x_isv_boardsw = {
     TULIP_21140_ISV,
     tulip_2114x_media_probe,
     tulip_media_select,
     tulip_media_poll,
     tulip_2114x_media_preset,
 };
 
 /*
  * ******** END of chip-specific handlers. ***********
  */
 
 /*
  * Code the read the SROM and MII bit streams (I2C)
  */
 #define EMIT    do { TULIP_CSR_WRITE(sc, csr_srom_mii, csr); DELAY(1); } while (0)
 
 static void
 tulip_srom_idle(tulip_softc_t * const sc)
 {
     unsigned bit, csr;
     
     csr  = SROMSEL ; EMIT;
     csr  = SROMSEL | SROMRD; EMIT;  
     csr ^= SROMCS; EMIT;
     csr ^= SROMCLKON; EMIT;
 
     /*
      * Write 25 cycles of 0 which will force the SROM to be idle.
      */
     for (bit = 3 + SROM_BITWIDTH + 16; bit > 0; bit--) {
         csr ^= SROMCLKOFF; EMIT;    /* clock low; data not valid */
         csr ^= SROMCLKON; EMIT;     /* clock high; data valid */
     }
     csr ^= SROMCLKOFF; EMIT;
     csr ^= SROMCS; EMIT;
     csr  = 0; EMIT;
 }
      
 static void
 tulip_srom_read(tulip_softc_t * const sc)
 {   
     unsigned idx; 
     const unsigned bitwidth = SROM_BITWIDTH;
     const unsigned cmdmask = (SROMCMD_RD << bitwidth);
     const unsigned msb = 1 << (bitwidth + 3 - 1);
     unsigned lastidx = (1 << bitwidth) - 1;
 
     tulip_srom_idle(sc);
 
     for (idx = 0; idx <= lastidx; idx++) {
         unsigned lastbit, data, bits, bit, csr;
 	csr  = SROMSEL ;	        EMIT;
         csr  = SROMSEL | SROMRD;        EMIT;
         csr ^= SROMCSON;                EMIT;
         csr ^=            SROMCLKON;    EMIT;
     
         lastbit = 0;
         for (bits = idx|cmdmask, bit = bitwidth + 3; bit > 0; bit--, bits <<= 1) {
             const unsigned thisbit = bits & msb;
             csr ^= SROMCLKOFF; EMIT;    /* clock low; data not valid */
             if (thisbit != lastbit) {
                 csr ^= SROMDOUT; EMIT;  /* clock low; invert data */
             } else {
 		EMIT;
 	    }
             csr ^= SROMCLKON; EMIT;     /* clock high; data valid */
             lastbit = thisbit;
         }
         csr ^= SROMCLKOFF; EMIT;
 
         for (data = 0, bits = 0; bits < 16; bits++) {
             data <<= 1;
             csr ^= SROMCLKON; EMIT;     /* clock high; data valid */ 
             data |= TULIP_CSR_READ(sc, csr_srom_mii) & SROMDIN ? 1 : 0;
             csr ^= SROMCLKOFF; EMIT;    /* clock low; data not valid */
         }
 	sc->tulip_rombuf[idx*2] = data & 0xFF;
 	sc->tulip_rombuf[idx*2+1] = data >> 8;
 	csr  = SROMSEL | SROMRD; EMIT;
 	csr  = 0; EMIT;
     }
     tulip_srom_idle(sc);
 }
 
 #define MII_EMIT    do { TULIP_CSR_WRITE(sc, csr_srom_mii, csr); DELAY(1); } while (0)
 
 static void
 tulip_mii_writebits(tulip_softc_t * const sc, unsigned data, unsigned bits)
 {
     unsigned msb = 1 << (bits - 1);
     unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK);
     unsigned lastbit = (csr & MII_DOUT) ? msb : 0;
 
     TULIP_LOCK_ASSERT(sc);
     csr |= MII_WR; MII_EMIT;  		/* clock low; assert write */
 
     for (; bits > 0; bits--, data <<= 1) {
 	const unsigned thisbit = data & msb;
 	if (thisbit != lastbit) {
 	    csr ^= MII_DOUT; MII_EMIT;  /* clock low; invert data */
 	}
 	csr ^= MII_CLKON; MII_EMIT;     /* clock high; data valid */
 	lastbit = thisbit;
 	csr ^= MII_CLKOFF; MII_EMIT;    /* clock low; data not valid */
     }
 }
 
 static void
 tulip_mii_turnaround(tulip_softc_t * const sc, unsigned cmd)
 {
     unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK);
 
     TULIP_LOCK_ASSERT(sc);
     if (cmd == MII_WRCMD) {
 	csr |= MII_DOUT; MII_EMIT;	/* clock low; change data */
 	csr ^= MII_CLKON; MII_EMIT;	/* clock high; data valid */
 	csr ^= MII_CLKOFF; MII_EMIT;	/* clock low; data not valid */
 	csr ^= MII_DOUT; MII_EMIT;	/* clock low; change data */
     } else {
 	csr |= MII_RD; MII_EMIT;	/* clock low; switch to read */
     }
     csr ^= MII_CLKON; MII_EMIT;		/* clock high; data valid */
     csr ^= MII_CLKOFF; MII_EMIT;	/* clock low; data not valid */
 }
 
 static unsigned
 tulip_mii_readbits(tulip_softc_t * const sc)
 {
     unsigned data;
     unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK);
     int idx;
 
     TULIP_LOCK_ASSERT(sc);
     for (idx = 0, data = 0; idx < 16; idx++) {
 	data <<= 1;	/* this is NOOP on the first pass through */
 	csr ^= MII_CLKON; MII_EMIT;	/* clock high; data valid */
 	if (TULIP_CSR_READ(sc, csr_srom_mii) & MII_DIN)
 	    data |= 1;
 	csr ^= MII_CLKOFF; MII_EMIT;	/* clock low; data not valid */
     }
     csr ^= MII_RD; MII_EMIT;		/* clock low; turn off read */
 
     return data;
 }
 
 static unsigned
 tulip_mii_readreg(tulip_softc_t * const sc, unsigned devaddr, unsigned regno)
 {
     unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK);
     unsigned data;
 
     TULIP_LOCK_ASSERT(sc);
     csr &= ~(MII_RD|MII_CLK); MII_EMIT;
     tulip_mii_writebits(sc, MII_PREAMBLE, 32);
     tulip_mii_writebits(sc, MII_RDCMD, 8);
     tulip_mii_writebits(sc, devaddr, 5);
     tulip_mii_writebits(sc, regno, 5);
     tulip_mii_turnaround(sc, MII_RDCMD);
 
     data = tulip_mii_readbits(sc);
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_phyregs[regno][0] = data;
     sc->tulip_dbg.dbg_phyregs[regno][1]++;
 #endif
     return data;
 }
 
 static void
 tulip_mii_writereg(tulip_softc_t * const sc, unsigned devaddr, unsigned regno,
     unsigned data)
 {
     unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK);
 
     TULIP_LOCK_ASSERT(sc);
     csr &= ~(MII_RD|MII_CLK); MII_EMIT;
     tulip_mii_writebits(sc, MII_PREAMBLE, 32);
     tulip_mii_writebits(sc, MII_WRCMD, 8);
     tulip_mii_writebits(sc, devaddr, 5);
     tulip_mii_writebits(sc, regno, 5);
     tulip_mii_turnaround(sc, MII_WRCMD);
     tulip_mii_writebits(sc, data, 16);
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_phyregs[regno][2] = data;
     sc->tulip_dbg.dbg_phyregs[regno][3]++;
 #endif
 }
 
 #define	tulip_mchash(mca)	(ether_crc32_le(mca, 6) & 0x1FF)
 #define	tulip_srom_crcok(databuf)	( \
     ((ether_crc32_le(databuf, 126) & 0xFFFFU) ^ 0xFFFFU) == \
      ((databuf)[126] | ((databuf)[127] << 8)))
 
 static void
 tulip_identify_dec_nic(tulip_softc_t * const sc)
 {
     TULIP_LOCK_ASSERT(sc);
     strcpy(sc->tulip_boardid, "DEC ");
 #define D0	4
     if (sc->tulip_chipid <= TULIP_21040)
 	return;
     if (bcmp(sc->tulip_rombuf + 29, "DE500", 5) == 0
 	|| bcmp(sc->tulip_rombuf + 29, "DE450", 5) == 0) {
 	bcopy(sc->tulip_rombuf + 29, &sc->tulip_boardid[D0], 8);
 	sc->tulip_boardid[D0+8] = ' ';
     }
 #undef D0
 }
 
 static void
 tulip_identify_znyx_nic(tulip_softc_t * const sc)
 {
     unsigned id = 0;
 
     TULIP_LOCK_ASSERT(sc);
     strcpy(sc->tulip_boardid, "ZNYX ZX3XX ");
     if (sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A) {
 	unsigned znyx_ptr;
 	sc->tulip_boardid[8] = '4';
 	znyx_ptr = sc->tulip_rombuf[124] + 256 * sc->tulip_rombuf[125];
 	if (znyx_ptr < 26 || znyx_ptr > 116) {
 	    sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw;
 	    return;
 	}
 	/* ZX344 = 0010 .. 0013FF
 	 */
 	if (sc->tulip_rombuf[znyx_ptr] == 0x4A
 		&& sc->tulip_rombuf[znyx_ptr + 1] == 0x52
 		&& sc->tulip_rombuf[znyx_ptr + 2] == 0x01) {
 	    id = sc->tulip_rombuf[znyx_ptr + 5] + 256 * sc->tulip_rombuf[znyx_ptr + 4];
 	    if ((id >> 8) == (TULIP_ZNYX_ID_ZX342 >> 8)) {
 		sc->tulip_boardid[9] = '2';
 		if (id == TULIP_ZNYX_ID_ZX342B) {
 		    sc->tulip_boardid[10] = 'B';
 		    sc->tulip_boardid[11] = ' ';
 		}
 		sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw;
 	    } else if (id == TULIP_ZNYX_ID_ZX344) {
 		sc->tulip_boardid[10] = '4';
 		sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw;
 	    } else if (id == TULIP_ZNYX_ID_ZX345) {
 		sc->tulip_boardid[9] = (sc->tulip_rombuf[19] > 1) ? '8' : '5';
 	    } else if (id == TULIP_ZNYX_ID_ZX346) {
 		sc->tulip_boardid[9] = '6';
 	    } else if (id == TULIP_ZNYX_ID_ZX351) {
 		sc->tulip_boardid[8] = '5';
 		sc->tulip_boardid[9] = '1';
 	    }
 	}
 	if (id == 0) {
 	    /*
 	     * Assume it's a ZX342...
 	     */
 	    sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw;
 	}
 	return;
     }
     sc->tulip_boardid[8] = '1';
     if (sc->tulip_chipid == TULIP_21041) {
 	sc->tulip_boardid[10] = '1';
 	return;
     }
     if (sc->tulip_rombuf[32] == 0x4A && sc->tulip_rombuf[33] == 0x52) {
 	id = sc->tulip_rombuf[37] + 256 * sc->tulip_rombuf[36];
 	if (id == TULIP_ZNYX_ID_ZX312T) {
 	    sc->tulip_boardid[9] = '2';
 	    sc->tulip_boardid[10] = 'T';
 	    sc->tulip_boardid[11] = ' ';
 	    sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw;
 	} else if (id == TULIP_ZNYX_ID_ZX314_INTA) {
 	    sc->tulip_boardid[9] = '4';
 	    sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw;
 	    sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM;
 	} else if (id == TULIP_ZNYX_ID_ZX314) {
 	    sc->tulip_boardid[9] = '4';
 	    sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw;
 	    sc->tulip_features |= TULIP_HAVE_BASEROM;
 	} else if (id == TULIP_ZNYX_ID_ZX315_INTA) {
 	    sc->tulip_boardid[9] = '5';
 	    sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM;
 	} else if (id == TULIP_ZNYX_ID_ZX315) {
 	    sc->tulip_boardid[9] = '5';
 	    sc->tulip_features |= TULIP_HAVE_BASEROM;
 	} else {
 	    id = 0;
 	}
     }		    
     if (id == 0) {
 	if ((sc->tulip_enaddr[3] & ~3) == 0xF0 && (sc->tulip_enaddr[5] & 2) == 0) {
 	    sc->tulip_boardid[9] = '4';
 	    sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw;
 	    sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM;
 	} else if ((sc->tulip_enaddr[3] & ~3) == 0xF4 && (sc->tulip_enaddr[5] & 1) == 0) {
 	    sc->tulip_boardid[9] = '5';
 	    sc->tulip_boardsw = &tulip_21040_boardsw;
 	    sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM;
 	} else if ((sc->tulip_enaddr[3] & ~3) == 0xEC) {
 	    sc->tulip_boardid[9] = '2';
 	    sc->tulip_boardsw = &tulip_21040_boardsw;
 	}
     }
 }
 
 static void
 tulip_identify_smc_nic(tulip_softc_t * const sc)
 {
     u_int32_t id1, id2, ei;
     int auibnc = 0, utp = 0;
     char *cp;
 
     TULIP_LOCK_ASSERT(sc);
     strcpy(sc->tulip_boardid, "SMC ");
     if (sc->tulip_chipid == TULIP_21041)
 	return;
     if (sc->tulip_chipid != TULIP_21040) {
 	if (sc->tulip_boardsw != &tulip_2114x_isv_boardsw) {
 	    strcpy(&sc->tulip_boardid[4], "9332DST ");
 	    sc->tulip_boardsw = &tulip_21140_smc9332_boardsw;
 	} else if (sc->tulip_features & (TULIP_HAVE_BASEROM|TULIP_HAVE_SLAVEDROM)) {
 	    strcpy(&sc->tulip_boardid[4], "9334BDT ");
 	} else {
 	    strcpy(&sc->tulip_boardid[4], "9332BDT ");
 	}
 	return;
     }
     id1 = sc->tulip_rombuf[0x60] | (sc->tulip_rombuf[0x61] << 8);
     id2 = sc->tulip_rombuf[0x62] | (sc->tulip_rombuf[0x63] << 8);
     ei  = sc->tulip_rombuf[0x66] | (sc->tulip_rombuf[0x67] << 8);
 
     strcpy(&sc->tulip_boardid[4], "8432");
     cp = &sc->tulip_boardid[8];
     if ((id1 & 1) == 0)
 	*cp++ = 'B', auibnc = 1;
     if ((id1 & 0xFF) > 0x32)
 	*cp++ = 'T', utp = 1;
     if ((id1 & 0x4000) == 0)
 	*cp++ = 'A', auibnc = 1;
     if (id2 == 0x15) {
 	sc->tulip_boardid[7] = '4';
 	*cp++ = '-';
 	*cp++ = 'C';
 	*cp++ = 'H';
 	*cp++ = (ei ? '2' : '1');
     }
     *cp++ = ' ';
     *cp = '\0';
     if (utp && !auibnc)
 	sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw;
     else if (!utp && auibnc)
 	sc->tulip_boardsw = &tulip_21040_auibnc_only_boardsw;
 }
 
 static void
 tulip_identify_cogent_nic(tulip_softc_t * const sc)
 {
     TULIP_LOCK_ASSERT(sc);
     strcpy(sc->tulip_boardid, "Cogent ");
     if (sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A) {
 	if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100TX_ID) {
 	    strcat(sc->tulip_boardid, "EM100TX ");
 	    sc->tulip_boardsw = &tulip_21140_cogent_em100_boardsw;
 #if defined(TULIP_COGENT_EM110TX_ID)
 	} else if (sc->tulip_rombuf[32] == TULIP_COGENT_EM110TX_ID) {
 	    strcat(sc->tulip_boardid, "EM110TX ");
 	    sc->tulip_boardsw = &tulip_21140_cogent_em100_boardsw;
 #endif
 	} else if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100FX_ID) {
 	    strcat(sc->tulip_boardid, "EM100FX ");
 	    sc->tulip_boardsw = &tulip_21140_cogent_em100_boardsw;
 	}
 	/*
 	 * Magic number (0x24001109U) is the SubVendor (0x2400) and
 	 * SubDevId (0x1109) for the ANA6944TX (EM440TX).
 	 */
 	if (*(u_int32_t *) sc->tulip_rombuf == 0x24001109U
 		&& (sc->tulip_features & TULIP_HAVE_BASEROM)) {
 	    /*
 	     * Cogent (Adaptec) is still mapping all INTs to INTA of
 	     * first 21140.  Dumb!  Dumb!
 	     */
 	    strcat(sc->tulip_boardid, "EM440TX ");
 	    sc->tulip_features |= TULIP_HAVE_SHAREDINTR;
 	}
     } else if (sc->tulip_chipid == TULIP_21040) {
 	sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM;
     }
 }
 
 static void
 tulip_identify_accton_nic(tulip_softc_t * const sc)
 {
     TULIP_LOCK_ASSERT(sc);
     strcpy(sc->tulip_boardid, "ACCTON ");
     switch (sc->tulip_chipid) {
 	case TULIP_21140A:
 	    strcat(sc->tulip_boardid, "EN1207 ");
 	    if (sc->tulip_boardsw != &tulip_2114x_isv_boardsw)
 		sc->tulip_boardsw = &tulip_21140_accton_boardsw;
 	    break;
 	case TULIP_21140:
 	    strcat(sc->tulip_boardid, "EN1207TX ");
 	    if (sc->tulip_boardsw != &tulip_2114x_isv_boardsw)
 		sc->tulip_boardsw = &tulip_21140_eb_boardsw;
             break;
         case TULIP_21040:
 	    strcat(sc->tulip_boardid, "EN1203 ");
             sc->tulip_boardsw = &tulip_21040_boardsw;
             break;
         case TULIP_21041:
 	    strcat(sc->tulip_boardid, "EN1203 ");
             sc->tulip_boardsw = &tulip_21041_boardsw;
             break;
 	default:
             sc->tulip_boardsw = &tulip_2114x_isv_boardsw;
             break;
     }
 }
 
 static void
 tulip_identify_asante_nic(tulip_softc_t * const sc)
 {
     TULIP_LOCK_ASSERT(sc);
     strcpy(sc->tulip_boardid, "Asante ");
     if ((sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A)
 	    && sc->tulip_boardsw != &tulip_2114x_isv_boardsw) {
 	tulip_media_info_t *mi = sc->tulip_mediainfo;
 	int idx;
 	/*
 	 * The Asante Fast Ethernet doesn't always ship with a valid
 	 * new format SROM.  So if isn't in the new format, we cheat
 	 * set it up as if we had.
 	 */
 
 	sc->tulip_gpinit = TULIP_GP_ASANTE_PINS;
 	sc->tulip_gpdata = 0;
 
 	TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ASANTE_PINS|TULIP_GP_PINSET);
 	TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ASANTE_PHYRESET);
 	DELAY(100);
 	TULIP_CSR_WRITE(sc, csr_gp, 0);
 
 	mi->mi_type = TULIP_MEDIAINFO_MII;
 	mi->mi_gpr_length = 0;
 	mi->mi_gpr_offset = 0;
 	mi->mi_reset_length = 0;
 	mi->mi_reset_offset = 0;
 
 	mi->mi_phyaddr = TULIP_MII_NOPHY;
 	for (idx = 20; idx > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx--) {
 	    DELAY(10000);
 	    mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, 0);
 	}
 	if (mi->mi_phyaddr == TULIP_MII_NOPHY) {
 	    device_printf(sc->tulip_dev, "can't find phy 0\n");
 	    return;
 	}
 
 	sc->tulip_features |= TULIP_HAVE_MII;
 	mi->mi_capabilities  = PHYSTS_10BASET|PHYSTS_10BASET_FD|PHYSTS_100BASETX|PHYSTS_100BASETX_FD;
 	mi->mi_advertisement = PHYSTS_10BASET|PHYSTS_10BASET_FD|PHYSTS_100BASETX|PHYSTS_100BASETX_FD;
 	mi->mi_full_duplex   = PHYSTS_10BASET_FD|PHYSTS_100BASETX_FD;
 	mi->mi_tx_threshold  = PHYSTS_10BASET|PHYSTS_10BASET_FD;
 	TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD);
 	TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX);
 	TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4);
 	TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD);
 	TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET);
 	mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) |
 	    tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH);
 
 	sc->tulip_boardsw = &tulip_2114x_isv_boardsw;
     }
 }
 
 static void
 tulip_identify_compex_nic(tulip_softc_t * const sc)
 {
     TULIP_LOCK_ASSERT(sc);
     strcpy(sc->tulip_boardid, "COMPEX ");
     if (sc->tulip_chipid == TULIP_21140A) {
 	int root_unit;
 	tulip_softc_t *root_sc = NULL;
 
 	strcat(sc->tulip_boardid, "400TX/PCI ");
 	/*
 	 * All 4 chips on these boards share an interrupt.  This code
 	 * copied from tulip_read_macaddr.
 	 */
 	sc->tulip_features |= TULIP_HAVE_SHAREDINTR;
 	for (root_unit = sc->tulip_unit - 1; root_unit >= 0; root_unit--) {
 	    root_sc = tulips[root_unit];
 	    if (root_sc == NULL
 		|| !(root_sc->tulip_features & TULIP_HAVE_SLAVEDINTR))
 		break;
 	    root_sc = NULL;
 	}
 	if (root_sc != NULL
 	    && root_sc->tulip_chipid == sc->tulip_chipid
 	    && root_sc->tulip_pci_busno == sc->tulip_pci_busno) {
 	    sc->tulip_features |= TULIP_HAVE_SLAVEDINTR;
 	    sc->tulip_slaves = root_sc->tulip_slaves;
 	    root_sc->tulip_slaves = sc;
 	} else if(sc->tulip_features & TULIP_HAVE_SLAVEDINTR) {
 	    printf("\nCannot find master device for %s interrupts",
 		   sc->tulip_ifp->if_xname);
 	}
     } else {
 	strcat(sc->tulip_boardid, "unknown ");
     }
     /*      sc->tulip_boardsw = &tulip_21140_eb_boardsw; */
     return;
 }
 
 static int
 tulip_srom_decode(tulip_softc_t * const sc)
 {
     unsigned idx1, idx2, idx3;
 
     const tulip_srom_header_t *shp = (const tulip_srom_header_t *) &sc->tulip_rombuf[0];
     const tulip_srom_adapter_info_t *saip = (const tulip_srom_adapter_info_t *) (shp + 1);
     tulip_srom_media_t srom_media;
     tulip_media_info_t *mi = sc->tulip_mediainfo;
     const u_int8_t *dp;
     u_int32_t leaf_offset, blocks, data;
 
     TULIP_LOCK_ASSERT(sc);
     for (idx1 = 0; idx1 < shp->sh_adapter_count; idx1++, saip++) {
 	if (shp->sh_adapter_count == 1)
 	    break;
 	if (saip->sai_device == sc->tulip_pci_devno)
 	    break;
     }
     /*
      * Didn't find the right media block for this card.
      */
     if (idx1 == shp->sh_adapter_count)
 	return 0;
 
     /*
      * Save the hardware address.
      */
     bcopy(shp->sh_ieee802_address, sc->tulip_enaddr, 6);
     /*
      * If this is a multiple port card, add the adapter index to the last
      * byte of the hardware address.  (if it isn't multiport, adding 0
      * won't hurt.
      */
     sc->tulip_enaddr[5] += idx1;
 
     leaf_offset = saip->sai_leaf_offset_lowbyte
 	+ saip->sai_leaf_offset_highbyte * 256;
     dp = sc->tulip_rombuf + leaf_offset;
 	
     sc->tulip_conntype = (tulip_srom_connection_t) (dp[0] + dp[1] * 256); dp += 2;
 
     for (idx2 = 0;; idx2++) {
 	if (tulip_srom_conninfo[idx2].sc_type == sc->tulip_conntype
 	        || tulip_srom_conninfo[idx2].sc_type == TULIP_SROM_CONNTYPE_NOT_USED)
 	    break;
     }
     sc->tulip_connidx = idx2;
 
     if (sc->tulip_chipid == TULIP_21041) {
 	blocks = *dp++;
 	for (idx2 = 0; idx2 < blocks; idx2++) {
 	    tulip_media_t media;
 	    data = *dp++;
 	    srom_media = (tulip_srom_media_t) (data & 0x3F);
 	    for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) {
 		if (tulip_srom_mediums[idx3].sm_srom_type == srom_media)
 		    break;
 	    }
 	    media = tulip_srom_mediums[idx3].sm_type;
 	    if (media != TULIP_MEDIA_UNKNOWN) {
 		if (data & TULIP_SROM_21041_EXTENDED) {
 		    mi->mi_type = TULIP_MEDIAINFO_SIA;
 		    sc->tulip_mediums[media] = mi;
 		    mi->mi_sia_connectivity = dp[0] + dp[1] * 256;
 		    mi->mi_sia_tx_rx        = dp[2] + dp[3] * 256;
 		    mi->mi_sia_general      = dp[4] + dp[5] * 256;
 		    mi++;
 		} else {
 		    switch (media) {
 			case TULIP_MEDIA_BNC: {
 			    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, BNC);
 			    mi++;
 			    break;
 			}
 			case TULIP_MEDIA_AUI: {
 			    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, AUI);
 			    mi++;
 			    break;
 			}
 			case TULIP_MEDIA_10BASET: {
 			    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET);
 			    mi++;
 			    break;
 			}
 			case TULIP_MEDIA_10BASET_FD: {
 			    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET_FD);
 			    mi++;
 			    break;
 			}
 			default: {
 			    break;
 			}
 		    }
 		}
 	    }
 	    if (data & TULIP_SROM_21041_EXTENDED)	
 		dp += 6;
 	}
 #ifdef notdef
 	if (blocks == 0) {
 	    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, BNC); mi++;
 	    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, AUI); mi++;
 	    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET); mi++;
 	    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET_FD); mi++;
 	}
 #endif
     } else {
 	unsigned length, type;
 	tulip_media_t gp_media = TULIP_MEDIA_UNKNOWN;
 	if (sc->tulip_features & TULIP_HAVE_GPR)
 	    sc->tulip_gpinit = *dp++;
 	blocks = *dp++;
 	for (idx2 = 0; idx2 < blocks; idx2++) {
 	    const u_int8_t *ep;
 	    if ((*dp & 0x80) == 0) {
 		length = 4;
 		type = 0;
 	    } else {
 		length = (*dp++ & 0x7f) - 1;
 		type = *dp++ & 0x3f;
 	    }
 	    ep = dp + length;
 	    switch (type & 0x3f) {
 		case 0: {	/* 21140[A] GPR block */
 		    tulip_media_t media;
 		    srom_media = (tulip_srom_media_t)(dp[0] & 0x3f);
 		    for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) {
 			if (tulip_srom_mediums[idx3].sm_srom_type == srom_media)
 			    break;
 		    }
 		    media = tulip_srom_mediums[idx3].sm_type;
 		    if (media == TULIP_MEDIA_UNKNOWN)
 			break;
 		    mi->mi_type = TULIP_MEDIAINFO_GPR;
 		    sc->tulip_mediums[media] = mi;
 		    mi->mi_gpdata = dp[1];
 		    if (media > gp_media && !TULIP_IS_MEDIA_FD(media)) {
 			sc->tulip_gpdata = mi->mi_gpdata;
 			gp_media = media;
 		    }
 		    data = dp[2] + dp[3] * 256;
 		    mi->mi_cmdmode = TULIP_SROM_2114X_CMDBITS(data);
 		    if (data & TULIP_SROM_2114X_NOINDICATOR) {
 			mi->mi_actmask = 0;
 		    } else {
 #if 0
 			mi->mi_default = (data & TULIP_SROM_2114X_DEFAULT) != 0;
 #endif
 			mi->mi_actmask = TULIP_SROM_2114X_BITPOS(data);
 			mi->mi_actdata = (data & TULIP_SROM_2114X_POLARITY) ? 0 : mi->mi_actmask;
 		    }
 		    mi++;
 		    break;
 		}
 		case 1: {	/* 21140[A] MII block */
 		    const unsigned phyno = *dp++;
 		    mi->mi_type = TULIP_MEDIAINFO_MII;
 		    mi->mi_gpr_length = *dp++;
 		    mi->mi_gpr_offset = dp - sc->tulip_rombuf;
 		    dp += mi->mi_gpr_length;
 		    mi->mi_reset_length = *dp++;
 		    mi->mi_reset_offset = dp - sc->tulip_rombuf;
 		    dp += mi->mi_reset_length;
 
 		    /*
 		     * Before we probe for a PHY, use the GPR information
 		     * to select it.  If we don't, it may be inaccessible.
 		     */
 		    TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_gpinit|TULIP_GP_PINSET);
 		    for (idx3 = 0; idx3 < mi->mi_reset_length; idx3++) {
 			DELAY(10);
 			TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_reset_offset + idx3]);
 		    }
 		    sc->tulip_phyaddr = mi->mi_phyaddr;
 		    for (idx3 = 0; idx3 < mi->mi_gpr_length; idx3++) {
 			DELAY(10);
 			TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_gpr_offset + idx3]);
 		    }
 
 		    /*
 		     * At least write something!
 		     */
 		    if (mi->mi_reset_length == 0 && mi->mi_gpr_length == 0)
 			TULIP_CSR_WRITE(sc, csr_gp, 0);
 
 		    mi->mi_phyaddr = TULIP_MII_NOPHY;
 		    for (idx3 = 20; idx3 > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx3--) {
 			DELAY(10000);
 			mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, phyno);
 		    }
 		    if (mi->mi_phyaddr == TULIP_MII_NOPHY) {
 #if defined(TULIP_DEBUG)
 			device_printf(sc->tulip_dev, "can't find phy %d\n",
 			    phyno);
 #endif
 			break;
 		    }
 		    sc->tulip_features |= TULIP_HAVE_MII;
 		    mi->mi_capabilities  = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_advertisement = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_full_duplex   = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_tx_threshold  = dp[0] + dp[1] * 256; dp += 2;
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET);
 		    mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) |
 			tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH);
 		    mi++;
 		    break;
 		}
 		case 2: {	/* 2114[23] SIA block */
 		    tulip_media_t media;
 		    srom_media = (tulip_srom_media_t)(dp[0] & 0x3f);
 		    for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) {
 			if (tulip_srom_mediums[idx3].sm_srom_type == srom_media)
 			    break;
 		    }
 		    media = tulip_srom_mediums[idx3].sm_type;
 		    if (media == TULIP_MEDIA_UNKNOWN)
 			break;
 		    mi->mi_type = TULIP_MEDIAINFO_SIA;
 		    sc->tulip_mediums[media] = mi;
 		    if (dp[0] & 0x40) {
 			mi->mi_sia_connectivity = dp[1] + dp[2] * 256;
 			mi->mi_sia_tx_rx        = dp[3] + dp[4] * 256;
 			mi->mi_sia_general      = dp[5] + dp[6] * 256;
 			dp += 6;
 		    } else {
 			switch (media) {
 			    case TULIP_MEDIA_BNC: {
 				TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, BNC);
 				break;
 			    }
 			    case TULIP_MEDIA_AUI: {
 				TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, AUI);
 				break;
 			    }
 			    case TULIP_MEDIA_10BASET: {
 				TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, 10BASET);
 				sc->tulip_intrmask |= TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL;
 				break;
 			    }
 			    case TULIP_MEDIA_10BASET_FD: {
 				TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, 10BASET_FD);
 				sc->tulip_intrmask |= TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL;
 				break;
 			    }
 			    default: {
 				goto bad_media;
 			    }
 			}
 		    }
 		    mi->mi_sia_gp_control = (dp[1] + dp[2] * 256) << 16;
 		    mi->mi_sia_gp_data    = (dp[3] + dp[4] * 256) << 16;
 		    mi++;
 		  bad_media:
 		    break;
 		}
 		case 3: {	/* 2114[23] MII PHY block */
 		    const unsigned phyno = *dp++;
 		    const u_int8_t *dp0;
 		    mi->mi_type = TULIP_MEDIAINFO_MII;
 		    mi->mi_gpr_length = *dp++;
 		    mi->mi_gpr_offset = dp - sc->tulip_rombuf;
 		    dp += 2 * mi->mi_gpr_length;
 		    mi->mi_reset_length = *dp++;
 		    mi->mi_reset_offset = dp - sc->tulip_rombuf;
 		    dp += 2 * mi->mi_reset_length;
 
 		    dp0 = &sc->tulip_rombuf[mi->mi_reset_offset];
 		    for (idx3 = 0; idx3 < mi->mi_reset_length; idx3++, dp0 += 2) {
 			DELAY(10);
 			TULIP_CSR_WRITE(sc, csr_sia_general, (dp0[0] + 256 * dp0[1]) << 16);
 		    }
 		    sc->tulip_phyaddr = mi->mi_phyaddr;
 		    dp0 = &sc->tulip_rombuf[mi->mi_gpr_offset];
 		    for (idx3 = 0; idx3 < mi->mi_gpr_length; idx3++, dp0 += 2) {
 			DELAY(10);
 			TULIP_CSR_WRITE(sc, csr_sia_general, (dp0[0] + 256 * dp0[1]) << 16);
 		    }
 
 		    if (mi->mi_reset_length == 0 && mi->mi_gpr_length == 0)
 			TULIP_CSR_WRITE(sc, csr_sia_general, 0);
 
 		    mi->mi_phyaddr = TULIP_MII_NOPHY;
 		    for (idx3 = 20; idx3 > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx3--) {
 			DELAY(10000);
 			mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, phyno);
 		    }
 		    if (mi->mi_phyaddr == TULIP_MII_NOPHY) {
 #if defined(TULIP_DEBUG)
 			device_printf(sc->tulip_dev, "can't find phy %d\n",
 			       phyno);
 #endif
 			break;
 		    }
 		    sc->tulip_features |= TULIP_HAVE_MII;
 		    mi->mi_capabilities  = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_advertisement = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_full_duplex   = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_tx_threshold  = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_mii_interrupt = dp[0] + dp[1] * 256; dp += 2;
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET);
 		    mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) |
 			tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH);
 		    mi++;
 		    break;
 		}
 		case 4: {	/* 21143 SYM block */
 		    tulip_media_t media;
 		    srom_media = (tulip_srom_media_t) dp[0];
 		    for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) {
 			if (tulip_srom_mediums[idx3].sm_srom_type == srom_media)
 			    break;
 		    }
 		    media = tulip_srom_mediums[idx3].sm_type;
 		    if (media == TULIP_MEDIA_UNKNOWN)
 			break;
 		    mi->mi_type = TULIP_MEDIAINFO_SYM;
 		    sc->tulip_mediums[media] = mi;
 		    mi->mi_gpcontrol = (dp[1] + dp[2] * 256) << 16;
 		    mi->mi_gpdata    = (dp[3] + dp[4] * 256) << 16;
 		    data = dp[5] + dp[6] * 256;
 		    mi->mi_cmdmode = TULIP_SROM_2114X_CMDBITS(data);
 		    if (data & TULIP_SROM_2114X_NOINDICATOR) {
 			mi->mi_actmask = 0;
 		    } else {
 			mi->mi_default = (data & TULIP_SROM_2114X_DEFAULT) != 0;
 			mi->mi_actmask = TULIP_SROM_2114X_BITPOS(data);
 			mi->mi_actdata = (data & TULIP_SROM_2114X_POLARITY) ? 0 : mi->mi_actmask;
 		    }
 		    if (TULIP_IS_MEDIA_TP(media))
 			sc->tulip_intrmask |= TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL;
 		    mi++;
 		    break;
 		}
 #if 0
 		case 5: {	/* 21143 Reset block */
 		    mi->mi_type = TULIP_MEDIAINFO_RESET;
 		    mi->mi_reset_length = *dp++;
 		    mi->mi_reset_offset = dp - sc->tulip_rombuf;
 		    dp += 2 * mi->mi_reset_length;
 		    mi++;
 		    break;
 		}
 #endif
 		default: {
 		}
 	    }
 	    dp = ep;
 	}
     }
     return mi - sc->tulip_mediainfo;
 }
 
 static const struct {
     void (*vendor_identify_nic)(tulip_softc_t * const sc);
     unsigned char vendor_oui[3];
 } tulip_vendors[] = {
     { tulip_identify_dec_nic,		{ 0x08, 0x00, 0x2B } },
     { tulip_identify_dec_nic,		{ 0x00, 0x00, 0xF8 } },
     { tulip_identify_smc_nic,		{ 0x00, 0x00, 0xC0 } },
     { tulip_identify_smc_nic,		{ 0x00, 0xE0, 0x29 } },
     { tulip_identify_znyx_nic,		{ 0x00, 0xC0, 0x95 } },
     { tulip_identify_cogent_nic,	{ 0x00, 0x00, 0x92 } },
     { tulip_identify_asante_nic,	{ 0x00, 0x00, 0x94 } },
     { tulip_identify_cogent_nic,	{ 0x00, 0x00, 0xD1 } },
     { tulip_identify_accton_nic,	{ 0x00, 0x00, 0xE8 } },
     { tulip_identify_compex_nic,        { 0x00, 0x80, 0x48 } },
     { NULL }
 };
 
 /*
  * This deals with the vagaries of the address roms and the
  * brain-deadness that various vendors commit in using them.
  */
 static int
 tulip_read_macaddr(tulip_softc_t * const sc)
 {
     unsigned cksum, rom_cksum, idx;
     u_int32_t csr;
     unsigned char tmpbuf[8];
     static const u_char testpat[] = { 0xFF, 0, 0x55, 0xAA, 0xFF, 0, 0x55, 0xAA };
 
     sc->tulip_connidx = TULIP_SROM_LASTCONNIDX;
 
     if (sc->tulip_chipid == TULIP_21040) {
 	TULIP_CSR_WRITE(sc, csr_enetrom, 1);
 	for (idx = 0; idx < sizeof(sc->tulip_rombuf); idx++) {
 	    int cnt = 0;
 	    while (((csr = TULIP_CSR_READ(sc, csr_enetrom)) & 0x80000000L) && cnt < 10000)
 		cnt++;
 	    sc->tulip_rombuf[idx] = csr & 0xFF;
 	}
 	sc->tulip_boardsw = &tulip_21040_boardsw;
     } else {
 	if (sc->tulip_chipid == TULIP_21041) {
 	    /*
 	     * Thankfully all 21041's act the same.
 	     */
 	    sc->tulip_boardsw = &tulip_21041_boardsw;
 	} else {
 	    /*
 	     * Assume all 21140 board are compatible with the
 	     * DEC 10/100 evaluation board.  Not really valid but
 	     * it's the best we can do until every one switches to
 	     * the new SROM format.
 	     */
 
 	    sc->tulip_boardsw = &tulip_21140_eb_boardsw;
 	}
 	tulip_srom_read(sc);
 	if (tulip_srom_crcok(sc->tulip_rombuf)) {
 	    /*
 	     * SROM CRC is valid therefore it must be in the
 	     * new format.
 	     */
 	    sc->tulip_features |= TULIP_HAVE_ISVSROM|TULIP_HAVE_OKSROM;
 	} else if (sc->tulip_rombuf[126] == 0xff && sc->tulip_rombuf[127] == 0xFF) {
 	    /*
 	     * No checksum is present.  See if the SROM id checks out;
 	     * the first 18 bytes should be 0 followed by a 1 followed
 	     * by the number of adapters (which we don't deal with yet).
 	     */
 	    for (idx = 0; idx < 18; idx++) {
 		if (sc->tulip_rombuf[idx] != 0)
 		    break;
 	    }
 	    if (idx == 18 && sc->tulip_rombuf[18] == 1 && sc->tulip_rombuf[19] != 0)
 		sc->tulip_features |= TULIP_HAVE_ISVSROM;
 	} else if (sc->tulip_chipid >= TULIP_21142) {
 	    sc->tulip_features |= TULIP_HAVE_ISVSROM;
 	    sc->tulip_boardsw = &tulip_2114x_isv_boardsw;
 	}
 	if ((sc->tulip_features & TULIP_HAVE_ISVSROM) && tulip_srom_decode(sc)) {
 	    if (sc->tulip_chipid != TULIP_21041)
 		sc->tulip_boardsw = &tulip_2114x_isv_boardsw;
 
 	    /*
 	     * If the SROM specifies more than one adapter, tag this as a
 	     * BASE rom.
 	     */
 	    if (sc->tulip_rombuf[19] > 1)
 		sc->tulip_features |= TULIP_HAVE_BASEROM;
 	    if (sc->tulip_boardsw == NULL)
 		return -6;
 	    goto check_oui;
 	}
     }
 
 
     if (bcmp(&sc->tulip_rombuf[0], &sc->tulip_rombuf[16], 8) != 0) {
 	/*
 	 * Some folks don't use the standard ethernet rom format
 	 * but instead just put the address in the first 6 bytes
 	 * of the rom and let the rest be all 0xffs.  (Can we say
 	 * ZNYX?) (well sometimes they put in a checksum so we'll
 	 * start at 8).
 	 */
 	for (idx = 8; idx < 32; idx++) {
 	    if (sc->tulip_rombuf[idx] != 0xFF)
 		return -4;
 	}
 	/*
 	 * Make sure the address is not multicast or locally assigned
 	 * that the OUI is not 00-00-00.
 	 */
 	if ((sc->tulip_rombuf[0] & 3) != 0)
 	    return -4;
 	if (sc->tulip_rombuf[0] == 0 && sc->tulip_rombuf[1] == 0
 		&& sc->tulip_rombuf[2] == 0)
 	    return -4;
 	bcopy(sc->tulip_rombuf, sc->tulip_enaddr, 6);
 	sc->tulip_features |= TULIP_HAVE_OKROM;
 	goto check_oui;
     } else {
 	/*
 	 * A number of makers of multiport boards (ZNYX and Cogent)
 	 * only put on one address ROM on their 21040 boards.  So
 	 * if the ROM is all zeros (or all 0xFFs), look at the
 	 * previous configured boards (as long as they are on the same
 	 * PCI bus and the bus number is non-zero) until we find the
 	 * master board with address ROM.  We then use its address ROM
 	 * as the base for this board.  (we add our relative board
 	 * to the last byte of its address).
 	 */
 	for (idx = 0; idx < sizeof(sc->tulip_rombuf); idx++) {
 	    if (sc->tulip_rombuf[idx] != 0 && sc->tulip_rombuf[idx] != 0xFF)
 		break;
 	}
 	if (idx == sizeof(sc->tulip_rombuf)) {
 	    int root_unit;
 	    tulip_softc_t *root_sc = NULL;
 	    for (root_unit = sc->tulip_unit - 1; root_unit >= 0; root_unit--) {
 		root_sc = tulips[root_unit];
 		if (root_sc == NULL || (root_sc->tulip_features & (TULIP_HAVE_OKROM|TULIP_HAVE_SLAVEDROM)) == TULIP_HAVE_OKROM)
 		    break;
 		root_sc = NULL;
 	    }
 	    if (root_sc != NULL && (root_sc->tulip_features & TULIP_HAVE_BASEROM)
 		    && root_sc->tulip_chipid == sc->tulip_chipid
 		    && root_sc->tulip_pci_busno == sc->tulip_pci_busno) {
 		sc->tulip_features |= TULIP_HAVE_SLAVEDROM;
 		sc->tulip_boardsw = root_sc->tulip_boardsw;
 		strcpy(sc->tulip_boardid, root_sc->tulip_boardid);
 		if (sc->tulip_boardsw->bd_type == TULIP_21140_ISV) {
 		    bcopy(root_sc->tulip_rombuf, sc->tulip_rombuf,
 			  sizeof(sc->tulip_rombuf));
 		    if (!tulip_srom_decode(sc))
 			return -5;
 		} else {
 		    bcopy(root_sc->tulip_enaddr, sc->tulip_enaddr, 6);
 		    sc->tulip_enaddr[5] += sc->tulip_unit - root_sc->tulip_unit;
 		}
 		/*
 		 * Now for a truly disgusting kludge: all 4 21040s on
 		 * the ZX314 share the same INTA line so the mapping
 		 * setup by the BIOS on the PCI bridge is worthless.
 		 * Rather than reprogramming the value in the config
 		 * register, we will handle this internally.
 		 */
 		if (root_sc->tulip_features & TULIP_HAVE_SHAREDINTR) {
 		    sc->tulip_slaves = root_sc->tulip_slaves;
 		    root_sc->tulip_slaves = sc;
 		    sc->tulip_features |= TULIP_HAVE_SLAVEDINTR;
 		}
 		return 0;
 	    }
 	}
     }
 
     /*
      * This is the standard DEC address ROM test.
      */
 
     if (bcmp(&sc->tulip_rombuf[24], testpat, 8) != 0)
 	return -3;
 
     tmpbuf[0] = sc->tulip_rombuf[15]; tmpbuf[1] = sc->tulip_rombuf[14];
     tmpbuf[2] = sc->tulip_rombuf[13]; tmpbuf[3] = sc->tulip_rombuf[12];
     tmpbuf[4] = sc->tulip_rombuf[11]; tmpbuf[5] = sc->tulip_rombuf[10];
     tmpbuf[6] = sc->tulip_rombuf[9];  tmpbuf[7] = sc->tulip_rombuf[8];
     if (bcmp(&sc->tulip_rombuf[0], tmpbuf, 8) != 0)
 	return -2;
 
     bcopy(sc->tulip_rombuf, sc->tulip_enaddr, 6);
 
     cksum = *(u_int16_t *) &sc->tulip_enaddr[0];
     cksum *= 2;
     if (cksum > 65535) cksum -= 65535;
     cksum += *(u_int16_t *) &sc->tulip_enaddr[2];
     if (cksum > 65535) cksum -= 65535;
     cksum *= 2;
     if (cksum > 65535) cksum -= 65535;
     cksum += *(u_int16_t *) &sc->tulip_enaddr[4];
     if (cksum >= 65535) cksum -= 65535;
 
     rom_cksum = *(u_int16_t *) &sc->tulip_rombuf[6];
 	
     if (cksum != rom_cksum)
 	return -1;
 
   check_oui:
     /*
      * Check for various boards based on OUI.  Did I say braindead?
      */
     for (idx = 0; tulip_vendors[idx].vendor_identify_nic != NULL; idx++) {
 	if (bcmp(sc->tulip_enaddr, tulip_vendors[idx].vendor_oui, 3) == 0) {
 	    (*tulip_vendors[idx].vendor_identify_nic)(sc);
 	    break;
 	}
     }
 
     sc->tulip_features |= TULIP_HAVE_OKROM;
     return 0;
 }
 
 static void
 tulip_ifmedia_add(tulip_softc_t * const sc)
 {
     tulip_media_t media;
     int medias = 0;
 
     TULIP_LOCK_ASSERT(sc);
     for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) {
 	if (sc->tulip_mediums[media] != NULL) {
 	    ifmedia_add(&sc->tulip_ifmedia, tulip_media_to_ifmedia[media],
 			0, 0);
 	    medias++;
 	}
     }
     if (medias == 0) {
 	sc->tulip_features |= TULIP_HAVE_NOMEDIA;
 	ifmedia_add(&sc->tulip_ifmedia, IFM_ETHER | IFM_NONE, 0, 0);
 	ifmedia_set(&sc->tulip_ifmedia, IFM_ETHER | IFM_NONE);
     } else if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) {
 	ifmedia_add(&sc->tulip_ifmedia, IFM_ETHER | IFM_AUTO, 0, 0);
 	ifmedia_set(&sc->tulip_ifmedia, IFM_ETHER | IFM_AUTO);
     } else {
 	ifmedia_set(&sc->tulip_ifmedia, tulip_media_to_ifmedia[sc->tulip_media]);
 	sc->tulip_flags |= TULIP_PRINTMEDIA;
 	tulip_linkup(sc, sc->tulip_media);
     }
 }
 
 static int
 tulip_ifmedia_change(struct ifnet * const ifp)
 {
     tulip_softc_t * const sc = (tulip_softc_t *)ifp->if_softc;
 
     TULIP_LOCK(sc);
     sc->tulip_flags |= TULIP_NEEDRESET;
     sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
     sc->tulip_media = TULIP_MEDIA_UNKNOWN;
     if (IFM_SUBTYPE(sc->tulip_ifmedia.ifm_media) != IFM_AUTO) {
 	tulip_media_t media;
 	for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) {
 	    if (sc->tulip_mediums[media] != NULL
 		&& sc->tulip_ifmedia.ifm_media == tulip_media_to_ifmedia[media]) {
 		sc->tulip_flags |= TULIP_PRINTMEDIA;
 		sc->tulip_flags &= ~TULIP_DIDNWAY;
 		tulip_linkup(sc, media);
 		TULIP_UNLOCK(sc);
 		return 0;
 	    }
 	}
     }
     sc->tulip_flags &= ~(TULIP_TXPROBE_ACTIVE|TULIP_WANTRXACT);
     tulip_reset(sc);
     tulip_init_locked(sc);
     TULIP_UNLOCK(sc);
     return 0;
 }
 
 /*
  * Media status callback
  */
 static void
 tulip_ifmedia_status(struct ifnet * const ifp, struct ifmediareq *req)
 {
     tulip_softc_t *sc = (tulip_softc_t *)ifp->if_softc;
 
     TULIP_LOCK(sc);
     if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) {
 	TULIP_UNLOCK(sc);
 	return;
     }
 
     req->ifm_status = IFM_AVALID;
     if (sc->tulip_flags & TULIP_LINKUP)
 	req->ifm_status |= IFM_ACTIVE;
 
     req->ifm_active = tulip_media_to_ifmedia[sc->tulip_media];
     TULIP_UNLOCK(sc);
 }
 
 static void
 tulip_addr_filter(tulip_softc_t * const sc)
 {
     struct ifmultiaddr *ifma;
     struct ifnet *ifp;
     u_char *addrp;
     u_int16_t eaddr[ETHER_ADDR_LEN/2];
     int multicnt;
 
     TULIP_LOCK_ASSERT(sc);
     sc->tulip_flags &= ~(TULIP_WANTHASHPERFECT|TULIP_WANTHASHONLY|TULIP_ALLMULTI);
     sc->tulip_flags |= TULIP_WANTSETUP|TULIP_WANTTXSTART;
     sc->tulip_cmdmode &= ~TULIP_CMD_RXRUN;
     sc->tulip_intrmask &= ~TULIP_STS_RXSTOPPED;
 #if defined(IFF_ALLMULTI)    
     if (sc->tulip_ifp->if_flags & IFF_ALLMULTI)
 	sc->tulip_flags |= TULIP_ALLMULTI ;
 #endif
 
     multicnt = 0;
     ifp = sc->tulip_ifp;      
     if_maddr_rlock(ifp);
 
     /* Copy MAC address on stack to align. */
     if (ifp->if_input != NULL)
     	bcopy(IF_LLADDR(ifp), eaddr, ETHER_ADDR_LEN);
     else
 	bcopy(sc->tulip_enaddr, eaddr, ETHER_ADDR_LEN);
 
     TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 
 	    if (ifma->ifma_addr->sa_family == AF_LINK)
 		multicnt++;
     }
 
     if (multicnt > 14) {
 	u_int32_t *sp = sc->tulip_setupdata;
 	unsigned hash;
 	/*
 	 * Some early passes of the 21140 have broken implementations of
 	 * hash-perfect mode.  When we get too many multicasts for perfect
 	 * filtering with these chips, we need to switch into hash-only
 	 * mode (this is better than all-multicast on network with lots
 	 * of multicast traffic).
 	 */
 	if (sc->tulip_features & TULIP_HAVE_BROKEN_HASH)
 	    sc->tulip_flags |= TULIP_WANTHASHONLY;
 	else
 	    sc->tulip_flags |= TULIP_WANTHASHPERFECT;
 	/*
 	 * If we have more than 14 multicasts, we have
 	 * go into hash perfect mode (512 bit multicast
 	 * hash and one perfect hardware).
 	 */
 	bzero(sc->tulip_setupdata, sizeof(sc->tulip_setupdata));
 
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 
 		hash = tulip_mchash(LLADDR((struct sockaddr_dl *)ifma->ifma_addr));
 		sp[hash >> 4] |= htole32(1 << (hash & 0xF));
 	}
 	/*
 	 * No reason to use a hash if we are going to be
 	 * receiving every multicast.
 	 */
 	if ((sc->tulip_flags & TULIP_ALLMULTI) == 0) {
 	    hash = tulip_mchash(ifp->if_broadcastaddr);
 	    sp[hash >> 4] |= htole32(1 << (hash & 0xF));
 	    if (sc->tulip_flags & TULIP_WANTHASHONLY) {
 		hash = tulip_mchash((caddr_t)eaddr);
 		sp[hash >> 4] |= htole32(1 << (hash & 0xF));
 	    } else {
 		sp[39] = TULIP_SP_MAC(eaddr[0]); 
 		sp[40] = TULIP_SP_MAC(eaddr[1]); 
 		sp[41] = TULIP_SP_MAC(eaddr[2]);
 	    }
 	}
     }
     if ((sc->tulip_flags & (TULIP_WANTHASHPERFECT|TULIP_WANTHASHONLY)) == 0) {
 	u_int32_t *sp = sc->tulip_setupdata;
 	int idx = 0;
 	if ((sc->tulip_flags & TULIP_ALLMULTI) == 0) {
 	    /*
 	     * Else can get perfect filtering for 16 addresses.
 	     */
 	    TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		    if (ifma->ifma_addr->sa_family != AF_LINK)
 			    continue;
 		    addrp = LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
 		    *sp++ = TULIP_SP_MAC(((u_int16_t *)addrp)[0]); 
 		    *sp++ = TULIP_SP_MAC(((u_int16_t *)addrp)[1]); 
 		    *sp++ = TULIP_SP_MAC(((u_int16_t *)addrp)[2]);
 		    idx++;
 	    }
 	    /*
 	     * Add the broadcast address.
 	     */
 	    idx++;
 	    *sp++ = TULIP_SP_MAC(0xFFFF);
 	    *sp++ = TULIP_SP_MAC(0xFFFF);
 	    *sp++ = TULIP_SP_MAC(0xFFFF);
 	}
 	/*
 	 * Pad the rest with our hardware address
 	 */
 	for (; idx < 16; idx++) {
 	    *sp++ = TULIP_SP_MAC(eaddr[0]); 
 	    *sp++ = TULIP_SP_MAC(eaddr[1]); 
 	    *sp++ = TULIP_SP_MAC(eaddr[2]);
 	}
     }
     if_maddr_runlock(ifp);
 }
 
 static void
 tulip_reset(tulip_softc_t * const sc)
 {
     tulip_ringinfo_t *ri;
     tulip_descinfo_t *di;
     struct mbuf *m;
     u_int32_t inreset = (sc->tulip_flags & TULIP_INRESET);
 
     TULIP_LOCK_ASSERT(sc);
 
     CTR1(KTR_TULIP, "tulip_reset: inreset %d", inreset);
 
     /*
      * Brilliant.  Simply brilliant.  When switching modes/speeds
      * on a 2114*, you need to set the appriopriate MII/PCS/SCL/PS
      * bits in CSR6 and then do a software reset to get the 21140
      * to properly reset its internal pathways to the right places.
      *   Grrrr.
      */
     if ((sc->tulip_flags & TULIP_DEVICEPROBE) == 0
 	    && sc->tulip_boardsw->bd_media_preset != NULL)
 	(*sc->tulip_boardsw->bd_media_preset)(sc);
 
     TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET);
     DELAY(10);	/* Wait 10 microseconds (actually 50 PCI cycles but at 
 		   33MHz that comes to two microseconds but wait a
 		   bit longer anyways) */
 
     if (!inreset) {
 	sc->tulip_flags |= TULIP_INRESET;
 	sc->tulip_flags &= ~(TULIP_NEEDRESET|TULIP_RXBUFSLOW);
 	sc->tulip_ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
     }
 
     TULIP_CSR_WRITE(sc, csr_txlist, sc->tulip_txinfo.ri_dma_addr & 0xffffffff);
     TULIP_CSR_WRITE(sc, csr_rxlist, sc->tulip_rxinfo.ri_dma_addr & 0xffffffff);
     TULIP_CSR_WRITE(sc, csr_busmode,
 		    (1 << (3 /*pci_max_burst_len*/ + 8))
 		    |TULIP_BUSMODE_CACHE_ALIGN8
 		    |TULIP_BUSMODE_READMULTIPLE
 		    |(BYTE_ORDER != LITTLE_ENDIAN ?
 		      TULIP_BUSMODE_DESC_BIGENDIAN : 0));
 
     sc->tulip_txtimer = 0;
     /*
      * Free all the mbufs that were on the transmit ring.
      */
     CTR0(KTR_TULIP, "tulip_reset: drain transmit ring");
     ri = &sc->tulip_txinfo;
     for (di = ri->ri_first; di < ri->ri_last; di++) {
 	m = tulip_dequeue_mbuf(ri, di, SYNC_NONE);
 	if (m != NULL)
 	    m_freem(m);
 	di->di_desc->d_status = 0;
     }
 
     ri->ri_nextin = ri->ri_nextout = ri->ri_first;
     ri->ri_free = ri->ri_max;
     TULIP_TXDESC_PRESYNC(ri);
 
     /*
      * We need to collect all the mbufs that were on the 
      * receive ring before we reinit it either to put
      * them back on or to know if we have to allocate
      * more.
      */
     CTR0(KTR_TULIP, "tulip_reset: drain receive ring");
     ri = &sc->tulip_rxinfo;
     ri->ri_nextin = ri->ri_nextout = ri->ri_first;
     ri->ri_free = ri->ri_max;
     for (di = ri->ri_first; di < ri->ri_last; di++) {
 	di->di_desc->d_status = 0;
 	di->di_desc->d_length1 = 0; di->di_desc->d_addr1 = 0;
 	di->di_desc->d_length2 = 0; di->di_desc->d_addr2 = 0;
     }
     TULIP_RXDESC_PRESYNC(ri);
     for (di = ri->ri_first; di < ri->ri_last; di++) {
 	m = tulip_dequeue_mbuf(ri, di, SYNC_NONE);
 	if (m != NULL)
 	    m_freem(m);
     }
 
     /*
      * If tulip_reset is being called recursively, exit quickly knowing
      * that when the outer tulip_reset returns all the right stuff will
      * have happened.
      */
     if (inreset)
 	return;
 
     sc->tulip_intrmask |= TULIP_STS_NORMALINTR|TULIP_STS_RXINTR|TULIP_STS_TXINTR
 	|TULIP_STS_ABNRMLINTR|TULIP_STS_SYSERROR|TULIP_STS_TXSTOPPED
 	|TULIP_STS_TXUNDERFLOW|TULIP_STS_TXBABBLE
 	|TULIP_STS_RXSTOPPED;
 
     if ((sc->tulip_flags & TULIP_DEVICEPROBE) == 0)
 	(*sc->tulip_boardsw->bd_media_select)(sc);
 #if defined(TULIP_DEBUG)
     if ((sc->tulip_flags & TULIP_NEEDRESET) == TULIP_NEEDRESET)
 	device_printf(sc->tulip_dev,
 	    "tulip_reset: additional reset needed?!?\n");
 #endif
     if (bootverbose)
 	    tulip_media_print(sc);
     if (sc->tulip_features & TULIP_HAVE_DUALSENSE)
 	TULIP_CSR_WRITE(sc, csr_sia_status, TULIP_CSR_READ(sc, csr_sia_status));
 
     sc->tulip_flags &= ~(TULIP_DOINGSETUP|TULIP_WANTSETUP|TULIP_INRESET
 			 |TULIP_RXACT);
 }
 
 
 static void
 tulip_init(void *arg)
 {
     tulip_softc_t *sc = (tulip_softc_t *)arg;
 
     TULIP_LOCK(sc);
     tulip_init_locked(sc);
     TULIP_UNLOCK(sc);
 }
 
 static void
 tulip_init_locked(tulip_softc_t * const sc)
 {
     CTR0(KTR_TULIP, "tulip_init_locked");
     if (sc->tulip_ifp->if_flags & IFF_UP) {
 	if ((sc->tulip_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 	    /* initialize the media */
 	    CTR0(KTR_TULIP, "tulip_init_locked: up but not running, reset chip");
 	    tulip_reset(sc);
 	}
 	tulip_addr_filter(sc);
 	sc->tulip_ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	if (sc->tulip_ifp->if_flags & IFF_PROMISC) {
 	    sc->tulip_flags |= TULIP_PROMISC;
 	    sc->tulip_cmdmode |= TULIP_CMD_PROMISCUOUS;
 	    sc->tulip_intrmask |= TULIP_STS_TXINTR;
 	} else {
 	    sc->tulip_flags &= ~TULIP_PROMISC;
 	    sc->tulip_cmdmode &= ~TULIP_CMD_PROMISCUOUS;
 	    if (sc->tulip_flags & TULIP_ALLMULTI) {
 		sc->tulip_cmdmode |= TULIP_CMD_ALLMULTI;
 	    } else {
 		sc->tulip_cmdmode &= ~TULIP_CMD_ALLMULTI;
 	    }
 	}
 	sc->tulip_cmdmode |= TULIP_CMD_TXRUN;
 	if ((sc->tulip_flags & (TULIP_TXPROBE_ACTIVE|TULIP_WANTSETUP)) == 0) {
 	    tulip_rx_intr(sc);
 	    sc->tulip_cmdmode |= TULIP_CMD_RXRUN;
 	    sc->tulip_intrmask |= TULIP_STS_RXSTOPPED;
 	} else {
 	    sc->tulip_ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 	    sc->tulip_cmdmode &= ~TULIP_CMD_RXRUN;
 	    sc->tulip_intrmask &= ~TULIP_STS_RXSTOPPED;
 	}
 	CTR2(KTR_TULIP, "tulip_init_locked: intr mask %08x  cmdmode %08x",
 	    sc->tulip_intrmask, sc->tulip_cmdmode);
 	TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 	CTR1(KTR_TULIP, "tulip_init_locked: status %08x\n",
 	    TULIP_CSR_READ(sc, csr_status));
 	if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == TULIP_WANTSETUP)
 	    tulip_txput_setup(sc);
 	callout_reset(&sc->tulip_stat_timer, hz, tulip_watchdog, sc);
     } else {
 	CTR0(KTR_TULIP, "tulip_init_locked: not up, reset chip");
 	sc->tulip_ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	tulip_reset(sc);
 	tulip_addr_filter(sc);
 	callout_stop(&sc->tulip_stat_timer);
     }
 }
 
 #define DESC_STATUS(di)	(((volatile tulip_desc_t *)((di)->di_desc))->d_status)
 #define DESC_FLAG(di)	((di)->di_desc->d_flag)
 
 static void
 tulip_rx_intr(tulip_softc_t * const sc)
 {
     TULIP_PERFSTART(rxintr)
     tulip_ringinfo_t * const ri = &sc->tulip_rxinfo;
     struct ifnet * const ifp = sc->tulip_ifp;
     int fillok = 1;
 #if defined(TULIP_DEBUG)
     int cnt = 0;
 #endif
 
     TULIP_LOCK_ASSERT(sc);
     CTR0(KTR_TULIP, "tulip_rx_intr: start");
     for (;;) {
 	TULIP_PERFSTART(rxget)
 	tulip_descinfo_t *eop = ri->ri_nextin, *dip;
 	int total_len = 0, last_offset = 0;
 	struct mbuf *ms = NULL, *me = NULL;
 	int accept = 0;
 	int error;
 
 	if (fillok && (ri->ri_max - ri->ri_free) < TULIP_RXQ_TARGET)
 	    goto queue_mbuf;
 
 #if defined(TULIP_DEBUG)
 	if (cnt == ri->ri_max)
 	    break;
 #endif
 	/*
 	 * If the TULIP has no descriptors, there can't be any receive
 	 * descriptors to process.
  	 */
 	if (eop == ri->ri_nextout)
 	    break;
 
 	/*
 	 * 90% of the packets will fit in one descriptor.  So we optimize
 	 * for that case.
 	 */
 	TULIP_RXDESC_POSTSYNC(ri);
 	if ((DESC_STATUS(eop) & (TULIP_DSTS_OWNER|TULIP_DSTS_RxFIRSTDESC|TULIP_DSTS_RxLASTDESC)) == (TULIP_DSTS_RxFIRSTDESC|TULIP_DSTS_RxLASTDESC)) {
 	    ms = tulip_dequeue_mbuf(ri, eop, SYNC_RX);
 	    CTR2(KTR_TULIP,
 		"tulip_rx_intr: single packet mbuf %p from descriptor %td", ms,
 		eop - ri->ri_first);
 	    me = ms;
 	    ri->ri_free++;
 	} else {
 	    /*
 	     * If still owned by the TULIP, don't touch it.
 	     */
 	    if (DESC_STATUS(eop) & TULIP_DSTS_OWNER)
 		break;
 
 	    /*
 	     * It is possible (though improbable unless MCLBYTES < 1518) for
 	     * a received packet to cross more than one receive descriptor.
 	     * We first loop through the descriptor ring making sure we have
 	     * received a complete packet.  If not, we bail until the next
 	     * interrupt.
 	     */
 	    dip = eop;
 	    while ((DESC_STATUS(eop) & TULIP_DSTS_RxLASTDESC) == 0) {
 		if (++eop == ri->ri_last)
 		    eop = ri->ri_first;
 		TULIP_RXDESC_POSTSYNC(ri);
 		if (eop == ri->ri_nextout || DESC_STATUS(eop) & TULIP_DSTS_OWNER) {
 #if defined(TULIP_DEBUG)
 		    sc->tulip_dbg.dbg_rxintrs++;
 		    sc->tulip_dbg.dbg_rxpktsperintr[cnt]++;
 #endif
 		    TULIP_PERFEND(rxget);
 		    TULIP_PERFEND(rxintr);
 		    return;
 		}
 		total_len++;
 	    }
 
 	    /*
 	     * Dequeue the first buffer for the start of the packet.  Hopefully
 	     * this will be the only one we need to dequeue.  However, if the
 	     * packet consumed multiple descriptors, then we need to dequeue
 	     * those buffers and chain to the starting mbuf.  All buffers but
 	     * the last buffer have the same length so we can set that now.
 	     * (we add to last_offset instead of multiplying since we normally
 	     * won't go into the loop and thereby saving ourselves from
 	     * doing a multiplication by 0 in the normal case).
 	     */
 	    ms = tulip_dequeue_mbuf(ri, dip, SYNC_RX);
 	    CTR2(KTR_TULIP,
 		"tulip_rx_intr: start packet mbuf %p from descriptor %td", ms,
 		dip - ri->ri_first);
 	    ri->ri_free++;
 	    for (me = ms; total_len > 0; total_len--) {
 		me->m_len = TULIP_RX_BUFLEN;
 		last_offset += TULIP_RX_BUFLEN;
 		if (++dip == ri->ri_last)
 		    dip = ri->ri_first;
 		me->m_next = tulip_dequeue_mbuf(ri, dip, SYNC_RX);
 		ri->ri_free++;
 		me = me->m_next;
 		CTR2(KTR_TULIP,
 		    "tulip_rx_intr: cont packet mbuf %p from descriptor %td",
 		    me, dip - ri->ri_first);
 	    }
 	    KASSERT(dip == eop, ("mismatched descinfo structs"));
 	}
 
 	/*
 	 *  Now get the size of received packet (minus the CRC).
 	 */
 	total_len = ((DESC_STATUS(eop) >> 16) & 0x7FFF) - ETHER_CRC_LEN;
 	if ((sc->tulip_flags & TULIP_RXIGNORE) == 0
 	    && ((DESC_STATUS(eop) & TULIP_DSTS_ERRSUM) == 0)) {
 	    me->m_len = total_len - last_offset;
 	    sc->tulip_flags |= TULIP_RXACT;
 	    accept = 1;
 	    CTR1(KTR_TULIP, "tulip_rx_intr: good packet; length %d",
 		total_len);
 	} else {
 	    CTR1(KTR_TULIP, "tulip_rx_intr: bad packet; status %08x",
 		DESC_STATUS(eop));
 	    if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 	    if (DESC_STATUS(eop) & (TULIP_DSTS_RxBADLENGTH|TULIP_DSTS_RxOVERFLOW|TULIP_DSTS_RxWATCHDOG)) {
 		sc->tulip_dot3stats.dot3StatsInternalMacReceiveErrors++;
 	    } else {
 #if defined(TULIP_VERBOSE)
 		const char *error = NULL;
 #endif
 		if (DESC_STATUS(eop) & TULIP_DSTS_RxTOOLONG) {
 		    sc->tulip_dot3stats.dot3StatsFrameTooLongs++;
 #if defined(TULIP_VERBOSE)
 		    error = "frame too long";
 #endif
 		}
 		if (DESC_STATUS(eop) & TULIP_DSTS_RxBADCRC) {
 		    if (DESC_STATUS(eop) & TULIP_DSTS_RxDRBBLBIT) {
 			sc->tulip_dot3stats.dot3StatsAlignmentErrors++;
 #if defined(TULIP_VERBOSE)
 			error = "alignment error";
 #endif
 		    } else {
 			sc->tulip_dot3stats.dot3StatsFCSErrors++;
 #if defined(TULIP_VERBOSE)
 			error = "bad crc";
 #endif
 		    }
 		}
 #if defined(TULIP_VERBOSE)
 		if (error != NULL && (sc->tulip_flags & TULIP_NOMESSAGES) == 0) {
 		    device_printf(sc->tulip_dev, "receive: %6D: %s\n",
 			   mtod(ms, u_char *) + 6, ":",
 			   error);
 		    sc->tulip_flags |= TULIP_NOMESSAGES;
 		}
 #endif
 	    }
 
 	}
 #if defined(TULIP_DEBUG)
 	cnt++;
 #endif
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if (++eop == ri->ri_last)
 	    eop = ri->ri_first;
 	ri->ri_nextin = eop;
       queue_mbuf:
 	/*
 	 * We have received a good packet that needs to be passed up the
 	 * stack.
 	 */
 	if (accept) {
 	    struct mbuf *m0;
 
 	    KASSERT(ms != NULL, ("no packet to accept"));
 #ifndef	__NO_STRICT_ALIGNMENT
 	    /*
 	     * Copy the data into a new mbuf that is properly aligned.  If
 	     * we fail to allocate a new mbuf, then drop the packet.  We will
 	     * reuse the same rx buffer ('ms') below for another packet
 	     * regardless.
 	     */
 	    m0 = m_devget(mtod(ms, caddr_t), total_len, ETHER_ALIGN, ifp, NULL);
 	    if (m0 == NULL) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		goto skip_input;
 	    }
 #else
 	    /*
 	     * Update the header for the mbuf referencing this receive
 	     * buffer and pass it up the stack.  Allocate a new mbuf cluster
 	     * to replace the one we just passed up the stack.
 	     *
 	     * Note that if this packet crossed multiple descriptors
 	     * we don't even try to reallocate all the mbufs here.
 	     * Instead we rely on the test at the beginning of
 	     * the loop to refill for the extra consumed mbufs.
 	     */
 	    ms->m_pkthdr.len = total_len;
 	    ms->m_pkthdr.rcvif = ifp;
 	    m0 = ms;
 	    ms = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 #endif
 	    TULIP_UNLOCK(sc);
 	    CTR1(KTR_TULIP, "tulip_rx_intr: passing %p to upper layer", m0);
 	    (*ifp->if_input)(ifp, m0);
 	    TULIP_LOCK(sc);
 	} else if (ms == NULL)
 	    /*
 	     * If we are priming the TULIP with mbufs, then allocate
 	     * a new cluster for the next descriptor.
 	     */
 	    ms = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 
 #ifndef __NO_STRICT_ALIGNMENT
     skip_input:
 #endif
 	if (ms == NULL) {
 	    /*
 	     * Couldn't allocate a new buffer.  Don't bother 
 	     * trying to replenish the receive queue.
 	     */
 	    fillok = 0;
 	    sc->tulip_flags |= TULIP_RXBUFSLOW;
 #if defined(TULIP_DEBUG)
 	    sc->tulip_dbg.dbg_rxlowbufs++;
 #endif
 	    TULIP_PERFEND(rxget);
 	    continue;
 	}
 	/*
 	 * Now give the buffer(s) to the TULIP and save in our
 	 * receive queue.
 	 */
 	do {
 	    tulip_descinfo_t * const nextout = ri->ri_nextout;
 
 	    M_ASSERTPKTHDR(ms);
 	    KASSERT(ms->m_data == ms->m_ext.ext_buf,
 		("rx mbuf data doesn't point to cluster"));	    
 	    ms->m_len = ms->m_pkthdr.len = TULIP_RX_BUFLEN;
 	    error = bus_dmamap_load_mbuf(ri->ri_data_tag, *nextout->di_map, ms,
 		tulip_dma_map_rxbuf, nextout->di_desc, BUS_DMA_NOWAIT);
 	    if (error) {
 		device_printf(sc->tulip_dev,
 		    "unable to load rx map, error = %d\n", error);
 		panic("tulip_rx_intr");		/* XXX */
 	    }
 	    nextout->di_desc->d_status = TULIP_DSTS_OWNER;
 	    KASSERT(nextout->di_mbuf == NULL, ("clobbering earlier rx mbuf"));
 	    nextout->di_mbuf = ms;
 	    CTR2(KTR_TULIP, "tulip_rx_intr: enqueued mbuf %p to descriptor %td",
 		ms, nextout - ri->ri_first);
 	    TULIP_RXDESC_POSTSYNC(ri);
 	    if (++ri->ri_nextout == ri->ri_last)
 		ri->ri_nextout = ri->ri_first;
 	    ri->ri_free--;
 	    me = ms->m_next;
 	    ms->m_next = NULL;
 	} while ((ms = me) != NULL);
 
 	if ((ri->ri_max - ri->ri_free) >= TULIP_RXQ_TARGET)
 	    sc->tulip_flags &= ~TULIP_RXBUFSLOW;
 	TULIP_PERFEND(rxget);
     }
 
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_rxintrs++;
     sc->tulip_dbg.dbg_rxpktsperintr[cnt]++;
 #endif
     TULIP_PERFEND(rxintr);
 }
 
 static int
 tulip_tx_intr(tulip_softc_t * const sc)
 {
     TULIP_PERFSTART(txintr)    
     tulip_ringinfo_t * const ri = &sc->tulip_txinfo;
     struct mbuf *m;
     int xmits = 0;
     int descs = 0;
 
     CTR0(KTR_TULIP, "tulip_tx_intr: start");
     TULIP_LOCK_ASSERT(sc);
     while (ri->ri_free < ri->ri_max) {
 	u_int32_t d_flag;
 
 	TULIP_TXDESC_POSTSYNC(ri);
 	if (DESC_STATUS(ri->ri_nextin) & TULIP_DSTS_OWNER)
 	    break;
 
 	ri->ri_free++;
 	descs++;
 	d_flag = DESC_FLAG(ri->ri_nextin);
 	if (d_flag & TULIP_DFLAG_TxLASTSEG) {
 	    if (d_flag & TULIP_DFLAG_TxSETUPPKT) {
 		CTR2(KTR_TULIP,
 		    "tulip_tx_intr: setup packet from descriptor %td: %08x",
 		    ri->ri_nextin - ri->ri_first, DESC_STATUS(ri->ri_nextin));
 		/*
 		 * We've just finished processing a setup packet.
 		 * Mark that we finished it.  If there's not
 		 * another pending, startup the TULIP receiver.
 		 * Make sure we ack the RXSTOPPED so we won't get
 		 * an abormal interrupt indication.
 		 */
 		bus_dmamap_sync(sc->tulip_setup_tag, sc->tulip_setup_map,
 		    BUS_DMASYNC_POSTWRITE);
 		sc->tulip_flags &= ~(TULIP_DOINGSETUP|TULIP_HASHONLY);
 		if (DESC_FLAG(ri->ri_nextin) & TULIP_DFLAG_TxINVRSFILT)
 		    sc->tulip_flags |= TULIP_HASHONLY;
 		if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == 0) {
 		    tulip_rx_intr(sc);
 		    sc->tulip_cmdmode |= TULIP_CMD_RXRUN;
 		    sc->tulip_intrmask |= TULIP_STS_RXSTOPPED;
 		    CTR2(KTR_TULIP,
 			"tulip_tx_intr: intr mask %08x  cmdmode %08x",
 			sc->tulip_intrmask, sc->tulip_cmdmode);
 		    TULIP_CSR_WRITE(sc, csr_status, TULIP_STS_RXSTOPPED);
 		    TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 		    TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 		}
 	    } else {
 		const u_int32_t d_status = DESC_STATUS(ri->ri_nextin);
 
 		m = tulip_dequeue_mbuf(ri, ri->ri_nextin, SYNC_TX);
 		CTR2(KTR_TULIP,
 		    "tulip_tx_intr: data packet %p from descriptor %td", m,
 		    ri->ri_nextin - ri->ri_first);
 		if (m != NULL) {
 		    m_freem(m);
 #if defined(TULIP_DEBUG)
 		} else {
 		    device_printf(sc->tulip_dev,
 		        "tx_intr: failed to dequeue mbuf?!?\n");
 #endif
 		}
 		if (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) {
 		    tulip_mediapoll_event_t event = TULIP_MEDIAPOLL_TXPROBE_OK;
 		    if (d_status & (TULIP_DSTS_TxNOCARR|TULIP_DSTS_TxEXCCOLL)) {
 #if defined(TULIP_DEBUG)
 			if (d_status & TULIP_DSTS_TxNOCARR)
 			    sc->tulip_dbg.dbg_txprobe_nocarr++;
 			if (d_status & TULIP_DSTS_TxEXCCOLL)
 			    sc->tulip_dbg.dbg_txprobe_exccoll++;
 #endif
 			event = TULIP_MEDIAPOLL_TXPROBE_FAILED;
 		    }
 		    (*sc->tulip_boardsw->bd_media_poll)(sc, event);
 		    /*
 		     * Escape from the loop before media poll has reset the TULIP!
 		     */
 		    break;
 		} else {
 		    xmits++;
 		    if (d_status & TULIP_DSTS_ERRSUM) {
 			CTR1(KTR_TULIP, "tulip_tx_intr: output error: %08x",
 			    d_status);
 			if_inc_counter(sc->tulip_ifp, IFCOUNTER_OERRORS, 1);
 			if (d_status & TULIP_DSTS_TxEXCCOLL)
 			    sc->tulip_dot3stats.dot3StatsExcessiveCollisions++;
 			if (d_status & TULIP_DSTS_TxLATECOLL)
 			    sc->tulip_dot3stats.dot3StatsLateCollisions++;
 			if (d_status & (TULIP_DSTS_TxNOCARR|TULIP_DSTS_TxCARRLOSS))
 			    sc->tulip_dot3stats.dot3StatsCarrierSenseErrors++;
 			if (d_status & (TULIP_DSTS_TxUNDERFLOW|TULIP_DSTS_TxBABBLE))
 			    sc->tulip_dot3stats.dot3StatsInternalMacTransmitErrors++;
 			if (d_status & TULIP_DSTS_TxUNDERFLOW)
 			    sc->tulip_dot3stats.dot3StatsInternalTransmitUnderflows++;
 			if (d_status & TULIP_DSTS_TxBABBLE)
 			    sc->tulip_dot3stats.dot3StatsInternalTransmitBabbles++;
 		    } else {
 			u_int32_t collisions = 
 			    (d_status & TULIP_DSTS_TxCOLLMASK)
 				>> TULIP_DSTS_V_TxCOLLCNT;
 
 			CTR2(KTR_TULIP,
 		    "tulip_tx_intr: output ok, collisions %d, status %08x",
 			    collisions, d_status);
 			if_inc_counter(sc->tulip_ifp, IFCOUNTER_COLLISIONS, collisions);
 			if (collisions == 1)
 			    sc->tulip_dot3stats.dot3StatsSingleCollisionFrames++;
 			else if (collisions > 1)
 			    sc->tulip_dot3stats.dot3StatsMultipleCollisionFrames++;
 			else if (d_status & TULIP_DSTS_TxDEFERRED)
 			    sc->tulip_dot3stats.dot3StatsDeferredTransmissions++;
 			/*
 			 * SQE is only valid for 10baseT/BNC/AUI when not
 			 * running in full-duplex.  In order to speed up the
 			 * test, the corresponding bit in tulip_flags needs to
 			 * set as well to get us to count SQE Test Errors.
 			 */
 			if (d_status & TULIP_DSTS_TxNOHRTBT & sc->tulip_flags)
 			    sc->tulip_dot3stats.dot3StatsSQETestErrors++;
 		    }
 		}
 	    }
 	}
 
 	if (++ri->ri_nextin == ri->ri_last)
 	    ri->ri_nextin = ri->ri_first;
 
 	if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0)
 	    sc->tulip_ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
     }
     /*
      * If nothing left to transmit, disable the timer.
      * Else if progress, reset the timer back to 2 ticks.
      */
     if (ri->ri_free == ri->ri_max || (sc->tulip_flags & TULIP_TXPROBE_ACTIVE))
 	sc->tulip_txtimer = 0;
     else if (xmits > 0)
 	sc->tulip_txtimer = TULIP_TXTIMER;
     if_inc_counter(sc->tulip_ifp, IFCOUNTER_OPACKETS, xmits);
     TULIP_PERFEND(txintr);
     return descs;
 }
 
 static void
 tulip_print_abnormal_interrupt(tulip_softc_t * const sc, u_int32_t csr)
 {
     const char * const *msgp = tulip_status_bits;
     const char *sep;
     u_int32_t mask;
     const char thrsh[] = "72|128\0\0\0" "96|256\0\0\0" "128|512\0\0" "160|1024";
 
     TULIP_LOCK_ASSERT(sc);
     csr &= (1 << (sizeof(tulip_status_bits)/sizeof(tulip_status_bits[0]))) - 1;
     device_printf(sc->tulip_dev, "abnormal interrupt:");
     for (sep = " ", mask = 1; mask <= csr; mask <<= 1, msgp++) {
 	if ((csr & mask) && *msgp != NULL) {
 	    printf("%s%s", sep, *msgp);
 	    if (mask == TULIP_STS_TXUNDERFLOW && (sc->tulip_flags & TULIP_NEWTXTHRESH)) {
 		sc->tulip_flags &= ~TULIP_NEWTXTHRESH;
 		if (sc->tulip_cmdmode & TULIP_CMD_STOREFWD) {
 		    printf(" (switching to store-and-forward mode)");
 		} else {
 		    printf(" (raising TX threshold to %s)",
 			   &thrsh[9 * ((sc->tulip_cmdmode & TULIP_CMD_THRESHOLDCTL) >> 14)]);
 		}
 	    }
 	    sep = ", ";
 	}
     }
     printf("\n");
 }
 
 static void
 tulip_intr_handler(tulip_softc_t * const sc)
 {
     TULIP_PERFSTART(intr)
     u_int32_t csr;
 
     CTR0(KTR_TULIP, "tulip_intr_handler invoked");
     TULIP_LOCK_ASSERT(sc);
     while ((csr = TULIP_CSR_READ(sc, csr_status)) & sc->tulip_intrmask) {
 	TULIP_CSR_WRITE(sc, csr_status, csr);
 
 	if (csr & TULIP_STS_SYSERROR) {
 	    sc->tulip_last_system_error = (csr & TULIP_STS_ERRORMASK) >> TULIP_STS_ERR_SHIFT;
 	    if (sc->tulip_flags & TULIP_NOMESSAGES) {
 		sc->tulip_flags |= TULIP_SYSTEMERROR;
 	    } else {
 		device_printf(sc->tulip_dev, "system error: %s\n",
 		       tulip_system_errors[sc->tulip_last_system_error]);
 	    }
 	    sc->tulip_flags |= TULIP_NEEDRESET;
 	    sc->tulip_system_errors++;
 	    break;
 	}
 	if (csr & (TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL) & sc->tulip_intrmask) {
 #if defined(TULIP_DEBUG)
 	    sc->tulip_dbg.dbg_link_intrs++;
 #endif
 	    if (sc->tulip_boardsw->bd_media_poll != NULL) {
 		(*sc->tulip_boardsw->bd_media_poll)(sc, csr & TULIP_STS_LINKFAIL
 						    ? TULIP_MEDIAPOLL_LINKFAIL
 						    : TULIP_MEDIAPOLL_LINKPASS);
 		csr &= ~TULIP_STS_ABNRMLINTR;
 	    }
 	    tulip_media_print(sc);
 	}
 	if (csr & (TULIP_STS_RXINTR|TULIP_STS_RXNOBUF)) {
 	    u_int32_t misses = TULIP_CSR_READ(sc, csr_missed_frames);
 	    if (csr & TULIP_STS_RXNOBUF)
 		sc->tulip_dot3stats.dot3StatsMissedFrames += misses & 0xFFFF;
 	    /*
 	     * Pass 2.[012] of the 21140A-A[CDE] may hang and/or corrupt data
 	     * on receive overflows.
 	     */
 	    if ((misses & 0x0FFE0000) && (sc->tulip_features & TULIP_HAVE_RXBADOVRFLW)) {
 		sc->tulip_dot3stats.dot3StatsInternalMacReceiveErrors++;
 		/*
 		 * Stop the receiver process and spin until it's stopped.
 		 * Tell rx_intr to drop the packets it dequeues.
 		 */
 		TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode & ~TULIP_CMD_RXRUN);
 		while ((TULIP_CSR_READ(sc, csr_status) & TULIP_STS_RXSTOPPED) == 0)
 		    ;
 		TULIP_CSR_WRITE(sc, csr_status, TULIP_STS_RXSTOPPED);
 		sc->tulip_flags |= TULIP_RXIGNORE;
 	    }
 	    tulip_rx_intr(sc);
 	    if (sc->tulip_flags & TULIP_RXIGNORE) {
 		/*
 		 * Restart the receiver.
 		 */
 		sc->tulip_flags &= ~TULIP_RXIGNORE;
 		TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 	    }
 	}
 	if (csr & TULIP_STS_ABNRMLINTR) {
 	    u_int32_t tmp = csr & sc->tulip_intrmask
 		& ~(TULIP_STS_NORMALINTR|TULIP_STS_ABNRMLINTR);
 	    if (csr & TULIP_STS_TXUNDERFLOW) {
 		if ((sc->tulip_cmdmode & TULIP_CMD_THRESHOLDCTL) != TULIP_CMD_THRSHLD160) {
 		    sc->tulip_cmdmode += TULIP_CMD_THRSHLD96;
 		    sc->tulip_flags |= TULIP_NEWTXTHRESH;
 		} else if (sc->tulip_features & TULIP_HAVE_STOREFWD) {
 		    sc->tulip_cmdmode |= TULIP_CMD_STOREFWD;
 		    sc->tulip_flags |= TULIP_NEWTXTHRESH;
 		}
 	    }
 	    if (sc->tulip_flags & TULIP_NOMESSAGES) {
 		sc->tulip_statusbits |= tmp;
 	    } else {
 		tulip_print_abnormal_interrupt(sc, tmp);
 		sc->tulip_flags |= TULIP_NOMESSAGES;
 	    }
 	    TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 	}
 	if (sc->tulip_flags & (TULIP_WANTTXSTART|TULIP_TXPROBE_ACTIVE|TULIP_DOINGSETUP|TULIP_PROMISC)) {
 	    tulip_tx_intr(sc);
 	    if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0)
 		tulip_start_locked(sc);
 	}
     }
     if (sc->tulip_flags & TULIP_NEEDRESET) {
 	tulip_reset(sc);
 	tulip_init_locked(sc);
     }
     TULIP_PERFEND(intr);
 }
 
 static void
 tulip_intr_shared(void *arg)
 {
     tulip_softc_t * sc = arg;
 
     for (; sc != NULL; sc = sc->tulip_slaves) {
 	TULIP_LOCK(sc);
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_intrs++;
 #endif
 	tulip_intr_handler(sc);
 	TULIP_UNLOCK(sc);
     }
 }
 
 static void
 tulip_intr_normal(void *arg)
 {
     tulip_softc_t * sc = (tulip_softc_t *) arg;
 
     TULIP_LOCK(sc);
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_intrs++;
 #endif
     tulip_intr_handler(sc);
     TULIP_UNLOCK(sc);
 }
 
 static struct mbuf *
 tulip_txput(tulip_softc_t * const sc, struct mbuf *m)
 {
     TULIP_PERFSTART(txput)
     tulip_ringinfo_t * const ri = &sc->tulip_txinfo;
     tulip_descinfo_t *eop, *nextout;
     int segcnt, free;
     u_int32_t d_status;
     bus_dma_segment_t segs[TULIP_MAX_TXSEG];
     bus_dmamap_t *map;
     int error, nsegs;
     struct mbuf *m0;
 
     TULIP_LOCK_ASSERT(sc);
 #if defined(TULIP_DEBUG)
     if ((sc->tulip_cmdmode & TULIP_CMD_TXRUN) == 0) {
 	device_printf(sc->tulip_dev, "txput%s: tx not running\n",
 	       (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) ? "(probe)" : "");
 	sc->tulip_flags |= TULIP_WANTTXSTART;
 	sc->tulip_dbg.dbg_txput_finishes[0]++;
 	goto finish;
     }
 #endif
 
     /*
      * Now we try to fill in our transmit descriptors.  This is
      * a bit reminiscent of going on the Ark two by two
      * since each descriptor for the TULIP can describe
      * two buffers.  So we advance through packet filling
-     * each of the two entries at a time to to fill each
+     * each of the two entries at a time to fill each
      * descriptor.  Clear the first and last segment bits
      * in each descriptor (actually just clear everything
      * but the end-of-ring or chain bits) to make sure
      * we don't get messed up by previously sent packets.
      *
      * We may fail to put the entire packet on the ring if
      * there is either not enough ring entries free or if the
      * packet has more than MAX_TXSEG segments.  In the former
      * case we will just wait for the ring to empty.  In the
      * latter case we have to recopy.
      */
 #if defined(KTR) && KTR_TULIP
     segcnt = 1;
     m0 = m;
     while (m0->m_next != NULL) {
 	    segcnt++;
 	    m0 = m0->m_next;
     }
     CTR2(KTR_TULIP, "tulip_txput: sending packet %p (%d chunks)", m, segcnt);
 #endif
     d_status = 0;
     eop = nextout = ri->ri_nextout;
     segcnt = 0;
     free = ri->ri_free;
 
     /*
      * Reclaim some tx descriptors if we are out since we need at least one
      * free descriptor so that we have a dma_map to load the mbuf.
      */
     if (free == 0) {
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_no_txmaps++;
 #endif
 	free += tulip_tx_intr(sc);
     }
     if (free == 0) {
 	sc->tulip_flags |= TULIP_WANTTXSTART;
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_txput_finishes[1]++;
 #endif
 	goto finish;
     }
     error = bus_dmamap_load_mbuf_sg(ri->ri_data_tag, *eop->di_map, m, segs,
 	&nsegs, BUS_DMA_NOWAIT);
     if (error != 0) {
 	if (error == EFBIG) {
 	    /*
 	     * The packet exceeds the number of transmit buffer
 	     * entries that we can use for one packet, so we have
 	     * to recopy it into one mbuf and then try again.  If
 	     * we can't recopy it, try again later.
 	     */
 	    m0 = m_defrag(m, M_NOWAIT);
 	    if (m0 == NULL) {
 		sc->tulip_flags |= TULIP_WANTTXSTART;
 #if defined(TULIP_DEBUG)
 		sc->tulip_dbg.dbg_txput_finishes[2]++;
 #endif
 		goto finish;
 	    }
 	    m = m0;
 	    error = bus_dmamap_load_mbuf_sg(ri->ri_data_tag, *eop->di_map, m,
 		segs, &nsegs, BUS_DMA_NOWAIT);
 	}
 	if (error != 0) {
 	    device_printf(sc->tulip_dev,
 	        "unable to load tx map, error = %d\n", error);
 #if defined(TULIP_DEBUG)
 	    sc->tulip_dbg.dbg_txput_finishes[3]++;
 #endif
 	    goto finish;
 	}
     }
     CTR1(KTR_TULIP, "tulip_txput: nsegs %d", nsegs);
 
     /*
      * Each descriptor allows for up to 2 fragments since we don't use
      * the descriptor chaining mode in this driver.
      */
     if ((free -= (nsegs + 1) / 2) <= 0
 	    /*
 	     * See if there's any unclaimed space in the transmit ring.
 	     */
 	    && (free += tulip_tx_intr(sc)) <= 0) {
 	/*
 	 * There's no more room but since nothing
 	 * has been committed at this point, just
 	 * show output is active, put back the
 	 * mbuf and return.
 	 */
 	sc->tulip_flags |= TULIP_WANTTXSTART;
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_txput_finishes[4]++;
 #endif
 	bus_dmamap_unload(ri->ri_data_tag, *eop->di_map);
 	goto finish;
     }
     for (; nsegs - segcnt > 1; segcnt += 2) {
 	eop = nextout;
 	eop->di_desc->d_flag   &= TULIP_DFLAG_ENDRING|TULIP_DFLAG_CHAIN;
 	eop->di_desc->d_status  = d_status;
 	eop->di_desc->d_addr1   = segs[segcnt].ds_addr & 0xffffffff;
 	eop->di_desc->d_length1 = segs[segcnt].ds_len;
 	eop->di_desc->d_addr2   = segs[segcnt+1].ds_addr & 0xffffffff;
 	eop->di_desc->d_length2 = segs[segcnt+1].ds_len;
 	d_status = TULIP_DSTS_OWNER;
 	if (++nextout == ri->ri_last)
 	    nextout = ri->ri_first;
     }
     if (segcnt < nsegs) {
 	eop = nextout;
 	eop->di_desc->d_flag   &= TULIP_DFLAG_ENDRING|TULIP_DFLAG_CHAIN;
 	eop->di_desc->d_status  = d_status;
 	eop->di_desc->d_addr1   = segs[segcnt].ds_addr & 0xffffffff;
 	eop->di_desc->d_length1 = segs[segcnt].ds_len;
 	eop->di_desc->d_addr2   = 0;
 	eop->di_desc->d_length2 = 0;
 	if (++nextout == ri->ri_last)
 	    nextout = ri->ri_first;
     }
 
     /*
      * tulip_tx_intr() harvests the mbuf from the last descriptor in the
      * frame.  We just used the dmamap in the first descriptor for the
      * load operation however.  Thus, to let the tulip_dequeue_mbuf() call
      * in tulip_tx_intr() unload the correct dmamap, we swap the dmamap
      * pointers in the two descriptors if this is a multiple-descriptor
      * packet.
      */
     if (eop != ri->ri_nextout) {
 	    map = eop->di_map;
 	    eop->di_map = ri->ri_nextout->di_map;
 	    ri->ri_nextout->di_map = map;
     }
 
     /*
      * bounce a copy to the bpf listener, if any.
      */
     if (!(sc->tulip_flags & TULIP_DEVICEPROBE))
 	    BPF_MTAP(sc->tulip_ifp, m);
 
     /*
      * The descriptors have been filled in.  Now get ready
      * to transmit.
      */
     CTR3(KTR_TULIP, "tulip_txput: enqueued mbuf %p to descriptors %td - %td",
 	m, ri->ri_nextout - ri->ri_first, eop - ri->ri_first);
     KASSERT(eop->di_mbuf == NULL, ("clobbering earlier tx mbuf"));
     eop->di_mbuf = m;
     TULIP_TXMAP_PRESYNC(ri, ri->ri_nextout);
     m = NULL;
 
     /*
      * Make sure the next descriptor after this packet is owned
      * by us since it may have been set up above if we ran out
      * of room in the ring.
      */
     nextout->di_desc->d_status = 0;
     TULIP_TXDESC_PRESYNC(ri);
 
     /*
      * Mark the last and first segments, indicate we want a transmit
      * complete interrupt, and tell it to transmit!
      */
     eop->di_desc->d_flag |= TULIP_DFLAG_TxLASTSEG|TULIP_DFLAG_TxWANTINTR;
 
     /*
      * Note that ri->ri_nextout is still the start of the packet
      * and until we set the OWNER bit, we can still back out of
      * everything we have done.
      */
     ri->ri_nextout->di_desc->d_flag |= TULIP_DFLAG_TxFIRSTSEG;
     TULIP_TXDESC_PRESYNC(ri);
     ri->ri_nextout->di_desc->d_status = TULIP_DSTS_OWNER;
     TULIP_TXDESC_PRESYNC(ri);
 
     /*
      * This advances the ring for us.
      */
     ri->ri_nextout = nextout;
     ri->ri_free = free;
 
     TULIP_PERFEND(txput);
 
     if (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) {
 	TULIP_CSR_WRITE(sc, csr_txpoll, 1);
 	sc->tulip_ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 	TULIP_PERFEND(txput);
 	return NULL;
     }
 
     /*
      * switch back to the single queueing ifstart.
      */
     sc->tulip_flags &= ~TULIP_WANTTXSTART;
     if (sc->tulip_txtimer == 0)
 	sc->tulip_txtimer = TULIP_TXTIMER;
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_txput_finishes[5]++;
 #endif
 
     /*
      * If we want a txstart, there must be not enough space in the
      * transmit ring.  So we want to enable transmit done interrupts
      * so we can immediately reclaim some space.  When the transmit
      * interrupt is posted, the interrupt handler will call tx_intr
      * to reclaim space and then txstart (since WANTTXSTART is set).
      * txstart will move the packet into the transmit ring and clear
      * WANTTXSTART thereby causing TXINTR to be cleared.
      */
   finish:
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_txput_finishes[6]++;
 #endif
     if (sc->tulip_flags & (TULIP_WANTTXSTART|TULIP_DOINGSETUP)) {
 	sc->tulip_ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 	if ((sc->tulip_intrmask & TULIP_STS_TXINTR) == 0) {
 	    sc->tulip_intrmask |= TULIP_STS_TXINTR;
 	    TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	}
     } else if ((sc->tulip_flags & TULIP_PROMISC) == 0) {
 	if (sc->tulip_intrmask & TULIP_STS_TXINTR) {
 	    sc->tulip_intrmask &= ~TULIP_STS_TXINTR;
 	    TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	}
     }
     TULIP_CSR_WRITE(sc, csr_txpoll, 1);
     TULIP_PERFEND(txput);
     return m;
 }
 
 static void
 tulip_txput_setup(tulip_softc_t * const sc)
 {
     tulip_ringinfo_t * const ri = &sc->tulip_txinfo;
     tulip_desc_t *nextout;
 
     TULIP_LOCK_ASSERT(sc);
 
     /*
      * We will transmit, at most, one setup packet per call to ifstart.
      */
 
 #if defined(TULIP_DEBUG)
     if ((sc->tulip_cmdmode & TULIP_CMD_TXRUN) == 0) {
 	device_printf(sc->tulip_dev, "txput_setup: tx not running\n");
 	sc->tulip_flags |= TULIP_WANTTXSTART;
 	return;
     }
 #endif
     /*
      * Try to reclaim some free descriptors..
      */
     if (ri->ri_free < 2)
 	tulip_tx_intr(sc);
     if ((sc->tulip_flags & TULIP_DOINGSETUP) || ri->ri_free == 1) {
 	sc->tulip_flags |= TULIP_WANTTXSTART;
 	return;
     }
     bcopy(sc->tulip_setupdata, sc->tulip_setupbuf,
 	  sizeof(sc->tulip_setupdata));
     /*
      * Clear WANTSETUP and set DOINGSETUP.  Since we know that WANTSETUP is
      * set and DOINGSETUP is clear doing an XOR of the two will DTRT.
      */
     sc->tulip_flags ^= TULIP_WANTSETUP|TULIP_DOINGSETUP;
     ri->ri_free--;
     nextout = ri->ri_nextout->di_desc;
     nextout->d_flag &= TULIP_DFLAG_ENDRING|TULIP_DFLAG_CHAIN;
     nextout->d_flag |= TULIP_DFLAG_TxFIRSTSEG|TULIP_DFLAG_TxLASTSEG
 	|TULIP_DFLAG_TxSETUPPKT|TULIP_DFLAG_TxWANTINTR;
     if (sc->tulip_flags & TULIP_WANTHASHPERFECT)
 	nextout->d_flag |= TULIP_DFLAG_TxHASHFILT;
     else if (sc->tulip_flags & TULIP_WANTHASHONLY)
 	nextout->d_flag |= TULIP_DFLAG_TxHASHFILT|TULIP_DFLAG_TxINVRSFILT;
 
     nextout->d_length2 = 0;
     nextout->d_addr2 = 0;
     nextout->d_length1 = sizeof(sc->tulip_setupdata);
     nextout->d_addr1 = sc->tulip_setup_dma_addr & 0xffffffff;
     bus_dmamap_sync(sc->tulip_setup_tag, sc->tulip_setup_map,
 	BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
     TULIP_TXDESC_PRESYNC(ri);
     CTR1(KTR_TULIP, "tulip_txput_setup: using descriptor %td",
 	ri->ri_nextout - ri->ri_first);
 
     /*
      * Advance the ring for the next transmit packet.
      */
     if (++ri->ri_nextout == ri->ri_last)
 	ri->ri_nextout = ri->ri_first;
 
     /*
      * Make sure the next descriptor is owned by us since it
      * may have been set up above if we ran out of room in the
      * ring.
      */
     ri->ri_nextout->di_desc->d_status = 0;
     TULIP_TXDESC_PRESYNC(ri);
     nextout->d_status = TULIP_DSTS_OWNER;
     /*
      * Flush the ownwership of the current descriptor
      */
     TULIP_TXDESC_PRESYNC(ri);
     TULIP_CSR_WRITE(sc, csr_txpoll, 1);
     if ((sc->tulip_intrmask & TULIP_STS_TXINTR) == 0) {
 	sc->tulip_intrmask |= TULIP_STS_TXINTR;
 	TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
     }
 }
 
 static int
 tulip_ifioctl(struct ifnet * ifp, u_long cmd, caddr_t data)
 {
     TULIP_PERFSTART(ifioctl)
     tulip_softc_t * const sc = (tulip_softc_t *)ifp->if_softc;
     struct ifreq *ifr = (struct ifreq *) data;
     int error = 0;
 
     switch (cmd) {
 	case SIOCSIFFLAGS: {
 	    TULIP_LOCK(sc);
 	    tulip_init_locked(sc);
 	    TULIP_UNLOCK(sc);
 	    break;
 	}
 
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA: {
 	    error = ifmedia_ioctl(ifp, ifr, &sc->tulip_ifmedia, cmd);
 	    break;
 	}
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI: {
 	    /*
 	     * Update multicast listeners
 	     */
 	    TULIP_LOCK(sc);
 	    tulip_init_locked(sc);
 	    TULIP_UNLOCK(sc);
 	    error = 0;
 	    break;
 	}
 
 #ifdef SIOCGADDRROM
 	case SIOCGADDRROM: {
 	    error = copyout(sc->tulip_rombuf, ifr->ifr_data, sizeof(sc->tulip_rombuf));
 	    break;
 	}
 #endif
 #ifdef SIOCGCHIPID
 	case SIOCGCHIPID: {
 	    ifr->ifr_metric = (int) sc->tulip_chipid;
 	    break;
 	}
 #endif
 	default: {
 	    error = ether_ioctl(ifp, cmd, data);
 	    break;
 	}
     }
 
     TULIP_PERFEND(ifioctl);
     return error;
 }
 
 static void
 tulip_start(struct ifnet * const ifp)
 {
     TULIP_PERFSTART(ifstart)
     tulip_softc_t * const sc = (tulip_softc_t *)ifp->if_softc;
 
     TULIP_LOCK(sc);
     tulip_start_locked(sc);
     TULIP_UNLOCK(sc);
 
     TULIP_PERFEND(ifstart);
 }
 
 static void
 tulip_start_locked(tulip_softc_t * const sc)
 {
     struct mbuf *m;
 
     TULIP_LOCK_ASSERT(sc);
 
     CTR0(KTR_TULIP, "tulip_start_locked invoked");
     if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == TULIP_WANTSETUP)
 	tulip_txput_setup(sc);
 
     CTR1(KTR_TULIP, "tulip_start_locked: %d tx packets pending",
 	sc->tulip_ifp->if_snd.ifq_len);
     while (!IFQ_DRV_IS_EMPTY(&sc->tulip_ifp->if_snd)) {
 	IFQ_DRV_DEQUEUE(&sc->tulip_ifp->if_snd, m);
 	if(m == NULL)
 	    break;
 	if ((m = tulip_txput(sc, m)) != NULL) {
 	    IFQ_DRV_PREPEND(&sc->tulip_ifp->if_snd, m);
 	    break;
 	}
     }
 }
 
 static void
 tulip_watchdog(void *arg)
 {
     TULIP_PERFSTART(stat)
     tulip_softc_t *sc = arg;
 #if defined(TULIP_DEBUG)
     u_int32_t rxintrs;
 #endif
 
     TULIP_LOCK_ASSERT(sc);
     callout_reset(&sc->tulip_stat_timer, hz, tulip_watchdog, sc);    
 #if defined(TULIP_DEBUG)
     rxintrs = sc->tulip_dbg.dbg_rxintrs - sc->tulip_dbg.dbg_last_rxintrs;
     if (rxintrs > sc->tulip_dbg.dbg_high_rxintrs_hz)
 	sc->tulip_dbg.dbg_high_rxintrs_hz = rxintrs;
     sc->tulip_dbg.dbg_last_rxintrs = sc->tulip_dbg.dbg_rxintrs;
 #endif /* TULIP_DEBUG */
 
     /*
      * These should be rare so do a bulk test up front so we can just skip
      * them if needed.
      */
     if (sc->tulip_flags & (TULIP_SYSTEMERROR|TULIP_RXBUFSLOW|TULIP_NOMESSAGES)) {
 	/*
 	 * If the number of receive buffer is low, try to refill
 	 */
 	if (sc->tulip_flags & TULIP_RXBUFSLOW)
 	    tulip_rx_intr(sc);
 
 	if (sc->tulip_flags & TULIP_SYSTEMERROR) {
 	    if_printf(sc->tulip_ifp, "%d system errors: last was %s\n",
 		   sc->tulip_system_errors,
 		   tulip_system_errors[sc->tulip_last_system_error]);
 	}
 	if (sc->tulip_statusbits) {
 	    tulip_print_abnormal_interrupt(sc, sc->tulip_statusbits);
 	    sc->tulip_statusbits = 0;
 	}
 
 	sc->tulip_flags &= ~(TULIP_NOMESSAGES|TULIP_SYSTEMERROR);
     }
 
     if (sc->tulip_txtimer)
 	tulip_tx_intr(sc);
     if (sc->tulip_txtimer && --sc->tulip_txtimer == 0) {
 	if_printf(sc->tulip_ifp, "transmission timeout\n");
 	if (TULIP_DO_AUTOSENSE(sc)) {
 	    sc->tulip_media = TULIP_MEDIA_UNKNOWN;
 	    sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
 	    sc->tulip_flags &= ~(TULIP_WANTRXACT|TULIP_LINKUP);
 	}
 	tulip_reset(sc);
 	tulip_init_locked(sc);
     }
 
     TULIP_PERFEND(stat);
     TULIP_PERFMERGE(sc, perf_intr_cycles);
     TULIP_PERFMERGE(sc, perf_ifstart_cycles);
     TULIP_PERFMERGE(sc, perf_ifioctl_cycles);
     TULIP_PERFMERGE(sc, perf_stat_cycles);
     TULIP_PERFMERGE(sc, perf_timeout_cycles);
     TULIP_PERFMERGE(sc, perf_ifstart_one_cycles);
     TULIP_PERFMERGE(sc, perf_txput_cycles);
     TULIP_PERFMERGE(sc, perf_txintr_cycles);
     TULIP_PERFMERGE(sc, perf_rxintr_cycles);
     TULIP_PERFMERGE(sc, perf_rxget_cycles);
     TULIP_PERFMERGE(sc, perf_intr);
     TULIP_PERFMERGE(sc, perf_ifstart);
     TULIP_PERFMERGE(sc, perf_ifioctl);
     TULIP_PERFMERGE(sc, perf_stat);
     TULIP_PERFMERGE(sc, perf_timeout);
     TULIP_PERFMERGE(sc, perf_ifstart_one);
     TULIP_PERFMERGE(sc, perf_txput);
     TULIP_PERFMERGE(sc, perf_txintr);
     TULIP_PERFMERGE(sc, perf_rxintr);
     TULIP_PERFMERGE(sc, perf_rxget);
 }
 
 static void
 tulip_attach(tulip_softc_t * const sc)
 {
     struct ifnet *ifp;
 
     ifp = sc->tulip_ifp = if_alloc(IFT_ETHER);
 
     /* XXX: driver name/unit should be set some other way */
     if_initname(ifp, "de", sc->tulip_unit);
     ifp->if_softc = sc;
     ifp->if_flags = IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST;
     ifp->if_ioctl = tulip_ifioctl;
     ifp->if_start = tulip_start;
     ifp->if_init = tulip_init;
     IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
     ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
     IFQ_SET_READY(&ifp->if_snd);
   
     device_printf(sc->tulip_dev, "%s%s pass %d.%d%s\n",
 	   sc->tulip_boardid,
 	   tulip_chipdescs[sc->tulip_chipid],
 	   (sc->tulip_revinfo & 0xF0) >> 4,
 	   sc->tulip_revinfo & 0x0F,
 	   (sc->tulip_features & (TULIP_HAVE_ISVSROM|TULIP_HAVE_OKSROM))
 		 == TULIP_HAVE_ISVSROM ? " (invalid EESPROM checksum)" : "");
 
     TULIP_LOCK(sc);
     (*sc->tulip_boardsw->bd_media_probe)(sc);
     ifmedia_init(&sc->tulip_ifmedia, 0,
 		 tulip_ifmedia_change,
 		 tulip_ifmedia_status);
     tulip_ifmedia_add(sc);
 
     tulip_reset(sc);
     TULIP_UNLOCK(sc);
 
     ether_ifattach(sc->tulip_ifp, sc->tulip_enaddr);
 
     TULIP_LOCK(sc);
     sc->tulip_flags &= ~TULIP_DEVICEPROBE;
     TULIP_UNLOCK(sc);
 }
 
 /* Release memory for a single descriptor ring. */
 static void
 tulip_busdma_freering(tulip_ringinfo_t *ri)
 {
     int i;
 
     /* Release the DMA maps and tag for data buffers. */
     if (ri->ri_data_maps != NULL) {
 	for (i = 0; i < ri->ri_max; i++) {
 	    if (ri->ri_data_maps[i] != NULL) {
 		bus_dmamap_destroy(ri->ri_data_tag, ri->ri_data_maps[i]);
 		ri->ri_data_maps[i] = NULL;
 	    }
 	}
 	free(ri->ri_data_maps, M_DEVBUF);
 	ri->ri_data_maps = NULL;
     }
     if (ri->ri_data_tag != NULL) {
 	bus_dma_tag_destroy(ri->ri_data_tag);
 	ri->ri_data_tag = NULL;
     }
 
     /* Release the DMA memory and tag for the ring descriptors. */
     if (ri->ri_dma_addr != 0) {
 	bus_dmamap_unload(ri->ri_ring_tag, ri->ri_ring_map);
 	ri->ri_dma_addr = 0;
     }
     if (ri->ri_descs != NULL) {
 	bus_dmamem_free(ri->ri_ring_tag, ri->ri_descs, ri->ri_ring_map);
 	ri->ri_descs = NULL;
     }
     if (ri->ri_ring_tag != NULL) {
 	bus_dma_tag_destroy(ri->ri_ring_tag);
 	ri->ri_ring_tag = NULL;
     }
 }
 
 /* Allocate memory for a single descriptor ring. */
 static int
 tulip_busdma_allocring(device_t dev, tulip_softc_t * const sc, size_t count,
     bus_size_t align, int nsegs, tulip_ringinfo_t *ri, const char *name)
 {
     size_t size;
     int error, i;
 
     /* First, setup a tag. */
     ri->ri_max = count;
     size = count * sizeof(tulip_desc_t);
     error = bus_dma_tag_create(bus_get_dma_tag(dev),
 	32, 0, BUS_SPACE_MAXADDR_32BIT,
 	BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size, 0, NULL, NULL,
 	&ri->ri_ring_tag);
     if (error) {
 	device_printf(dev, "failed to allocate %s descriptor ring dma tag\n",
 	    name);
 	return (error);
     }
 
     /* Next, allocate memory for the descriptors. */
     error = bus_dmamem_alloc(ri->ri_ring_tag, (void **)&ri->ri_descs,
 	BUS_DMA_NOWAIT | BUS_DMA_ZERO, &ri->ri_ring_map);
     if (error) {
 	device_printf(dev, "failed to allocate memory for %s descriptor ring\n",
 	    name);
 	return (error);
     }
 
     /* Map the descriptors. */
     error = bus_dmamap_load(ri->ri_ring_tag, ri->ri_ring_map, ri->ri_descs,
 	size, tulip_dma_map_addr, &ri->ri_dma_addr, BUS_DMA_NOWAIT);
     if (error) {
 	device_printf(dev, "failed to get dma address for %s descriptor ring\n",
 	    name);
 	return (error);
     }
 
     /* Allocate a tag for the data buffers. */
     error = bus_dma_tag_create(bus_get_dma_tag(dev), align, 0,
 	BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	MCLBYTES * nsegs, nsegs, MCLBYTES, 0, NULL, NULL, &ri->ri_data_tag);
     if (error) {
 	device_printf(dev, "failed to allocate %s buffer dma tag\n", name);
 	return (error);
     }
 
     /* Allocate maps for the data buffers. */
     ri->ri_data_maps = malloc(sizeof(bus_dmamap_t) * count, M_DEVBUF,
 	M_WAITOK | M_ZERO);
     for (i = 0; i < count; i++) {
     	error = bus_dmamap_create(ri->ri_data_tag, 0, &ri->ri_data_maps[i]);
 	if (error) {
 	    device_printf(dev, "failed to create map for %s buffer %d\n",
 		name, i);
 	    return (error);
 	}
     }
 
     return (0);
 }
 
 /* Release busdma maps, tags, and memory. */
 static void
 tulip_busdma_cleanup(tulip_softc_t * const sc)
 {
 
     /* Release resources for the setup descriptor. */
     if (sc->tulip_setup_dma_addr != 0) {
 	bus_dmamap_unload(sc->tulip_setup_tag, sc->tulip_setup_map);
 	sc->tulip_setup_dma_addr = 0;
     }
     if (sc->tulip_setupbuf != NULL) {
 	bus_dmamem_free(sc->tulip_setup_tag, sc->tulip_setupbuf,
 	    sc->tulip_setup_map);
 	sc->tulip_setupbuf = NULL;
     }
     if (sc->tulip_setup_tag != NULL) {
 	bus_dma_tag_destroy(sc->tulip_setup_tag);
 	sc->tulip_setup_tag = NULL;
     }
 
     /* Release the transmit ring. */
     tulip_busdma_freering(&sc->tulip_txinfo);
 
     /* Release the receive ring. */
     tulip_busdma_freering(&sc->tulip_rxinfo);
 }
 
 static int
 tulip_busdma_init(device_t dev, tulip_softc_t * const sc)
 {
     int error;
 
     /*
      * Allocate space and dmamap for transmit ring.
      */
     error = tulip_busdma_allocring(dev, sc, TULIP_TXDESCS, 1, TULIP_MAX_TXSEG,
 	&sc->tulip_txinfo, "transmit");
     if (error)
 	return (error);
 
     /*
      * Allocate space and dmamap for receive ring.  We tell bus_dma that
      * we can map MCLBYTES so that it will accept a full MCLBYTES cluster,
      * but we will only map the first TULIP_RX_BUFLEN bytes.  This is not
      * a waste in practice though as an ethernet frame can easily fit
      * in TULIP_RX_BUFLEN bytes.
      */
     error = tulip_busdma_allocring(dev, sc, TULIP_RXDESCS, 4, 1,
 	&sc->tulip_rxinfo, "receive");
     if (error)
 	return (error);
 
     /*
      * Allocate a DMA tag, memory, and map for setup descriptor
      */
     error = bus_dma_tag_create(bus_get_dma_tag(dev), 32, 0,
 	BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	sizeof(sc->tulip_setupdata), 1, sizeof(sc->tulip_setupdata), 0,
 	NULL, NULL, &sc->tulip_setup_tag);
     if (error) {
 	device_printf(dev, "failed to allocate setup descriptor dma tag\n");
 	return (error);
     }
     error = bus_dmamem_alloc(sc->tulip_setup_tag, (void **)&sc->tulip_setupbuf,
 	BUS_DMA_NOWAIT | BUS_DMA_ZERO, &sc->tulip_setup_map);
     if (error) {
 	device_printf(dev, "failed to allocate memory for setup descriptor\n");
 	return (error);
     }
     error = bus_dmamap_load(sc->tulip_setup_tag, sc->tulip_setup_map,
 	sc->tulip_setupbuf, sizeof(sc->tulip_setupdata), 
 	tulip_dma_map_addr, &sc->tulip_setup_dma_addr, BUS_DMA_NOWAIT);
     if (error) {
 	device_printf(dev, "failed to get dma address for setup descriptor\n");
 	return (error);
     }
 
     return error;
 }
 
 static void
 tulip_initcsrs(tulip_softc_t * const sc, tulip_csrptr_t csr_base,
     size_t csr_size)
 {
     sc->tulip_csrs.csr_busmode		= csr_base +  0 * csr_size;
     sc->tulip_csrs.csr_txpoll		= csr_base +  1 * csr_size;
     sc->tulip_csrs.csr_rxpoll		= csr_base +  2 * csr_size;
     sc->tulip_csrs.csr_rxlist		= csr_base +  3 * csr_size;
     sc->tulip_csrs.csr_txlist		= csr_base +  4 * csr_size;
     sc->tulip_csrs.csr_status		= csr_base +  5 * csr_size;
     sc->tulip_csrs.csr_command		= csr_base +  6 * csr_size;
     sc->tulip_csrs.csr_intr		= csr_base +  7 * csr_size;
     sc->tulip_csrs.csr_missed_frames	= csr_base +  8 * csr_size;
     sc->tulip_csrs.csr_9		= csr_base +  9 * csr_size;
     sc->tulip_csrs.csr_10		= csr_base + 10 * csr_size;
     sc->tulip_csrs.csr_11		= csr_base + 11 * csr_size;
     sc->tulip_csrs.csr_12		= csr_base + 12 * csr_size;
     sc->tulip_csrs.csr_13		= csr_base + 13 * csr_size;
     sc->tulip_csrs.csr_14		= csr_base + 14 * csr_size;
     sc->tulip_csrs.csr_15		= csr_base + 15 * csr_size;
 }
 
 static int
 tulip_initring(
     device_t dev,
     tulip_softc_t * const sc,
     tulip_ringinfo_t * const ri,
     int ndescs)
 {
     int i;
 
     ri->ri_descinfo = malloc(sizeof(tulip_descinfo_t) * ndescs, M_DEVBUF,
 	M_WAITOK | M_ZERO);
     for (i = 0; i < ndescs; i++) {
 	ri->ri_descinfo[i].di_desc = &ri->ri_descs[i];
 	ri->ri_descinfo[i].di_map = &ri->ri_data_maps[i];
     }
     ri->ri_first = ri->ri_descinfo;
     ri->ri_max = ndescs;
     ri->ri_last = ri->ri_first + ri->ri_max;
     bzero(ri->ri_descs, sizeof(tulip_desc_t) * ri->ri_max);
     ri->ri_last[-1].di_desc->d_flag = TULIP_DFLAG_ENDRING;
     return (0);
 }
 
 /*
  * This is the PCI configuration support.
  */
 
 #define	PCI_CBIO	PCIR_BAR(0)	/* Configuration Base IO Address */
 #define	PCI_CBMA	PCIR_BAR(1)	/* Configuration Base Memory Address */
 #define	PCI_CFDA	0x40	/* Configuration Driver Area */
 
 static int
 tulip_pci_probe(device_t dev)
 {
     const char *name = NULL;
 
     if (pci_get_vendor(dev) != DEC_VENDORID)
 	return ENXIO;
 
     /*
      * Some LanMedia WAN cards use the Tulip chip, but they have
      * their own driver, and we should not recognize them
      */
     if (pci_get_subvendor(dev) == 0x1376)
 	return ENXIO;
 
     switch (pci_get_device(dev)) {
     case CHIPID_21040:
 	name = "Digital 21040 Ethernet";
 	break;
     case CHIPID_21041:
 	name = "Digital 21041 Ethernet";
 	break;
     case CHIPID_21140:
 	if (pci_get_revid(dev) >= 0x20)
 	    name = "Digital 21140A Fast Ethernet";
 	else
 	    name = "Digital 21140 Fast Ethernet";
 	break;
     case CHIPID_21142:
 	if (pci_get_revid(dev) >= 0x20)
 	    name = "Digital 21143 Fast Ethernet";
 	else
 	    name = "Digital 21142 Fast Ethernet";
 	break;
     }
     if (name) {
 	device_set_desc(dev, name);
 	return BUS_PROBE_LOW_PRIORITY;
     }
     return ENXIO;
 }
 
 static int
 tulip_shutdown(device_t dev)
 {
     tulip_softc_t * const sc = device_get_softc(dev);
     TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET);
     DELAY(10);	/* Wait 10 microseconds (actually 50 PCI cycles but at 
 		   33MHz that comes to two microseconds but wait a
 		   bit longer anyways) */
     return 0;
 }
 
 static int
 tulip_pci_attach(device_t dev)
 {
     tulip_softc_t *sc;
     int retval, idx;
     u_int32_t revinfo, cfdainfo;
     unsigned csroffset = TULIP_PCI_CSROFFSET;
     unsigned csrsize = TULIP_PCI_CSRSIZE;
     tulip_csrptr_t csr_base;
     tulip_chipid_t chipid = TULIP_CHIPID_UNKNOWN;
     struct resource *res;
     int rid, unit;
 
     unit = device_get_unit(dev);
 
     if (unit >= TULIP_MAX_DEVICES) {
 	device_printf(dev, "not configured; limit of %d reached or exceeded\n",
 	       TULIP_MAX_DEVICES);
 	return ENXIO;
     }
 
     revinfo  = pci_get_revid(dev);
     cfdainfo = pci_read_config(dev, PCI_CFDA, 4);
 
     /* turn busmaster on in case BIOS doesn't set it */
     pci_enable_busmaster(dev);
 
     if (pci_get_vendor(dev) == DEC_VENDORID) {
 	if (pci_get_device(dev) == CHIPID_21040)
 		chipid = TULIP_21040;
 	else if (pci_get_device(dev) == CHIPID_21041)
 		chipid = TULIP_21041;
 	else if (pci_get_device(dev) == CHIPID_21140)
 		chipid = (revinfo >= 0x20) ? TULIP_21140A : TULIP_21140;
 	else if (pci_get_device(dev) == CHIPID_21142)
 		chipid = (revinfo >= 0x20) ? TULIP_21143 : TULIP_21142;
     }
     if (chipid == TULIP_CHIPID_UNKNOWN)
 	return ENXIO;
 
     if (chipid == TULIP_21040 && revinfo < 0x20) {
 	device_printf(dev,
 	    "not configured; 21040 pass 2.0 required (%d.%d found)\n",
 	    revinfo >> 4, revinfo & 0x0f);
 	return ENXIO;
     } else if (chipid == TULIP_21140 && revinfo < 0x11) {
 	device_printf(dev,
 	    "not configured; 21140 pass 1.1 required (%d.%d found)\n",
 	    revinfo >> 4, revinfo & 0x0f);
 	return ENXIO;
     }
 
     sc = device_get_softc(dev);
     sc->tulip_dev = dev;
     sc->tulip_pci_busno = pci_get_bus(dev);
     sc->tulip_pci_devno = pci_get_slot(dev);
     sc->tulip_chipid = chipid;
     sc->tulip_flags |= TULIP_DEVICEPROBE;
     if (chipid == TULIP_21140 || chipid == TULIP_21140A)
 	sc->tulip_features |= TULIP_HAVE_GPR|TULIP_HAVE_STOREFWD;
     if (chipid == TULIP_21140A && revinfo <= 0x22)
 	sc->tulip_features |= TULIP_HAVE_RXBADOVRFLW;
     if (chipid == TULIP_21140)
 	sc->tulip_features |= TULIP_HAVE_BROKEN_HASH;
     if (chipid != TULIP_21040 && chipid != TULIP_21140)
 	sc->tulip_features |= TULIP_HAVE_POWERMGMT;
     if (chipid == TULIP_21041 || chipid == TULIP_21142 || chipid == TULIP_21143) {
 	sc->tulip_features |= TULIP_HAVE_DUALSENSE;
 	if (chipid != TULIP_21041 || revinfo >= 0x20)
 	    sc->tulip_features |= TULIP_HAVE_SIANWAY;
 	if (chipid != TULIP_21041)
 	    sc->tulip_features |= TULIP_HAVE_SIAGP|TULIP_HAVE_RXBADOVRFLW|TULIP_HAVE_STOREFWD;
 	if (chipid != TULIP_21041 && revinfo >= 0x20)
 	    sc->tulip_features |= TULIP_HAVE_SIA100;
     }
 
     if (sc->tulip_features & TULIP_HAVE_POWERMGMT
 	    && (cfdainfo & (TULIP_CFDA_SLEEP|TULIP_CFDA_SNOOZE))) {
 	cfdainfo &= ~(TULIP_CFDA_SLEEP|TULIP_CFDA_SNOOZE);
 	pci_write_config(dev, PCI_CFDA, cfdainfo, 4);
 	DELAY(11*1000);
     }
 
     sc->tulip_unit = unit;
     sc->tulip_revinfo = revinfo;
 #if defined(TULIP_IOMAPPED)
     rid = PCI_CBIO;
     res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, RF_ACTIVE);
 #else
     rid = PCI_CBMA;
     res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE);
 #endif
     if (!res)
 	return ENXIO;
     sc->tulip_csrs_bst = rman_get_bustag(res);
     sc->tulip_csrs_bsh = rman_get_bushandle(res);
     csr_base = 0;
 
     mtx_init(TULIP_MUTEX(sc), MTX_NETWORK_LOCK, device_get_nameunit(dev),
 	MTX_DEF);
     callout_init_mtx(&sc->tulip_callout, TULIP_MUTEX(sc), 0);
     callout_init_mtx(&sc->tulip_stat_timer, TULIP_MUTEX(sc), 0);
     tulips[unit] = sc;
 
     tulip_initcsrs(sc, csr_base + csroffset, csrsize);
 
     if ((retval = tulip_busdma_init(dev, sc)) != 0) {
 	device_printf(dev, "error initing bus_dma: %d\n", retval);
 	tulip_busdma_cleanup(sc);
 	mtx_destroy(TULIP_MUTEX(sc));
 	return ENXIO;
     }
 
     retval = tulip_initring(dev, sc, &sc->tulip_rxinfo, TULIP_RXDESCS);
     if (retval == 0)
 	retval = tulip_initring(dev, sc, &sc->tulip_txinfo, TULIP_TXDESCS);
     if (retval) {
 	tulip_busdma_cleanup(sc);
 	mtx_destroy(TULIP_MUTEX(sc));
 	return retval;
     }
 
     /*
      * Make sure there won't be any interrupts or such...
      */
     TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET);
     DELAY(100);	/* Wait 10 microseconds (actually 50 PCI cycles but at 
 		   33MHz that comes to two microseconds but wait a
 		   bit longer anyways) */
 
     TULIP_LOCK(sc);
     retval = tulip_read_macaddr(sc);
     TULIP_UNLOCK(sc);
     if (retval < 0) {
 	device_printf(dev, "can't read ENET ROM (why=%d) (", retval);
 	for (idx = 0; idx < 32; idx++)
 	    printf("%02x", sc->tulip_rombuf[idx]);
 	printf("\n");
 	device_printf(dev, "%s%s pass %d.%d\n",
 	       sc->tulip_boardid, tulip_chipdescs[sc->tulip_chipid],
 	       (sc->tulip_revinfo & 0xF0) >> 4, sc->tulip_revinfo & 0x0F);
 	device_printf(dev, "address unknown\n");
     } else {
 	void (*intr_rtn)(void *) = tulip_intr_normal;
 
 	if (sc->tulip_features & TULIP_HAVE_SHAREDINTR)
 	    intr_rtn = tulip_intr_shared;
 
 	tulip_attach(sc);
 
 	/* Setup interrupt last. */
 	if ((sc->tulip_features & TULIP_HAVE_SLAVEDINTR) == 0) {
 	    void *ih;
 
 	    rid = 0;
 	    res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 					 RF_SHAREABLE | RF_ACTIVE);
 	    if (res == NULL || bus_setup_intr(dev, res, INTR_TYPE_NET |
                                               INTR_MPSAFE, NULL, intr_rtn, sc, &ih)) {
 		device_printf(dev, "couldn't map interrupt\n");
 		tulip_busdma_cleanup(sc);
 		ether_ifdetach(sc->tulip_ifp);
 		if_free(sc->tulip_ifp);
 		mtx_destroy(TULIP_MUTEX(sc));
 		return ENXIO;
 	    }
 	}
     }
     return 0;
 }
 
 static device_method_t tulip_pci_methods[] = {
     /* Device interface */
     DEVMETHOD(device_probe,	tulip_pci_probe),
     DEVMETHOD(device_attach,	tulip_pci_attach),
     DEVMETHOD(device_shutdown,	tulip_shutdown),
     { 0, 0 }
 };
 
 static driver_t tulip_pci_driver = {
     "de",
     tulip_pci_methods,
     sizeof(tulip_softc_t),
 };
 
 static devclass_t tulip_devclass;
 
 DRIVER_MODULE(de, pci, tulip_pci_driver, tulip_devclass, 0, 0);
 
 #ifdef DDB
 void	tulip_dumpring(int unit, int ring);
 void	tulip_dumpdesc(int unit, int ring, int desc);
 void	tulip_status(int unit);
 
 void
 tulip_dumpring(int unit, int ring)
 {
     tulip_softc_t *sc;
     tulip_ringinfo_t *ri;
     tulip_descinfo_t *di;
 
     if (unit < 0 || unit >= TULIP_MAX_DEVICES) {
 	db_printf("invalid unit %d\n", unit);
 	return;
     }
     sc = tulips[unit];
     if (sc == NULL) {
 	db_printf("unit %d not present\n", unit);
 	return;
     }
 
     switch (ring) {
     case 0:
 	db_printf("receive ring:\n");
 	ri = &sc->tulip_rxinfo;
 	break;
     case 1:
 	db_printf("transmit ring:\n");
 	ri = &sc->tulip_txinfo;
 	break;
     default:
 	db_printf("invalid ring %d\n", ring);
 	return;
     }
 
     db_printf(" nextin: %td, nextout: %td, max: %d, free: %d\n",
 	ri->ri_nextin - ri->ri_first, ri->ri_nextout - ri->ri_first,
 	ri->ri_max, ri->ri_free);
     for (di = ri->ri_first; di != ri->ri_last; di++) {
 	if (di->di_mbuf != NULL)
 	    db_printf(" descriptor %td: mbuf %p\n", di - ri->ri_first,
 		di->di_mbuf);
 	else if (di->di_desc->d_flag & TULIP_DFLAG_TxSETUPPKT)
 	    db_printf(" descriptor %td: setup packet\n", di - ri->ri_first);
     }
 }
 
 void
 tulip_dumpdesc(int unit, int ring, int desc)
 {
     tulip_softc_t *sc;
     tulip_ringinfo_t *ri;
     tulip_descinfo_t *di;
     char *s;
 
     if (unit < 0 || unit >= TULIP_MAX_DEVICES) {
 	db_printf("invalid unit %d\n", unit);
 	return;
     }
     sc = tulips[unit];
     if (sc == NULL) {
 	db_printf("unit %d not present\n", unit);
 	return;
     }
 
     switch (ring) {
     case 0:
 	s = "receive";
 	ri = &sc->tulip_rxinfo;
 	break;
     case 1:
 	s = "transmit";
 	ri = &sc->tulip_txinfo;
 	break;
     default:
 	db_printf("invalid ring %d\n", ring);
 	return;
     }
 
     if (desc < 0 || desc >= ri->ri_max) {
 	db_printf("invalid descriptor %d\n", desc);
 	return;
     }
 
     db_printf("%s descriptor %d:\n", s, desc);
     di = &ri->ri_first[desc];
     db_printf(" mbuf: %p\n", di->di_mbuf);
     db_printf(" status: %08x  flag: %03x\n", di->di_desc->d_status,
 	di->di_desc->d_flag);
     db_printf("  addr1: %08x  len1: %03x\n", di->di_desc->d_addr1,
 	di->di_desc->d_length1);
     db_printf("  addr2: %08x  len2: %03x\n", di->di_desc->d_addr2,
 	di->di_desc->d_length2);
 }
 #endif
Index: stable/11/sys/dev/drm/mga_state.c
===================================================================
--- stable/11/sys/dev/drm/mga_state.c	(revision 330445)
+++ stable/11/sys/dev/drm/mga_state.c	(revision 330446)
@@ -1,1142 +1,1142 @@
 /* mga_state.c -- State support for MGA G200/G400 -*- linux-c -*-
  * Created: Thu Jan 27 02:53:43 2000 by jhartmann@precisioninsight.com
  */
 /*-
  * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
  * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  * Authors:
  *    Jeff Hartmann <jhartmann@valinux.com>
  *    Keith Whitwell <keith@tungstengraphics.com>
  *
  * Rewritten by:
  *    Gareth Hughes <gareth@valinux.com>
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "dev/drm/drmP.h"
 #include "dev/drm/drm.h"
 #include "dev/drm/mga_drm.h"
 #include "dev/drm/mga_drv.h"
 
 /* ================================================================
  * DMA hardware state programming functions
  */
 
 static void mga_emit_clip_rect(drm_mga_private_t * dev_priv,
 			       struct drm_clip_rect * box)
 {
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_mga_context_regs_t *ctx = &sarea_priv->context_state;
 	unsigned int pitch = dev_priv->front_pitch;
 	DMA_LOCALS;
 
 	BEGIN_DMA(2);
 
 	/* Force reset of DWGCTL on G400 (eliminates clip disable bit).
 	 */
 	if (dev_priv->chipset >= MGA_CARD_TYPE_G400) {
 		DMA_BLOCK(MGA_DWGCTL, ctx->dwgctl,
 			  MGA_LEN + MGA_EXEC, 0x80000000,
 			  MGA_DWGCTL, ctx->dwgctl,
 			  MGA_LEN + MGA_EXEC, 0x80000000);
 	}
 	DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 		  MGA_CXBNDRY, ((box->x2 - 1) << 16) | box->x1,
 		  MGA_YTOP, box->y1 * pitch, MGA_YBOT, (box->y2 - 1) * pitch);
 
 	ADVANCE_DMA();
 }
 
 static __inline__ void mga_g200_emit_context(drm_mga_private_t * dev_priv)
 {
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_mga_context_regs_t *ctx = &sarea_priv->context_state;
 	DMA_LOCALS;
 
 	BEGIN_DMA(3);
 
 	DMA_BLOCK(MGA_DSTORG, ctx->dstorg,
 		  MGA_MACCESS, ctx->maccess,
 		  MGA_PLNWT, ctx->plnwt, MGA_DWGCTL, ctx->dwgctl);
 
 	DMA_BLOCK(MGA_ALPHACTRL, ctx->alphactrl,
 		  MGA_FOGCOL, ctx->fogcolor,
 		  MGA_WFLAG, ctx->wflag, MGA_ZORG, dev_priv->depth_offset);
 
 	DMA_BLOCK(MGA_FCOL, ctx->fcol,
 		  MGA_DMAPAD, 0x00000000,
 		  MGA_DMAPAD, 0x00000000, MGA_DMAPAD, 0x00000000);
 
 	ADVANCE_DMA();
 }
 
 static __inline__ void mga_g400_emit_context(drm_mga_private_t * dev_priv)
 {
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_mga_context_regs_t *ctx = &sarea_priv->context_state;
 	DMA_LOCALS;
 
 	BEGIN_DMA(4);
 
 	DMA_BLOCK(MGA_DSTORG, ctx->dstorg,
 		  MGA_MACCESS, ctx->maccess,
 		  MGA_PLNWT, ctx->plnwt,
 		  MGA_DWGCTL, ctx->dwgctl);
 
 	DMA_BLOCK(MGA_ALPHACTRL, ctx->alphactrl,
 		  MGA_FOGCOL, ctx->fogcolor,
 		  MGA_WFLAG, ctx->wflag,
 		  MGA_ZORG, dev_priv->depth_offset);
 
 	DMA_BLOCK(MGA_WFLAG1, ctx->wflag,
 		  MGA_TDUALSTAGE0, ctx->tdualstage0,
 		  MGA_TDUALSTAGE1, ctx->tdualstage1,
 		  MGA_FCOL, ctx->fcol);
 
 	DMA_BLOCK(MGA_STENCIL, ctx->stencil,
 		  MGA_STENCILCTL, ctx->stencilctl,
 		  MGA_DMAPAD, 0x00000000,
 		  MGA_DMAPAD, 0x00000000);
 
 	ADVANCE_DMA();
 }
 
 static __inline__ void mga_g200_emit_tex0(drm_mga_private_t * dev_priv)
 {
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_mga_texture_regs_t *tex = &sarea_priv->tex_state[0];
 	DMA_LOCALS;
 
 	BEGIN_DMA(4);
 
 	DMA_BLOCK(MGA_TEXCTL2, tex->texctl2,
 		  MGA_TEXCTL, tex->texctl,
 		  MGA_TEXFILTER, tex->texfilter,
 		  MGA_TEXBORDERCOL, tex->texbordercol);
 
 	DMA_BLOCK(MGA_TEXORG, tex->texorg,
 		  MGA_TEXORG1, tex->texorg1,
 		  MGA_TEXORG2, tex->texorg2,
 		  MGA_TEXORG3, tex->texorg3);
 
 	DMA_BLOCK(MGA_TEXORG4, tex->texorg4,
 		  MGA_TEXWIDTH, tex->texwidth,
 		  MGA_TEXHEIGHT, tex->texheight,
 		  MGA_WR24, tex->texwidth);
 
 	DMA_BLOCK(MGA_WR34, tex->texheight,
 		  MGA_TEXTRANS, 0x0000ffff,
 		  MGA_TEXTRANSHIGH, 0x0000ffff,
 		  MGA_DMAPAD, 0x00000000);
 
 	ADVANCE_DMA();
 }
 
 static __inline__ void mga_g400_emit_tex0(drm_mga_private_t * dev_priv)
 {
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_mga_texture_regs_t *tex = &sarea_priv->tex_state[0];
 	DMA_LOCALS;
 
 /*	printk("mga_g400_emit_tex0 %x %x %x\n", tex->texorg, */
 /*	       tex->texctl, tex->texctl2); */
 
 	BEGIN_DMA(6);
 
 	DMA_BLOCK(MGA_TEXCTL2, tex->texctl2 | MGA_G400_TC2_MAGIC,
 		  MGA_TEXCTL, tex->texctl,
 		  MGA_TEXFILTER, tex->texfilter,
 		  MGA_TEXBORDERCOL, tex->texbordercol);
 
 	DMA_BLOCK(MGA_TEXORG, tex->texorg,
 		  MGA_TEXORG1, tex->texorg1,
 		  MGA_TEXORG2, tex->texorg2,
 		  MGA_TEXORG3, tex->texorg3);
 
 	DMA_BLOCK(MGA_TEXORG4, tex->texorg4,
 		  MGA_TEXWIDTH, tex->texwidth,
 		  MGA_TEXHEIGHT, tex->texheight,
 		  MGA_WR49, 0x00000000);
 
 	DMA_BLOCK(MGA_WR57, 0x00000000,
 		  MGA_WR53, 0x00000000,
 		  MGA_WR61, 0x00000000,
 		  MGA_WR52, MGA_G400_WR_MAGIC);
 
 	DMA_BLOCK(MGA_WR60, MGA_G400_WR_MAGIC,
 		  MGA_WR54, tex->texwidth | MGA_G400_WR_MAGIC,
 		  MGA_WR62, tex->texheight | MGA_G400_WR_MAGIC,
 		  MGA_DMAPAD, 0x00000000);
 
 	DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 		  MGA_DMAPAD, 0x00000000,
 		  MGA_TEXTRANS, 0x0000ffff,
 		  MGA_TEXTRANSHIGH, 0x0000ffff);
 
 	ADVANCE_DMA();
 }
 
 static __inline__ void mga_g400_emit_tex1(drm_mga_private_t * dev_priv)
 {
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_mga_texture_regs_t *tex = &sarea_priv->tex_state[1];
 	DMA_LOCALS;
 
 /*	printk("mga_g400_emit_tex1 %x %x %x\n", tex->texorg,  */
 /*	       tex->texctl, tex->texctl2); */
 
 	BEGIN_DMA(5);
 
 	DMA_BLOCK(MGA_TEXCTL2, (tex->texctl2 |
 				MGA_MAP1_ENABLE |
 				MGA_G400_TC2_MAGIC),
 		  MGA_TEXCTL, tex->texctl,
 		  MGA_TEXFILTER, tex->texfilter,
 		  MGA_TEXBORDERCOL, tex->texbordercol);
 
 	DMA_BLOCK(MGA_TEXORG, tex->texorg,
 		  MGA_TEXORG1, tex->texorg1,
 		  MGA_TEXORG2, tex->texorg2,
 		  MGA_TEXORG3, tex->texorg3);
 
 	DMA_BLOCK(MGA_TEXORG4, tex->texorg4,
 		  MGA_TEXWIDTH, tex->texwidth,
 		  MGA_TEXHEIGHT, tex->texheight,
 		  MGA_WR49, 0x00000000);
 
 	DMA_BLOCK(MGA_WR57, 0x00000000,
 		  MGA_WR53, 0x00000000,
 		  MGA_WR61, 0x00000000,
 		  MGA_WR52, tex->texwidth | MGA_G400_WR_MAGIC);
 
 	DMA_BLOCK(MGA_WR60, tex->texheight | MGA_G400_WR_MAGIC,
 		  MGA_TEXTRANS, 0x0000ffff,
 		  MGA_TEXTRANSHIGH, 0x0000ffff,
 		  MGA_TEXCTL2, tex->texctl2 | MGA_G400_TC2_MAGIC);
 
 	ADVANCE_DMA();
 }
 
 static __inline__ void mga_g200_emit_pipe(drm_mga_private_t * dev_priv)
 {
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	unsigned int pipe = sarea_priv->warp_pipe;
 	DMA_LOCALS;
 
 	BEGIN_DMA(3);
 
 	DMA_BLOCK(MGA_WIADDR, MGA_WMODE_SUSPEND,
 		  MGA_WVRTXSZ, 0x00000007,
 		  MGA_WFLAG, 0x00000000,
 		  MGA_WR24, 0x00000000);
 
 	DMA_BLOCK(MGA_WR25, 0x00000100,
 		  MGA_WR34, 0x00000000,
 		  MGA_WR42, 0x0000ffff,
 		  MGA_WR60, 0x0000ffff);
 
-	/* Padding required to to hardware bug.
+	/* Padding required due to hardware bug.
 	 */
 	DMA_BLOCK(MGA_DMAPAD, 0xffffffff,
 		  MGA_DMAPAD, 0xffffffff,
 		  MGA_DMAPAD, 0xffffffff,
 		  MGA_WIADDR, (dev_priv->warp_pipe_phys[pipe] |
 			       MGA_WMODE_START | dev_priv->wagp_enable));
 
 	ADVANCE_DMA();
 }
 
 static __inline__ void mga_g400_emit_pipe(drm_mga_private_t * dev_priv)
 {
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	unsigned int pipe = sarea_priv->warp_pipe;
 	DMA_LOCALS;
 
 /*	printk("mga_g400_emit_pipe %x\n", pipe); */
 
 	BEGIN_DMA(10);
 
 	DMA_BLOCK(MGA_WIADDR2, MGA_WMODE_SUSPEND,
 		  MGA_DMAPAD, 0x00000000,
 		  MGA_DMAPAD, 0x00000000,
 		  MGA_DMAPAD, 0x00000000);
 
 	if (pipe & MGA_T2) {
 		DMA_BLOCK(MGA_WVRTXSZ, 0x00001e09,
 			  MGA_DMAPAD, 0x00000000,
 			  MGA_DMAPAD, 0x00000000,
 			  MGA_DMAPAD, 0x00000000);
 
 		DMA_BLOCK(MGA_WACCEPTSEQ, 0x00000000,
 			  MGA_WACCEPTSEQ, 0x00000000,
 			  MGA_WACCEPTSEQ, 0x00000000,
 			  MGA_WACCEPTSEQ, 0x1e000000);
 	} else {
 		if (dev_priv->warp_pipe & MGA_T2) {
 			/* Flush the WARP pipe */
 			DMA_BLOCK(MGA_YDST, 0x00000000,
 				  MGA_FXLEFT, 0x00000000,
 				  MGA_FXRIGHT, 0x00000001,
 				  MGA_DWGCTL, MGA_DWGCTL_FLUSH);
 
 			DMA_BLOCK(MGA_LEN + MGA_EXEC, 0x00000001,
 				  MGA_DWGSYNC, 0x00007000,
 				  MGA_TEXCTL2, MGA_G400_TC2_MAGIC,
 				  MGA_LEN + MGA_EXEC, 0x00000000);
 
 			DMA_BLOCK(MGA_TEXCTL2, (MGA_DUALTEX |
 						MGA_G400_TC2_MAGIC),
 				  MGA_LEN + MGA_EXEC, 0x00000000,
 				  MGA_TEXCTL2, MGA_G400_TC2_MAGIC,
 				  MGA_DMAPAD, 0x00000000);
 		}
 
 		DMA_BLOCK(MGA_WVRTXSZ, 0x00001807,
 			  MGA_DMAPAD, 0x00000000,
 			  MGA_DMAPAD, 0x00000000,
 			  MGA_DMAPAD, 0x00000000);
 
 		DMA_BLOCK(MGA_WACCEPTSEQ, 0x00000000,
 			  MGA_WACCEPTSEQ, 0x00000000,
 			  MGA_WACCEPTSEQ, 0x00000000,
 			  MGA_WACCEPTSEQ, 0x18000000);
 	}
 
 	DMA_BLOCK(MGA_WFLAG, 0x00000000,
 		  MGA_WFLAG1, 0x00000000,
 		  MGA_WR56, MGA_G400_WR56_MAGIC,
 		  MGA_DMAPAD, 0x00000000);
 
 	DMA_BLOCK(MGA_WR49, 0x00000000,	/* tex0              */
 		  MGA_WR57, 0x00000000,	/* tex0              */
 		  MGA_WR53, 0x00000000,	/* tex1              */
 		  MGA_WR61, 0x00000000);	/* tex1              */
 
 	DMA_BLOCK(MGA_WR54, MGA_G400_WR_MAGIC,	/* tex0 width        */
 		  MGA_WR62, MGA_G400_WR_MAGIC,	/* tex0 height       */
 		  MGA_WR52, MGA_G400_WR_MAGIC,	/* tex1 width        */
 		  MGA_WR60, MGA_G400_WR_MAGIC);	/* tex1 height       */
 
-	/* Padding required to to hardware bug */
+	/* Padding required due to hardware bug */
 	DMA_BLOCK(MGA_DMAPAD, 0xffffffff,
 		  MGA_DMAPAD, 0xffffffff,
 		  MGA_DMAPAD, 0xffffffff,
 		  MGA_WIADDR2, (dev_priv->warp_pipe_phys[pipe] |
 				MGA_WMODE_START | dev_priv->wagp_enable));
 
 	ADVANCE_DMA();
 }
 
 static void mga_g200_emit_state(drm_mga_private_t * dev_priv)
 {
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	unsigned int dirty = sarea_priv->dirty;
 
 	if (sarea_priv->warp_pipe != dev_priv->warp_pipe) {
 		mga_g200_emit_pipe(dev_priv);
 		dev_priv->warp_pipe = sarea_priv->warp_pipe;
 	}
 
 	if (dirty & MGA_UPLOAD_CONTEXT) {
 		mga_g200_emit_context(dev_priv);
 		sarea_priv->dirty &= ~MGA_UPLOAD_CONTEXT;
 	}
 
 	if (dirty & MGA_UPLOAD_TEX0) {
 		mga_g200_emit_tex0(dev_priv);
 		sarea_priv->dirty &= ~MGA_UPLOAD_TEX0;
 	}
 }
 
 static void mga_g400_emit_state(drm_mga_private_t * dev_priv)
 {
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	unsigned int dirty = sarea_priv->dirty;
 	int multitex = sarea_priv->warp_pipe & MGA_T2;
 
 	if (sarea_priv->warp_pipe != dev_priv->warp_pipe) {
 		mga_g400_emit_pipe(dev_priv);
 		dev_priv->warp_pipe = sarea_priv->warp_pipe;
 	}
 
 	if (dirty & MGA_UPLOAD_CONTEXT) {
 		mga_g400_emit_context(dev_priv);
 		sarea_priv->dirty &= ~MGA_UPLOAD_CONTEXT;
 	}
 
 	if (dirty & MGA_UPLOAD_TEX0) {
 		mga_g400_emit_tex0(dev_priv);
 		sarea_priv->dirty &= ~MGA_UPLOAD_TEX0;
 	}
 
 	if ((dirty & MGA_UPLOAD_TEX1) && multitex) {
 		mga_g400_emit_tex1(dev_priv);
 		sarea_priv->dirty &= ~MGA_UPLOAD_TEX1;
 	}
 }
 
 /* ================================================================
  * SAREA state verification
  */
 
 /* Disallow all write destinations except the front and backbuffer.
  */
 static int mga_verify_context(drm_mga_private_t * dev_priv)
 {
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_mga_context_regs_t *ctx = &sarea_priv->context_state;
 
 	if (ctx->dstorg != dev_priv->front_offset &&
 	    ctx->dstorg != dev_priv->back_offset) {
 		DRM_ERROR("*** bad DSTORG: %x (front %x, back %x)\n\n",
 			  ctx->dstorg, dev_priv->front_offset,
 			  dev_priv->back_offset);
 		ctx->dstorg = 0;
 		return -EINVAL;
 	}
 
 	return 0;
 }
 
 /* Disallow texture reads from PCI space.
  */
 static int mga_verify_tex(drm_mga_private_t * dev_priv, int unit)
 {
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_mga_texture_regs_t *tex = &sarea_priv->tex_state[unit];
 	unsigned int org;
 
 	org = tex->texorg & (MGA_TEXORGMAP_MASK | MGA_TEXORGACC_MASK);
 
 	if (org == (MGA_TEXORGMAP_SYSMEM | MGA_TEXORGACC_PCI)) {
 		DRM_ERROR("*** bad TEXORG: 0x%x, unit %d\n", tex->texorg, unit);
 		tex->texorg = 0;
 		return -EINVAL;
 	}
 
 	return 0;
 }
 
 static int mga_verify_state(drm_mga_private_t * dev_priv)
 {
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	unsigned int dirty = sarea_priv->dirty;
 	int ret = 0;
 
 	if (sarea_priv->nbox > MGA_NR_SAREA_CLIPRECTS)
 		sarea_priv->nbox = MGA_NR_SAREA_CLIPRECTS;
 
 	if (dirty & MGA_UPLOAD_CONTEXT)
 		ret |= mga_verify_context(dev_priv);
 
 	if (dirty & MGA_UPLOAD_TEX0)
 		ret |= mga_verify_tex(dev_priv, 0);
 
 	if (dev_priv->chipset >= MGA_CARD_TYPE_G400) {
 		if (dirty & MGA_UPLOAD_TEX1)
 			ret |= mga_verify_tex(dev_priv, 1);
 
 		if (dirty & MGA_UPLOAD_PIPE)
 			ret |= (sarea_priv->warp_pipe > MGA_MAX_G400_PIPES);
 	} else {
 		if (dirty & MGA_UPLOAD_PIPE)
 			ret |= (sarea_priv->warp_pipe > MGA_MAX_G200_PIPES);
 	}
 
 	return (ret == 0);
 }
 
 static int mga_verify_iload(drm_mga_private_t * dev_priv,
 			    unsigned int dstorg, unsigned int length)
 {
 	if (dstorg < dev_priv->texture_offset ||
 	    dstorg + length > (dev_priv->texture_offset +
 			       dev_priv->texture_size)) {
 		DRM_ERROR("*** bad iload DSTORG: 0x%x\n", dstorg);
 		return -EINVAL;
 	}
 
 	if (length & MGA_ILOAD_MASK) {
 		DRM_ERROR("*** bad iload length: 0x%x\n",
 			  length & MGA_ILOAD_MASK);
 		return -EINVAL;
 	}
 
 	return 0;
 }
 
 static int mga_verify_blit(drm_mga_private_t * dev_priv,
 			   unsigned int srcorg, unsigned int dstorg)
 {
 	if ((srcorg & 0x3) == (MGA_SRCACC_PCI | MGA_SRCMAP_SYSMEM) ||
 	    (dstorg & 0x3) == (MGA_SRCACC_PCI | MGA_SRCMAP_SYSMEM)) {
 		DRM_ERROR("*** bad blit: src=0x%x dst=0x%x\n", srcorg, dstorg);
 		return -EINVAL;
 	}
 	return 0;
 }
 
 /* ================================================================
  *
  */
 
 static void mga_dma_dispatch_clear(struct drm_device * dev, drm_mga_clear_t * clear)
 {
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_mga_context_regs_t *ctx = &sarea_priv->context_state;
 	struct drm_clip_rect *pbox = sarea_priv->boxes;
 	int nbox = sarea_priv->nbox;
 	int i;
 	DMA_LOCALS;
 	DRM_DEBUG("\n");
 
 	BEGIN_DMA(1);
 
 	DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 		  MGA_DMAPAD, 0x00000000,
 		  MGA_DWGSYNC, 0x00007100,
 		  MGA_DWGSYNC, 0x00007000);
 
 	ADVANCE_DMA();
 
 	for (i = 0; i < nbox; i++) {
 		struct drm_clip_rect *box = &pbox[i];
 		u32 height = box->y2 - box->y1;
 
 		DRM_DEBUG("   from=%d,%d to=%d,%d\n",
 			  box->x1, box->y1, box->x2, box->y2);
 
 		if (clear->flags & MGA_FRONT) {
 			BEGIN_DMA(2);
 
 			DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 				  MGA_PLNWT, clear->color_mask,
 				  MGA_YDSTLEN, (box->y1 << 16) | height,
 				  MGA_FXBNDRY, (box->x2 << 16) | box->x1);
 
 			DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 				  MGA_FCOL, clear->clear_color,
 				  MGA_DSTORG, dev_priv->front_offset,
 				  MGA_DWGCTL + MGA_EXEC, dev_priv->clear_cmd);
 
 			ADVANCE_DMA();
 		}
 
 		if (clear->flags & MGA_BACK) {
 			BEGIN_DMA(2);
 
 			DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 				  MGA_PLNWT, clear->color_mask,
 				  MGA_YDSTLEN, (box->y1 << 16) | height,
 				  MGA_FXBNDRY, (box->x2 << 16) | box->x1);
 
 			DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 				  MGA_FCOL, clear->clear_color,
 				  MGA_DSTORG, dev_priv->back_offset,
 				  MGA_DWGCTL + MGA_EXEC, dev_priv->clear_cmd);
 
 			ADVANCE_DMA();
 		}
 
 		if (clear->flags & MGA_DEPTH) {
 			BEGIN_DMA(2);
 
 			DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 				  MGA_PLNWT, clear->depth_mask,
 				  MGA_YDSTLEN, (box->y1 << 16) | height,
 				  MGA_FXBNDRY, (box->x2 << 16) | box->x1);
 
 			DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 				  MGA_FCOL, clear->clear_depth,
 				  MGA_DSTORG, dev_priv->depth_offset,
 				  MGA_DWGCTL + MGA_EXEC, dev_priv->clear_cmd);
 
 			ADVANCE_DMA();
 		}
 
 	}
 
 	BEGIN_DMA(1);
 
 	/* Force reset of DWGCTL */
 	DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 		  MGA_DMAPAD, 0x00000000,
 		  MGA_PLNWT, ctx->plnwt,
 		  MGA_DWGCTL, ctx->dwgctl);
 
 	ADVANCE_DMA();
 
 	FLUSH_DMA();
 }
 
 static void mga_dma_dispatch_swap(struct drm_device * dev)
 {
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_mga_context_regs_t *ctx = &sarea_priv->context_state;
 	struct drm_clip_rect *pbox = sarea_priv->boxes;
 	int nbox = sarea_priv->nbox;
 	int i;
 	DMA_LOCALS;
 	DRM_DEBUG("\n");
 
 	sarea_priv->last_frame.head = dev_priv->prim.tail;
 	sarea_priv->last_frame.wrap = dev_priv->prim.last_wrap;
 
 	BEGIN_DMA(4 + nbox);
 
 	DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 		  MGA_DMAPAD, 0x00000000,
 		  MGA_DWGSYNC, 0x00007100,
 		  MGA_DWGSYNC, 0x00007000);
 
 	DMA_BLOCK(MGA_DSTORG, dev_priv->front_offset,
 		  MGA_MACCESS, dev_priv->maccess,
 		  MGA_SRCORG, dev_priv->back_offset,
 		  MGA_AR5, dev_priv->front_pitch);
 
 	DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 		  MGA_DMAPAD, 0x00000000,
 		  MGA_PLNWT, 0xffffffff,
 		  MGA_DWGCTL, MGA_DWGCTL_COPY);
 
 	for (i = 0; i < nbox; i++) {
 		struct drm_clip_rect *box = &pbox[i];
 		u32 height = box->y2 - box->y1;
 		u32 start = box->y1 * dev_priv->front_pitch;
 
 		DRM_DEBUG("   from=%d,%d to=%d,%d\n",
 			  box->x1, box->y1, box->x2, box->y2);
 
 		DMA_BLOCK(MGA_AR0, start + box->x2 - 1,
 			  MGA_AR3, start + box->x1,
 			  MGA_FXBNDRY, ((box->x2 - 1) << 16) | box->x1,
 			  MGA_YDSTLEN + MGA_EXEC, (box->y1 << 16) | height);
 	}
 
 	DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 		  MGA_PLNWT, ctx->plnwt,
 		  MGA_SRCORG, dev_priv->front_offset,
 		  MGA_DWGCTL, ctx->dwgctl);
 
 	ADVANCE_DMA();
 
 	FLUSH_DMA();
 
 	DRM_DEBUG("... done.\n");
 }
 
 static void mga_dma_dispatch_vertex(struct drm_device * dev, struct drm_buf * buf)
 {
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	drm_mga_buf_priv_t *buf_priv = buf->dev_private;
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	u32 address = (u32) buf->bus_address;
 	u32 length = (u32) buf->used;
 	int i = 0;
 	DMA_LOCALS;
 	DRM_DEBUG("buf=%d used=%d\n", buf->idx, buf->used);
 
 	if (buf->used) {
 		buf_priv->dispatched = 1;
 
 		MGA_EMIT_STATE(dev_priv, sarea_priv->dirty);
 
 		do {
 			if (i < sarea_priv->nbox) {
 				mga_emit_clip_rect(dev_priv,
 						   &sarea_priv->boxes[i]);
 			}
 
 			BEGIN_DMA(1);
 
 			DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 				  MGA_DMAPAD, 0x00000000,
 				  MGA_SECADDRESS, (address |
 						   MGA_DMA_VERTEX),
 				  MGA_SECEND, ((address + length) |
 					       dev_priv->dma_access));
 
 			ADVANCE_DMA();
 		} while (++i < sarea_priv->nbox);
 	}
 
 	if (buf_priv->discard) {
 		AGE_BUFFER(buf_priv);
 		buf->pending = 0;
 		buf->used = 0;
 		buf_priv->dispatched = 0;
 
 		mga_freelist_put(dev, buf);
 	}
 
 	FLUSH_DMA();
 }
 
 static void mga_dma_dispatch_indices(struct drm_device * dev, struct drm_buf * buf,
 				     unsigned int start, unsigned int end)
 {
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	drm_mga_buf_priv_t *buf_priv = buf->dev_private;
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	u32 address = (u32) buf->bus_address;
 	int i = 0;
 	DMA_LOCALS;
 	DRM_DEBUG("buf=%d start=%d end=%d\n", buf->idx, start, end);
 
 	if (start != end) {
 		buf_priv->dispatched = 1;
 
 		MGA_EMIT_STATE(dev_priv, sarea_priv->dirty);
 
 		do {
 			if (i < sarea_priv->nbox) {
 				mga_emit_clip_rect(dev_priv,
 						   &sarea_priv->boxes[i]);
 			}
 
 			BEGIN_DMA(1);
 
 			DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 				  MGA_DMAPAD, 0x00000000,
 				  MGA_SETUPADDRESS, address + start,
 				  MGA_SETUPEND, ((address + end) |
 						 dev_priv->dma_access));
 
 			ADVANCE_DMA();
 		} while (++i < sarea_priv->nbox);
 	}
 
 	if (buf_priv->discard) {
 		AGE_BUFFER(buf_priv);
 		buf->pending = 0;
 		buf->used = 0;
 		buf_priv->dispatched = 0;
 
 		mga_freelist_put(dev, buf);
 	}
 
 	FLUSH_DMA();
 }
 
 /* This copies a 64 byte aligned agp region to the frambuffer with a
  * standard blit, the ioctl needs to do checking.
  */
 static void mga_dma_dispatch_iload(struct drm_device * dev, struct drm_buf * buf,
 				   unsigned int dstorg, unsigned int length)
 {
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	drm_mga_buf_priv_t *buf_priv = buf->dev_private;
 	drm_mga_context_regs_t *ctx = &dev_priv->sarea_priv->context_state;
 	u32 srcorg = buf->bus_address | dev_priv->dma_access | MGA_SRCMAP_SYSMEM;
 	u32 y2;
 	DMA_LOCALS;
 	DRM_DEBUG("buf=%d used=%d\n", buf->idx, buf->used);
 
 	y2 = length / 64;
 
 	BEGIN_DMA(5);
 
 	DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 		  MGA_DMAPAD, 0x00000000,
 		  MGA_DWGSYNC, 0x00007100,
 		  MGA_DWGSYNC, 0x00007000);
 
 	DMA_BLOCK(MGA_DSTORG, dstorg,
 		  MGA_MACCESS, 0x00000000,
 		  MGA_SRCORG, srcorg,
 		  MGA_AR5, 64);
 
 	DMA_BLOCK(MGA_PITCH, 64,
 		  MGA_PLNWT, 0xffffffff,
 		  MGA_DMAPAD, 0x00000000,
 		  MGA_DWGCTL, MGA_DWGCTL_COPY);
 
 	DMA_BLOCK(MGA_AR0, 63,
 		  MGA_AR3, 0,
 		  MGA_FXBNDRY, (63 << 16) | 0,
 		  MGA_YDSTLEN + MGA_EXEC, y2);
 
 	DMA_BLOCK(MGA_PLNWT, ctx->plnwt,
 		  MGA_SRCORG, dev_priv->front_offset,
 		  MGA_PITCH, dev_priv->front_pitch,
 		  MGA_DWGSYNC, 0x00007000);
 
 	ADVANCE_DMA();
 
 	AGE_BUFFER(buf_priv);
 
 	buf->pending = 0;
 	buf->used = 0;
 	buf_priv->dispatched = 0;
 
 	mga_freelist_put(dev, buf);
 
 	FLUSH_DMA();
 }
 
 static void mga_dma_dispatch_blit(struct drm_device * dev, drm_mga_blit_t * blit)
 {
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_mga_context_regs_t *ctx = &sarea_priv->context_state;
 	struct drm_clip_rect *pbox = sarea_priv->boxes;
 	int nbox = sarea_priv->nbox;
 	u32 scandir = 0, i;
 	DMA_LOCALS;
 	DRM_DEBUG("\n");
 
 	BEGIN_DMA(4 + nbox);
 
 	DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 		  MGA_DMAPAD, 0x00000000,
 		  MGA_DWGSYNC, 0x00007100,
 		  MGA_DWGSYNC, 0x00007000);
 
 	DMA_BLOCK(MGA_DWGCTL, MGA_DWGCTL_COPY,
 		  MGA_PLNWT, blit->planemask,
 		  MGA_SRCORG, blit->srcorg,
 		  MGA_DSTORG, blit->dstorg);
 
 	DMA_BLOCK(MGA_SGN, scandir,
 		  MGA_MACCESS, dev_priv->maccess,
 		  MGA_AR5, blit->ydir * blit->src_pitch,
 		  MGA_PITCH, blit->dst_pitch);
 
 	for (i = 0; i < nbox; i++) {
 		int srcx = pbox[i].x1 + blit->delta_sx;
 		int srcy = pbox[i].y1 + blit->delta_sy;
 		int dstx = pbox[i].x1 + blit->delta_dx;
 		int dsty = pbox[i].y1 + blit->delta_dy;
 		int h = pbox[i].y2 - pbox[i].y1;
 		int w = pbox[i].x2 - pbox[i].x1 - 1;
 		int start;
 
 		if (blit->ydir == -1) {
 			srcy = blit->height - srcy - 1;
 		}
 
 		start = srcy * blit->src_pitch + srcx;
 
 		DMA_BLOCK(MGA_AR0, start + w,
 			  MGA_AR3, start,
 			  MGA_FXBNDRY, ((dstx + w) << 16) | (dstx & 0xffff),
 			  MGA_YDSTLEN + MGA_EXEC, (dsty << 16) | h);
 	}
 
 	/* Do something to flush AGP?
 	 */
 
 	/* Force reset of DWGCTL */
 	DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 		  MGA_PLNWT, ctx->plnwt,
 		  MGA_PITCH, dev_priv->front_pitch,
 		  MGA_DWGCTL, ctx->dwgctl);
 
 	ADVANCE_DMA();
 }
 
 /* ================================================================
  *
  */
 
 static int mga_dma_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_mga_clear_t *clear = data;
 
 	LOCK_TEST_WITH_RETURN(dev, file_priv);
 
 	if (sarea_priv->nbox > MGA_NR_SAREA_CLIPRECTS)
 		sarea_priv->nbox = MGA_NR_SAREA_CLIPRECTS;
 
 	WRAP_TEST_WITH_RETURN(dev_priv);
 
 	mga_dma_dispatch_clear(dev, clear);
 
 	/* Make sure we restore the 3D state next time.
 	 */
 	dev_priv->sarea_priv->dirty |= MGA_UPLOAD_CONTEXT;
 
 	return 0;
 }
 
 static int mga_dma_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 
 	LOCK_TEST_WITH_RETURN(dev, file_priv);
 
 	if (sarea_priv->nbox > MGA_NR_SAREA_CLIPRECTS)
 		sarea_priv->nbox = MGA_NR_SAREA_CLIPRECTS;
 
 	WRAP_TEST_WITH_RETURN(dev_priv);
 
 	mga_dma_dispatch_swap(dev);
 
 	/* Make sure we restore the 3D state next time.
 	 */
 	dev_priv->sarea_priv->dirty |= MGA_UPLOAD_CONTEXT;
 
 	return 0;
 }
 
 static int mga_dma_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	struct drm_device_dma *dma = dev->dma;
 	struct drm_buf *buf;
 	drm_mga_buf_priv_t *buf_priv;
 	drm_mga_vertex_t *vertex = data;
 
 	LOCK_TEST_WITH_RETURN(dev, file_priv);
 
 	if (vertex->idx < 0 || vertex->idx > dma->buf_count)
 		return -EINVAL;
 	buf = dma->buflist[vertex->idx];
 	buf_priv = buf->dev_private;
 
 	buf->used = vertex->used;
 	buf_priv->discard = vertex->discard;
 
 	if (!mga_verify_state(dev_priv)) {
 		if (vertex->discard) {
 			if (buf_priv->dispatched == 1)
 				AGE_BUFFER(buf_priv);
 			buf_priv->dispatched = 0;
 			mga_freelist_put(dev, buf);
 		}
 		return -EINVAL;
 	}
 
 	WRAP_TEST_WITH_RETURN(dev_priv);
 
 	mga_dma_dispatch_vertex(dev, buf);
 
 	return 0;
 }
 
 static int mga_dma_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	struct drm_device_dma *dma = dev->dma;
 	struct drm_buf *buf;
 	drm_mga_buf_priv_t *buf_priv;
 	drm_mga_indices_t *indices = data;
 
 	LOCK_TEST_WITH_RETURN(dev, file_priv);
 
 	if (indices->idx < 0 || indices->idx > dma->buf_count)
 		return -EINVAL;
 
 	buf = dma->buflist[indices->idx];
 	buf_priv = buf->dev_private;
 
 	buf_priv->discard = indices->discard;
 
 	if (!mga_verify_state(dev_priv)) {
 		if (indices->discard) {
 			if (buf_priv->dispatched == 1)
 				AGE_BUFFER(buf_priv);
 			buf_priv->dispatched = 0;
 			mga_freelist_put(dev, buf);
 		}
 		return -EINVAL;
 	}
 
 	WRAP_TEST_WITH_RETURN(dev_priv);
 
 	mga_dma_dispatch_indices(dev, buf, indices->start, indices->end);
 
 	return 0;
 }
 
 static int mga_dma_iload(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	struct drm_device_dma *dma = dev->dma;
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	struct drm_buf *buf;
 	drm_mga_buf_priv_t *buf_priv;
 	drm_mga_iload_t *iload = data;
 	DRM_DEBUG("\n");
 
 	LOCK_TEST_WITH_RETURN(dev, file_priv);
 
 #if 0
 	if (mga_do_wait_for_idle(dev_priv) < 0) {
 		if (MGA_DMA_DEBUG)
 			DRM_INFO("-EBUSY\n");
 		return -EBUSY;
 	}
 #endif
 	if (iload->idx < 0 || iload->idx > dma->buf_count)
 		return -EINVAL;
 
 	buf = dma->buflist[iload->idx];
 	buf_priv = buf->dev_private;
 
 	if (mga_verify_iload(dev_priv, iload->dstorg, iload->length)) {
 		mga_freelist_put(dev, buf);
 		return -EINVAL;
 	}
 
 	WRAP_TEST_WITH_RETURN(dev_priv);
 
 	mga_dma_dispatch_iload(dev, buf, iload->dstorg, iload->length);
 
 	/* Make sure we restore the 3D state next time.
 	 */
 	dev_priv->sarea_priv->dirty |= MGA_UPLOAD_CONTEXT;
 
 	return 0;
 }
 
 static int mga_dma_blit(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	drm_mga_sarea_t *sarea_priv = dev_priv->sarea_priv;
 	drm_mga_blit_t *blit = data;
 	DRM_DEBUG("\n");
 
 	LOCK_TEST_WITH_RETURN(dev, file_priv);
 
 	if (sarea_priv->nbox > MGA_NR_SAREA_CLIPRECTS)
 		sarea_priv->nbox = MGA_NR_SAREA_CLIPRECTS;
 
 	if (mga_verify_blit(dev_priv, blit->srcorg, blit->dstorg))
 		return -EINVAL;
 
 	WRAP_TEST_WITH_RETURN(dev_priv);
 
 	mga_dma_dispatch_blit(dev, blit);
 
 	/* Make sure we restore the 3D state next time.
 	 */
 	dev_priv->sarea_priv->dirty |= MGA_UPLOAD_CONTEXT;
 
 	return 0;
 }
 
 static int mga_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	drm_mga_getparam_t *param = data;
 	int value;
 
 	if (!dev_priv) {
 		DRM_ERROR("called with no initialization\n");
 		return -EINVAL;
 	}
 
 	DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
 
 	switch (param->param) {
 	case MGA_PARAM_IRQ_NR:
 		value = dev->irq;
 		break;
 	case MGA_PARAM_CARD_TYPE:
 		value = dev_priv->chipset;
 		break;
 	default:
 		return -EINVAL;
 	}
 
 	if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
 		DRM_ERROR("copy_to_user\n");
 		return -EFAULT;
 	}
 
 	return 0;
 }
 
 static int mga_set_fence(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	u32 *fence = data;
 	DMA_LOCALS;
 
 	if (!dev_priv) {
 		DRM_ERROR("called with no initialization\n");
 		return -EINVAL;
 	}
 
 	DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
 
 	/* I would normal do this assignment in the declaration of fence,
 	 * but dev_priv may be NULL.
 	 */
 
 	*fence = dev_priv->next_fence_to_post;
 	dev_priv->next_fence_to_post++;
 
 	BEGIN_DMA(1);
 	DMA_BLOCK(MGA_DMAPAD, 0x00000000,
 		  MGA_DMAPAD, 0x00000000,
 		  MGA_DMAPAD, 0x00000000,
 		  MGA_SOFTRAP, 0x00000000);
 	ADVANCE_DMA();
 
 	return 0;
 }
 
 static int mga_wait_fence(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	drm_mga_private_t *dev_priv = dev->dev_private;
 	u32 *fence = data;
 
 	if (!dev_priv) {
 		DRM_ERROR("called with no initialization\n");
 		return -EINVAL;
 	}
 
 	DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
 
 	mga_driver_fence_wait(dev, fence);
 
 	return 0;
 }
 
 struct drm_ioctl_desc mga_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_MGA_INIT, mga_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_MGA_FLUSH, mga_dma_flush, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_MGA_RESET, mga_dma_reset, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_MGA_SWAP, mga_dma_swap, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_MGA_CLEAR, mga_dma_clear, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_MGA_VERTEX, mga_dma_vertex, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_MGA_INDICES, mga_dma_indices, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_MGA_ILOAD, mga_dma_iload, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_MGA_BLIT, mga_dma_blit, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_MGA_GETPARAM, mga_getparam, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_MGA_SET_FENCE, mga_set_fence, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_MGA_WAIT_FENCE, mga_wait_fence, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_MGA_DMA_BOOTSTRAP, mga_dma_bootstrap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
 };
 
 int mga_max_ioctl = DRM_ARRAY_SIZE(mga_ioctls);
Index: stable/11/sys/dev/e1000/e1000_82543.c
===================================================================
--- stable/11/sys/dev/e1000/e1000_82543.c	(revision 330445)
+++ stable/11/sys/dev/e1000/e1000_82543.c	(revision 330446)
@@ -1,1596 +1,1596 @@
 /******************************************************************************
 
   Copyright (c) 2001-2015, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
   modification, are permitted provided that the following conditions are met:
   
    1. Redistributions of source code must retain the above copyright notice, 
       this list of conditions and the following disclaimer.
   
    2. Redistributions in binary form must reproduce the above copyright 
       notice, this list of conditions and the following disclaimer in the 
       documentation and/or other materials provided with the distribution.
   
    3. Neither the name of the Intel Corporation nor the names of its 
       contributors may be used to endorse or promote products derived from 
       this software without specific prior written permission.
   
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   POSSIBILITY OF SUCH DAMAGE.
 
 ******************************************************************************/
 /*$FreeBSD$*/
 
 /*
  * 82543GC Gigabit Ethernet Controller (Fiber)
  * 82543GC Gigabit Ethernet Controller (Copper)
  * 82544EI Gigabit Ethernet Controller (Copper)
  * 82544EI Gigabit Ethernet Controller (Fiber)
  * 82544GC Gigabit Ethernet Controller (Copper)
  * 82544GC Gigabit Ethernet Controller (LOM)
  */
 
 #include "e1000_api.h"
 
 static s32  e1000_init_phy_params_82543(struct e1000_hw *hw);
 static s32  e1000_init_nvm_params_82543(struct e1000_hw *hw);
 static s32  e1000_init_mac_params_82543(struct e1000_hw *hw);
 static s32  e1000_read_phy_reg_82543(struct e1000_hw *hw, u32 offset,
 				     u16 *data);
 static s32  e1000_write_phy_reg_82543(struct e1000_hw *hw, u32 offset,
 				      u16 data);
 static s32  e1000_phy_force_speed_duplex_82543(struct e1000_hw *hw);
 static s32  e1000_phy_hw_reset_82543(struct e1000_hw *hw);
 static s32  e1000_reset_hw_82543(struct e1000_hw *hw);
 static s32  e1000_init_hw_82543(struct e1000_hw *hw);
 static s32  e1000_setup_link_82543(struct e1000_hw *hw);
 static s32  e1000_setup_copper_link_82543(struct e1000_hw *hw);
 static s32  e1000_setup_fiber_link_82543(struct e1000_hw *hw);
 static s32  e1000_check_for_copper_link_82543(struct e1000_hw *hw);
 static s32  e1000_check_for_fiber_link_82543(struct e1000_hw *hw);
 static s32  e1000_led_on_82543(struct e1000_hw *hw);
 static s32  e1000_led_off_82543(struct e1000_hw *hw);
 static void e1000_write_vfta_82543(struct e1000_hw *hw, u32 offset,
 				   u32 value);
 static void e1000_clear_hw_cntrs_82543(struct e1000_hw *hw);
 static s32  e1000_config_mac_to_phy_82543(struct e1000_hw *hw);
 static bool e1000_init_phy_disabled_82543(struct e1000_hw *hw);
 static void e1000_lower_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl);
 static s32  e1000_polarity_reversal_workaround_82543(struct e1000_hw *hw);
 static void e1000_raise_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl);
 static u16  e1000_shift_in_mdi_bits_82543(struct e1000_hw *hw);
 static void e1000_shift_out_mdi_bits_82543(struct e1000_hw *hw, u32 data,
 					   u16 count);
 static bool e1000_tbi_compatibility_enabled_82543(struct e1000_hw *hw);
 static void e1000_set_tbi_sbp_82543(struct e1000_hw *hw, bool state);
 static s32  e1000_read_mac_addr_82543(struct e1000_hw *hw);
 
 
 /**
  *  e1000_init_phy_params_82543 - Init PHY func ptrs.
  *  @hw: pointer to the HW structure
  **/
 static s32 e1000_init_phy_params_82543(struct e1000_hw *hw)
 {
 	struct e1000_phy_info *phy = &hw->phy;
 	s32 ret_val = E1000_SUCCESS;
 
 	DEBUGFUNC("e1000_init_phy_params_82543");
 
 	if (hw->phy.media_type != e1000_media_type_copper) {
 		phy->type = e1000_phy_none;
 		goto out;
 	} else {
 		phy->ops.power_up = e1000_power_up_phy_copper;
 		phy->ops.power_down = e1000_power_down_phy_copper;
 	}
 
 	phy->addr		= 1;
 	phy->autoneg_mask	= AUTONEG_ADVERTISE_SPEED_DEFAULT;
 	phy->reset_delay_us	= 10000;
 	phy->type		= e1000_phy_m88;
 
 	/* Function Pointers */
 	phy->ops.check_polarity	= e1000_check_polarity_m88;
 	phy->ops.commit		= e1000_phy_sw_reset_generic;
 	phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_82543;
 	phy->ops.get_cable_length = e1000_get_cable_length_m88;
 	phy->ops.get_cfg_done	= e1000_get_cfg_done_generic;
 	phy->ops.read_reg	= (hw->mac.type == e1000_82543)
 				  ? e1000_read_phy_reg_82543
 				  : e1000_read_phy_reg_m88;
 	phy->ops.reset		= (hw->mac.type == e1000_82543)
 				  ? e1000_phy_hw_reset_82543
 				  : e1000_phy_hw_reset_generic;
 	phy->ops.write_reg	= (hw->mac.type == e1000_82543)
 				  ? e1000_write_phy_reg_82543
 				  : e1000_write_phy_reg_m88;
 	phy->ops.get_info	= e1000_get_phy_info_m88;
 
 	/*
 	 * The external PHY of the 82543 can be in a funky state.
 	 * Resetting helps us read the PHY registers for acquiring
 	 * the PHY ID.
 	 */
 	if (!e1000_init_phy_disabled_82543(hw)) {
 		ret_val = phy->ops.reset(hw);
 		if (ret_val) {
 			DEBUGOUT("Resetting PHY during init failed.\n");
 			goto out;
 		}
 		msec_delay(20);
 	}
 
 	ret_val = e1000_get_phy_id(hw);
 	if (ret_val)
 		goto out;
 
 	/* Verify phy id */
 	switch (hw->mac.type) {
 	case e1000_82543:
 		if (phy->id != M88E1000_E_PHY_ID) {
 			ret_val = -E1000_ERR_PHY;
 			goto out;
 		}
 		break;
 	case e1000_82544:
 		if (phy->id != M88E1000_I_PHY_ID) {
 			ret_val = -E1000_ERR_PHY;
 			goto out;
 		}
 		break;
 	default:
 		ret_val = -E1000_ERR_PHY;
 		goto out;
 		break;
 	}
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_init_nvm_params_82543 - Init NVM func ptrs.
  *  @hw: pointer to the HW structure
  **/
 static s32 e1000_init_nvm_params_82543(struct e1000_hw *hw)
 {
 	struct e1000_nvm_info *nvm = &hw->nvm;
 
 	DEBUGFUNC("e1000_init_nvm_params_82543");
 
 	nvm->type		= e1000_nvm_eeprom_microwire;
 	nvm->word_size		= 64;
 	nvm->delay_usec		= 50;
 	nvm->address_bits	=  6;
 	nvm->opcode_bits	=  3;
 
 	/* Function Pointers */
 	nvm->ops.read		= e1000_read_nvm_microwire;
 	nvm->ops.update		= e1000_update_nvm_checksum_generic;
 	nvm->ops.valid_led_default = e1000_valid_led_default_generic;
 	nvm->ops.validate	= e1000_validate_nvm_checksum_generic;
 	nvm->ops.write		= e1000_write_nvm_microwire;
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_init_mac_params_82543 - Init MAC func ptrs.
  *  @hw: pointer to the HW structure
  **/
 static s32 e1000_init_mac_params_82543(struct e1000_hw *hw)
 {
 	struct e1000_mac_info *mac = &hw->mac;
 
 	DEBUGFUNC("e1000_init_mac_params_82543");
 
 	/* Set media type */
 	switch (hw->device_id) {
 	case E1000_DEV_ID_82543GC_FIBER:
 	case E1000_DEV_ID_82544EI_FIBER:
 		hw->phy.media_type = e1000_media_type_fiber;
 		break;
 	default:
 		hw->phy.media_type = e1000_media_type_copper;
 		break;
 	}
 
 	/* Set mta register count */
 	mac->mta_reg_count = 128;
 	/* Set rar entry count */
 	mac->rar_entry_count = E1000_RAR_ENTRIES;
 
 	/* Function pointers */
 
 	/* bus type/speed/width */
 	mac->ops.get_bus_info = e1000_get_bus_info_pci_generic;
 	/* function id */
 	mac->ops.set_lan_id = e1000_set_lan_id_multi_port_pci;
 	/* reset */
 	mac->ops.reset_hw = e1000_reset_hw_82543;
 	/* hw initialization */
 	mac->ops.init_hw = e1000_init_hw_82543;
 	/* link setup */
 	mac->ops.setup_link = e1000_setup_link_82543;
 	/* physical interface setup */
 	mac->ops.setup_physical_interface =
 		(hw->phy.media_type == e1000_media_type_copper)
 		 ? e1000_setup_copper_link_82543 : e1000_setup_fiber_link_82543;
 	/* check for link */
 	mac->ops.check_for_link =
 		(hw->phy.media_type == e1000_media_type_copper)
 		 ? e1000_check_for_copper_link_82543
 		 : e1000_check_for_fiber_link_82543;
 	/* link info */
 	mac->ops.get_link_up_info =
 		(hw->phy.media_type == e1000_media_type_copper)
 		 ? e1000_get_speed_and_duplex_copper_generic
 		 : e1000_get_speed_and_duplex_fiber_serdes_generic;
 	/* multicast address update */
 	mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic;
 	/* writing VFTA */
 	mac->ops.write_vfta = e1000_write_vfta_82543;
 	/* clearing VFTA */
 	mac->ops.clear_vfta = e1000_clear_vfta_generic;
 	/* read mac address */
 	mac->ops.read_mac_addr = e1000_read_mac_addr_82543;
 	/* turn on/off LED */
 	mac->ops.led_on = e1000_led_on_82543;
 	mac->ops.led_off = e1000_led_off_82543;
 	/* clear hardware counters */
 	mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_82543;
 
 	/* Set tbi compatibility */
 	if ((hw->mac.type != e1000_82543) ||
 	    (hw->phy.media_type == e1000_media_type_fiber))
 		e1000_set_tbi_compatibility_82543(hw, FALSE);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_init_function_pointers_82543 - Init func ptrs.
  *  @hw: pointer to the HW structure
  *
  *  Called to initialize all function pointers and parameters.
  **/
 void e1000_init_function_pointers_82543(struct e1000_hw *hw)
 {
 	DEBUGFUNC("e1000_init_function_pointers_82543");
 
 	hw->mac.ops.init_params = e1000_init_mac_params_82543;
 	hw->nvm.ops.init_params = e1000_init_nvm_params_82543;
 	hw->phy.ops.init_params = e1000_init_phy_params_82543;
 }
 
 /**
  *  e1000_tbi_compatibility_enabled_82543 - Returns TBI compat status
  *  @hw: pointer to the HW structure
  *
  *  Returns the current status of 10-bit Interface (TBI) compatibility
  *  (enabled/disabled).
  **/
 static bool e1000_tbi_compatibility_enabled_82543(struct e1000_hw *hw)
 {
 	struct e1000_dev_spec_82543 *dev_spec = &hw->dev_spec._82543;
 	bool state = FALSE;
 
 	DEBUGFUNC("e1000_tbi_compatibility_enabled_82543");
 
 	if (hw->mac.type != e1000_82543) {
 		DEBUGOUT("TBI compatibility workaround for 82543 only.\n");
 		goto out;
 	}
 
 	state = !!(dev_spec->tbi_compatibility & TBI_COMPAT_ENABLED);
 
 out:
 	return state;
 }
 
 /**
  *  e1000_set_tbi_compatibility_82543 - Set TBI compatibility
  *  @hw: pointer to the HW structure
  *  @state: enable/disable TBI compatibility
  *
  *  Enables or disabled 10-bit Interface (TBI) compatibility.
  **/
 void e1000_set_tbi_compatibility_82543(struct e1000_hw *hw, bool state)
 {
 	struct e1000_dev_spec_82543 *dev_spec = &hw->dev_spec._82543;
 
 	DEBUGFUNC("e1000_set_tbi_compatibility_82543");
 
 	if (hw->mac.type != e1000_82543) {
 		DEBUGOUT("TBI compatibility workaround for 82543 only.\n");
 		goto out;
 	}
 
 	if (state)
 		dev_spec->tbi_compatibility |= TBI_COMPAT_ENABLED;
 	else
 		dev_spec->tbi_compatibility &= ~TBI_COMPAT_ENABLED;
 
 out:
 	return;
 }
 
 /**
  *  e1000_tbi_sbp_enabled_82543 - Returns TBI SBP status
  *  @hw: pointer to the HW structure
  *
  *  Returns the current status of 10-bit Interface (TBI) store bad packet (SBP)
  *  (enabled/disabled).
  **/
 bool e1000_tbi_sbp_enabled_82543(struct e1000_hw *hw)
 {
 	struct e1000_dev_spec_82543 *dev_spec = &hw->dev_spec._82543;
 	bool state = FALSE;
 
 	DEBUGFUNC("e1000_tbi_sbp_enabled_82543");
 
 	if (hw->mac.type != e1000_82543) {
 		DEBUGOUT("TBI compatibility workaround for 82543 only.\n");
 		goto out;
 	}
 
 	state = !!(dev_spec->tbi_compatibility & TBI_SBP_ENABLED);
 
 out:
 	return state;
 }
 
 /**
  *  e1000_set_tbi_sbp_82543 - Set TBI SBP
  *  @hw: pointer to the HW structure
  *  @state: enable/disable TBI store bad packet
  *
  *  Enables or disabled 10-bit Interface (TBI) store bad packet (SBP).
  **/
 static void e1000_set_tbi_sbp_82543(struct e1000_hw *hw, bool state)
 {
 	struct e1000_dev_spec_82543 *dev_spec = &hw->dev_spec._82543;
 
 	DEBUGFUNC("e1000_set_tbi_sbp_82543");
 
 	if (state && e1000_tbi_compatibility_enabled_82543(hw))
 		dev_spec->tbi_compatibility |= TBI_SBP_ENABLED;
 	else
 		dev_spec->tbi_compatibility &= ~TBI_SBP_ENABLED;
 
 	return;
 }
 
 /**
  *  e1000_init_phy_disabled_82543 - Returns init PHY status
  *  @hw: pointer to the HW structure
  *
  *  Returns the current status of whether PHY initialization is disabled.
  *  True if PHY initialization is disabled else FALSE.
  **/
 static bool e1000_init_phy_disabled_82543(struct e1000_hw *hw)
 {
 	struct e1000_dev_spec_82543 *dev_spec = &hw->dev_spec._82543;
 	bool ret_val;
 
 	DEBUGFUNC("e1000_init_phy_disabled_82543");
 
 	if (hw->mac.type != e1000_82543) {
 		ret_val = FALSE;
 		goto out;
 	}
 
 	ret_val = dev_spec->init_phy_disabled;
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_tbi_adjust_stats_82543 - Adjust stats when TBI enabled
  *  @hw: pointer to the HW structure
  *  @stats: Struct containing statistic register values
  *  @frame_len: The length of the frame in question
  *  @mac_addr: The Ethernet destination address of the frame in question
  *  @max_frame_size: The maximum frame size
  *
  *  Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT
  **/
 void e1000_tbi_adjust_stats_82543(struct e1000_hw *hw,
 				  struct e1000_hw_stats *stats, u32 frame_len,
 				  u8 *mac_addr, u32 max_frame_size)
 {
 	if (!(e1000_tbi_sbp_enabled_82543(hw)))
 		goto out;
 
 	/* First adjust the frame length. */
 	frame_len--;
 	/*
 	 * We need to adjust the statistics counters, since the hardware
 	 * counters overcount this packet as a CRC error and undercount
 	 * the packet as a good packet
 	 */
 	/* This packet should not be counted as a CRC error. */
 	stats->crcerrs--;
 	/* This packet does count as a Good Packet Received. */
 	stats->gprc++;
 
 	/* Adjust the Good Octets received counters */
 	stats->gorc += frame_len;
 
 	/*
 	 * Is this a broadcast or multicast?  Check broadcast first,
 	 * since the test for a multicast frame will test positive on
 	 * a broadcast frame.
 	 */
 	if ((mac_addr[0] == 0xff) && (mac_addr[1] == 0xff))
 		/* Broadcast packet */
 		stats->bprc++;
 	else if (*mac_addr & 0x01)
 		/* Multicast packet */
 		stats->mprc++;
 
 	/*
 	 * In this case, the hardware has over counted the number of
 	 * oversize frames.
 	 */
 	if ((frame_len == max_frame_size) && (stats->roc > 0))
 		stats->roc--;
 
 	/*
 	 * Adjust the bin counters when the extra byte put the frame in the
 	 * wrong bin. Remember that the frame_len was adjusted above.
 	 */
 	if (frame_len == 64) {
 		stats->prc64++;
 		stats->prc127--;
 	} else if (frame_len == 127) {
 		stats->prc127++;
 		stats->prc255--;
 	} else if (frame_len == 255) {
 		stats->prc255++;
 		stats->prc511--;
 	} else if (frame_len == 511) {
 		stats->prc511++;
 		stats->prc1023--;
 	} else if (frame_len == 1023) {
 		stats->prc1023++;
 		stats->prc1522--;
 	} else if (frame_len == 1522) {
 		stats->prc1522++;
 	}
 
 out:
 	return;
 }
 
 /**
  *  e1000_read_phy_reg_82543 - Read PHY register
  *  @hw: pointer to the HW structure
  *  @offset: register offset to be read
  *  @data: pointer to the read data
  *
  *  Reads the PHY at offset and stores the information read to data.
  **/
 static s32 e1000_read_phy_reg_82543(struct e1000_hw *hw, u32 offset, u16 *data)
 {
 	u32 mdic;
 	s32 ret_val = E1000_SUCCESS;
 
 	DEBUGFUNC("e1000_read_phy_reg_82543");
 
 	if (offset > MAX_PHY_REG_ADDRESS) {
 		DEBUGOUT1("PHY Address %d is out of range\n", offset);
 		ret_val = -E1000_ERR_PARAM;
 		goto out;
 	}
 
 	/*
 	 * We must first send a preamble through the MDIO pin to signal the
 	 * beginning of an MII instruction.  This is done by sending 32
 	 * consecutive "1" bits.
 	 */
 	e1000_shift_out_mdi_bits_82543(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE);
 
 	/*
 	 * Now combine the next few fields that are required for a read
 	 * operation.  We use this method instead of calling the
 	 * e1000_shift_out_mdi_bits routine five different times.  The format
 	 * of an MII read instruction consists of a shift out of 14 bits and
 	 * is defined as follows:
 	 *         <Preamble><SOF><Op Code><Phy Addr><Offset>
 	 * followed by a shift in of 18 bits.  This first two bits shifted in
 	 * are TurnAround bits used to avoid contention on the MDIO pin when a
 	 * READ operation is performed.  These two bits are thrown away
 	 * followed by a shift in of 16 bits which contains the desired data.
 	 */
 	mdic = (offset | (hw->phy.addr << 5) |
 		(PHY_OP_READ << 10) | (PHY_SOF << 12));
 
 	e1000_shift_out_mdi_bits_82543(hw, mdic, 14);
 
 	/*
 	 * Now that we've shifted out the read command to the MII, we need to
 	 * "shift in" the 16-bit value (18 total bits) of the requested PHY
 	 * register address.
 	 */
 	*data = e1000_shift_in_mdi_bits_82543(hw);
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_write_phy_reg_82543 - Write PHY register
  *  @hw: pointer to the HW structure
  *  @offset: register offset to be written
  *  @data: pointer to the data to be written at offset
  *
  *  Writes data to the PHY at offset.
  **/
 static s32 e1000_write_phy_reg_82543(struct e1000_hw *hw, u32 offset, u16 data)
 {
 	u32 mdic;
 	s32 ret_val = E1000_SUCCESS;
 
 	DEBUGFUNC("e1000_write_phy_reg_82543");
 
 	if (offset > MAX_PHY_REG_ADDRESS) {
 		DEBUGOUT1("PHY Address %d is out of range\n", offset);
 		ret_val = -E1000_ERR_PARAM;
 		goto out;
 	}
 
 	/*
 	 * We'll need to use the SW defined pins to shift the write command
 	 * out to the PHY. We first send a preamble to the PHY to signal the
 	 * beginning of the MII instruction.  This is done by sending 32
 	 * consecutive "1" bits.
 	 */
 	e1000_shift_out_mdi_bits_82543(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE);
 
 	/*
 	 * Now combine the remaining required fields that will indicate a
 	 * write operation. We use this method instead of calling the
 	 * e1000_shift_out_mdi_bits routine for each field in the command. The
 	 * format of a MII write instruction is as follows:
 	 * <Preamble><SOF><Op Code><Phy Addr><Reg Addr><Turnaround><Data>.
 	 */
 	mdic = ((PHY_TURNAROUND) | (offset << 2) | (hw->phy.addr << 7) |
 		(PHY_OP_WRITE << 12) | (PHY_SOF << 14));
 	mdic <<= 16;
 	mdic |= (u32)data;
 
 	e1000_shift_out_mdi_bits_82543(hw, mdic, 32);
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_raise_mdi_clk_82543 - Raise Management Data Input clock
  *  @hw: pointer to the HW structure
  *  @ctrl: pointer to the control register
  *
  *  Raise the management data input clock by setting the MDC bit in the control
  *  register.
  **/
 static void e1000_raise_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl)
 {
 	/*
 	 * Raise the clock input to the Management Data Clock (by setting the
 	 * MDC bit), and then delay a sufficient amount of time.
 	 */
 	E1000_WRITE_REG(hw, E1000_CTRL, (*ctrl | E1000_CTRL_MDC));
 	E1000_WRITE_FLUSH(hw);
 	usec_delay(10);
 }
 
 /**
  *  e1000_lower_mdi_clk_82543 - Lower Management Data Input clock
  *  @hw: pointer to the HW structure
  *  @ctrl: pointer to the control register
  *
  *  Lower the management data input clock by clearing the MDC bit in the
  *  control register.
  **/
 static void e1000_lower_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl)
 {
 	/*
 	 * Lower the clock input to the Management Data Clock (by clearing the
 	 * MDC bit), and then delay a sufficient amount of time.
 	 */
 	E1000_WRITE_REG(hw, E1000_CTRL, (*ctrl & ~E1000_CTRL_MDC));
 	E1000_WRITE_FLUSH(hw);
 	usec_delay(10);
 }
 
 /**
  *  e1000_shift_out_mdi_bits_82543 - Shift data bits our to the PHY
  *  @hw: pointer to the HW structure
  *  @data: data to send to the PHY
  *  @count: number of bits to shift out
  *
  *  We need to shift 'count' bits out to the PHY.  So, the value in the
  *  "data" parameter will be shifted out to the PHY one bit at a time.
  *  In order to do this, "data" must be broken down into bits.
  **/
 static void e1000_shift_out_mdi_bits_82543(struct e1000_hw *hw, u32 data,
 					   u16 count)
 {
 	u32 ctrl, mask;
 
 	/*
 	 * We need to shift "count" number of bits out to the PHY.  So, the
 	 * value in the "data" parameter will be shifted out to the PHY one
 	 * bit at a time.  In order to do this, "data" must be broken down
 	 * into bits.
 	 */
 	mask = 0x01;
 	mask <<= (count - 1);
 
 	ctrl = E1000_READ_REG(hw, E1000_CTRL);
 
 	/* Set MDIO_DIR and MDC_DIR direction bits to be used as output pins. */
 	ctrl |= (E1000_CTRL_MDIO_DIR | E1000_CTRL_MDC_DIR);
 
 	while (mask) {
 		/*
 		 * A "1" is shifted out to the PHY by setting the MDIO bit to
 		 * "1" and then raising and lowering the Management Data Clock.
 		 * A "0" is shifted out to the PHY by setting the MDIO bit to
 		 * "0" and then raising and lowering the clock.
 		 */
 		if (data & mask)
 			ctrl |= E1000_CTRL_MDIO;
 		else
 			ctrl &= ~E1000_CTRL_MDIO;
 
 		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
 		E1000_WRITE_FLUSH(hw);
 
 		usec_delay(10);
 
 		e1000_raise_mdi_clk_82543(hw, &ctrl);
 		e1000_lower_mdi_clk_82543(hw, &ctrl);
 
 		mask >>= 1;
 	}
 }
 
 /**
  *  e1000_shift_in_mdi_bits_82543 - Shift data bits in from the PHY
  *  @hw: pointer to the HW structure
  *
  *  In order to read a register from the PHY, we need to shift 18 bits
  *  in from the PHY.  Bits are "shifted in" by raising the clock input to
  *  the PHY (setting the MDC bit), and then reading the value of the data out
  *  MDIO bit.
  **/
 static u16 e1000_shift_in_mdi_bits_82543(struct e1000_hw *hw)
 {
 	u32 ctrl;
 	u16 data = 0;
 	u8 i;
 
 	/*
 	 * In order to read a register from the PHY, we need to shift in a
 	 * total of 18 bits from the PHY.  The first two bit (turnaround)
 	 * times are used to avoid contention on the MDIO pin when a read
 	 * operation is performed.  These two bits are ignored by us and
 	 * thrown away.  Bits are "shifted in" by raising the input to the
 	 * Management Data Clock (setting the MDC bit) and then reading the
 	 * value of the MDIO bit.
 	 */
 	ctrl = E1000_READ_REG(hw, E1000_CTRL);
 
 	/*
 	 * Clear MDIO_DIR (SWDPIO1) to indicate this bit is to be used as
 	 * input.
 	 */
 	ctrl &= ~E1000_CTRL_MDIO_DIR;
 	ctrl &= ~E1000_CTRL_MDIO;
 
 	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
 	E1000_WRITE_FLUSH(hw);
 
 	/*
 	 * Raise and lower the clock before reading in the data.  This accounts
 	 * for the turnaround bits.  The first clock occurred when we clocked
 	 * out the last bit of the Register Address.
 	 */
 	e1000_raise_mdi_clk_82543(hw, &ctrl);
 	e1000_lower_mdi_clk_82543(hw, &ctrl);
 
 	for (data = 0, i = 0; i < 16; i++) {
 		data <<= 1;
 		e1000_raise_mdi_clk_82543(hw, &ctrl);
 		ctrl = E1000_READ_REG(hw, E1000_CTRL);
 		/* Check to see if we shifted in a "1". */
 		if (ctrl & E1000_CTRL_MDIO)
 			data |= 1;
 		e1000_lower_mdi_clk_82543(hw, &ctrl);
 	}
 
 	e1000_raise_mdi_clk_82543(hw, &ctrl);
 	e1000_lower_mdi_clk_82543(hw, &ctrl);
 
 	return data;
 }
 
 /**
  *  e1000_phy_force_speed_duplex_82543 - Force speed/duplex for PHY
  *  @hw: pointer to the HW structure
  *
  *  Calls the function to force speed and duplex for the m88 PHY, and
  *  if the PHY is not auto-negotiating and the speed is forced to 10Mbit,
  *  then call the function for polarity reversal workaround.
  **/
 static s32 e1000_phy_force_speed_duplex_82543(struct e1000_hw *hw)
 {
 	s32 ret_val;
 
 	DEBUGFUNC("e1000_phy_force_speed_duplex_82543");
 
 	ret_val = e1000_phy_force_speed_duplex_m88(hw);
 	if (ret_val)
 		goto out;
 
 	if (!hw->mac.autoneg && (hw->mac.forced_speed_duplex &
 	    E1000_ALL_10_SPEED))
 		ret_val = e1000_polarity_reversal_workaround_82543(hw);
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_polarity_reversal_workaround_82543 - Workaround polarity reversal
  *  @hw: pointer to the HW structure
  *
  *  When forcing link to 10 Full or 10 Half, the PHY can reverse the polarity
  *  inadvertently.  To workaround the issue, we disable the transmitter on
  *  the PHY until we have established the link partner's link parameters.
  **/
 static s32 e1000_polarity_reversal_workaround_82543(struct e1000_hw *hw)
 {
 	s32 ret_val = E1000_SUCCESS;
 	u16 mii_status_reg;
 	u16 i;
 	bool link;
 
 	if (!(hw->phy.ops.write_reg))
 		goto out;
 
 	/* Polarity reversal workaround for forced 10F/10H links. */
 
 	/* Disable the transmitter on the PHY */
 
 	ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019);
 	if (ret_val)
 		goto out;
 	ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFFF);
 	if (ret_val)
 		goto out;
 
 	ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000);
 	if (ret_val)
 		goto out;
 
 	/*
 	 * This loop will early-out if the NO link condition has been met.
 	 * In other words, DO NOT use e1000_phy_has_link_generic() here.
 	 */
 	for (i = PHY_FORCE_TIME; i > 0; i--) {
 		/*
 		 * Read the MII Status Register and wait for Link Status bit
 		 * to be clear.
 		 */
 
 		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg);
 		if (ret_val)
 			goto out;
 
 		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg);
 		if (ret_val)
 			goto out;
 
 		if (!(mii_status_reg & ~MII_SR_LINK_STATUS))
 			break;
 		msec_delay_irq(100);
 	}
 
 	/* Recommended delay time after link has been lost */
 	msec_delay_irq(1000);
 
 	/* Now we will re-enable the transmitter on the PHY */
 
 	ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019);
 	if (ret_val)
 		goto out;
 	msec_delay_irq(50);
 	ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFF0);
 	if (ret_val)
 		goto out;
 	msec_delay_irq(50);
 	ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFF00);
 	if (ret_val)
 		goto out;
 	msec_delay_irq(50);
 	ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0x0000);
 	if (ret_val)
 		goto out;
 
 	ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000);
 	if (ret_val)
 		goto out;
 
 	/*
 	 * Read the MII Status Register and wait for Link Status bit
 	 * to be set.
 	 */
 	ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_TIME, 100000, &link);
 	if (ret_val)
 		goto out;
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_phy_hw_reset_82543 - PHY hardware reset
  *  @hw: pointer to the HW structure
  *
  *  Sets the PHY_RESET_DIR bit in the extended device control register
  *  to put the PHY into a reset and waits for completion.  Once the reset
  *  has been accomplished, clear the PHY_RESET_DIR bit to take the PHY out
  *  of reset.
  **/
 static s32 e1000_phy_hw_reset_82543(struct e1000_hw *hw)
 {
 	u32 ctrl_ext;
 	s32 ret_val;
 
 	DEBUGFUNC("e1000_phy_hw_reset_82543");
 
 	/*
 	 * Read the Extended Device Control Register, assert the PHY_RESET_DIR
 	 * bit to put the PHY into reset...
 	 */
 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
 	ctrl_ext |= E1000_CTRL_EXT_SDP4_DIR;
 	ctrl_ext &= ~E1000_CTRL_EXT_SDP4_DATA;
 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
 	E1000_WRITE_FLUSH(hw);
 
 	msec_delay(10);
 
 	/* ...then take it out of reset. */
 	ctrl_ext |= E1000_CTRL_EXT_SDP4_DATA;
 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
 	E1000_WRITE_FLUSH(hw);
 
 	usec_delay(150);
 
 	if (!(hw->phy.ops.get_cfg_done))
 		return E1000_SUCCESS;
 
 	ret_val = hw->phy.ops.get_cfg_done(hw);
 
 	return ret_val;
 }
 
 /**
  *  e1000_reset_hw_82543 - Reset hardware
  *  @hw: pointer to the HW structure
  *
  *  This resets the hardware into a known state.
  **/
 static s32 e1000_reset_hw_82543(struct e1000_hw *hw)
 {
 	u32 ctrl;
 	s32 ret_val = E1000_SUCCESS;
 
 	DEBUGFUNC("e1000_reset_hw_82543");
 
 	DEBUGOUT("Masking off all interrupts\n");
 	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
 
 	E1000_WRITE_REG(hw, E1000_RCTL, 0);
 	E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP);
 	E1000_WRITE_FLUSH(hw);
 
 	e1000_set_tbi_sbp_82543(hw, FALSE);
 
 	/*
 	 * Delay to allow any outstanding PCI transactions to complete before
 	 * resetting the device
 	 */
 	msec_delay(10);
 
 	ctrl = E1000_READ_REG(hw, E1000_CTRL);
 
 	DEBUGOUT("Issuing a global reset to 82543/82544 MAC\n");
 	if (hw->mac.type == e1000_82543) {
 		E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST);
 	} else {
 		/*
 		 * The 82544 can't ACK the 64-bit write when issuing the
 		 * reset, so use IO-mapping as a workaround.
 		 */
 		E1000_WRITE_REG_IO(hw, E1000_CTRL, ctrl | E1000_CTRL_RST);
 	}
 
 	/*
 	 * After MAC reset, force reload of NVM to restore power-on
 	 * settings to device.
 	 */
 	hw->nvm.ops.reload(hw);
 	msec_delay(2);
 
 	/* Masking off and clearing any pending interrupts */
 	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
 	E1000_READ_REG(hw, E1000_ICR);
 
 	return ret_val;
 }
 
 /**
  *  e1000_init_hw_82543 - Initialize hardware
  *  @hw: pointer to the HW structure
  *
  *  This inits the hardware readying it for operation.
  **/
 static s32 e1000_init_hw_82543(struct e1000_hw *hw)
 {
 	struct e1000_mac_info *mac = &hw->mac;
 	struct e1000_dev_spec_82543 *dev_spec = &hw->dev_spec._82543;
 	u32 ctrl;
 	s32 ret_val;
 	u16 i;
 
 	DEBUGFUNC("e1000_init_hw_82543");
 
 	/* Disabling VLAN filtering */
 	E1000_WRITE_REG(hw, E1000_VET, 0);
 	mac->ops.clear_vfta(hw);
 
 	/* Setup the receive address. */
 	e1000_init_rx_addrs_generic(hw, mac->rar_entry_count);
 
 	/* Zero out the Multicast HASH table */
 	DEBUGOUT("Zeroing the MTA\n");
 	for (i = 0; i < mac->mta_reg_count; i++) {
 		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
 		E1000_WRITE_FLUSH(hw);
 	}
 
 	/*
 	 * Set the PCI priority bit correctly in the CTRL register.  This
 	 * determines if the adapter gives priority to receives, or if it
 	 * gives equal priority to transmits and receives.
 	 */
 	if (hw->mac.type == e1000_82543 && dev_spec->dma_fairness) {
 		ctrl = E1000_READ_REG(hw, E1000_CTRL);
 		E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PRIOR);
 	}
 
 	e1000_pcix_mmrbc_workaround_generic(hw);
 
 	/* Setup link and flow control */
 	ret_val = mac->ops.setup_link(hw);
 
 	/*
 	 * Clear all of the statistics registers (clear on read).  It is
 	 * important that we do this after we have tried to establish link
 	 * because the symbol error count will increment wildly if there
 	 * is no link.
 	 */
 	e1000_clear_hw_cntrs_82543(hw);
 
 	return ret_val;
 }
 
 /**
  *  e1000_setup_link_82543 - Setup flow control and link settings
  *  @hw: pointer to the HW structure
  *
  *  Read the EEPROM to determine the initial polarity value and write the
  *  extended device control register with the information before calling
  *  the generic setup link function, which does the following:
  *  Determines which flow control settings to use, then configures flow
  *  control.  Calls the appropriate media-specific link configuration
  *  function.  Assuming the adapter has a valid link partner, a valid link
  *  should be established.  Assumes the hardware has previously been reset
  *  and the transmitter and receiver are not enabled.
  **/
 static s32 e1000_setup_link_82543(struct e1000_hw *hw)
 {
 	u32 ctrl_ext;
 	s32  ret_val;
 	u16 data;
 
 	DEBUGFUNC("e1000_setup_link_82543");
 
 	/*
 	 * Take the 4 bits from NVM word 0xF that determine the initial
 	 * polarity value for the SW controlled pins, and setup the
 	 * Extended Device Control reg with that info.
 	 * This is needed because one of the SW controlled pins is used for
 	 * signal detection.  So this should be done before phy setup.
 	 */
 	if (hw->mac.type == e1000_82543) {
 		ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG, 1, &data);
 		if (ret_val) {
 			DEBUGOUT("NVM Read Error\n");
 			ret_val = -E1000_ERR_NVM;
 			goto out;
 		}
 		ctrl_ext = ((data & NVM_WORD0F_SWPDIO_EXT_MASK) <<
 			    NVM_SWDPIO_EXT_SHIFT);
 		E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
 	}
 
 	ret_val = e1000_setup_link_generic(hw);
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_setup_copper_link_82543 - Configure copper link settings
  *  @hw: pointer to the HW structure
  *
  *  Configures the link for auto-neg or forced speed and duplex.  Then we check
  *  for link, once link is established calls to configure collision distance
  *  and flow control are called.
  **/
 static s32 e1000_setup_copper_link_82543(struct e1000_hw *hw)
 {
 	u32 ctrl;
 	s32 ret_val;
 	bool link;
 
 	DEBUGFUNC("e1000_setup_copper_link_82543");
 
 	ctrl = E1000_READ_REG(hw, E1000_CTRL) | E1000_CTRL_SLU;
 	/*
 	 * With 82543, we need to force speed and duplex on the MAC
 	 * equal to what the PHY speed and duplex configuration is.
 	 * In addition, we need to perform a hardware reset on the
 	 * PHY to take it out of reset.
 	 */
 	if (hw->mac.type == e1000_82543) {
 		ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
 		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
 		ret_val = hw->phy.ops.reset(hw);
 		if (ret_val)
 			goto out;
 	} else {
 		ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
 		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
 	}
 
 	/* Set MDI/MDI-X, Polarity Reversal, and downshift settings */
 	ret_val = e1000_copper_link_setup_m88(hw);
 	if (ret_val)
 		goto out;
 
 	if (hw->mac.autoneg) {
 		/*
 		 * Setup autoneg and flow control advertisement and perform
 		 * autonegotiation.
 		 */
 		ret_val = e1000_copper_link_autoneg(hw);
 		if (ret_val)
 			goto out;
 	} else {
 		/*
 		 * PHY will be set to 10H, 10F, 100H or 100F
 		 * depending on user settings.
 		 */
 		DEBUGOUT("Forcing Speed and Duplex\n");
 		ret_val = e1000_phy_force_speed_duplex_82543(hw);
 		if (ret_val) {
 			DEBUGOUT("Error Forcing Speed and Duplex\n");
 			goto out;
 		}
 	}
 
 	/*
 	 * Check link status. Wait up to 100 microseconds for link to become
 	 * valid.
 	 */
 	ret_val = e1000_phy_has_link_generic(hw, COPPER_LINK_UP_LIMIT, 10,
 					     &link);
 	if (ret_val)
 		goto out;
 
 
 	if (link) {
 		DEBUGOUT("Valid link established!!!\n");
 		/* Config the MAC and PHY after link is up */
 		if (hw->mac.type == e1000_82544) {
 			hw->mac.ops.config_collision_dist(hw);
 		} else {
 			ret_val = e1000_config_mac_to_phy_82543(hw);
 			if (ret_val)
 				goto out;
 		}
 		ret_val = e1000_config_fc_after_link_up_generic(hw);
 	} else {
 		DEBUGOUT("Unable to establish link!!!\n");
 	}
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_setup_fiber_link_82543 - Setup link for fiber
  *  @hw: pointer to the HW structure
  *
  *  Configures collision distance and flow control for fiber links.  Upon
  *  successful setup, poll for link.
  **/
 static s32 e1000_setup_fiber_link_82543(struct e1000_hw *hw)
 {
 	u32 ctrl;
 	s32 ret_val;
 
 	DEBUGFUNC("e1000_setup_fiber_link_82543");
 
 	ctrl = E1000_READ_REG(hw, E1000_CTRL);
 
 	/* Take the link out of reset */
 	ctrl &= ~E1000_CTRL_LRST;
 
 	hw->mac.ops.config_collision_dist(hw);
 
 	ret_val = e1000_commit_fc_settings_generic(hw);
 	if (ret_val)
 		goto out;
 
 	DEBUGOUT("Auto-negotiation enabled\n");
 
 	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
 	E1000_WRITE_FLUSH(hw);
 	msec_delay(1);
 
 	/*
 	 * For these adapters, the SW definable pin 1 is cleared when the
 	 * optics detect a signal.  If we have a signal, then poll for a
 	 * "Link-Up" indication.
 	 */
 	if (!(E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1))
 		ret_val = e1000_poll_fiber_serdes_link_generic(hw);
 	else
 		DEBUGOUT("No signal detected\n");
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_check_for_copper_link_82543 - Check for link (Copper)
  *  @hw: pointer to the HW structure
  *
  *  Checks the phy for link, if link exists, do the following:
  *   - check for downshift
  *   - do polarity workaround (if necessary)
  *   - configure collision distance
  *   - configure flow control after link up
  *   - configure tbi compatibility
  **/
 static s32 e1000_check_for_copper_link_82543(struct e1000_hw *hw)
 {
 	struct e1000_mac_info *mac = &hw->mac;
 	u32 icr, rctl;
 	s32 ret_val;
 	u16 speed, duplex;
 	bool link;
 
 	DEBUGFUNC("e1000_check_for_copper_link_82543");
 
 	if (!mac->get_link_status) {
 		ret_val = E1000_SUCCESS;
 		goto out;
 	}
 
 	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
 	if (ret_val)
 		goto out;
 
 	if (!link)
 		goto out; /* No link detected */
 
 	mac->get_link_status = FALSE;
 
 	e1000_check_downshift_generic(hw);
 
 	/*
 	 * If we are forcing speed/duplex, then we can return since
 	 * we have already determined whether we have link or not.
 	 */
 	if (!mac->autoneg) {
 		/*
 		 * If speed and duplex are forced to 10H or 10F, then we will
 		 * implement the polarity reversal workaround.  We disable
 		 * interrupts first, and upon returning, place the devices
 		 * interrupt state to its previous value except for the link
 		 * status change interrupt which will happened due to the
 		 * execution of this workaround.
 		 */
 		if (mac->forced_speed_duplex & E1000_ALL_10_SPEED) {
 			E1000_WRITE_REG(hw, E1000_IMC, 0xFFFFFFFF);
 			ret_val = e1000_polarity_reversal_workaround_82543(hw);
 			icr = E1000_READ_REG(hw, E1000_ICR);
 			E1000_WRITE_REG(hw, E1000_ICS, (icr & ~E1000_ICS_LSC));
 			E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK);
 		}
 
 		ret_val = -E1000_ERR_CONFIG;
 		goto out;
 	}
 
 	/*
 	 * We have a M88E1000 PHY and Auto-Neg is enabled.  If we
 	 * have Si on board that is 82544 or newer, Auto
 	 * Speed Detection takes care of MAC speed/duplex
 	 * configuration.  So we only need to configure Collision
 	 * Distance in the MAC.  Otherwise, we need to force
 	 * speed/duplex on the MAC to the current PHY speed/duplex
 	 * settings.
 	 */
 	if (mac->type == e1000_82544)
 		hw->mac.ops.config_collision_dist(hw);
 	else {
 		ret_val = e1000_config_mac_to_phy_82543(hw);
 		if (ret_val) {
 			DEBUGOUT("Error configuring MAC to PHY settings\n");
 			goto out;
 		}
 	}
 
 	/*
 	 * Configure Flow Control now that Auto-Neg has completed.
 	 * First, we need to restore the desired flow control
 	 * settings because we may have had to re-autoneg with a
 	 * different link partner.
 	 */
 	ret_val = e1000_config_fc_after_link_up_generic(hw);
 	if (ret_val)
 		DEBUGOUT("Error configuring flow control\n");
 
 	/*
 	 * At this point we know that we are on copper and we have
 	 * auto-negotiated link.  These are conditions for checking the link
 	 * partner capability register.  We use the link speed to determine if
 	 * TBI compatibility needs to be turned on or off.  If the link is not
 	 * at gigabit speed, then TBI compatibility is not needed.  If we are
 	 * at gigabit speed, we turn on TBI compatibility.
 	 */
 	if (e1000_tbi_compatibility_enabled_82543(hw)) {
 		ret_val = mac->ops.get_link_up_info(hw, &speed, &duplex);
 		if (ret_val) {
 			DEBUGOUT("Error getting link speed and duplex\n");
 			return ret_val;
 		}
 		if (speed != SPEED_1000) {
 			/*
 			 * If link speed is not set to gigabit speed,
 			 * we do not need to enable TBI compatibility.
 			 */
 			if (e1000_tbi_sbp_enabled_82543(hw)) {
 				/*
 				 * If we previously were in the mode,
 				 * turn it off.
 				 */
 				e1000_set_tbi_sbp_82543(hw, FALSE);
 				rctl = E1000_READ_REG(hw, E1000_RCTL);
 				rctl &= ~E1000_RCTL_SBP;
 				E1000_WRITE_REG(hw, E1000_RCTL, rctl);
 			}
 		} else {
 			/*
 			 * If TBI compatibility is was previously off,
 			 * turn it on. For compatibility with a TBI link
 			 * partner, we will store bad packets. Some
 			 * frames have an additional byte on the end and
-			 * will look like CRC errors to to the hardware.
+			 * will look like CRC errors to the hardware.
 			 */
 			if (!e1000_tbi_sbp_enabled_82543(hw)) {
 				e1000_set_tbi_sbp_82543(hw, TRUE);
 				rctl = E1000_READ_REG(hw, E1000_RCTL);
 				rctl |= E1000_RCTL_SBP;
 				E1000_WRITE_REG(hw, E1000_RCTL, rctl);
 			}
 		}
 	}
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_check_for_fiber_link_82543 - Check for link (Fiber)
  *  @hw: pointer to the HW structure
  *
  *  Checks for link up on the hardware.  If link is not up and we have
  *  a signal, then we need to force link up.
  **/
 static s32 e1000_check_for_fiber_link_82543(struct e1000_hw *hw)
 {
 	struct e1000_mac_info *mac = &hw->mac;
 	u32 rxcw, ctrl, status;
 	s32 ret_val = E1000_SUCCESS;
 
 	DEBUGFUNC("e1000_check_for_fiber_link_82543");
 
 	ctrl = E1000_READ_REG(hw, E1000_CTRL);
 	status = E1000_READ_REG(hw, E1000_STATUS);
 	rxcw = E1000_READ_REG(hw, E1000_RXCW);
 
 	/*
 	 * If we don't have link (auto-negotiation failed or link partner
 	 * cannot auto-negotiate), the cable is plugged in (we have signal),
 	 * and our link partner is not trying to auto-negotiate with us (we
 	 * are receiving idles or data), we need to force link up. We also
 	 * need to give auto-negotiation time to complete, in case the cable
 	 * was just plugged in. The autoneg_failed flag does this.
 	 */
 	/* (ctrl & E1000_CTRL_SWDPIN1) == 0 == have signal */
 	if ((!(ctrl & E1000_CTRL_SWDPIN1)) &&
 	    (!(status & E1000_STATUS_LU)) &&
 	    (!(rxcw & E1000_RXCW_C))) {
 		if (!mac->autoneg_failed) {
 			mac->autoneg_failed = TRUE;
 			ret_val = 0;
 			goto out;
 		}
 		DEBUGOUT("NOT RXing /C/, disable AutoNeg and force link.\n");
 
 		/* Disable auto-negotiation in the TXCW register */
 		E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE));
 
 		/* Force link-up and also force full-duplex. */
 		ctrl = E1000_READ_REG(hw, E1000_CTRL);
 		ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
 		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
 
 		/* Configure Flow Control after forcing link up. */
 		ret_val = e1000_config_fc_after_link_up_generic(hw);
 		if (ret_val) {
 			DEBUGOUT("Error configuring flow control\n");
 			goto out;
 		}
 	} else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
 		/*
 		 * If we are forcing link and we are receiving /C/ ordered
 		 * sets, re-enable auto-negotiation in the TXCW register
 		 * and disable forced link in the Device Control register
 		 * in an attempt to auto-negotiate with our link partner.
 		 */
 		DEBUGOUT("RXing /C/, enable AutoNeg and stop forcing link.\n");
 		E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw);
 		E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU));
 
 		mac->serdes_has_link = TRUE;
 	}
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_config_mac_to_phy_82543 - Configure MAC to PHY settings
  *  @hw: pointer to the HW structure
  *
  *  For the 82543 silicon, we need to set the MAC to match the settings
  *  of the PHY, even if the PHY is auto-negotiating.
  **/
 static s32 e1000_config_mac_to_phy_82543(struct e1000_hw *hw)
 {
 	u32 ctrl;
 	s32 ret_val = E1000_SUCCESS;
 	u16 phy_data;
 
 	DEBUGFUNC("e1000_config_mac_to_phy_82543");
 
 	if (!(hw->phy.ops.read_reg))
 		goto out;
 
 	/* Set the bits to force speed and duplex */
 	ctrl = E1000_READ_REG(hw, E1000_CTRL);
 	ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
 	ctrl &= ~(E1000_CTRL_SPD_SEL | E1000_CTRL_ILOS);
 
 	/*
 	 * Set up duplex in the Device Control and Transmit Control
 	 * registers depending on negotiated values.
 	 */
 	ret_val = hw->phy.ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
 	if (ret_val)
 		goto out;
 
 	ctrl &= ~E1000_CTRL_FD;
 	if (phy_data & M88E1000_PSSR_DPLX)
 		ctrl |= E1000_CTRL_FD;
 
 	hw->mac.ops.config_collision_dist(hw);
 
 	/*
 	 * Set up speed in the Device Control register depending on
 	 * negotiated values.
 	 */
 	if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS)
 		ctrl |= E1000_CTRL_SPD_1000;
 	else if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_100MBS)
 		ctrl |= E1000_CTRL_SPD_100;
 
 	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
 
 out:
 	return ret_val;
 }
 
 /**
  *  e1000_write_vfta_82543 - Write value to VLAN filter table
  *  @hw: pointer to the HW structure
  *  @offset: the 32-bit offset in which to write the value to.
  *  @value: the 32-bit value to write at location offset.
  *
  *  This writes a 32-bit value to a 32-bit offset in the VLAN filter
  *  table.
  **/
 static void e1000_write_vfta_82543(struct e1000_hw *hw, u32 offset, u32 value)
 {
 	u32 temp;
 
 	DEBUGFUNC("e1000_write_vfta_82543");
 
 	if ((hw->mac.type == e1000_82544) && (offset & 1)) {
 		temp = E1000_READ_REG_ARRAY(hw, E1000_VFTA, offset - 1);
 		E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value);
 		E1000_WRITE_FLUSH(hw);
 		E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset - 1, temp);
 		E1000_WRITE_FLUSH(hw);
 	} else {
 		e1000_write_vfta_generic(hw, offset, value);
 	}
 }
 
 /**
  *  e1000_led_on_82543 - Turn on SW controllable LED
  *  @hw: pointer to the HW structure
  *
  *  Turns the SW defined LED on.
  **/
 static s32 e1000_led_on_82543(struct e1000_hw *hw)
 {
 	u32 ctrl = E1000_READ_REG(hw, E1000_CTRL);
 
 	DEBUGFUNC("e1000_led_on_82543");
 
 	if (hw->mac.type == e1000_82544 &&
 	    hw->phy.media_type == e1000_media_type_copper) {
 		/* Clear SW-definable Pin 0 to turn on the LED */
 		ctrl &= ~E1000_CTRL_SWDPIN0;
 		ctrl |= E1000_CTRL_SWDPIO0;
 	} else {
 		/* Fiber 82544 and all 82543 use this method */
 		ctrl |= E1000_CTRL_SWDPIN0;
 		ctrl |= E1000_CTRL_SWDPIO0;
 	}
 	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_led_off_82543 - Turn off SW controllable LED
  *  @hw: pointer to the HW structure
  *
  *  Turns the SW defined LED off.
  **/
 static s32 e1000_led_off_82543(struct e1000_hw *hw)
 {
 	u32 ctrl = E1000_READ_REG(hw, E1000_CTRL);
 
 	DEBUGFUNC("e1000_led_off_82543");
 
 	if (hw->mac.type == e1000_82544 &&
 	    hw->phy.media_type == e1000_media_type_copper) {
 		/* Set SW-definable Pin 0 to turn off the LED */
 		ctrl |= E1000_CTRL_SWDPIN0;
 		ctrl |= E1000_CTRL_SWDPIO0;
 	} else {
 		ctrl &= ~E1000_CTRL_SWDPIN0;
 		ctrl |= E1000_CTRL_SWDPIO0;
 	}
 	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
 
 	return E1000_SUCCESS;
 }
 
 /**
  *  e1000_clear_hw_cntrs_82543 - Clear device specific hardware counters
  *  @hw: pointer to the HW structure
  *
  *  Clears the hardware counters by reading the counter registers.
  **/
 static void e1000_clear_hw_cntrs_82543(struct e1000_hw *hw)
 {
 	DEBUGFUNC("e1000_clear_hw_cntrs_82543");
 
 	e1000_clear_hw_cntrs_base_generic(hw);
 
 	E1000_READ_REG(hw, E1000_PRC64);
 	E1000_READ_REG(hw, E1000_PRC127);
 	E1000_READ_REG(hw, E1000_PRC255);
 	E1000_READ_REG(hw, E1000_PRC511);
 	E1000_READ_REG(hw, E1000_PRC1023);
 	E1000_READ_REG(hw, E1000_PRC1522);
 	E1000_READ_REG(hw, E1000_PTC64);
 	E1000_READ_REG(hw, E1000_PTC127);
 	E1000_READ_REG(hw, E1000_PTC255);
 	E1000_READ_REG(hw, E1000_PTC511);
 	E1000_READ_REG(hw, E1000_PTC1023);
 	E1000_READ_REG(hw, E1000_PTC1522);
 
 	E1000_READ_REG(hw, E1000_ALGNERRC);
 	E1000_READ_REG(hw, E1000_RXERRC);
 	E1000_READ_REG(hw, E1000_TNCRS);
 	E1000_READ_REG(hw, E1000_CEXTERR);
 	E1000_READ_REG(hw, E1000_TSCTC);
 	E1000_READ_REG(hw, E1000_TSCTFC);
 }
 
 /**
  *  e1000_read_mac_addr_82543 - Read device MAC address
  *  @hw: pointer to the HW structure
  *
  *  Reads the device MAC address from the EEPROM and stores the value.
  *  Since devices with two ports use the same EEPROM, we increment the
  *  last bit in the MAC address for the second port.
  *
  **/
 s32 e1000_read_mac_addr_82543(struct e1000_hw *hw)
 {
 	s32  ret_val = E1000_SUCCESS;
 	u16 offset, nvm_data, i;
 
 	DEBUGFUNC("e1000_read_mac_addr");
 
 	for (i = 0; i < ETH_ADDR_LEN; i += 2) {
 		offset = i >> 1;
 		ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data);
 		if (ret_val) {
 			DEBUGOUT("NVM Read Error\n");
 			goto out;
 		}
 		hw->mac.perm_addr[i] = (u8)(nvm_data & 0xFF);
 		hw->mac.perm_addr[i+1] = (u8)(nvm_data >> 8);
 	}
 
 	/* Flip last bit of mac address if we're on second port */
 	if (hw->bus.func == E1000_FUNC_1)
 		hw->mac.perm_addr[5] ^= 1;
 
 	for (i = 0; i < ETH_ADDR_LEN; i++)
 		hw->mac.addr[i] = hw->mac.perm_addr[i];
 
 out:
 	return ret_val;
 }
Index: stable/11/sys/dev/isci/scil/scic_sds_remote_node_context.c
===================================================================
--- stable/11/sys/dev/isci/scil/scic_sds_remote_node_context.c	(revision 330445)
+++ stable/11/sys/dev/isci/scil/scic_sds_remote_node_context.c	(revision 330446)
@@ -1,1500 +1,1500 @@
 /*-
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
 *
 * GPL LICENSE SUMMARY
 *
 * Copyright(c) 2008 - 2011 Intel Corporation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of version 2 of the GNU General Public License as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 * The full GNU General Public License is included in this distribution
 * in the file called LICENSE.GPL.
 *
 * BSD LICENSE
 *
 * Copyright(c) 2008 - 2011 Intel Corporation. All rights reserved.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *   * Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *   * Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in
 *     the documentation and/or other materials provided with the
 *     distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /**
 * @file
 *
 * @brief This file contains the structures, constants, and prototypes
 *        associated with the remote node context in the silicon.  It
 *        exists to model and manage the remote node context in the silicon.
 */
 
 #include <dev/isci/scil/sci_util.h>
 #include <dev/isci/scil/scic_sds_logger.h>
 #include <dev/isci/scil/scic_sds_controller.h>
 #include <dev/isci/scil/scic_sds_remote_device.h>
 #include <dev/isci/scil/scic_sds_remote_node_context.h>
 #include <dev/isci/scil/sci_base_state_machine.h>
 #include <dev/isci/scil/scic_remote_device.h>
 #include <dev/isci/scil/scic_sds_port.h>
 #include <dev/isci/scil/scu_event_codes.h>
 #include <dev/isci/scil/scu_task_context.h>
 
 /**
 * @brief
 */
    void scic_sds_remote_node_context_construct(
    SCIC_SDS_REMOTE_DEVICE_T       * device,
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * rnc,
    U16                              remote_node_index
       )
 {
    memset (rnc, 0, sizeof(SCIC_SDS_REMOTE_NODE_CONTEXT_T) );
 
    rnc->remote_node_index = remote_node_index;
    rnc->device            = device;
    rnc->destination_state = SCIC_SDS_REMOTE_NODE_DESTINATION_STATE_UNSPECIFIED;
 
    rnc->parent.logger = device->parent.parent.logger;
 
    sci_base_state_machine_construct(
       &rnc->state_machine,
       &rnc->parent,
       scic_sds_remote_node_context_state_table,
       SCIC_SDS_REMOTE_NODE_CONTEXT_INITIAL_STATE
          );
 
    sci_base_state_machine_start(&rnc->state_machine);
 
    // State logging initialization takes place late for the remote node context
    // see the resume state handler for the initial state.
 }
 
 /**
 * This method will return TRUE if the RNC is not in the initial state.  In
 * all other states the RNC is considered active and this will return TRUE.
 *
 * @note The destroy request of the state machine drives the RNC back to the
 *       initial state.  If the state machine changes then this routine will
 *       also have to be changed.
 *
 * @param[in] this_rnc The RNC for which the is posted request is being made.
 *
 * @return BOOL
 * @retval TRUE if the state machine is not in the initial state
 * @retval FALSE if the state machine is in the initial state
 */
    BOOL scic_sds_remote_node_context_is_initialized(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * this_rnc
       )
 {
    U32 current_state = sci_base_state_machine_get_state(&this_rnc->state_machine);
 
    if (current_state == SCIC_SDS_REMOTE_NODE_CONTEXT_INITIAL_STATE)
    {
       return FALSE;
    }
 
    return TRUE;
 }
 
 /**
 * This method will return TRUE if the remote node context is in a READY state
 * otherwise it will return FALSE
 *
 * @param[in] this_rnc The state of the remote node context object to check.
 *
 * @return BOOL
 * @retval TRUE if the remote node context is in the ready state.
 * @retval FALSE if the remote node context is not in the ready state.
 */
    BOOL scic_sds_remote_node_context_is_ready(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * this_rnc
       )
 {
    U32 current_state = sci_base_state_machine_get_state(&this_rnc->state_machine);
 
    if (current_state == SCIC_SDS_REMOTE_NODE_CONTEXT_READY_STATE)
    {
       return TRUE;
    }
 
    return FALSE;
 }
 
 /**
 * This method will construct the RNC buffer for this remote device object.
 *
 * @param[in] this_device The remote device to use to construct the RNC
 *       buffer.
 * @param[in] rnc The buffer into which the remote device data will be copied.
 *
 * @return none
 */
    void scic_sds_remote_node_context_construct_buffer(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * this_rnc
       )
 {
    SCU_REMOTE_NODE_CONTEXT_T * rnc;
    SCIC_SDS_CONTROLLER_T     * the_controller;
 
    the_controller = scic_sds_remote_device_get_controller(this_rnc->device);
 
    rnc = scic_sds_controller_get_remote_node_context_buffer(
       the_controller, this_rnc->remote_node_index);
 
    memset(
       rnc,
       0x00,
       sizeof(SCU_REMOTE_NODE_CONTEXT_T)
          * scic_sds_remote_device_node_count(this_rnc->device)
          );
 
    rnc->ssp.remote_node_index = this_rnc->remote_node_index;
    rnc->ssp.remote_node_port_width = this_rnc->device->device_port_width;
    rnc->ssp.logical_port_index =
       scic_sds_remote_device_get_port_index(this_rnc->device);
 
    rnc->ssp.remote_sas_address_hi = SCIC_SWAP_DWORD(this_rnc->device->device_address.high);
    rnc->ssp.remote_sas_address_lo = SCIC_SWAP_DWORD(this_rnc->device->device_address.low);
 
    rnc->ssp.nexus_loss_timer_enable = TRUE;
    rnc->ssp.check_bit               = FALSE;
    rnc->ssp.is_valid                = FALSE;
    rnc->ssp.is_remote_node_context  = TRUE;
    rnc->ssp.function_number         = 0;
 
    rnc->ssp.arbitration_wait_time = 0;
 
 
    if (
       this_rnc->device->target_protocols.u.bits.attached_sata_device
          || this_rnc->device->target_protocols.u.bits.attached_stp_target
          )
    {
       rnc->ssp.connection_occupancy_timeout =
          the_controller->user_parameters.sds1.stp_max_occupancy_timeout;
       rnc->ssp.connection_inactivity_timeout =
          the_controller->user_parameters.sds1.stp_inactivity_timeout;
    }
    else
    {
       rnc->ssp.connection_occupancy_timeout  =
          the_controller->user_parameters.sds1.ssp_max_occupancy_timeout;
       rnc->ssp.connection_inactivity_timeout =
          the_controller->user_parameters.sds1.ssp_inactivity_timeout;
    }
 
    rnc->ssp.initial_arbitration_wait_time = 0;
 
    // Open Address Frame Parameters
    rnc->ssp.oaf_connection_rate = this_rnc->device->connection_rate;
    rnc->ssp.oaf_features = 0;
    rnc->ssp.oaf_source_zone_group = 0;
    rnc->ssp.oaf_more_compatibility_features = 0;
 }
 
 // ---------------------------------------------------------------------------
 
 #ifdef SCI_LOGGING
 /**
 * This method will enable and turn on state transition logging for the remote
 * node context object.
 *
 * @param[in] this_rnc The remote node context for which state transition
 *       logging is to be enabled.
 *
 * @return none
 */
    void scic_sds_remote_node_context_initialize_state_logging(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * this_rnc
       )
 {
    sci_base_state_machine_logger_initialize(
       &this_rnc->state_machine_logger,
       &this_rnc->state_machine,
       &this_rnc->parent,
       scic_cb_logger_log_states,
       "SCIC_SDS_REMOTE_NODE_CONTEXT_T", "state machine",
       SCIC_LOG_OBJECT_SSP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_SMP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_STP_REMOTE_TARGET
          );
 }
 
 /**
 * This method will stop the state machine logging for this object and should
 * be called before the object is destroyed.
 *
 * @param[in] this_rnc The remote node context on which to stop logging state
 *       transitions.
 *
 * @return none
 */
    void scic_sds_remote_node_context_deinitialize_state_logging(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * this_rnc
       )
 {
    sci_base_state_machine_logger_deinitialize(
       &this_rnc->state_machine_logger,
       &this_rnc->state_machine
          );
 }
 #endif
 
 /**
 * This method will setup the remote node context object so it will transition
 * to its ready state.  If the remote node context is already setup to
 * transition to its final state then this function does nothing.
 *
 * @param[in] this_rnc
 * @param[in] the_callback
 * @param[in] callback_parameter
 *
 * @return none
 */
 static
 void scic_sds_remote_node_context_setup_to_resume(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T         * this_rnc,
    SCIC_SDS_REMOTE_NODE_CONTEXT_CALLBACK   the_callback,
    void                                   * callback_parameter
 )
 {
    if (this_rnc->destination_state != SCIC_SDS_REMOTE_NODE_DESTINATION_STATE_FINAL)
    {
       this_rnc->destination_state = SCIC_SDS_REMOTE_NODE_DESTINATION_STATE_READY;
       this_rnc->user_callback     = the_callback;
       this_rnc->user_cookie       = callback_parameter;
    }
 }
 
 /**
 * This method will setup the remote node context object so it will
 * transition to its final state.
 *
 * @param[in] this_rnc
 * @param[in] the_callback
 * @param[in] callback_parameter
 *
 * @return none
 */
 static
 void scic_sds_remote_node_context_setup_to_destory(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T         * this_rnc,
    SCIC_SDS_REMOTE_NODE_CONTEXT_CALLBACK   the_callback,
    void                                   * callback_parameter
 )
 {
    this_rnc->destination_state = SCIC_SDS_REMOTE_NODE_DESTINATION_STATE_FINAL;
    this_rnc->user_callback     = the_callback;
    this_rnc->user_cookie       = callback_parameter;
 }
 
 /**
 * This method will continue to resume a remote node context.  This is used
 * in the states where a resume is requested while a resume is in progress.
 *
 * @param[in] this_rnc
 * @param[in] the_callback
 * @param[in] callback_parameter
 */
 static
 SCI_STATUS scic_sds_remote_node_context_continue_to_resume_handler(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T         * this_rnc,
    SCIC_SDS_REMOTE_NODE_CONTEXT_CALLBACK   the_callback,
    void                                   * callback_parameter
 )
 {
    if (this_rnc->destination_state == SCIC_SDS_REMOTE_NODE_DESTINATION_STATE_READY)
    {
       this_rnc->user_callback = the_callback;
       this_rnc->user_cookie   = callback_parameter;
 
       return SCI_SUCCESS;
    }
 
    return SCI_FAILURE_INVALID_STATE;
 }
 
 //******************************************************************************
 //* REMOTE NODE CONTEXT STATE MACHINE
 //******************************************************************************
 
 static
 SCI_STATUS scic_sds_remote_node_context_default_destruct_handler(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T         * this_rnc,
    SCIC_SDS_REMOTE_NODE_CONTEXT_CALLBACK   the_callback,
    void                                   * callback_parameter
 )
 {
    SCIC_LOG_WARNING((
       sci_base_object_get_logger(this_rnc->device),
       SCIC_LOG_OBJECT_SSP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_SMP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_STP_REMOTE_TARGET,
       "SCIC Remote Node Context 0x%x requested to stop while in unexpected state %d\n",
       this_rnc, sci_base_state_machine_get_state(&this_rnc->state_machine)
          ));
 
    // We have decided that the destruct request on the remote node context can not fail
    // since it is either in the initial/destroyed state or is can be destroyed.
    return SCI_SUCCESS;
 }
 
 static
 SCI_STATUS scic_sds_remote_node_context_default_suspend_handler(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T         * this_rnc,
    U32                                      suspend_type,
    SCIC_SDS_REMOTE_NODE_CONTEXT_CALLBACK   the_callback,
    void                                   * callback_parameter
 )
 {
    SCIC_LOG_WARNING((
       sci_base_object_get_logger(this_rnc->device),
       SCIC_LOG_OBJECT_SSP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_SMP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_STP_REMOTE_TARGET,
       "SCIC Remote Node Context 0x%x requested to suspend while in wrong state %d\n",
       this_rnc, sci_base_state_machine_get_state(&this_rnc->state_machine)
          ));
 
    return SCI_FAILURE_INVALID_STATE;
 }
 
 static
 SCI_STATUS scic_sds_remote_node_context_default_resume_handler(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T         * this_rnc,
    SCIC_SDS_REMOTE_NODE_CONTEXT_CALLBACK   the_callback,
    void                                   * callback_parameter
 )
 {
    SCIC_LOG_WARNING((
       sci_base_object_get_logger(this_rnc->device),
       SCIC_LOG_OBJECT_SSP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_SMP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_STP_REMOTE_TARGET,
       "SCIC Remote Node Context 0x%x requested to resume while in wrong state %d\n",
       this_rnc, sci_base_state_machine_get_state(&this_rnc->state_machine)
          ));
 
    return SCI_FAILURE_INVALID_STATE;
 }
 
 static
 SCI_STATUS scic_sds_remote_node_context_default_start_io_handler(
    struct SCIC_SDS_REMOTE_NODE_CONTEXT * this_rnc,
    struct SCIC_SDS_REQUEST             * the_request
 )
 {
    SCIC_LOG_WARNING((
       sci_base_object_get_logger(this_rnc->device),
       SCIC_LOG_OBJECT_SSP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_SMP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_STP_REMOTE_TARGET,
       "SCIC Remote Node Context 0x%x requested to start io 0x%x while in wrong state %d\n",
       this_rnc, the_request, sci_base_state_machine_get_state(&this_rnc->state_machine)
          ));
 
    return SCI_FAILURE_INVALID_STATE;
 }
 
 static
 SCI_STATUS scic_sds_remote_node_context_default_start_task_handler(
    struct SCIC_SDS_REMOTE_NODE_CONTEXT * this_rnc,
    struct SCIC_SDS_REQUEST             * the_request
 )
 {
    SCIC_LOG_WARNING((
       sci_base_object_get_logger(this_rnc->device),
       SCIC_LOG_OBJECT_SSP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_SMP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_STP_REMOTE_TARGET,
       "SCIC Remote Node Context 0x%x requested to start task 0x%x while in wrong state %d\n",
       this_rnc, the_request, sci_base_state_machine_get_state(&this_rnc->state_machine)
          ));
 
    return SCI_FAILURE;
 }
 
 static
 SCI_STATUS scic_sds_remote_node_context_default_event_handler(
    struct SCIC_SDS_REMOTE_NODE_CONTEXT * this_rnc,
    U32                                   event_code
 )
 {
    SCIC_LOG_WARNING((
       sci_base_object_get_logger(this_rnc->device),
       SCIC_LOG_OBJECT_SSP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_SMP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_STP_REMOTE_TARGET,
       "SCIC Remote Node Context 0x%x requested to process event 0x%x while in wrong state %d\n",
       this_rnc, event_code, sci_base_state_machine_get_state(&this_rnc->state_machine)
          ));
 
    return SCI_FAILURE_INVALID_STATE;
 }
 
 /**
 * This method determines if the task request can be started by the SCU
 * hardware. When the RNC is in the ready state any task can be started.
 *
 * @param[in] this_rnc The rnc for which the task request is targeted.
 * @param[in] the_request The request which is going to be started.
 *
 * @return SCI_STATUS
 * @retval SCI_SUCCESS
 */
 static
 SCI_STATUS scic_sds_remote_node_context_success_start_task_handler(
    struct SCIC_SDS_REMOTE_NODE_CONTEXT * this_rnc,
    struct SCIC_SDS_REQUEST             * the_request
 )
 {
    return SCI_SUCCESS;
 }
 
 /**
 * This method handles destruct calls from the various state handlers.  The
 * remote node context can be requested to destroy from any state. If there
 * was a user callback it is always replaced with the request to destroy user
 * callback.
 *
 * @param[in] this_rnc
 * @param[in] the_callback
 * @param[in] callback_parameter
 *
 * @return SCI_STATUS
 */
 static
 SCI_STATUS scic_sds_remote_node_context_general_destruct_handler(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T         * this_rnc,
    SCIC_SDS_REMOTE_NODE_CONTEXT_CALLBACK   the_callback,
    void                                   * callback_parameter
 )
 {
    scic_sds_remote_node_context_setup_to_destory(
       this_rnc, the_callback, callback_parameter
          );
 
    sci_base_state_machine_change_state(
       &this_rnc->state_machine,
       SCIC_SDS_REMOTE_NODE_CONTEXT_INVALIDATING_STATE
          );
 
    return SCI_SUCCESS;
 }
 // ---------------------------------------------------------------------------
 static
 SCI_STATUS scic_sds_remote_node_context_reset_required_start_io_handler(
    struct SCIC_SDS_REMOTE_NODE_CONTEXT * this_rnc,
    struct SCIC_SDS_REQUEST             * the_request
 )
 {
    SCIC_LOG_WARNING((
       sci_base_object_get_logger(this_rnc->device),
       SCIC_LOG_OBJECT_SSP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_SMP_REMOTE_TARGET |
          SCIC_LOG_OBJECT_STP_REMOTE_TARGET,
       "SCIC Remote Node Context 0x%x requested to start io 0x%x while in wrong state %d\n",
       this_rnc, the_request, sci_base_state_machine_get_state(&this_rnc->state_machine)
          ));
 
    return SCI_FAILURE_REMOTE_DEVICE_RESET_REQUIRED;
 }
 
 // ---------------------------------------------------------------------------
 
 static
 SCI_STATUS scic_sds_remote_node_context_initial_state_resume_handler(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T         * this_rnc,
    SCIC_SDS_REMOTE_NODE_CONTEXT_CALLBACK   the_callback,
    void                                   * callback_parameter
 )
 {
    if (this_rnc->remote_node_index != SCIC_SDS_REMOTE_NODE_CONTEXT_INVALID_INDEX)
    {
       scic_sds_remote_node_context_setup_to_resume(
          this_rnc, the_callback, callback_parameter
             );
 
       scic_sds_remote_node_context_construct_buffer(this_rnc);
 
 #if defined (SCI_LOGGING)
       // If a remote node context has a logger already, don't work on its state
       // logging.
       if (this_rnc->state_machine.previous_state_id
              != SCIC_SDS_REMOTE_NODE_CONTEXT_INVALIDATING_STATE)
          scic_sds_remote_node_context_initialize_state_logging(this_rnc);
 #endif
 
       sci_base_state_machine_change_state(
          &this_rnc->state_machine,
          SCIC_SDS_REMOTE_NODE_CONTEXT_POSTING_STATE
             );
 
       return SCI_SUCCESS;
    }
 
    return SCI_FAILURE_INVALID_STATE;
 }
 
 // ---------------------------------------------------------------------------
 
 static
 SCI_STATUS scic_sds_remote_node_context_posting_state_event_handler(
    struct SCIC_SDS_REMOTE_NODE_CONTEXT * this_rnc,
    U32                                   event_code
 )
 {
    SCI_STATUS status;
 
    switch (scu_get_event_code(event_code))
    {
       case SCU_EVENT_POST_RNC_COMPLETE:
          status = SCI_SUCCESS;
 
          sci_base_state_machine_change_state(
             &this_rnc->state_machine,
             SCIC_SDS_REMOTE_NODE_CONTEXT_READY_STATE
                );
          break;
 
       default:
          status = SCI_FAILURE;
          SCIC_LOG_WARNING((
             sci_base_object_get_logger(this_rnc->device),
             SCIC_LOG_OBJECT_SSP_REMOTE_TARGET |
                SCIC_LOG_OBJECT_SMP_REMOTE_TARGET |
                SCIC_LOG_OBJECT_STP_REMOTE_TARGET,
             "SCIC Remote Node Context 0x%x requested to process unexpected event 0x%x while in posting state\n",
             this_rnc, event_code
                ));
          break;
    }
 
    return status;
 }
 
 // ---------------------------------------------------------------------------
 
 static
 SCI_STATUS scic_sds_remote_node_context_invalidating_state_destruct_handler(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T         * this_rnc,
    SCIC_SDS_REMOTE_NODE_CONTEXT_CALLBACK   the_callback,
    void                                   * callback_parameter
 )
 {
    scic_sds_remote_node_context_setup_to_destory(
       this_rnc, the_callback, callback_parameter
          );
 
    return SCI_SUCCESS;
 }
 
 static
 SCI_STATUS scic_sds_remote_node_context_invalidating_state_event_handler(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * this_rnc,
    U32                              event_code
 )
 {
    SCI_STATUS status;
 
    if (scu_get_event_code(event_code) == SCU_EVENT_POST_RNC_INVALIDATE_COMPLETE)
    {
       status = SCI_SUCCESS;
 
       if (this_rnc->destination_state == SCIC_SDS_REMOTE_NODE_DESTINATION_STATE_FINAL)
       {
          sci_base_state_machine_change_state(
             &this_rnc->state_machine,
             SCIC_SDS_REMOTE_NODE_CONTEXT_INITIAL_STATE
                );
       }
       else
       {
          sci_base_state_machine_change_state(
             &this_rnc->state_machine,
             SCIC_SDS_REMOTE_NODE_CONTEXT_POSTING_STATE
                );
       }
    }
    else
    {
       switch (scu_get_event_type(event_code))
       {
          case SCU_EVENT_TYPE_RNC_SUSPEND_TX:
          case SCU_EVENT_TYPE_RNC_SUSPEND_TX_RX:
             // We really dont care if the hardware is going to suspend
             // the device since it's being invalidated anyway
             SCIC_LOG_INFO((
                sci_base_object_get_logger(this_rnc->device),
                SCIC_LOG_OBJECT_SSP_REMOTE_TARGET |
                   SCIC_LOG_OBJECT_SMP_REMOTE_TARGET |
                   SCIC_LOG_OBJECT_STP_REMOTE_TARGET,
                "SCIC Remote Node Context 0x%x was suspeneded by hardware while being invalidated.\n",
                this_rnc
                   ));
             status = SCI_SUCCESS;
             break;
 
          default:
             SCIC_LOG_WARNING((
                sci_base_object_get_logger(this_rnc->device),
                SCIC_LOG_OBJECT_SSP_REMOTE_TARGET |
                   SCIC_LOG_OBJECT_SMP_REMOTE_TARGET |
                   SCIC_LOG_OBJECT_STP_REMOTE_TARGET,
                "SCIC Remote Node Context 0x%x requested to process event 0x%x while in state %d.\n",
                this_rnc, event_code, sci_base_state_machine_get_state(&this_rnc->state_machine)
                   ));
             status = SCI_FAILURE;
             break;
       }
    }
 
    return status;
 }
 
 // ---------------------------------------------------------------------------
 
 static
 SCI_STATUS scic_sds_remote_node_context_resuming_state_event_handler(
    struct SCIC_SDS_REMOTE_NODE_CONTEXT * this_rnc,
    U32                                   event_code
 )
 {
    SCI_STATUS status;
 
    if (scu_get_event_code(event_code) == SCU_EVENT_POST_RCN_RELEASE)
    {
       status = SCI_SUCCESS;
 
       sci_base_state_machine_change_state(
          &this_rnc->state_machine,
          SCIC_SDS_REMOTE_NODE_CONTEXT_READY_STATE
             );
    }
    else
    {
       switch (scu_get_event_type(event_code))
       {
          case SCU_EVENT_TYPE_RNC_SUSPEND_TX:
          case SCU_EVENT_TYPE_RNC_SUSPEND_TX_RX:
             // We really dont care if the hardware is going to suspend
             // the device since it's being resumed anyway
             SCIC_LOG_INFO((
                sci_base_object_get_logger(this_rnc->device),
                SCIC_LOG_OBJECT_SSP_REMOTE_TARGET |
                   SCIC_LOG_OBJECT_SMP_REMOTE_TARGET |
                   SCIC_LOG_OBJECT_STP_REMOTE_TARGET,
                "SCIC Remote Node Context 0x%x was suspeneded by hardware while being resumed.\n",
                this_rnc
                   ));
             status = SCI_SUCCESS;
             break;
 
          default:
             SCIC_LOG_WARNING((
                sci_base_object_get_logger(this_rnc->device),
                SCIC_LOG_OBJECT_SSP_REMOTE_TARGET |
                   SCIC_LOG_OBJECT_SMP_REMOTE_TARGET |
                   SCIC_LOG_OBJECT_STP_REMOTE_TARGET,
                "SCIC Remote Node Context 0x%x requested to process event 0x%x while in state %d.\n",
                this_rnc, event_code, sci_base_state_machine_get_state(&this_rnc->state_machine)
                   ));
             status = SCI_FAILURE;
             break;
       }
    }
 
    return status;
 }
 
 // ---------------------------------------------------------------------------
 
 /**
 * This method will handle the suspend requests from the ready state.
 *
 * @param[in] this_rnc The remote node context object being suspended.
 * @param[in] the_callback The callback when the suspension is complete.
 * @param[in] callback_parameter The parameter that is to be passed into the
 *       callback.
 *
 * @return SCI_SUCCESS
 */
 static
 SCI_STATUS scic_sds_remote_node_context_ready_state_suspend_handler(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T         * this_rnc,
    U32                                      suspend_type,
    SCIC_SDS_REMOTE_NODE_CONTEXT_CALLBACK   the_callback,
    void                                   * callback_parameter
 )
 {
    this_rnc->user_callback   = the_callback;
    this_rnc->user_cookie     = callback_parameter;
    this_rnc->suspension_code = suspend_type;
 
    if (suspend_type == SCI_SOFTWARE_SUSPENSION)
    {
       scic_sds_remote_device_post_request(
          this_rnc->device,
          SCU_CONTEXT_COMMAND_POST_RNC_SUSPEND_TX
             );
    }
 
    sci_base_state_machine_change_state(
       &this_rnc->state_machine,
       SCIC_SDS_REMOTE_NODE_CONTEXT_AWAIT_SUSPENSION_STATE
          );
 
    return SCI_SUCCESS;
 }
 
 /**
 * This method determines if the io request can be started by the SCU
 * hardware. When the RNC is in the ready state any io request can be started.
 *
 * @param[in] this_rnc The rnc for which the io request is targeted.
 * @param[in] the_request The request which is going to be started.
 *
 * @return SCI_STATUS
 * @retval SCI_SUCCESS
 */
 static
 SCI_STATUS scic_sds_remote_node_context_ready_state_start_io_handler(
    struct SCIC_SDS_REMOTE_NODE_CONTEXT * this_rnc,
    struct SCIC_SDS_REQUEST             * the_request
 )
 {
    return SCI_SUCCESS;
 }
 
 
 static
 SCI_STATUS scic_sds_remote_node_context_ready_state_event_handler(
    struct SCIC_SDS_REMOTE_NODE_CONTEXT * this_rnc,
    U32                                   event_code
 )
 {
    SCI_STATUS status;
 
    switch (scu_get_event_type(event_code))
    {
       case SCU_EVENT_TL_RNC_SUSPEND_TX:
          sci_base_state_machine_change_state(
             &this_rnc->state_machine,
             SCIC_SDS_REMOTE_NODE_CONTEXT_TX_SUSPENDED_STATE
                );
 
          this_rnc->suspension_code = scu_get_event_specifier(event_code);
          status = SCI_SUCCESS;
          break;
 
       case SCU_EVENT_TL_RNC_SUSPEND_TX_RX:
          sci_base_state_machine_change_state(
             &this_rnc->state_machine,
             SCIC_SDS_REMOTE_NODE_CONTEXT_TX_RX_SUSPENDED_STATE
                );
 
          this_rnc->suspension_code = scu_get_event_specifier(event_code);
          status = SCI_SUCCESS;
          break;
 
       default:
          SCIC_LOG_WARNING((
             sci_base_object_get_logger(this_rnc->device),
             SCIC_LOG_OBJECT_SSP_REMOTE_TARGET |
                SCIC_LOG_OBJECT_SMP_REMOTE_TARGET |
                SCIC_LOG_OBJECT_STP_REMOTE_TARGET,
             "SCIC Remote Node Context 0x%x requested to process event 0x%x while in state %d.\n",
             this_rnc, event_code, sci_base_state_machine_get_state(&this_rnc->state_machine)
                ));
 
          status = SCI_FAILURE;
          break;
    }
 
    return status;
 }
 
 // ---------------------------------------------------------------------------
 
 static
 SCI_STATUS scic_sds_remote_node_context_tx_suspended_state_resume_handler(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T         * this_rnc,
    SCIC_SDS_REMOTE_NODE_CONTEXT_CALLBACK   the_callback,
    void                                   * callback_parameter
 )
 {
    SMP_DISCOVER_RESPONSE_PROTOCOLS_T protocols;
 
    scic_sds_remote_node_context_setup_to_resume(
       this_rnc, the_callback, callback_parameter
          );
 
    // If this is an expander attached SATA device we must invalidate
    // and repost the RNC since this is the only way to clear the
    // TCi to NCQ tag mapping table for the RNi
    // All other device types we can just resume.
    scic_remote_device_get_protocols(this_rnc->device, &protocols);
 
    if (
       (protocols.u.bits.attached_stp_target == 1)
          && !(this_rnc->device->is_direct_attached)
          )
    {
       sci_base_state_machine_change_state(
          &this_rnc->state_machine,
          SCIC_SDS_REMOTE_NODE_CONTEXT_INVALIDATING_STATE
             );
    }
    else
    {
       sci_base_state_machine_change_state(
          &this_rnc->state_machine,
          SCIC_SDS_REMOTE_NODE_CONTEXT_RESUMING_STATE
             );
    }
 
    return SCI_SUCCESS;
 }
 
 /**
 * This method will report a success or failure attempt to start a new task
 * request to the hardware.  Since all task requests are sent on the high
 * priority queue they can be sent when the RCN is in a TX suspend state.
 *
 * @param[in] this_rnc The remote node context which is to receive the task
 *       request.
-* @param[in] the_request The task request to be transmitted to to the remote
+* @param[in] the_request The task request to be transmitted to the remote
 *       target device.
 *
 * @return SCI_STATUS
 * @retval SCI_SUCCESS
 */
 static
 SCI_STATUS scic_sds_remote_node_context_suspended_start_task_handler(
    struct SCIC_SDS_REMOTE_NODE_CONTEXT * this_rnc,
    struct SCIC_SDS_REQUEST             * the_request
 )
 {
    scic_sds_remote_node_context_resume(this_rnc, NULL, NULL);
 
    return SCI_SUCCESS;
 }
 
 // ---------------------------------------------------------------------------
 
 static
 SCI_STATUS scic_sds_remote_node_context_tx_rx_suspended_state_resume_handler(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T         * this_rnc,
    SCIC_SDS_REMOTE_NODE_CONTEXT_CALLBACK   the_callback,
    void                                   * callback_parameter
 )
 {
    scic_sds_remote_node_context_setup_to_resume(
       this_rnc, the_callback, callback_parameter
          );
 
    sci_base_state_machine_change_state(
       &this_rnc->state_machine,
       SCIC_SDS_REMOTE_NODE_CONTEXT_RESUMING_STATE
          );
 
    return SCI_FAILURE_INVALID_STATE;
 }
 
 // ---------------------------------------------------------------------------
 
 /**
 *
 */
 static
 SCI_STATUS scic_sds_remote_node_context_await_suspension_state_resume_handler(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T         * this_rnc,
    SCIC_SDS_REMOTE_NODE_CONTEXT_CALLBACK   the_callback,
    void                                   * callback_parameter
 )
 {
    scic_sds_remote_node_context_setup_to_resume(
       this_rnc, the_callback, callback_parameter
          );
 
    return SCI_SUCCESS;
 }
 
 /**
 * This method will report a success or failure attempt to start a new task
 * request to the hardware.  Since all task requests are sent on the high
 * priority queue they can be sent when the RCN is in a TX suspend state.
 *
 * @param[in] this_rnc The remote node context which is to receive the task
 *       request.
 * @param[in] the_request The task request to be transmitted to to the remote
 *       target device.
 *
 * @return SCI_STATUS
 * @retval SCI_SUCCESS
 */
 static
 SCI_STATUS scic_sds_remote_node_context_await_suspension_state_start_task_handler(
    struct SCIC_SDS_REMOTE_NODE_CONTEXT * this_rnc,
    struct SCIC_SDS_REQUEST             * the_request
 )
 {
    return SCI_SUCCESS;
 }
 
 static
 SCI_STATUS scic_sds_remote_node_context_await_suspension_state_event_handler(
    struct SCIC_SDS_REMOTE_NODE_CONTEXT * this_rnc,
    U32                                   event_code
 )
 {
    SCI_STATUS status;
 
    switch (scu_get_event_type(event_code))
    {
       case SCU_EVENT_TL_RNC_SUSPEND_TX:
          sci_base_state_machine_change_state(
             &this_rnc->state_machine,
             SCIC_SDS_REMOTE_NODE_CONTEXT_TX_SUSPENDED_STATE
                );
 
          this_rnc->suspension_code = scu_get_event_specifier(event_code);
          status = SCI_SUCCESS;
          break;
 
       case SCU_EVENT_TL_RNC_SUSPEND_TX_RX:
          sci_base_state_machine_change_state(
             &this_rnc->state_machine,
             SCIC_SDS_REMOTE_NODE_CONTEXT_TX_RX_SUSPENDED_STATE
                );
 
          this_rnc->suspension_code = scu_get_event_specifier(event_code);
          status = SCI_SUCCESS;
          break;
 
       default:
          SCIC_LOG_WARNING((
             sci_base_object_get_logger(this_rnc->device),
             SCIC_LOG_OBJECT_SSP_REMOTE_TARGET |
                SCIC_LOG_OBJECT_SMP_REMOTE_TARGET |
                SCIC_LOG_OBJECT_STP_REMOTE_TARGET,
             "SCIC Remote Node Context 0x%x requested to process event 0x%x while in state %d.\n",
             this_rnc, event_code, sci_base_state_machine_get_state(&this_rnc->state_machine)
                ));
 
          status = SCI_FAILURE;
          break;
    }
 
    return status;
 }
 
 // ---------------------------------------------------------------------------
 
    SCIC_SDS_REMOTE_NODE_CONTEXT_HANDLERS
    scic_sds_remote_node_context_state_handler_table[
    SCIC_SDS_REMOTE_NODE_CONTEXT_MAX_STATES] =
 {
    // SCIC_SDS_REMOTE_NODE_CONTEXT_INITIAL_STATE
    {
       scic_sds_remote_node_context_default_destruct_handler,
       scic_sds_remote_node_context_default_suspend_handler,
       scic_sds_remote_node_context_initial_state_resume_handler,
       scic_sds_remote_node_context_default_start_io_handler,
       scic_sds_remote_node_context_default_start_task_handler,
       scic_sds_remote_node_context_default_event_handler
    },
    // SCIC_SDS_REMOTE_NODE_CONTEXT_POSTING_STATE
    {
       scic_sds_remote_node_context_general_destruct_handler,
       scic_sds_remote_node_context_default_suspend_handler,
       scic_sds_remote_node_context_continue_to_resume_handler,
       scic_sds_remote_node_context_default_start_io_handler,
       scic_sds_remote_node_context_default_start_task_handler,
       scic_sds_remote_node_context_posting_state_event_handler
    },
    // SCIC_SDS_REMOTE_NODE_CONTEXT_INVALIDATING_STATE
    {
       scic_sds_remote_node_context_invalidating_state_destruct_handler,
       scic_sds_remote_node_context_default_suspend_handler,
       scic_sds_remote_node_context_continue_to_resume_handler,
       scic_sds_remote_node_context_default_start_io_handler,
       scic_sds_remote_node_context_default_start_task_handler,
       scic_sds_remote_node_context_invalidating_state_event_handler
    },
    // SCIC_SDS_REMOTE_NODE_CONTEXT_RESUMING_STATE
    {
       scic_sds_remote_node_context_general_destruct_handler,
       scic_sds_remote_node_context_default_suspend_handler,
       scic_sds_remote_node_context_continue_to_resume_handler,
       scic_sds_remote_node_context_default_start_io_handler,
       scic_sds_remote_node_context_success_start_task_handler,
       scic_sds_remote_node_context_resuming_state_event_handler
    },
    // SCIC_SDS_REMOTE_NODE_CONTEXT_READY_STATE
    {
       scic_sds_remote_node_context_general_destruct_handler,
       scic_sds_remote_node_context_ready_state_suspend_handler,
       scic_sds_remote_node_context_default_resume_handler,
       scic_sds_remote_node_context_ready_state_start_io_handler,
       scic_sds_remote_node_context_success_start_task_handler,
       scic_sds_remote_node_context_ready_state_event_handler
    },
    // SCIC_SDS_REMOTE_NODE_CONTEXT_TX_SUSPENDED_STATE
    {
       scic_sds_remote_node_context_general_destruct_handler,
       scic_sds_remote_node_context_default_suspend_handler,
       scic_sds_remote_node_context_tx_suspended_state_resume_handler,
       scic_sds_remote_node_context_reset_required_start_io_handler,
       scic_sds_remote_node_context_suspended_start_task_handler,
       scic_sds_remote_node_context_default_event_handler
    },
    // SCIC_SDS_REMOTE_NODE_CONTEXT_TX_RX_SUSPENDED_STATE
    {
       scic_sds_remote_node_context_general_destruct_handler,
       scic_sds_remote_node_context_default_suspend_handler,
       scic_sds_remote_node_context_tx_rx_suspended_state_resume_handler,
       scic_sds_remote_node_context_reset_required_start_io_handler,
       scic_sds_remote_node_context_suspended_start_task_handler,
       scic_sds_remote_node_context_default_event_handler
    },
    // SCIC_SDS_REMOTE_NODE_CONTEXT_AWAIT_SUSPENSION_STATE
    {
       scic_sds_remote_node_context_general_destruct_handler,
       scic_sds_remote_node_context_default_suspend_handler,
       scic_sds_remote_node_context_await_suspension_state_resume_handler,
       scic_sds_remote_node_context_reset_required_start_io_handler,
       scic_sds_remote_node_context_await_suspension_state_start_task_handler,
       scic_sds_remote_node_context_await_suspension_state_event_handler
    }
 };
 
 //*****************************************************************************
 //* REMOTE NODE CONTEXT PRIVATE METHODS
 //*****************************************************************************
 
 /**
 * This method just calls the user callback function and then resets the
 * callback.
 *
 * @param[in out] rnc
 */
 static
 void scic_sds_remote_node_context_notify_user(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T *rnc
 )
 {
    SCIC_SDS_REMOTE_NODE_CONTEXT_CALLBACK local_user_callback = rnc->user_callback;
    void * local_user_cookie = rnc->user_cookie;
 
    //we need to set the user_callback to NULL before it is called, because
    //the user callback's stack may eventually also set up a new set of
    //user callback. If we nullify the user_callback after it is called,
    //we are in the risk to lose the freshly set user callback.
    rnc->user_callback = NULL;
    rnc->user_cookie = NULL;
 
    if (local_user_callback != NULL)
    {
       (*local_user_callback)(local_user_cookie);
    }
 }
 
 /**
 * This method will continue the remote node context state machine by
 * requesting to resume the remote node context state machine from its current
 * state.
 *
 * @param[in] rnc
 */
 static
 void scic_sds_remote_node_context_continue_state_transitions(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * rnc
 )
 {
    if (rnc->destination_state == SCIC_SDS_REMOTE_NODE_DESTINATION_STATE_READY)
    {
       rnc->state_handlers->resume_handler(
          rnc, rnc->user_callback, rnc->user_cookie
             );
    }
 }
 
 /**
 * This method will mark the rnc buffer as being valid and post the request to
 * the hardware.
 *
 * @param[in] this_rnc The remote node context object that is to be
 *            validated.
 *
 * @return none
 */
 static
 void scic_sds_remote_node_context_validate_context_buffer(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * this_rnc
 )
 {
    SCU_REMOTE_NODE_CONTEXT_T *rnc_buffer;
 
    rnc_buffer = scic_sds_controller_get_remote_node_context_buffer(
       scic_sds_remote_device_get_controller(this_rnc->device),
       this_rnc->remote_node_index
          );
 
    rnc_buffer->ssp.is_valid = TRUE;
 
    if (
       !this_rnc->device->is_direct_attached
          && this_rnc->device->target_protocols.u.bits.attached_stp_target
          )
    {
       scic_sds_remote_device_post_request(
          this_rnc->device,
          SCU_CONTEXT_COMMAND_POST_RNC_96
             );
    }
    else
    {
       scic_sds_remote_device_post_request(
          this_rnc->device,
          SCU_CONTEXT_COMMAND_POST_RNC_32
             );
 
       if (this_rnc->device->is_direct_attached)
       {
          scic_sds_port_setup_transports(
             this_rnc->device->owning_port,
             this_rnc->remote_node_index
                );
       }
    }
 }
 
 /**
 * This method will update the RNC buffer and post the invalidate request.
 *
 * @param[in] this_rnc The remote node context object that is to be
 *       invalidated.
 *
 * @return none
 */
 static
 void scic_sds_remote_node_context_invalidate_context_buffer(
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * this_rnc
 )
 {
    SCU_REMOTE_NODE_CONTEXT_T *rnc_buffer;
 
    rnc_buffer = scic_sds_controller_get_remote_node_context_buffer(
       scic_sds_remote_device_get_controller(this_rnc->device),
       this_rnc->remote_node_index
          );
 
    rnc_buffer->ssp.is_valid = FALSE;
 
    scic_sds_remote_device_post_request(
       this_rnc->device,
       SCU_CONTEXT_COMMAND_POST_RNC_INVALIDATE
          );
 }
 
 //*****************************************************************************
 //* REMOTE NODE CONTEXT STATE ENTER AND EXIT METHODS
 //*****************************************************************************
 
 /**
 *
 *
 * @param[in] object
 */
 static
 void scic_sds_remote_node_context_initial_state_enter(
    SCI_BASE_OBJECT_T * object
 )
 {
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * rnc;
    rnc = (SCIC_SDS_REMOTE_NODE_CONTEXT_T  *)object;
 
    SET_STATE_HANDLER(
       rnc,
       scic_sds_remote_node_context_state_handler_table,
       SCIC_SDS_REMOTE_NODE_CONTEXT_INITIAL_STATE
          );
 
    // Check to see if we have gotten back to the initial state because someone
    // requested to destroy the remote node context object.
    if (
       rnc->state_machine.previous_state_id
          == SCIC_SDS_REMOTE_NODE_CONTEXT_INVALIDATING_STATE
          )
    {
       rnc->destination_state = SCIC_SDS_REMOTE_NODE_DESTINATION_STATE_UNSPECIFIED;
 
       scic_sds_remote_node_context_notify_user(rnc);
 
       // Since we are destroying the remote node context deinitialize the state logging
       // should we resume the remote node context the state logging will be reinitialized
       // on the resume handler.
       scic_sds_remote_node_context_deinitialize_state_logging(rnc);
    }
 }
 
 /**
 *
 *
 * @param[in] object
 */
 static
 void scic_sds_remote_node_context_posting_state_enter(
    SCI_BASE_OBJECT_T * object
 )
 {
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * this_rnc;
    this_rnc = (SCIC_SDS_REMOTE_NODE_CONTEXT_T  *)object;
 
    SET_STATE_HANDLER(
       this_rnc,
       scic_sds_remote_node_context_state_handler_table,
       SCIC_SDS_REMOTE_NODE_CONTEXT_POSTING_STATE
          );
 
    scic_sds_remote_node_context_validate_context_buffer(this_rnc);
 }
 
 /**
 *
 *
 * @param[in] object
 */
 static
 void scic_sds_remote_node_context_invalidating_state_enter(
    SCI_BASE_OBJECT_T * object
 )
 {
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * rnc;
    rnc = (SCIC_SDS_REMOTE_NODE_CONTEXT_T  *)object;
 
    SET_STATE_HANDLER(
       rnc,
       scic_sds_remote_node_context_state_handler_table,
       SCIC_SDS_REMOTE_NODE_CONTEXT_INVALIDATING_STATE
          );
 
    scic_sds_remote_node_context_invalidate_context_buffer(rnc);
 }
 
 /**
 *
 *
 * @param[in] object
 */
 static
 void scic_sds_remote_node_context_resuming_state_enter(
    SCI_BASE_OBJECT_T * object
 )
 {
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * rnc;
    SMP_DISCOVER_RESPONSE_PROTOCOLS_T protocols;
    rnc = (SCIC_SDS_REMOTE_NODE_CONTEXT_T  *)object;
 
    SET_STATE_HANDLER(
       rnc,
       scic_sds_remote_node_context_state_handler_table,
       SCIC_SDS_REMOTE_NODE_CONTEXT_RESUMING_STATE
          );
 
    // For direct attached SATA devices we need to clear the TLCR
    // NCQ to TCi tag mapping on the phy and in cases where we
    // resume because of a target reset we also need to update
    // the STPTLDARNI register with the RNi of the device
    scic_remote_device_get_protocols(rnc->device, &protocols);
 
    if (
       (protocols.u.bits.attached_stp_target == 1)
          && (rnc->device->is_direct_attached)
          )
    {
       scic_sds_port_setup_transports(
          rnc->device->owning_port, rnc->remote_node_index
             );
    }
 
    scic_sds_remote_device_post_request(
       rnc->device,
       SCU_CONTEXT_COMMAND_POST_RNC_RESUME
          );
 }
 
 /**
 *
 *
 * @param[in] object
 */
 static
 void scic_sds_remote_node_context_ready_state_enter(
    SCI_BASE_OBJECT_T * object
 )
 {
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * rnc;
    rnc = (SCIC_SDS_REMOTE_NODE_CONTEXT_T  *)object;
 
    SET_STATE_HANDLER(
       rnc,
       scic_sds_remote_node_context_state_handler_table,
       SCIC_SDS_REMOTE_NODE_CONTEXT_READY_STATE
          );
 
    rnc->destination_state = SCIC_SDS_REMOTE_NODE_DESTINATION_STATE_UNSPECIFIED;
 
    if (rnc->user_callback != NULL)
    {
       scic_sds_remote_node_context_notify_user(rnc);
    }
 }
 
 /**
 *
 *
 * @param[in] object
 */
 static
 void scic_sds_remote_node_context_tx_suspended_state_enter(
    SCI_BASE_OBJECT_T * object
 )
 {
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * rnc;
    rnc = (SCIC_SDS_REMOTE_NODE_CONTEXT_T  *)object;
 
    SET_STATE_HANDLER(
       rnc,
       scic_sds_remote_node_context_state_handler_table,
       SCIC_SDS_REMOTE_NODE_CONTEXT_TX_SUSPENDED_STATE
          );
 
    scic_sds_remote_node_context_continue_state_transitions(rnc);
 }
 
 /**
 *
 *
 * @param[in] object
 */
 static
 void scic_sds_remote_node_context_tx_rx_suspended_state_enter(
    SCI_BASE_OBJECT_T * object
 )
 {
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * rnc;
    rnc = (SCIC_SDS_REMOTE_NODE_CONTEXT_T  *)object;
 
    SET_STATE_HANDLER(
       rnc,
       scic_sds_remote_node_context_state_handler_table,
       SCIC_SDS_REMOTE_NODE_CONTEXT_TX_RX_SUSPENDED_STATE
          );
 
    scic_sds_remote_node_context_continue_state_transitions(rnc);
 }
 
 /**
 *
 *
 * @param[in] object
 */
 static
 void scic_sds_remote_node_context_await_suspension_state_enter(
    SCI_BASE_OBJECT_T * object
 )
 {
    SCIC_SDS_REMOTE_NODE_CONTEXT_T * rnc;
    rnc = (SCIC_SDS_REMOTE_NODE_CONTEXT_T  *)object;
 
    SET_STATE_HANDLER(
       rnc,
       scic_sds_remote_node_context_state_handler_table,
       SCIC_SDS_REMOTE_NODE_CONTEXT_AWAIT_SUSPENSION_STATE
          );
 }
 
 // ---------------------------------------------------------------------------
 
    SCI_BASE_STATE_T
    scic_sds_remote_node_context_state_table[
    SCIC_SDS_REMOTE_NODE_CONTEXT_MAX_STATES] =
 {
    {
       SCIC_SDS_REMOTE_NODE_CONTEXT_INITIAL_STATE,
       scic_sds_remote_node_context_initial_state_enter,
       NULL
    },
    {
       SCIC_SDS_REMOTE_NODE_CONTEXT_POSTING_STATE,
       scic_sds_remote_node_context_posting_state_enter,
       NULL
    },
    {
       SCIC_SDS_REMOTE_NODE_CONTEXT_INVALIDATING_STATE,
       scic_sds_remote_node_context_invalidating_state_enter,
       NULL
    },
    {
       SCIC_SDS_REMOTE_NODE_CONTEXT_RESUMING_STATE,
       scic_sds_remote_node_context_resuming_state_enter,
       NULL
    },
    {
       SCIC_SDS_REMOTE_NODE_CONTEXT_READY_STATE,
       scic_sds_remote_node_context_ready_state_enter,
       NULL
    },
    {
       SCIC_SDS_REMOTE_NODE_CONTEXT_TX_SUSPENDED_STATE,
       scic_sds_remote_node_context_tx_suspended_state_enter,
       NULL
    },
    {
       SCIC_SDS_REMOTE_NODE_CONTEXT_TX_RX_SUSPENDED_STATE,
       scic_sds_remote_node_context_tx_rx_suspended_state_enter,
       NULL
    },
    {
       SCIC_SDS_REMOTE_NODE_CONTEXT_AWAIT_SUSPENSION_STATE,
       scic_sds_remote_node_context_await_suspension_state_enter,
       NULL
    }
 };
 
Index: stable/11/sys/dev/isci/scil/scif_sas_controller.c
===================================================================
--- stable/11/sys/dev/isci/scil/scif_sas_controller.c	(revision 330445)
+++ stable/11/sys/dev/isci/scil/scif_sas_controller.c	(revision 330446)
@@ -1,1255 +1,1255 @@
 /*-
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2008 - 2011 Intel Corporation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  * The full GNU General Public License is included in this distribution
  * in the file called LICENSE.GPL.
  *
  * BSD LICENSE
  *
  * Copyright(c) 2008 - 2011 Intel Corporation. All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  *   * Redistributions of source code must retain the above copyright
  *     notice, this list of conditions and the following disclaimer.
  *   * Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimer in
  *     the documentation and/or other materials provided with the
  *     distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /**
  * @file
  *
  * @brief This file contains the implementation of the SCIF_SAS_CONTROLLER
  *        object.
  */
 
 
 #include <dev/isci/scil/sci_status.h>
 #include <dev/isci/scil/sci_util.h>
 #include <dev/isci/scil/sci_controller.h>
 #include <dev/isci/scil/scic_controller.h>
 #include <dev/isci/scil/scic_user_callback.h>
 #include <dev/isci/scil/scif_user_callback.h>
 
 #include <dev/isci/scil/scif_sas_controller.h>
 #include <dev/isci/scil/scif_sas_library.h>
 #include <dev/isci/scil/scif_sas_logger.h>
 
 
 //******************************************************************************
 //* P U B L I C   M E T H O D S
 //******************************************************************************
 
 SCI_STATUS scif_controller_construct(
    SCI_LIBRARY_HANDLE_T      library,
    SCI_CONTROLLER_HANDLE_T   controller,
    void *                    user_object
 )
 {
    SCI_STATUS              status        = SCI_SUCCESS;
    SCIF_SAS_LIBRARY_T    * fw_library    = (SCIF_SAS_LIBRARY_T*) library;
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T*) controller;
 
    // Validate the user supplied parameters.
    if ((library == SCI_INVALID_HANDLE) || (controller == SCI_INVALID_HANDLE))
       return SCI_FAILURE_INVALID_PARAMETER_VALUE;
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(library),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_INITIALIZATION,
       "scif_controller_construct(0x%x, 0x%x) enter\n",
       library, controller
    ));
 
    // Construct the base controller.  As part of constructing the base
    // controller we ask it to also manage the MDL iteration for the Core.
    sci_base_controller_construct(
       &fw_controller->parent,
       sci_base_object_get_logger(fw_library),
       scif_sas_controller_state_table,
       fw_controller->mdes,
       SCIF_SAS_MAX_MEMORY_DESCRIPTORS,
       sci_controller_get_memory_descriptor_list_handle(fw_controller->core_object)
    );
 
    scif_sas_controller_initialize_state_logging(fw_controller);
 
    sci_object_set_association(fw_controller, user_object);
 
    status = scic_controller_construct(
                fw_library->core_object, fw_controller->core_object, fw_controller
             );
 
    // If the core controller was successfully constructed, then
    // finish construction of the framework controller.
    if (status == SCI_SUCCESS)
    {
       // Set the association in the core controller to this framework
       // controller.
       sci_object_set_association(
          (SCI_OBJECT_HANDLE_T) fw_controller->core_object, fw_controller
       );
 
       sci_base_state_machine_change_state(
         &fw_controller->parent.state_machine,
          SCI_BASE_CONTROLLER_STATE_RESET
       );
    }
 
    return status;
 }
 
 // ---------------------------------------------------------------------------
 
 SCI_STATUS scif_controller_initialize(
    SCI_CONTROLLER_HANDLE_T   controller
 )
 {
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T*) controller;
 
    // Validate the user supplied parameters.
    if (controller == SCI_INVALID_HANDLE)
       return SCI_FAILURE_INVALID_PARAMETER_VALUE;
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(controller),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_INITIALIZATION,
       "scif_controller_initialize(0x%x) enter\n",
       controller
    ));
 
    return fw_controller->state_handlers->initialize_handler(
              &fw_controller->parent
           );
 }
 
 // ---------------------------------------------------------------------------
 
 U32 scif_controller_get_suggested_start_timeout(
    SCI_CONTROLLER_HANDLE_T  controller
 )
 {
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T*) controller;
 
    // Validate the user supplied parameters.
    if (controller == SCI_INVALID_HANDLE)
       return 0;
 
    // Currently we aren't adding any additional time into the suggested
    // timeout value for the start operation.  Simply utilize the core
    // value.
    return scic_controller_get_suggested_start_timeout(fw_controller->core_object);
 }
 
 // ---------------------------------------------------------------------------
 
 SCI_STATUS scif_controller_start(
    SCI_CONTROLLER_HANDLE_T  controller,
    U32                      timeout
 )
 {
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T*) controller;
 
    // Validate the user supplied parameters.
    if (controller == SCI_INVALID_HANDLE)
       return SCI_FAILURE_INVALID_PARAMETER_VALUE;
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(controller),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_INITIALIZATION,
       "scif_controller_start(0x%x, 0x%x) enter\n",
       controller, timeout
    ));
 
    return fw_controller->state_handlers->
           start_handler(&fw_controller->parent, timeout);
 }
 
 // ---------------------------------------------------------------------------
 
 SCI_STATUS scif_controller_stop(
    SCI_CONTROLLER_HANDLE_T  controller,
    U32                      timeout
 )
 {
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T*) controller;
 
    // Validate the user supplied parameters.
    if (controller == SCI_INVALID_HANDLE)
       return SCI_FAILURE_INVALID_PARAMETER_VALUE;
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(controller),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_SHUTDOWN,
       "scif_controller_stop(0x%x, 0x%x) enter\n",
       controller, timeout
    ));
 
    return fw_controller->state_handlers->
           stop_handler(&fw_controller->parent, timeout);
 
 }
 
 // ---------------------------------------------------------------------------
 
 SCI_STATUS scif_controller_reset(
    SCI_CONTROLLER_HANDLE_T  controller
 )
 {
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T*) controller;
 
    // Validate the user supplied parameters.
    if (controller == SCI_INVALID_HANDLE)
       return SCI_FAILURE_INVALID_PARAMETER_VALUE;
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(controller),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_CONTROLLER_RESET,
       "scif_controller_reset(0x%x) enter\n",
       controller
    ));
 
    return fw_controller->state_handlers->
           reset_handler(&fw_controller->parent);
 }
 
 // ---------------------------------------------------------------------------
 
 SCI_CONTROLLER_HANDLE_T scif_controller_get_scic_handle(
    SCI_CONTROLLER_HANDLE_T   controller
 )
 {
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T*) controller;
 
    return fw_controller->core_object;
 }
 
 // ---------------------------------------------------------------------------
 
 SCI_IO_STATUS scif_controller_start_io(
    SCI_CONTROLLER_HANDLE_T     controller,
    SCI_REMOTE_DEVICE_HANDLE_T  remote_device,
    SCI_IO_REQUEST_HANDLE_T     io_request,
    U16                         io_tag
 )
 {
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T*) controller;
    SCI_STATUS              status;
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(controller),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_IO_REQUEST,
       "scif_controller_start_io(0x%x, 0x%x, 0x%x, 0x%x) enter\n",
       controller, remote_device, io_request, io_tag
    ));
 
    if (
          sci_pool_empty(fw_controller->hprq.pool)
       || scif_sas_controller_sufficient_resource(controller)
       )
    {
       status = fw_controller->state_handlers->start_io_handler(
                 (SCI_BASE_CONTROLLER_T*) controller,
                 (SCI_BASE_REMOTE_DEVICE_T*) remote_device,
                 (SCI_BASE_REQUEST_T*) io_request,
                 io_tag
              );
    }
    else
       status = SCI_FAILURE_INSUFFICIENT_RESOURCES;
 
    return (SCI_IO_STATUS)status;
 }
 
 // ---------------------------------------------------------------------------
 
 SCI_TASK_STATUS scif_controller_start_task(
    SCI_CONTROLLER_HANDLE_T     controller,
    SCI_REMOTE_DEVICE_HANDLE_T  remote_device,
    SCI_TASK_REQUEST_HANDLE_T   task_request,
    U16                         io_tag
 )
 {
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T*) controller;
    SCI_STATUS              status;
 
    // Validate the user supplied parameters.
    if (  (controller == SCI_INVALID_HANDLE)
       || (remote_device == SCI_INVALID_HANDLE)
       || (task_request == SCI_INVALID_HANDLE) )
    {
       return SCI_TASK_FAILURE_INVALID_PARAMETER_VALUE;
    }
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(controller),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_TASK_MANAGEMENT,
       "scif_controller_start_task(0x%x, 0x%x, 0x%x, 0x%x) enter\n",
       controller, remote_device, task_request, io_tag
    ));
 
    if (scif_sas_controller_sufficient_resource(controller))
    {
       status = fw_controller->state_handlers->start_task_handler(
              (SCI_BASE_CONTROLLER_T*) controller,
              (SCI_BASE_REMOTE_DEVICE_T*) remote_device,
              (SCI_BASE_REQUEST_T*) task_request,
              io_tag
           );
    }
    else
       status = SCI_FAILURE_INSUFFICIENT_RESOURCES;
 
    return (SCI_TASK_STATUS)status;
 }
 
 // ---------------------------------------------------------------------------
 
 SCI_STATUS scif_controller_complete_io(
    SCI_CONTROLLER_HANDLE_T     controller,
    SCI_REMOTE_DEVICE_HANDLE_T  remote_device,
    SCI_IO_REQUEST_HANDLE_T     io_request
 )
 {
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T*) controller;
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(controller),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_IO_REQUEST,
       "scif_controller_complete_io(0x%x, 0x%x, 0x%x) enter\n",
       controller, remote_device, io_request
    ));
 
    return fw_controller->state_handlers->complete_io_handler(
              (SCI_BASE_CONTROLLER_T*) controller,
              (SCI_BASE_REMOTE_DEVICE_T*) remote_device,
              (SCI_BASE_REQUEST_T*) io_request
           );
 }
 
 // ---------------------------------------------------------------------------
 
 SCI_STATUS scif_controller_complete_task(
    SCI_CONTROLLER_HANDLE_T     controller,
    SCI_REMOTE_DEVICE_HANDLE_T  remote_device,
    SCI_TASK_REQUEST_HANDLE_T   task_request
 )
 {
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T*) controller;
 
    // Validate the user supplied parameters.
    if (  (controller == SCI_INVALID_HANDLE)
       || (remote_device == SCI_INVALID_HANDLE)
       || (task_request == SCI_INVALID_HANDLE) )
    {
       return SCI_FAILURE_INVALID_PARAMETER_VALUE;
    }
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(controller),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_TASK_MANAGEMENT,
       "scif_controller_complete_task(0x%x, 0x%x, 0x%x) enter\n",
       controller, remote_device, task_request
    ));
 
    return fw_controller->state_handlers->complete_task_handler(
              (SCI_BASE_CONTROLLER_T*) controller,
              (SCI_BASE_REMOTE_DEVICE_T*) remote_device,
              (SCI_BASE_REQUEST_T*) task_request
           );
 }
 
 // ---------------------------------------------------------------------------
 
 SCI_STATUS scif_controller_get_domain_handle(
    SCI_CONTROLLER_HANDLE_T   controller,
    U8                        port_index,
    SCI_DOMAIN_HANDLE_T     * domain_handle
 )
 {
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T*) controller;
 
    // Validate the user supplied parameters.
    if (controller == SCI_INVALID_HANDLE)
       return SCI_FAILURE_INVALID_PARAMETER_VALUE;
 
    // Retrieve the domain handle if the supplied index is legitimate.
    if (port_index < SCI_MAX_PORTS)
    {
       *domain_handle = &fw_controller->domains[port_index];
       return SCI_SUCCESS;
    }
 
    return SCI_FAILURE_INVALID_PORT;
 }
 
 /**
  * @brief This method builds the memory descriptor list for this
  *        controller.
  *
  * @param[in] fw_controller This parameter specifies the framework
  *            controller object for which to build the MDL.
  *
  * @return none
  */
 void scif_sas_controller_build_mdl(
    SCIF_SAS_CONTROLLER_T * fw_controller
 )
 {
    // one internal request for each domain.
    sci_base_mde_construct(
       &fw_controller->mdes[SCIF_SAS_MDE_INTERNAL_IO],
       4,
       fw_controller->internal_request_entries *
          scif_sas_internal_request_get_object_size(),
       SCI_MDE_ATTRIBUTE_PHYSICALLY_CONTIGUOUS
    );
 }
 
 // ---------------------------------------------------------------------------
 
 SCI_STATUS scif_controller_set_mode(
    SCI_CONTROLLER_HANDLE_T   controller,
    SCI_CONTROLLER_MODE       mode
 )
 {
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T*) controller;
    SCI_STATUS              status          = SCI_SUCCESS;
 
    if (
          (fw_controller->parent.state_machine.current_state_id
           == SCI_BASE_CONTROLLER_STATE_INITIALIZING)
       || (fw_controller->parent.state_machine.current_state_id
           == SCI_BASE_CONTROLLER_STATE_INITIALIZED)
       )
    {
       switch (mode)
       {
       case SCI_MODE_SPEED:
          fw_controller->internal_request_entries =
             MIN(fw_controller->internal_request_entries, SCIF_SAS_MAX_INTERNAL_REQUEST_COUNT);
          scif_sas_controller_build_mdl(fw_controller);
       break;
 
       case SCI_MODE_SIZE:
          fw_controller->internal_request_entries =
             MIN(fw_controller->internal_request_entries, SCIF_SAS_MIN_INTERNAL_REQUEST_COUNT);
          scif_sas_controller_build_mdl(fw_controller);
       break;
 
       default:
          status = SCI_FAILURE_INVALID_PARAMETER_VALUE;
       break;
       }
    }
    else
       status = SCI_FAILURE_INVALID_STATE;
 
    if (status != SCI_SUCCESS)
    {
       return status;
    }
    else
    {
       // Currently, the framework doesn't change any configurations for
       // speed or size modes.  Default to speed mode basically.
       return scic_controller_set_mode(fw_controller->core_object, mode);
    }
 }
 
 // ---------------------------------------------------------------------------
 
 U32 scif_controller_get_sat_compliance_version(
    void
 )
 {
    /// @todo Fix return of SAT compliance version.
    return 0;
 }
 
 // ---------------------------------------------------------------------------
 
 U32 scif_controller_get_sat_compliance_version_revision(
    void
 )
 {
    /// @todo Fix return of SAT compliance revision.
    return 0;
 }
 
 // ---------------------------------------------------------------------------
 
 SCI_STATUS scif_user_parameters_set(
    SCI_CONTROLLER_HANDLE_T   controller,
    SCIF_USER_PARAMETERS_T  * scif_parms
 )
 {
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T*) controller;
 
    //validate all the registry entries before overwriting the default parameter
    //values.
    if (scif_parms->sas.is_sata_ncq_enabled != 1 && scif_parms->sas.is_sata_ncq_enabled != 0)
       return SCI_FAILURE_INVALID_PARAMETER_VALUE;
 
    if (scif_parms->sas.max_ncq_depth < 1 && scif_parms->sas.max_ncq_depth > 32)
       return SCI_FAILURE_INVALID_PARAMETER_VALUE;
 
    if (scif_parms->sas.is_sata_standby_timer_enabled != 1
        && scif_parms->sas.is_sata_standby_timer_enabled != 0)
       return SCI_FAILURE_INVALID_PARAMETER_VALUE;
 
    if (scif_parms->sas.is_non_zero_buffer_offsets_enabled != 1
        && scif_parms->sas.is_non_zero_buffer_offsets_enabled != 0)
       return SCI_FAILURE_INVALID_PARAMETER_VALUE;
 
    if (scif_parms->sas.reset_type != SCI_SAS_ABORT_TASK
        && scif_parms->sas.reset_type != SCI_SAS_ABORT_TASK_SET
        && scif_parms->sas.reset_type != SCI_SAS_CLEAR_TASK_SET
        && scif_parms->sas.reset_type != SCI_SAS_LOGICAL_UNIT_RESET
        && scif_parms->sas.reset_type != SCI_SAS_I_T_NEXUS_RESET
        && scif_parms->sas.reset_type != SCI_SAS_CLEAR_ACA
        && scif_parms->sas.reset_type != SCI_SAS_QUERY_TASK
        && scif_parms->sas.reset_type != SCI_SAS_QUERY_TASK_SET
        && scif_parms->sas.reset_type != SCI_SAS_QUERY_ASYNCHRONOUS_EVENT
        && scif_parms->sas.reset_type != SCI_SAS_HARD_RESET)
       return SCI_FAILURE_INVALID_PARAMETER_VALUE;
 
    if (scif_parms->sas.clear_affiliation_during_controller_stop != 1
        && scif_parms->sas.clear_affiliation_during_controller_stop !=0)
        return SCI_FAILURE_INVALID_PARAMETER_VALUE;
 
    memcpy((&fw_controller->user_parameters), scif_parms, sizeof(*scif_parms));
 
    // In the future more could be done to prevent setting parameters at the
    // wrong time, but for now we'll simply set the values even if it is too
    // late for them to take affect.
    return SCI_SUCCESS;
 }
 
 // ---------------------------------------------------------------------------
 
 #if !defined(DISABLE_INTERRUPTS)
 
 /**
  * @brief This routine check each domain of the controller to see if
  *           any domain is overriding interrupt coalescence.
  *
  * @param[in] fw_controller frame controller
  * @param[in] fw_smp_phy The smp phy to be freed.
  *
  * @return none
  */
 static
 BOOL scif_sas_controller_is_overriding_interrupt_coalescence(
    SCIF_SAS_CONTROLLER_T * fw_controller
 )
 {
    U8 index;
 
    for(index = 0; index < SCI_MAX_DOMAINS; index++)
    {
       if(fw_controller->domains[index].parent.state_machine.current_state_id ==
             SCI_BASE_DOMAIN_STATE_DISCOVERING)
          return TRUE;
    }
 
    return FALSE;
 }
 
 SCI_STATUS scif_controller_set_interrupt_coalescence(
    SCI_CONTROLLER_HANDLE_T controller,
    U32                     coalesce_number,
    U32                     coalesce_timeout
 )
 {
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T * )controller;
 
    ///when framework is in the middle of temporarily overriding the interrupt
    ///coalescence values, user's request of setting interrupt coalescence
    ///will be saved. As soon as the framework done the temporary overriding,
    ///it will serve user's request to set new interrupt coalescence.
    if (scif_sas_controller_is_overriding_interrupt_coalescence(fw_controller))
    {
       U32 curr_coalesce_number;
       U32 curr_coalesce_timeout;
       SCI_STATUS core_status;
 
       // save current interrupt coalescence info.
       scic_controller_get_interrupt_coalescence (
          fw_controller->core_object, &curr_coalesce_number, &curr_coalesce_timeout);
 
       //try user's request out in the core, but immediately restore core's
       //current setting.
       core_status = scic_controller_set_interrupt_coalescence(
                        fw_controller->core_object, coalesce_number, coalesce_timeout);
 
       if ( core_status == SCI_SUCCESS )
       {
          fw_controller->saved_interrupt_coalesce_number = (U16)coalesce_number;
          fw_controller->saved_interrupt_coalesce_timeout = coalesce_timeout;
       }
 
        //restore current interrupt coalescence.
       scic_controller_set_interrupt_coalescence(
          fw_controller->core_object, curr_coalesce_number, curr_coalesce_timeout);
 
       return core_status;
    }
    else
    {
       ///If framework is not internally overriding the interrupt coalescence,
       ///serve user's request immediately by passing the reqeust to core.
       return scic_controller_set_interrupt_coalescence(
                 fw_controller->core_object, coalesce_number, coalesce_timeout);
    }
 }
 
 // ---------------------------------------------------------------------------
 
 void scif_controller_get_interrupt_coalescence(
    SCI_CONTROLLER_HANDLE_T controller,
    U32                   * coalesce_number,
    U32                   * coalesce_timeout
 )
 {
    SCIF_SAS_CONTROLLER_T * scif_controller = (SCIF_SAS_CONTROLLER_T * )controller;
 
    scic_controller_get_interrupt_coalescence(
       scif_controller->core_object, coalesce_number, coalesce_timeout);
 }
 
 /**
  * @brief This method will save the interrupt coalescence values.  If
  *        the interrupt coalescence values have already been saved,
  *        then this method performs no operations.
  *
  * @param[in,out] fw_controller This parameter specifies the controller
  *                for which to save the interrupt coalescence values.
  *
  * @return none
  */
 void scif_sas_controller_save_interrupt_coalescence(
    SCIF_SAS_CONTROLLER_T * fw_controller
 )
 {
    if ( !scif_sas_controller_is_overriding_interrupt_coalescence(fw_controller))
    {
       // Override core's interrupt coalescing settings during SMP
       // DISCOVER process cause' there is only 1 outstanding SMP
       // request per domain is allowed.
       scic_controller_get_interrupt_coalescence(
          fw_controller->core_object,
          (U32*)&(fw_controller->saved_interrupt_coalesce_number),
          &(fw_controller->saved_interrupt_coalesce_timeout)
       );
 
       // Temporarily disable the interrupt coalescing.
       scic_controller_set_interrupt_coalescence(fw_controller->core_object,0,0);
    }
 }
 
 /**
  * @brief This method will restore the interrupt coalescence values.  If
  *        the interrupt coalescence values have not already been saved,
  *        then this method performs no operations.
  *
  * @param[in,out] fw_controller This parameter specifies the controller
  *                for which to restore the interrupt coalescence values.
  *
  * @return none
  */
 void scif_sas_controller_restore_interrupt_coalescence(
    SCIF_SAS_CONTROLLER_T * fw_controller
 )
 {
    if ( !scif_sas_controller_is_overriding_interrupt_coalescence(fw_controller))
       scic_controller_set_interrupt_coalescence(
          fw_controller->core_object,
          fw_controller->saved_interrupt_coalesce_number,
          fw_controller->saved_interrupt_coalesce_timeout
       );
 }
 
 #endif // !defined(DISABLE_INTERRUPTS)
 
 // ---------------------------------------------------------------------------
 
 void scic_cb_controller_start_complete(
    SCI_CONTROLLER_HANDLE_T  controller,
    SCI_STATUS               completion_status
 )
 {
    SCIF_SAS_CONTROLLER_T *fw_controller = (SCIF_SAS_CONTROLLER_T*)
                                          sci_object_get_association(controller);
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(controller),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_INITIALIZATION,
       "scic_cb_controller_start_complete(0x%x, 0x%x) enter\n",
       controller, completion_status
    ));
 
    if (completion_status == SCI_SUCCESS
        || completion_status == SCI_FAILURE_TIMEOUT)
    {
       // Even the initialization of the core controller timed out, framework
       // controller should still transit to READY state.
       sci_base_state_machine_change_state(
          &fw_controller->parent.state_machine,
          SCI_BASE_CONTROLLER_STATE_READY
       );
    }
 
    scif_cb_controller_start_complete(fw_controller, completion_status);
 }
 
 // ---------------------------------------------------------------------------
 
 void scic_cb_controller_stop_complete(
    SCI_CONTROLLER_HANDLE_T  controller,
    SCI_STATUS               completion_status
 )
 {
    SCIF_SAS_CONTROLLER_T *fw_controller = (SCIF_SAS_CONTROLLER_T*)
                                          sci_object_get_association(controller);
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(controller),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_SHUTDOWN,
       "scic_cb_controller_stop_complete(0x%x, 0x%x) enter\n",
       controller, completion_status
    ));
 
    if (completion_status == SCI_SUCCESS)
    {
       sci_base_state_machine_change_state(
          &fw_controller->parent.state_machine,
          SCI_BASE_CONTROLLER_STATE_STOPPED
       );
    }
    else
    {
       sci_base_state_machine_change_state(
          &fw_controller->parent.state_machine,
          SCI_BASE_CONTROLLER_STATE_FAILED
       );
    }
 
    scif_cb_controller_stop_complete(fw_controller, completion_status);
 }
 
 
 // ---------------------------------------------------------------------------
 
 void scic_cb_controller_error(
    SCI_CONTROLLER_HANDLE_T  controller,
    SCI_CONTROLLER_ERROR error
 )
 {
    SCIF_SAS_CONTROLLER_T *fw_controller = (SCIF_SAS_CONTROLLER_T*)
                                          sci_object_get_association(controller);
 
    fw_controller->parent.error = error;
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(controller),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_SHUTDOWN,
       "scic_cb_controller_not_ready(0x%x) enter\n",
       controller
    ));
 
    sci_base_state_machine_change_state(
       &fw_controller->parent.state_machine,
       SCI_BASE_CONTROLLER_STATE_FAILED
    );
 }
 
 //******************************************************************************
 //* P R O T E C T E D    M E T H O D S
 //******************************************************************************
 
 /**
  * @brief This method is utilized to continue an internal IO operation
  *        on the controller.  This method is utilized for SAT translated
  *        requests that generate multiple ATA commands in order to fulfill
  *        the original SCSI request.
  *
  * @param[in]  controller This parameter specifies the controller on which
  *             to continue an internal IO request.
  * @param[in]  remote_device This parameter specifies the remote device
  *             on which to continue an internal IO request.
  * @param[in]  io_request This parameter specifies the IO request to be
  *             continue.
  *
  * @return Indicate if the continue operation was successful.
  * @retval SCI_SUCCESS This value is returned if the operation succeeded.
  */
 SCI_STATUS scif_sas_controller_continue_io(
    SCI_CONTROLLER_HANDLE_T     controller,
    SCI_REMOTE_DEVICE_HANDLE_T  remote_device,
    SCI_IO_REQUEST_HANDLE_T     io_request
 )
 {
    SCIF_SAS_CONTROLLER_T * fw_controller = (SCIF_SAS_CONTROLLER_T*) controller;
 
    return fw_controller->state_handlers->continue_io_handler(
              (SCI_BASE_CONTROLLER_T*) controller,
              (SCI_BASE_REMOTE_DEVICE_T*) remote_device,
              (SCI_BASE_REQUEST_T*) io_request
           );
 }
 
 /**
  * @brief This method will attempt to destruct a framework controller.
  *        This includes free any resources retreived from the user (e.g.
  *        timers).
  *
  * @param[in]  fw_controller This parameter specifies the framework
  *             controller to destructed.
  *
  * @return none
  */
 void scif_sas_controller_destruct(
    SCIF_SAS_CONTROLLER_T * fw_controller
 )
 {
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(fw_controller),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_SHUTDOWN,
       "scif_sas_controller_destruct(0x%x) enter\n",
       fw_controller
    ));
 }
 
 //-----------------------------------------------------------------------------
 // INTERNAL REQUEST RELATED METHODS
 //-----------------------------------------------------------------------------
 
 /**
  * @brief This routine is to allocate the memory for creating a new internal
  *        request.
  *
  * @param[in] scif_controller handle to frame controller
  *
  * @return void* address to internal request memory
  */
 void * scif_sas_controller_allocate_internal_request(
    SCIF_SAS_CONTROLLER_T * fw_controller
 )
 {
    POINTER_UINT internal_io_address;
 
    if( !sci_pool_empty(fw_controller->internal_request_memory_pool) )
    {
       sci_pool_get(
          fw_controller->internal_request_memory_pool, internal_io_address
       );
 
       //clean the memory.
       memset((char*)internal_io_address, 0, scif_sas_internal_request_get_object_size());
 
       return (void *) internal_io_address;
    }
    else
       return NULL;
 }
 
 /**
  * @brief This routine is to free the memory for a completed internal request.
  *
  * @param[in] scif_controller handle to frame controller
  * @param[in] fw_internal_io The internal IO to be freed.
  *
  * @return none
  */
 void scif_sas_controller_free_internal_request(
    SCIF_SAS_CONTROLLER_T * fw_controller,
    void                  * fw_internal_request_buffer
 )
 {
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(fw_controller),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_IO_REQUEST,
       "scif_controller_free_internal_request(0x%x, 0x%x) enter\n",
       fw_controller, fw_internal_request_buffer
    ));
 
-   //return the memory to to pool.
+   //return the memory to the pool.
    if( !sci_pool_full(fw_controller->internal_request_memory_pool) )
    {
       sci_pool_put(
          fw_controller->internal_request_memory_pool,
          (POINTER_UINT) fw_internal_request_buffer
       );
    }
 }
 
 
 /**
  * @brief this routine is called by OS' DPC to start io requests from internal
  *        high priority request queue
  * @param[in] fw_controller The framework controller.
  *
  * @return none
  */
 void scif_sas_controller_start_high_priority_io(
    SCIF_SAS_CONTROLLER_T * fw_controller
 )
 {
    POINTER_UINT            io_address;
    SCIF_SAS_IO_REQUEST_T * fw_io;
    SCI_STATUS              status;
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(fw_controller),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_IO_REQUEST,
       "scif_controller_start_high_priority_io(0x%x) enter\n",
       fw_controller
    ));
 
    while ( !sci_pool_empty(fw_controller->hprq.pool) )
    {
       sci_pool_get(fw_controller->hprq.pool, io_address);
 
       fw_io = (SCIF_SAS_IO_REQUEST_T *)io_address;
 
       status = fw_controller->state_handlers->start_high_priority_io_handler(
          (SCI_BASE_CONTROLLER_T*) fw_controller,
          (SCI_BASE_REMOTE_DEVICE_T*) fw_io->parent.device,
          (SCI_BASE_REQUEST_T*) fw_io,
          SCI_CONTROLLER_INVALID_IO_TAG
       );
    }
 }
 
 /**
  * @brief This method will check how many outstanding IOs currently and number
  * of IOs in high priority queue, if the overall number exceeds the max_tc,
  * return FALSE.
  *
  * @param[in] fw_controller The framework controller.
  *
  * @return BOOL Indicate whether there is sufficient resource to start an IO.
  * @retvalue TRUE The controller has sufficient resource.
  * @retvalue FALSE There is not sufficient resource available.
  */
 BOOL scif_sas_controller_sufficient_resource(
    SCIF_SAS_CONTROLLER_T *fw_controller
 )
 {
    SCIF_SAS_DOMAIN_T * fw_domain;
    U32 domain_index;
    U32 outstanding_io_count = 0;
    U32 high_priority_io_count = 0;
 
    for(domain_index = 0; domain_index < SCI_MAX_DOMAINS; domain_index++)
    {
       fw_domain = &fw_controller->domains[domain_index];
       outstanding_io_count += fw_domain->request_list.element_count;
    }
 
    high_priority_io_count = sci_pool_count(fw_controller->hprq.pool);
 
    if ( (outstanding_io_count + high_priority_io_count) > SCI_MAX_IO_REQUESTS )
       return FALSE;
 
    return TRUE;
 }
 
 
 /**
  * @brief This method is the starting point to complete high prority io for a
  *        controller then down to domain, device.
  *
  * @param[in] fw_controller The framework controller
  * @param[in] remote_device  The framework remote device.
  * @param[in] io_request The high priority io request to be completed.
  *
  * @return SCI_STATUS indicate the completion status from framework down to the
  *         core.
  */
 SCI_STATUS scif_sas_controller_complete_high_priority_io(
    SCIF_SAS_CONTROLLER_T    *fw_controller,
    SCIF_SAS_REMOTE_DEVICE_T *remote_device,
    SCIF_SAS_REQUEST_T       *io_request
 )
 {
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(fw_controller),
       SCIF_LOG_OBJECT_CONTROLLER | SCIF_LOG_OBJECT_IO_REQUEST,
       "scif_sas_controller_complete_high_priority_io(0x%x, 0x%x, 0x%x) enter\n",
       fw_controller, remote_device, io_request
    ));
 
    //call controller's new added complete_high_priority_io_handler
    return fw_controller->state_handlers->complete_high_priority_io_handler(
              (SCI_BASE_CONTROLLER_T*) fw_controller,
              (SCI_BASE_REMOTE_DEVICE_T*) remote_device,
              (SCI_BASE_REQUEST_T*) io_request
           );
 }
 
 /**
 
  * @brief This routine is to allocate the memory for creating a smp phy object.
  *
  * @param[in] scif_controller handle to frame controller
  *
  * @return SCIF_SAS_SMP_PHY_T * An allocated space for smp phy. If failed to allocate,
  *            return NULL.
  */
 SCIF_SAS_SMP_PHY_T * scif_sas_controller_allocate_smp_phy(
    SCIF_SAS_CONTROLLER_T * fw_controller
 )
 {
    SCIF_SAS_SMP_PHY_T * smp_phy;
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(fw_controller),
       SCIF_LOG_OBJECT_CONTROLLER,
       "scif_controller_allocate_smp_phy(0x%x) enter\n",
       fw_controller
    ));
 
    if( !sci_fast_list_is_empty(&fw_controller->smp_phy_memory_list) )
    {
       smp_phy = (SCIF_SAS_SMP_PHY_T *)
          sci_fast_list_remove_head(&fw_controller->smp_phy_memory_list);
 
       //clean the memory.
       memset((char*)smp_phy,
              0,
              sizeof(SCIF_SAS_SMP_PHY_T)
             );
 
       return smp_phy;
    }
    else
       return NULL;
 }
 
 /**
  * @brief This routine is to free the memory for a released smp phy.
  *
  * @param[in] fw_controller The framework controller, a smp phy is released
  *                to its memory.
  * @param[in] fw_smp_phy The smp phy to be freed.
  *
  * @return none
  */
 void scif_sas_controller_free_smp_phy(
    SCIF_SAS_CONTROLLER_T * fw_controller,
    SCIF_SAS_SMP_PHY_T    * smp_phy
 )
 {
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(fw_controller),
       SCIF_LOG_OBJECT_CONTROLLER,
       "scif_controller_free_smp_phy(0x%x, 0x%x) enter\n",
       fw_controller, smp_phy
    ));
 
    //return the memory to the list.
    sci_fast_list_insert_tail(
       &fw_controller->smp_phy_memory_list,
       &smp_phy->list_element
    );
 }
 
 
 /**
  * @brief This method clear affiliation for all the EA SATA devices associated
  *        to this controller.
  *
  * @param[in] fw_controller This parameter specifies the framework
  *            controller object for whose remote devices are to be stopped.
  *
  * @return This method returns a value indicating if the operation completed.
  * @retval SCI_COMPLETE This value indicates that all the EA SATA devices'
  *         affiliation was cleared.
  * @retval SCI_INCOMPLETE This value indicates clear affiliation activity is
  *         yet to be completed.
  */
 SCI_STATUS scif_sas_controller_clear_affiliation(
    SCIF_SAS_CONTROLLER_T * fw_controller
 )
 {
    U8 index;
    SCI_STATUS status;
    SCIF_SAS_DOMAIN_T * fw_domain;
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(fw_controller),
       SCIF_LOG_OBJECT_CONTROLLER,
       "scif_sas_controller_clear_affiliation(0x%x) enter\n",
       fw_controller
    ));
 
    index = fw_controller->current_domain_to_clear_affiliation;
 
    if (index < SCI_MAX_DOMAINS)
    {
       fw_domain = &fw_controller->domains[index];
 
       //Need to stop all the on-going smp activities before clearing affiliation.
       scif_sas_domain_cancel_smp_activities(fw_domain);
 
       scif_sas_domain_start_clear_affiliation(fw_domain);
 
       status = SCI_WARNING_SEQUENCE_INCOMPLETE;
    }
    else
    {  //the controller has done clear affiliation work to all its domains.
       scif_sas_controller_continue_to_stop(fw_controller);
       status = SCI_SUCCESS;
    }
 
    return status;
 }
 
 
 /**
  * @brief This method sets SCIF user parameters to
  *        default values.  Users can override these values utilizing
  *        the sciF_user_parameters_set() methods.
  *
  * @param[in] controller This parameter specifies the controller for
  *            which to set the configuration parameters to their
  *            default values.
  *
  * @return none
  */
 void scif_sas_controller_set_default_config_parameters(
    SCIF_SAS_CONTROLLER_T * this_controller
 )
 {
    SCIF_USER_PARAMETERS_T * scif_parms = &(this_controller->user_parameters);
 
    scif_parms->sas.is_sata_ncq_enabled = TRUE;
    scif_parms->sas.max_ncq_depth = 32;
    scif_parms->sas.is_sata_standby_timer_enabled = FALSE;
    scif_parms->sas.is_non_zero_buffer_offsets_enabled = FALSE;
    scif_parms->sas.reset_type = SCI_SAS_LOGICAL_UNIT_RESET;
    scif_parms->sas.clear_affiliation_during_controller_stop = TRUE;
    scif_parms->sas.ignore_fua = FALSE;
 
 }
 
 
 /**
  * @brief This method releases resource for framework controller and associated
  *        objects.
  *
  * @param[in] fw_controller This parameter specifies the framework
  *            controller and associated objects whose resources are to be released.
  *
  * @return This method returns a value indicating if the operation succeeded.
  * @retval SCI_SUCCESS This value indicates that resource release succeeded.
  * @retval SCI_FAILURE This value indicates certain failure during the process
  *            of resource release.
  */
 SCI_STATUS scif_sas_controller_release_resource(
    SCIF_SAS_CONTROLLER_T * fw_controller
 )
 {
    U8 index;
    SCIF_SAS_DOMAIN_T * fw_domain;
 
    SCIF_LOG_TRACE((
       sci_base_object_get_logger(fw_controller),
       SCIF_LOG_OBJECT_CONTROLLER,
       "scif_sas_controller_release_resource(0x%x) enter\n",
       fw_controller
    ));
 
    //currently the only resource to be released is domain's timer.
    for (index = 0; index < SCI_MAX_DOMAINS; index++)
    {
       fw_domain = &fw_controller->domains[index];
 
       scif_sas_domain_release_resource(fw_controller, fw_domain);
    }
 
    return SCI_SUCCESS;
 }
 
 
 #ifdef SCI_LOGGING
 /**
  * This method will start state transition logging for the framework
  * controller object.
  *
  * @param[in] fw_controller The framework controller object on which to
  *       observe state changes.
  *
  * @return none
  */
 void scif_sas_controller_initialize_state_logging(
    SCIF_SAS_CONTROLLER_T * fw_controller
 )
 {
    sci_base_state_machine_logger_initialize(
       &fw_controller->parent.state_machine_logger,
       &fw_controller->parent.state_machine,
       &fw_controller->parent.parent,
       scif_cb_logger_log_states,
       "SCIF_SAS_CONTROLLER_T", "base state machine",
       SCIF_LOG_OBJECT_CONTROLLER
    );
 }
 
 /**
  * This method will remove the logging of state transitions from the framework
  * controller object.
  *
  * @param[in] fw_controller The framework controller to change.
  *
  * @return none
  */
 void scif_sas_controller_deinitialize_state_logging(
    SCIF_SAS_CONTROLLER_T * fw_controller
 )
 {
    sci_base_state_machine_logger_deinitialize(
       &fw_controller->parent.state_machine_logger,
       &fw_controller->parent.state_machine
    );
 }
 #endif // SCI_LOGGING
Index: stable/11/sys/dev/iwm/if_iwmreg.h
===================================================================
--- stable/11/sys/dev/iwm/if_iwmreg.h	(revision 330445)
+++ stable/11/sys/dev/iwm/if_iwmreg.h	(revision 330446)
@@ -1,6137 +1,6137 @@
 /*	$OpenBSD: if_iwmreg.h,v 1.4 2015/06/15 08:06:11 stsp Exp $	*/
 /*	$FreeBSD$ */
 
 /******************************************************************************
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
  * USA
  *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
  * Contact Information:
  *  Intel Linux Wireless <ilw@linux.intel.com>
  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
  *
  * BSD LICENSE
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  *  * Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *  * Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *  * Neither the name Intel Corporation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *****************************************************************************/
 #ifndef	__IF_IWM_REG_H__
 #define	__IF_IWM_REG_H__
 
 #define	le16_to_cpup(_a_)	(le16toh(*(const uint16_t *)(_a_)))
 #define	le32_to_cpup(_a_)	(le32toh(*(const uint32_t *)(_a_)))
 
 /*
  * BEGIN iwl-csr.h
  */
 
 /*
  * CSR (control and status registers)
  *
  * CSR registers are mapped directly into PCI bus space, and are accessible
  * whenever platform supplies power to device, even when device is in
  * low power states due to driver-invoked device resets
  * (e.g. IWM_CSR_RESET_REG_FLAG_SW_RESET) or uCode-driven power-saving modes.
  *
  * Use iwl_write32() and iwl_read32() family to access these registers;
  * these provide simple PCI bus access, without waking up the MAC.
  * Do not use iwl_write_direct32() family for these registers;
  * no need to "grab nic access" via IWM_CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ.
  * The MAC (uCode processor, etc.) does not need to be powered up for accessing
  * the CSR registers.
  *
  * NOTE:  Device does need to be awake in order to read this memory
  *        via IWM_CSR_EEPROM and IWM_CSR_OTP registers
  */
 #define IWM_CSR_HW_IF_CONFIG_REG    (0x000) /* hardware interface config */
 #define IWM_CSR_INT_COALESCING      (0x004) /* accum ints, 32-usec units */
 #define IWM_CSR_INT                 (0x008) /* host interrupt status/ack */
 #define IWM_CSR_INT_MASK            (0x00c) /* host interrupt enable */
 #define IWM_CSR_FH_INT_STATUS       (0x010) /* busmaster int status/ack*/
 #define IWM_CSR_GPIO_IN             (0x018) /* read external chip pins */
 #define IWM_CSR_RESET               (0x020) /* busmaster enable, NMI, etc*/
 #define IWM_CSR_GP_CNTRL            (0x024)
 
 /* 2nd byte of IWM_CSR_INT_COALESCING, not accessible via iwl_write32()! */
 #define IWM_CSR_INT_PERIODIC_REG	(0x005)
 
 /*
  * Hardware revision info
  * Bit fields:
  * 31-16:  Reserved
  *  15-4:  Type of device:  see IWM_CSR_HW_REV_TYPE_xxx definitions
  *  3-2:  Revision step:  0 = A, 1 = B, 2 = C, 3 = D
  *  1-0:  "Dash" (-) value, as in A-1, etc.
  */
 #define IWM_CSR_HW_REV              (0x028)
 
 /*
  * EEPROM and OTP (one-time-programmable) memory reads
  *
  * NOTE:  Device must be awake, initialized via apm_ops.init(),
  *        in order to read.
  */
 #define IWM_CSR_EEPROM_REG          (0x02c)
 #define IWM_CSR_EEPROM_GP           (0x030)
 #define IWM_CSR_OTP_GP_REG          (0x034)
 
 #define IWM_CSR_GIO_REG		(0x03C)
 #define IWM_CSR_GP_UCODE_REG	(0x048)
 #define IWM_CSR_GP_DRIVER_REG	(0x050)
 
 /*
  * UCODE-DRIVER GP (general purpose) mailbox registers.
  * SET/CLR registers set/clear bit(s) if "1" is written.
  */
 #define IWM_CSR_UCODE_DRV_GP1       (0x054)
 #define IWM_CSR_UCODE_DRV_GP1_SET   (0x058)
 #define IWM_CSR_UCODE_DRV_GP1_CLR   (0x05c)
 #define IWM_CSR_UCODE_DRV_GP2       (0x060)
 
 #define IWM_CSR_MBOX_SET_REG		(0x088)
 #define IWM_CSR_MBOX_SET_REG_OS_ALIVE	0x20
 
 #define IWM_CSR_LED_REG			(0x094)
 #define IWM_CSR_DRAM_INT_TBL_REG	(0x0A0)
 #define IWM_CSR_MAC_SHADOW_REG_CTRL	(0x0A8) /* 6000 and up */
 
 
 /* GIO Chicken Bits (PCI Express bus link power management) */
 #define IWM_CSR_GIO_CHICKEN_BITS    (0x100)
 
 /* Analog phase-lock-loop configuration  */
 #define IWM_CSR_ANA_PLL_CFG         (0x20c)
 
 /*
  * CSR Hardware Revision Workaround Register.  Indicates hardware rev;
  * "step" determines CCK backoff for txpower calculation.  Used for 4965 only.
  * See also IWM_CSR_HW_REV register.
  * Bit fields:
  *  3-2:  0 = A, 1 = B, 2 = C, 3 = D step
  *  1-0:  "Dash" (-) value, as in C-1, etc.
  */
 #define IWM_CSR_HW_REV_WA_REG		(0x22C)
 
 #define IWM_CSR_DBG_HPET_MEM_REG	(0x240)
 #define IWM_CSR_DBG_LINK_PWR_MGMT_REG	(0x250)
 
 /* Bits for IWM_CSR_HW_IF_CONFIG_REG */
 #define IWM_CSR_HW_IF_CONFIG_REG_MSK_MAC_DASH	(0x00000003)
 #define IWM_CSR_HW_IF_CONFIG_REG_MSK_MAC_STEP	(0x0000000C)
 #define IWM_CSR_HW_IF_CONFIG_REG_MSK_BOARD_VER	(0x000000C0)
 #define IWM_CSR_HW_IF_CONFIG_REG_BIT_MAC_SI	(0x00000100)
 #define IWM_CSR_HW_IF_CONFIG_REG_BIT_RADIO_SI	(0x00000200)
 #define IWM_CSR_HW_IF_CONFIG_REG_MSK_PHY_TYPE	(0x00000C00)
 #define IWM_CSR_HW_IF_CONFIG_REG_MSK_PHY_DASH	(0x00003000)
 #define IWM_CSR_HW_IF_CONFIG_REG_MSK_PHY_STEP	(0x0000C000)
 
 #define IWM_CSR_HW_IF_CONFIG_REG_POS_MAC_DASH	(0)
 #define IWM_CSR_HW_IF_CONFIG_REG_POS_MAC_STEP	(2)
 #define IWM_CSR_HW_IF_CONFIG_REG_POS_BOARD_VER	(6)
 #define IWM_CSR_HW_IF_CONFIG_REG_POS_PHY_TYPE	(10)
 #define IWM_CSR_HW_IF_CONFIG_REG_POS_PHY_DASH	(12)
 #define IWM_CSR_HW_IF_CONFIG_REG_POS_PHY_STEP	(14)
 
 #define IWM_CSR_HW_IF_CONFIG_REG_BIT_HAP_WAKE_L1A	(0x00080000)
 #define IWM_CSR_HW_IF_CONFIG_REG_BIT_EEPROM_OWN_SEM	(0x00200000)
 #define IWM_CSR_HW_IF_CONFIG_REG_BIT_NIC_READY	(0x00400000) /* PCI_OWN_SEM */
 #define IWM_CSR_HW_IF_CONFIG_REG_BIT_NIC_PREPARE_DONE (0x02000000) /* ME_OWN */
 #define IWM_CSR_HW_IF_CONFIG_REG_PREPARE	(0x08000000) /* WAKE_ME */
 #define IWM_CSR_HW_IF_CONFIG_REG_ENABLE_PME	(0x10000000)
 #define IWM_CSR_HW_IF_CONFIG_REG_PERSIST_MODE	(0x40000000) /* PERSISTENCE */
 
 #define IWM_CSR_INT_PERIODIC_DIS		(0x00) /* disable periodic int*/
 #define IWM_CSR_INT_PERIODIC_ENA		(0xFF) /* 255*32 usec ~ 8 msec*/
 
 /* interrupt flags in INTA, set by uCode or hardware (e.g. dma),
  * acknowledged (reset) by host writing "1" to flagged bits. */
 #define IWM_CSR_INT_BIT_FH_RX	(1 << 31) /* Rx DMA, cmd responses, FH_INT[17:16] */
 #define IWM_CSR_INT_BIT_HW_ERR	(1 << 29) /* DMA hardware error FH_INT[31] */
 #define IWM_CSR_INT_BIT_RX_PERIODIC	(1 << 28) /* Rx periodic */
 #define IWM_CSR_INT_BIT_FH_TX	(1 << 27) /* Tx DMA FH_INT[1:0] */
 #define IWM_CSR_INT_BIT_SCD	(1 << 26) /* TXQ pointer advanced */
 #define IWM_CSR_INT_BIT_SW_ERR	(1 << 25) /* uCode error */
 #define IWM_CSR_INT_BIT_RF_KILL	(1 << 7)  /* HW RFKILL switch GP_CNTRL[27] toggled */
 #define IWM_CSR_INT_BIT_CT_KILL	(1 << 6)  /* Critical temp (chip too hot) rfkill */
 #define IWM_CSR_INT_BIT_SW_RX	(1 << 3)  /* Rx, command responses */
 #define IWM_CSR_INT_BIT_WAKEUP	(1 << 1)  /* NIC controller waking up (pwr mgmt) */
 #define IWM_CSR_INT_BIT_ALIVE	(1 << 0)  /* uCode interrupts once it initializes */
 
 #define IWM_CSR_INI_SET_MASK	(IWM_CSR_INT_BIT_FH_RX   | \
 				 IWM_CSR_INT_BIT_HW_ERR  | \
 				 IWM_CSR_INT_BIT_FH_TX   | \
 				 IWM_CSR_INT_BIT_SW_ERR  | \
 				 IWM_CSR_INT_BIT_RF_KILL | \
 				 IWM_CSR_INT_BIT_SW_RX   | \
 				 IWM_CSR_INT_BIT_WAKEUP  | \
 				 IWM_CSR_INT_BIT_ALIVE   | \
 				 IWM_CSR_INT_BIT_RX_PERIODIC)
 
 /* interrupt flags in FH (flow handler) (PCI busmaster DMA) */
 #define IWM_CSR_FH_INT_BIT_ERR       (1 << 31) /* Error */
 #define IWM_CSR_FH_INT_BIT_HI_PRIOR  (1 << 30) /* High priority Rx, bypass coalescing */
 #define IWM_CSR_FH_INT_BIT_RX_CHNL1  (1 << 17) /* Rx channel 1 */
 #define IWM_CSR_FH_INT_BIT_RX_CHNL0  (1 << 16) /* Rx channel 0 */
 #define IWM_CSR_FH_INT_BIT_TX_CHNL1  (1 << 1)  /* Tx channel 1 */
 #define IWM_CSR_FH_INT_BIT_TX_CHNL0  (1 << 0)  /* Tx channel 0 */
 
 #define IWM_CSR_FH_INT_RX_MASK	(IWM_CSR_FH_INT_BIT_HI_PRIOR | \
 				IWM_CSR_FH_INT_BIT_RX_CHNL1 | \
 				IWM_CSR_FH_INT_BIT_RX_CHNL0)
 
 #define IWM_CSR_FH_INT_TX_MASK	(IWM_CSR_FH_INT_BIT_TX_CHNL1 | \
 				IWM_CSR_FH_INT_BIT_TX_CHNL0)
 
 /* GPIO */
 #define IWM_CSR_GPIO_IN_BIT_AUX_POWER                   (0x00000200)
 #define IWM_CSR_GPIO_IN_VAL_VAUX_PWR_SRC                (0x00000000)
 #define IWM_CSR_GPIO_IN_VAL_VMAIN_PWR_SRC               (0x00000200)
 
 /* RESET */
 #define IWM_CSR_RESET_REG_FLAG_NEVO_RESET                (0x00000001)
 #define IWM_CSR_RESET_REG_FLAG_FORCE_NMI                 (0x00000002)
 #define IWM_CSR_RESET_REG_FLAG_SW_RESET                  (0x00000080)
 #define IWM_CSR_RESET_REG_FLAG_MASTER_DISABLED           (0x00000100)
 #define IWM_CSR_RESET_REG_FLAG_STOP_MASTER               (0x00000200)
 #define IWM_CSR_RESET_LINK_PWR_MGMT_DISABLED             (0x80000000)
 
 /*
  * GP (general purpose) CONTROL REGISTER
  * Bit fields:
  *    27:  HW_RF_KILL_SW
  *         Indicates state of (platform's) hardware RF-Kill switch
  * 26-24:  POWER_SAVE_TYPE
  *         Indicates current power-saving mode:
  *         000 -- No power saving
  *         001 -- MAC power-down
  *         010 -- PHY (radio) power-down
  *         011 -- Error
  *   9-6:  SYS_CONFIG
  *         Indicates current system configuration, reflecting pins on chip
  *         as forced high/low by device circuit board.
  *     4:  GOING_TO_SLEEP
  *         Indicates MAC is entering a power-saving sleep power-down.
  *         Not a good time to access device-internal resources.
  *     3:  MAC_ACCESS_REQ
  *         Host sets this to request and maintain MAC wakeup, to allow host
  *         access to device-internal resources.  Host must wait for
  *         MAC_CLOCK_READY (and !GOING_TO_SLEEP) before accessing non-CSR
  *         device registers.
  *     2:  INIT_DONE
  *         Host sets this to put device into fully operational D0 power mode.
  *         Host resets this after SW_RESET to put device into low power mode.
  *     0:  MAC_CLOCK_READY
  *         Indicates MAC (ucode processor, etc.) is powered up and can run.
  *         Internal resources are accessible.
  *         NOTE:  This does not indicate that the processor is actually running.
  *         NOTE:  This does not indicate that device has completed
  *                init or post-power-down restore of internal SRAM memory.
  *                Use IWM_CSR_UCODE_DRV_GP1_BIT_MAC_SLEEP as indication that
  *                SRAM is restored and uCode is in normal operation mode.
  *                Later devices (5xxx/6xxx/1xxx) use non-volatile SRAM, and
  *                do not need to save/restore it.
  *         NOTE:  After device reset, this bit remains "0" until host sets
  *                INIT_DONE
  */
 #define IWM_CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY        (0x00000001)
 #define IWM_CSR_GP_CNTRL_REG_FLAG_INIT_DONE              (0x00000004)
 #define IWM_CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ         (0x00000008)
 #define IWM_CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP         (0x00000010)
 
 #define IWM_CSR_GP_CNTRL_REG_VAL_MAC_ACCESS_EN           (0x00000001)
 
 #define IWM_CSR_GP_CNTRL_REG_MSK_POWER_SAVE_TYPE         (0x07000000)
 #define IWM_CSR_GP_CNTRL_REG_FLAG_MAC_POWER_SAVE         (0x04000000)
 #define IWM_CSR_GP_CNTRL_REG_FLAG_HW_RF_KILL_SW          (0x08000000)
 
 
 /* HW REV */
 #define IWM_CSR_HW_REV_DASH(_val)          (((_val) & 0x0000003) >> 0)
 #define IWM_CSR_HW_REV_STEP(_val)          (((_val) & 0x000000C) >> 2)
 
 /**
  *  hw_rev values
  */
 enum {
 	IWM_SILICON_A_STEP = 0,
 	IWM_SILICON_B_STEP,
 	IWM_SILICON_C_STEP,
 };
 
 
 #define IWM_CSR_HW_REV_TYPE_MSK		(0x000FFF0)
 #define IWM_CSR_HW_REV_TYPE_5300	(0x0000020)
 #define IWM_CSR_HW_REV_TYPE_5350	(0x0000030)
 #define IWM_CSR_HW_REV_TYPE_5100	(0x0000050)
 #define IWM_CSR_HW_REV_TYPE_5150	(0x0000040)
 #define IWM_CSR_HW_REV_TYPE_1000	(0x0000060)
 #define IWM_CSR_HW_REV_TYPE_6x00	(0x0000070)
 #define IWM_CSR_HW_REV_TYPE_6x50	(0x0000080)
 #define IWM_CSR_HW_REV_TYPE_6150	(0x0000084)
 #define IWM_CSR_HW_REV_TYPE_6x05	(0x00000B0)
 #define IWM_CSR_HW_REV_TYPE_6x30	IWM_CSR_HW_REV_TYPE_6x05
 #define IWM_CSR_HW_REV_TYPE_6x35	IWM_CSR_HW_REV_TYPE_6x05
 #define IWM_CSR_HW_REV_TYPE_2x30	(0x00000C0)
 #define IWM_CSR_HW_REV_TYPE_2x00	(0x0000100)
 #define IWM_CSR_HW_REV_TYPE_105		(0x0000110)
 #define IWM_CSR_HW_REV_TYPE_135		(0x0000120)
 #define IWM_CSR_HW_REV_TYPE_7265D	(0x0000210)
 #define IWM_CSR_HW_REV_TYPE_NONE	(0x00001F0)
 
 /* EEPROM REG */
 #define IWM_CSR_EEPROM_REG_READ_VALID_MSK	(0x00000001)
 #define IWM_CSR_EEPROM_REG_BIT_CMD		(0x00000002)
 #define IWM_CSR_EEPROM_REG_MSK_ADDR		(0x0000FFFC)
 #define IWM_CSR_EEPROM_REG_MSK_DATA		(0xFFFF0000)
 
 /* EEPROM GP */
 #define IWM_CSR_EEPROM_GP_VALID_MSK		(0x00000007) /* signature */
 #define IWM_CSR_EEPROM_GP_IF_OWNER_MSK	(0x00000180)
 #define IWM_CSR_EEPROM_GP_BAD_SIGNATURE_BOTH_EEP_AND_OTP	(0x00000000)
 #define IWM_CSR_EEPROM_GP_BAD_SIG_EEP_GOOD_SIG_OTP		(0x00000001)
 #define IWM_CSR_EEPROM_GP_GOOD_SIG_EEP_LESS_THAN_4K		(0x00000002)
 #define IWM_CSR_EEPROM_GP_GOOD_SIG_EEP_MORE_THAN_4K		(0x00000004)
 
 /* One-time-programmable memory general purpose reg */
 #define IWM_CSR_OTP_GP_REG_DEVICE_SELECT  (0x00010000) /* 0 - EEPROM, 1 - OTP */
 #define IWM_CSR_OTP_GP_REG_OTP_ACCESS_MODE  (0x00020000) /* 0 - absolute, 1 - relative */
 #define IWM_CSR_OTP_GP_REG_ECC_CORR_STATUS_MSK    (0x00100000) /* bit 20 */
 #define IWM_CSR_OTP_GP_REG_ECC_UNCORR_STATUS_MSK  (0x00200000) /* bit 21 */
 
 /* GP REG */
 #define IWM_CSR_GP_REG_POWER_SAVE_STATUS_MSK    (0x03000000) /* bit 24/25 */
 #define IWM_CSR_GP_REG_NO_POWER_SAVE            (0x00000000)
 #define IWM_CSR_GP_REG_MAC_POWER_SAVE           (0x01000000)
 #define IWM_CSR_GP_REG_PHY_POWER_SAVE           (0x02000000)
 #define IWM_CSR_GP_REG_POWER_SAVE_ERROR         (0x03000000)
 
 
 /* CSR GIO */
 #define IWM_CSR_GIO_REG_VAL_L0S_ENABLED	(0x00000002)
 
 /*
  * UCODE-DRIVER GP (general purpose) mailbox register 1
  * Host driver and uCode write and/or read this register to communicate with
  * each other.
  * Bit fields:
  *     4:  UCODE_DISABLE
  *         Host sets this to request permanent halt of uCode, same as
  *         sending CARD_STATE command with "halt" bit set.
  *     3:  CT_KILL_EXIT
  *         Host sets this to request exit from CT_KILL state, i.e. host thinks
  *         device temperature is low enough to continue normal operation.
  *     2:  CMD_BLOCKED
  *         Host sets this during RF KILL power-down sequence (HW, SW, CT KILL)
  *         to release uCode to clear all Tx and command queues, enter
  *         unassociated mode, and power down.
  *         NOTE:  Some devices also use HBUS_TARG_MBX_C register for this bit.
  *     1:  SW_BIT_RFKILL
  *         Host sets this when issuing CARD_STATE command to request
  *         device sleep.
  *     0:  MAC_SLEEP
  *         uCode sets this when preparing a power-saving power-down.
  *         uCode resets this when power-up is complete and SRAM is sane.
  *         NOTE:  device saves internal SRAM data to host when powering down,
  *                and must restore this data after powering back up.
  *                MAC_SLEEP is the best indication that restore is complete.
  *                Later devices (5xxx/6xxx/1xxx) use non-volatile SRAM, and
  *                do not need to save/restore it.
  */
 #define IWM_CSR_UCODE_DRV_GP1_BIT_MAC_SLEEP             (0x00000001)
 #define IWM_CSR_UCODE_SW_BIT_RFKILL                     (0x00000002)
 #define IWM_CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED           (0x00000004)
 #define IWM_CSR_UCODE_DRV_GP1_REG_BIT_CT_KILL_EXIT      (0x00000008)
 #define IWM_CSR_UCODE_DRV_GP1_BIT_D3_CFG_COMPLETE       (0x00000020)
 
 /* GP Driver */
 #define IWM_CSR_GP_DRIVER_REG_BIT_RADIO_SKU_MSK		    (0x00000003)
 #define IWM_CSR_GP_DRIVER_REG_BIT_RADIO_SKU_3x3_HYB	    (0x00000000)
 #define IWM_CSR_GP_DRIVER_REG_BIT_RADIO_SKU_2x2_HYB	    (0x00000001)
 #define IWM_CSR_GP_DRIVER_REG_BIT_RADIO_SKU_2x2_IPA	    (0x00000002)
 #define IWM_CSR_GP_DRIVER_REG_BIT_CALIB_VERSION6	    (0x00000004)
 #define IWM_CSR_GP_DRIVER_REG_BIT_6050_1x2		    (0x00000008)
 
 #define IWM_CSR_GP_DRIVER_REG_BIT_RADIO_IQ_INVER	    (0x00000080)
 
 /* GIO Chicken Bits (PCI Express bus link power management) */
 #define IWM_CSR_GIO_CHICKEN_BITS_REG_BIT_L1A_NO_L0S_RX  (0x00800000)
 #define IWM_CSR_GIO_CHICKEN_BITS_REG_BIT_DIS_L0S_EXIT_TIMER  (0x20000000)
 
 /* LED */
 #define IWM_CSR_LED_BSM_CTRL_MSK (0xFFFFFFDF)
 #define IWM_CSR_LED_REG_TURN_ON (0x60)
 #define IWM_CSR_LED_REG_TURN_OFF (0x20)
 
 /* ANA_PLL */
 #define IWM_CSR50_ANA_PLL_CFG_VAL        (0x00880300)
 
 /* HPET MEM debug */
 #define IWM_CSR_DBG_HPET_MEM_REG_VAL	(0xFFFF0000)
 
 /* DRAM INT TABLE */
 #define IWM_CSR_DRAM_INT_TBL_ENABLE		(1 << 31)
 #define IWM_CSR_DRAM_INIT_TBL_WRITE_POINTER	(1 << 28)
 #define IWM_CSR_DRAM_INIT_TBL_WRAP_CHECK	(1 << 27)
 
 /* SECURE boot registers */
 #define IWM_CSR_SECURE_BOOT_CONFIG_ADDR	(0x100)
 enum iwm_secure_boot_config_reg {
 	IWM_CSR_SECURE_BOOT_CONFIG_INSPECTOR_BURNED_IN_OTP	= 0x00000001,
 	IWM_CSR_SECURE_BOOT_CONFIG_INSPECTOR_NOT_REQ	= 0x00000002,
 };
 
 #define IWM_CSR_SECURE_BOOT_CPU1_STATUS_ADDR	(0x100)
 #define IWM_CSR_SECURE_BOOT_CPU2_STATUS_ADDR	(0x100)
 enum iwm_secure_boot_status_reg {
 	IWM_CSR_SECURE_BOOT_CPU_STATUS_VERF_STATUS		= 0x00000003,
 	IWM_CSR_SECURE_BOOT_CPU_STATUS_VERF_COMPLETED	= 0x00000002,
 	IWM_CSR_SECURE_BOOT_CPU_STATUS_VERF_SUCCESS		= 0x00000004,
 	IWM_CSR_SECURE_BOOT_CPU_STATUS_VERF_FAIL		= 0x00000008,
 	IWM_CSR_SECURE_BOOT_CPU_STATUS_SIGN_VERF_FAIL	= 0x00000010,
 };
 
 #define IWM_FH_UCODE_LOAD_STATUS	0x1af0
 #define IWM_FH_MEM_TB_MAX_LENGTH	0x20000
 
 #define IWM_LMPM_SECURE_UCODE_LOAD_CPU1_HDR_ADDR	0x1e78
 #define IWM_LMPM_SECURE_UCODE_LOAD_CPU2_HDR_ADDR	0x1e7c
 
 #define IWM_LMPM_SECURE_CPU1_HDR_MEM_SPACE		0x420000
 #define IWM_LMPM_SECURE_CPU2_HDR_MEM_SPACE		0x420400
 
 #define IWM_CSR_SECURE_TIME_OUT	(100)
 
 /* extended range in FW SRAM */
 #define IWM_FW_MEM_EXTENDED_START       0x40000
 #define IWM_FW_MEM_EXTENDED_END         0x57FFF
 
 /* FW chicken bits */
 #define IWM_LMPM_CHICK				0xa01ff8
 #define IWM_LMPM_CHICK_EXTENDED_ADDR_SPACE	0x01
 
 #define IWM_FH_TCSR_0_REG0 (0x1D00)
 
 /*
  * HBUS (Host-side Bus)
  *
  * HBUS registers are mapped directly into PCI bus space, but are used
  * to indirectly access device's internal memory or registers that
  * may be powered-down.
  *
  * Use iwl_write_direct32()/iwl_read_direct32() family for these registers;
  * host must "grab nic access" via CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ
  * to make sure the MAC (uCode processor, etc.) is powered up for accessing
  * internal resources.
  *
  * Do not use iwl_write32()/iwl_read32() family to access these registers;
  * these provide only simple PCI bus access, without waking up the MAC.
  */
 #define IWM_HBUS_BASE	(0x400)
 
 /*
  * Registers for accessing device's internal SRAM memory (e.g. SCD SRAM
  * structures, error log, event log, verifying uCode load).
  * First write to address register, then read from or write to data register
  * to complete the job.  Once the address register is set up, accesses to
  * data registers auto-increment the address by one dword.
  * Bit usage for address registers (read or write):
  *  0-31:  memory address within device
  */
 #define IWM_HBUS_TARG_MEM_RADDR     (IWM_HBUS_BASE+0x00c)
 #define IWM_HBUS_TARG_MEM_WADDR     (IWM_HBUS_BASE+0x010)
 #define IWM_HBUS_TARG_MEM_WDAT      (IWM_HBUS_BASE+0x018)
 #define IWM_HBUS_TARG_MEM_RDAT      (IWM_HBUS_BASE+0x01c)
 
 /* Mailbox C, used as workaround alternative to CSR_UCODE_DRV_GP1 mailbox */
 #define IWM_HBUS_TARG_MBX_C         (IWM_HBUS_BASE+0x030)
 #define IWM_HBUS_TARG_MBX_C_REG_BIT_CMD_BLOCKED         (0x00000004)
 
 /*
  * Registers for accessing device's internal peripheral registers
  * (e.g. SCD, BSM, etc.).  First write to address register,
  * then read from or write to data register to complete the job.
  * Bit usage for address registers (read or write):
  *  0-15:  register address (offset) within device
  * 24-25:  (# bytes - 1) to read or write (e.g. 3 for dword)
  */
 #define IWM_HBUS_TARG_PRPH_WADDR    (IWM_HBUS_BASE+0x044)
 #define IWM_HBUS_TARG_PRPH_RADDR    (IWM_HBUS_BASE+0x048)
 #define IWM_HBUS_TARG_PRPH_WDAT     (IWM_HBUS_BASE+0x04c)
 #define IWM_HBUS_TARG_PRPH_RDAT     (IWM_HBUS_BASE+0x050)
 
 /* enable the ID buf for read */
 #define IWM_WFPM_PS_CTL_CLR			0xa0300c
 #define IWM_WFMP_MAC_ADDR_0			0xa03080
 #define IWM_WFMP_MAC_ADDR_1			0xa03084
 #define IWM_LMPM_PMG_EN				0xa01cec
 #define IWM_RADIO_REG_SYS_MANUAL_DFT_0		0xad4078
 #define IWM_RFIC_REG_RD				0xad0470
 #define IWM_WFPM_CTRL_REG			0xa03030
 #define IWM_WFPM_AUX_CTL_AUX_IF_MAC_OWNER_MSK	0x08000000
 #define IWM_ENABLE_WFPM				0x80000000
 
 #define IWM_AUX_MISC_REG			0xa200b0
 #define IWM_HW_STEP_LOCATION_BITS		24
 
 #define IWM_AUX_MISC_MASTER1_EN			0xa20818
 #define IWM_AUX_MISC_MASTER1_EN_SBE_MSK		0x1
 #define IWM_AUX_MISC_MASTER1_SMPHR_STATUS	0xa20800
 #define IWM_RSA_ENABLE				0xa24b08
 #define IWM_PREG_AUX_BUS_WPROT_0		0xa04cc0
 #define IWM_SB_CFG_OVERRIDE_ADDR		0xa26c78
 #define IWM_SB_CFG_OVERRIDE_ENABLE		0x8000
 #define IWM_SB_CFG_BASE_OVERRIDE		0xa20000
 #define IWM_SB_MODIFY_CFG_FLAG			0xa03088
 #define IWM_SB_CPU_1_STATUS			0xa01e30
 #define IWM_SB_CPU_2_STATUS			0Xa01e34
 
 /* Used to enable DBGM */
 #define IWM_HBUS_TARG_TEST_REG	(IWM_HBUS_BASE+0x05c)
 
 /*
  * Per-Tx-queue write pointer (index, really!)
  * Indicates index to next TFD that driver will fill (1 past latest filled).
  * Bit usage:
  *  0-7:  queue write index
  * 11-8:  queue selector
  */
 #define IWM_HBUS_TARG_WRPTR         (IWM_HBUS_BASE+0x060)
 
 /**********************************************************
  * CSR values
  **********************************************************/
  /*
  * host interrupt timeout value
  * used with setting interrupt coalescing timer
  * the CSR_INT_COALESCING is an 8 bit register in 32-usec unit
  *
  * default interrupt coalescing timer is 64 x 32 = 2048 usecs
  */
 #define IWM_HOST_INT_TIMEOUT_MAX	(0xFF)
 #define IWM_HOST_INT_TIMEOUT_DEF	(0x40)
 #define IWM_HOST_INT_TIMEOUT_MIN	(0x0)
 #define IWM_HOST_INT_OPER_MODE		(1 << 31)
 
 /*****************************************************************************
  *                        7000/3000 series SHR DTS addresses                 *
  *****************************************************************************/
 
 /* Diode Results Register Structure: */
 enum iwm_dtd_diode_reg {
 	IWM_DTS_DIODE_REG_DIG_VAL		= 0x000000FF, /* bits [7:0] */
 	IWM_DTS_DIODE_REG_VREF_LOW		= 0x0000FF00, /* bits [15:8] */
 	IWM_DTS_DIODE_REG_VREF_HIGH		= 0x00FF0000, /* bits [23:16] */
 	IWM_DTS_DIODE_REG_VREF_ID		= 0x03000000, /* bits [25:24] */
 	IWM_DTS_DIODE_REG_PASS_ONCE		= 0x80000000, /* bits [31:31] */
 	IWM_DTS_DIODE_REG_FLAGS_MSK		= 0xFF000000, /* bits [31:24] */
 /* Those are the masks INSIDE the flags bit-field: */
 	IWM_DTS_DIODE_REG_FLAGS_VREFS_ID_POS	= 0,
 	IWM_DTS_DIODE_REG_FLAGS_VREFS_ID	= 0x00000003, /* bits [1:0] */
 	IWM_DTS_DIODE_REG_FLAGS_PASS_ONCE_POS	= 7,
 	IWM_DTS_DIODE_REG_FLAGS_PASS_ONCE	= 0x00000080, /* bits [7:7] */
 };
 
 /*
  * END iwl-csr.h
  */
 
 /*
  * BEGIN iwl-fw.h
  */
 
 /**
  * enum iwm_ucode_tlv_flag - ucode API flags
  * @IWM_UCODE_TLV_FLAGS_PAN: This is PAN capable microcode; this previously
  *	was a separate TLV but moved here to save space.
  * @IWM_UCODE_TLV_FLAGS_NEWSCAN: new uCode scan behaviour on hidden SSID,
  *	treats good CRC threshold as a boolean
  * @IWM_UCODE_TLV_FLAGS_MFP: This uCode image supports MFP (802.11w).
  * @IWM_UCODE_TLV_FLAGS_UAPSD: This uCode image supports uAPSD
  * @IWM_UCODE_TLV_FLAGS_SHORT_BL: 16 entries of black list instead of 64 in scan
  *	offload profile config command.
  * @IWM_UCODE_TLV_FLAGS_D3_6_IPV6_ADDRS: D3 image supports up to six
  *	(rather than two) IPv6 addresses
  * @IWM_UCODE_TLV_FLAGS_NO_BASIC_SSID: not sending a probe with the SSID element
  *	from the probe request template.
  * @IWM_UCODE_TLV_FLAGS_NEW_NSOFFL_SMALL: new NS offload (small version)
  * @IWM_UCODE_TLV_FLAGS_NEW_NSOFFL_LARGE: new NS offload (large version)
  * @IWM_UCODE_TLV_FLAGS_UAPSD_SUPPORT: General support for uAPSD
  * @IWM_UCODE_TLV_FLAGS_EBS_SUPPORT: this uCode image supports EBS.
  * @IWM_UCODE_TLV_FLAGS_P2P_PS_UAPSD: P2P client supports uAPSD power save
  * @IWM_UCODE_TLV_FLAGS_BCAST_FILTERING: uCode supports broadcast filtering.
  */
 enum iwm_ucode_tlv_flag {
 	IWM_UCODE_TLV_FLAGS_PAN			= (1 << 0),
 	IWM_UCODE_TLV_FLAGS_NEWSCAN		= (1 << 1),
 	IWM_UCODE_TLV_FLAGS_MFP			= (1 << 2),
 	IWM_UCODE_TLV_FLAGS_SHORT_BL		= (1 << 7),
 	IWM_UCODE_TLV_FLAGS_D3_6_IPV6_ADDRS	= (1 << 10),
 	IWM_UCODE_TLV_FLAGS_NO_BASIC_SSID	= (1 << 12),
 	IWM_UCODE_TLV_FLAGS_NEW_NSOFFL_SMALL	= (1 << 15),
 	IWM_UCODE_TLV_FLAGS_NEW_NSOFFL_LARGE	= (1 << 16),
 	IWM_UCODE_TLV_FLAGS_UAPSD_SUPPORT	= (1 << 24),
 	IWM_UCODE_TLV_FLAGS_EBS_SUPPORT		= (1 << 25),
 	IWM_UCODE_TLV_FLAGS_P2P_PS_UAPSD	= (1 << 26),
 	IWM_UCODE_TLV_FLAGS_BCAST_FILTERING	= (1 << 29),
 };
 
 #define IWM_UCODE_TLV_FLAG_BITS \
 	"\020\1PAN\2NEWSCAN\3MFP\4P2P\5DW_BC_TABLE\6NEWBT_COEX\7PM_CMD\10SHORT_BL\11RX_ENERG \
 Y\12TIME_EVENT_V2\13D3_6_IPV6\14BF_UPDATED\15NO_BASIC_SSID\17D3_CONTINUITY\20NEW_NSOFF \
 L_S\21NEW_NSOFFL_L\22SCHED_SCAN\24STA_KEY_CMD\25DEVICE_PS_CMD\26P2P_PS\27P2P_PS_DCM\30 \
 P2P_PS_SCM\31UAPSD_SUPPORT\32EBS\33P2P_PS_UAPSD\36BCAST_FILTERING\37GO_UAPSD\40LTE_COEX"
 
 /**
  * enum iwm_ucode_tlv_api - ucode api
  * @IWM_UCODE_TLV_API_FRAGMENTED_SCAN: This ucode supports active dwell time
  *	longer than the passive one, which is essential for fragmented scan.
  * @IWM_UCODE_TLV_API_WIFI_MCC_UPDATE: ucode supports MCC updates with source.
  * @IWM_UCODE_TLV_API_LQ_SS_PARAMS: Configure STBC/BFER via LQ CMD ss_params
  *
  * @IWM_NUM_UCODE_TLV_API: number of bits used
  */
 enum iwm_ucode_tlv_api {
 	IWM_UCODE_TLV_API_FRAGMENTED_SCAN	= 8,
 	IWM_UCODE_TLV_API_WIFI_MCC_UPDATE	= 9,
 	IWM_UCODE_TLV_API_LQ_SS_PARAMS		= 18,
 
 	IWM_NUM_UCODE_TLV_API = 32
 };
 
 #define IWM_UCODE_TLV_API_BITS \
 	"\020\10FRAGMENTED_SCAN\11WIFI_MCC_UPDATE\16WIDE_CMD_HDR\22LQ_SS_PARAMS\30EXT_SCAN_PRIO\33TX_POWER_CHAIN"
 
 /**
  * enum iwm_ucode_tlv_capa - ucode capabilities
  * @IWM_UCODE_TLV_CAPA_D0I3_SUPPORT: supports D0i3
  * @IWM_UCODE_TLV_CAPA_LAR_SUPPORT: supports Location Aware Regulatory
  * @IWM_UCODE_TLV_CAPA_UMAC_SCAN: supports UMAC scan.
  * @IWM_UCODE_TLV_CAPA_BEAMFORMER: supports Beamformer
  * @IWM_UCODE_TLV_CAPA_TOF_SUPPORT: supports Time of Flight (802.11mc FTM)
  * @IWM_UCODE_TLV_CAPA_TDLS_SUPPORT: support basic TDLS functionality
  * @IWM_UCODE_TLV_CAPA_TXPOWER_INSERTION_SUPPORT: supports insertion of current
  *	tx power value into TPC Report action frame and Link Measurement Report
  *	action frame
  * @IWM_UCODE_TLV_CAPA_DS_PARAM_SET_IE_SUPPORT: supports updating current
  *	channel in DS parameter set element in probe requests.
  * @IWM_UCODE_TLV_CAPA_WFA_TPC_REP_IE_SUPPORT: supports adding TPC Report IE in
  *	probe requests.
  * @IWM_UCODE_TLV_CAPA_QUIET_PERIOD_SUPPORT: supports Quiet Period requests
  * @IWM_UCODE_TLV_CAPA_DQA_SUPPORT: supports dynamic queue allocation (DQA),
  *	which also implies support for the scheduler configuration command
  * @IWM_UCODE_TLV_CAPA_TDLS_CHANNEL_SWITCH: supports TDLS channel switching
  * @IWM_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG: Consolidated D3-D0 image
  * @IWM_UCODE_TLV_CAPA_HOTSPOT_SUPPORT: supports Hot Spot Command
  * @IWM_UCODE_TLV_CAPA_DC2DC_SUPPORT: supports DC2DC Command
  * @IWM_UCODE_TLV_CAPA_2G_COEX_SUPPORT: supports 2G coex Command
  * @IWM_UCODE_TLV_CAPA_CSUM_SUPPORT: supports TCP Checksum Offload
  * @IWM_UCODE_TLV_CAPA_RADIO_BEACON_STATS: support radio and beacon statistics
  * @IWM_UCODE_TLV_CAPA_P2P_STANDALONE_UAPSD: support p2p standalone U-APSD
  * @IWM_UCODE_TLV_CAPA_BT_COEX_PLCR: enabled BT Coex packet level co-running
  * @IWM_UCODE_TLV_CAPA_LAR_MULTI_MCC: ucode supports LAR updates with different
  *	sources for the MCC. This TLV bit is a future replacement to
  *	IWM_UCODE_TLV_API_WIFI_MCC_UPDATE. When either is set, multi-source LAR
  *	is supported.
  * @IWM_UCODE_TLV_CAPA_BT_COEX_RRC: supports BT Coex RRC
  * @IWM_UCODE_TLV_CAPA_GSCAN_SUPPORT: supports gscan
  * @IWM_UCODE_TLV_CAPA_NAN_SUPPORT: supports NAN
  * @IWM_UCODE_TLV_CAPA_UMAC_UPLOAD: supports upload mode in umac (1=supported,
  *	0=no support)
  * @IWM_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE: extended DTS measurement
  * @IWM_UCODE_TLV_CAPA_SHORT_PM_TIMEOUTS: supports short PM timeouts
  * @IWM_UCODE_TLV_CAPA_BT_MPLUT_SUPPORT: supports bt-coex Multi-priority LUT
  * @IWM_UCODE_TLV_CAPA_BEACON_ANT_SELECTION: firmware will decide on what
  *	antenna the beacon should be transmitted
  * @IWM_UCODE_TLV_CAPA_BEACON_STORING: firmware will store the latest beacon
  *	from AP and will send it upon d0i3 exit.
  * @IWM_UCODE_TLV_CAPA_LAR_SUPPORT_V2: support LAR API V2
  * @IWM_UCODE_TLV_CAPA_CT_KILL_BY_FW: firmware responsible for CT-kill
  * @IWM_UCODE_TLV_CAPA_TEMP_THS_REPORT_SUPPORT: supports temperature
  *	thresholds reporting
  * @IWM_UCODE_TLV_CAPA_CTDP_SUPPORT: supports cTDP command
  * @IWM_UCODE_TLV_CAPA_USNIFFER_UNIFIED: supports usniffer enabled in
  *	regular image.
  * @IWM_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG: support getting more shared
  *	memory addresses from the firmware.
  * @IWM_UCODE_TLV_CAPA_LQM_SUPPORT: supports Link Quality Measurement
  * @IWM_UCODE_TLV_CAPA_LMAC_UPLOAD: supports upload mode in lmac (1=supported,
  *	0=no support)
  *
  * @IWM_NUM_UCODE_TLV_CAPA: number of bits used
  */
 enum iwm_ucode_tlv_capa {
 	IWM_UCODE_TLV_CAPA_D0I3_SUPPORT			= 0,
 	IWM_UCODE_TLV_CAPA_LAR_SUPPORT			= 1,
 	IWM_UCODE_TLV_CAPA_UMAC_SCAN			= 2,
 	IWM_UCODE_TLV_CAPA_BEAMFORMER			= 3,
 	IWM_UCODE_TLV_CAPA_TOF_SUPPORT                  = 5,
 	IWM_UCODE_TLV_CAPA_TDLS_SUPPORT			= 6,
 	IWM_UCODE_TLV_CAPA_TXPOWER_INSERTION_SUPPORT	= 8,
 	IWM_UCODE_TLV_CAPA_DS_PARAM_SET_IE_SUPPORT	= 9,
 	IWM_UCODE_TLV_CAPA_WFA_TPC_REP_IE_SUPPORT	= 10,
 	IWM_UCODE_TLV_CAPA_QUIET_PERIOD_SUPPORT		= 11,
 	IWM_UCODE_TLV_CAPA_DQA_SUPPORT			= 12,
 	IWM_UCODE_TLV_CAPA_TDLS_CHANNEL_SWITCH		= 13,
 	IWM_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG		= 17,
 	IWM_UCODE_TLV_CAPA_HOTSPOT_SUPPORT		= 18,
 	IWM_UCODE_TLV_CAPA_DC2DC_CONFIG_SUPPORT		= 19,
 	IWM_UCODE_TLV_CAPA_2G_COEX_SUPPORT		= 20,
 	IWM_UCODE_TLV_CAPA_CSUM_SUPPORT			= 21,
 	IWM_UCODE_TLV_CAPA_RADIO_BEACON_STATS		= 22,
 	IWM_UCODE_TLV_CAPA_P2P_STANDALONE_UAPSD		= 26,
 	IWM_UCODE_TLV_CAPA_BT_COEX_PLCR			= 28,
 	IWM_UCODE_TLV_CAPA_LAR_MULTI_MCC		= 29,
 	IWM_UCODE_TLV_CAPA_BT_COEX_RRC			= 30,
 	IWM_UCODE_TLV_CAPA_GSCAN_SUPPORT		= 31,
 	IWM_UCODE_TLV_CAPA_NAN_SUPPORT			= 34,
 	IWM_UCODE_TLV_CAPA_UMAC_UPLOAD			= 35,
 	IWM_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE		= 64,
 	IWM_UCODE_TLV_CAPA_SHORT_PM_TIMEOUTS		= 65,
 	IWM_UCODE_TLV_CAPA_BT_MPLUT_SUPPORT		= 67,
 	IWM_UCODE_TLV_CAPA_MULTI_QUEUE_RX_SUPPORT	= 68,
 	IWM_UCODE_TLV_CAPA_BEACON_ANT_SELECTION		= 71,
 	IWM_UCODE_TLV_CAPA_BEACON_STORING		= 72,
 	IWM_UCODE_TLV_CAPA_LAR_SUPPORT_V2		= 73,
 	IWM_UCODE_TLV_CAPA_CT_KILL_BY_FW		= 74,
 	IWM_UCODE_TLV_CAPA_TEMP_THS_REPORT_SUPPORT	= 75,
 	IWM_UCODE_TLV_CAPA_CTDP_SUPPORT			= 76,
 	IWM_UCODE_TLV_CAPA_USNIFFER_UNIFIED		= 77,
 	IWM_UCODE_TLV_CAPA_LMAC_UPLOAD			= 79,
 	IWM_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG	= 80,
 	IWM_UCODE_TLV_CAPA_LQM_SUPPORT			= 81,
 
 	IWM_NUM_UCODE_TLV_CAPA = 128
 };
 
 /* The default calibrate table size if not specified by firmware file */
 #define IWM_DEFAULT_STANDARD_PHY_CALIBRATE_TBL_SIZE	18
 #define IWM_MAX_STANDARD_PHY_CALIBRATE_TBL_SIZE		19
 #define IWM_MAX_PHY_CALIBRATE_TBL_SIZE			253
 
 /* The default max probe length if not specified by the firmware file */
 #define IWM_DEFAULT_MAX_PROBE_LENGTH	200
 
 /*
  * enumeration of ucode section.
  * This enumeration is used directly for older firmware (before 16.0).
  * For new firmware, there can be up to 4 sections (see below) but the
  * first one packaged into the firmware file is the DATA section and
  * some debugging code accesses that.
  */
 enum iwm_ucode_sec {
 	IWM_UCODE_SECTION_DATA,
 	IWM_UCODE_SECTION_INST,
 };
 /*
  * For 16.0 uCode and above, there is no differentiation between sections,
  * just an offset to the HW address.
  */
 #define IWM_CPU1_CPU2_SEPARATOR_SECTION		0xFFFFCCCC
 #define IWM_PAGING_SEPARATOR_SECTION		0xAAAABBBB
 
 /* uCode version contains 4 values: Major/Minor/API/Serial */
 #define IWM_UCODE_MAJOR(ver)	(((ver) & 0xFF000000) >> 24)
 #define IWM_UCODE_MINOR(ver)	(((ver) & 0x00FF0000) >> 16)
 #define IWM_UCODE_API(ver)	(((ver) & 0x0000FF00) >> 8)
 #define IWM_UCODE_SERIAL(ver)	((ver) & 0x000000FF)
 
 /*
  * Calibration control struct.
  * Sent as part of the phy configuration command.
  * @flow_trigger: bitmap for which calibrations to perform according to
  *		flow triggers.
  * @event_trigger: bitmap for which calibrations to perform according to
  *		event triggers.
  */
 struct iwm_tlv_calib_ctrl {
 	uint32_t flow_trigger;
 	uint32_t event_trigger;
 } __packed;
 
 enum iwm_fw_phy_cfg {
 	IWM_FW_PHY_CFG_RADIO_TYPE_POS = 0,
 	IWM_FW_PHY_CFG_RADIO_TYPE = 0x3 << IWM_FW_PHY_CFG_RADIO_TYPE_POS,
 	IWM_FW_PHY_CFG_RADIO_STEP_POS = 2,
 	IWM_FW_PHY_CFG_RADIO_STEP = 0x3 << IWM_FW_PHY_CFG_RADIO_STEP_POS,
 	IWM_FW_PHY_CFG_RADIO_DASH_POS = 4,
 	IWM_FW_PHY_CFG_RADIO_DASH = 0x3 << IWM_FW_PHY_CFG_RADIO_DASH_POS,
 	IWM_FW_PHY_CFG_TX_CHAIN_POS = 16,
 	IWM_FW_PHY_CFG_TX_CHAIN = 0xf << IWM_FW_PHY_CFG_TX_CHAIN_POS,
 	IWM_FW_PHY_CFG_RX_CHAIN_POS = 20,
 	IWM_FW_PHY_CFG_RX_CHAIN = 0xf << IWM_FW_PHY_CFG_RX_CHAIN_POS,
 };
 
 #define IWM_UCODE_MAX_CS		1
 
 /**
  * struct iwm_fw_cipher_scheme - a cipher scheme supported by FW.
  * @cipher: a cipher suite selector
  * @flags: cipher scheme flags (currently reserved for a future use)
  * @hdr_len: a size of MPDU security header
  * @pn_len: a size of PN
  * @pn_off: an offset of pn from the beginning of the security header
  * @key_idx_off: an offset of key index byte in the security header
  * @key_idx_mask: a bit mask of key_idx bits
  * @key_idx_shift: bit shift needed to get key_idx
  * @mic_len: mic length in bytes
  * @hw_cipher: a HW cipher index used in host commands
  */
 struct iwm_fw_cipher_scheme {
 	uint32_t cipher;
 	uint8_t flags;
 	uint8_t hdr_len;
 	uint8_t pn_len;
 	uint8_t pn_off;
 	uint8_t key_idx_off;
 	uint8_t key_idx_mask;
 	uint8_t key_idx_shift;
 	uint8_t mic_len;
 	uint8_t hw_cipher;
 } __packed;
 
 /**
  * struct iwm_fw_cscheme_list - a cipher scheme list
  * @size: a number of entries
  * @cs: cipher scheme entries
  */
 struct iwm_fw_cscheme_list {
 	uint8_t size;
 	struct iwm_fw_cipher_scheme cs[];
 } __packed;
 
 /*
  * END iwl-fw.h
  */
 
 /*
  * BEGIN iwl-fw-file.h
  */
 
 /* v1/v2 uCode file layout */
 struct iwm_ucode_header {
 	uint32_t ver;	/* major/minor/API/serial */
 	union {
 		struct {
 			uint32_t inst_size;	/* bytes of runtime code */
 			uint32_t data_size;	/* bytes of runtime data */
 			uint32_t init_size;	/* bytes of init code */
 			uint32_t init_data_size;	/* bytes of init data */
 			uint32_t boot_size;	/* bytes of bootstrap code */
 			uint8_t data[0];		/* in same order as sizes */
 		} v1;
 		struct {
 			uint32_t build;		/* build number */
 			uint32_t inst_size;	/* bytes of runtime code */
 			uint32_t data_size;	/* bytes of runtime data */
 			uint32_t init_size;	/* bytes of init code */
 			uint32_t init_data_size;	/* bytes of init data */
 			uint32_t boot_size;	/* bytes of bootstrap code */
 			uint8_t data[0];		/* in same order as sizes */
 		} v2;
 	} u;
 };
 
 /*
  * new TLV uCode file layout
  *
  * The new TLV file format contains TLVs, that each specify
  * some piece of data.
  */
 
 enum iwm_ucode_tlv_type {
 	IWM_UCODE_TLV_INVALID		= 0, /* unused */
 	IWM_UCODE_TLV_INST		= 1,
 	IWM_UCODE_TLV_DATA		= 2,
 	IWM_UCODE_TLV_INIT		= 3,
 	IWM_UCODE_TLV_INIT_DATA		= 4,
 	IWM_UCODE_TLV_BOOT		= 5,
 	IWM_UCODE_TLV_PROBE_MAX_LEN	= 6, /* a uint32_t value */
 	IWM_UCODE_TLV_PAN		= 7,
 	IWM_UCODE_TLV_RUNT_EVTLOG_PTR	= 8,
 	IWM_UCODE_TLV_RUNT_EVTLOG_SIZE	= 9,
 	IWM_UCODE_TLV_RUNT_ERRLOG_PTR	= 10,
 	IWM_UCODE_TLV_INIT_EVTLOG_PTR	= 11,
 	IWM_UCODE_TLV_INIT_EVTLOG_SIZE	= 12,
 	IWM_UCODE_TLV_INIT_ERRLOG_PTR	= 13,
 	IWM_UCODE_TLV_ENHANCE_SENS_TBL	= 14,
 	IWM_UCODE_TLV_PHY_CALIBRATION_SIZE = 15,
 	IWM_UCODE_TLV_WOWLAN_INST	= 16,
 	IWM_UCODE_TLV_WOWLAN_DATA	= 17,
 	IWM_UCODE_TLV_FLAGS		= 18,
 	IWM_UCODE_TLV_SEC_RT		= 19,
 	IWM_UCODE_TLV_SEC_INIT		= 20,
 	IWM_UCODE_TLV_SEC_WOWLAN	= 21,
 	IWM_UCODE_TLV_DEF_CALIB		= 22,
 	IWM_UCODE_TLV_PHY_SKU		= 23,
 	IWM_UCODE_TLV_SECURE_SEC_RT	= 24,
 	IWM_UCODE_TLV_SECURE_SEC_INIT	= 25,
 	IWM_UCODE_TLV_SECURE_SEC_WOWLAN	= 26,
 	IWM_UCODE_TLV_NUM_OF_CPU	= 27,
 	IWM_UCODE_TLV_CSCHEME		= 28,
 
 	/*
 	 * Following two are not in our base tag, but allow
 	 * handling ucode version 9.
 	 */
 	IWM_UCODE_TLV_API_CHANGES_SET	= 29,
 	IWM_UCODE_TLV_ENABLED_CAPABILITIES = 30,
 
 	IWM_UCODE_TLV_N_SCAN_CHANNELS	= 31,
 	IWM_UCODE_TLV_PAGING		= 32,
 	IWM_UCODE_TLV_SEC_RT_USNIFFER	= 34,
 	IWM_UCODE_TLV_SDIO_ADMA_ADDR	= 35,
 	IWM_UCODE_TLV_FW_VERSION	= 36,
 	IWM_UCODE_TLV_FW_DBG_DEST	= 38,
 	IWM_UCODE_TLV_FW_DBG_CONF	= 39,
 	IWM_UCODE_TLV_FW_DBG_TRIGGER	= 40,
 	IWM_UCODE_TLV_FW_GSCAN_CAPA	= 50,
 	IWM_UCODE_TLV_FW_MEM_SEG	= 51,
 };
 
 struct iwm_ucode_tlv {
 	uint32_t type;		/* see above */
 	uint32_t length;		/* not including type/length fields */
 	uint8_t data[0];
 };
 
 struct iwm_ucode_api {
 	uint32_t api_index;
 	uint32_t api_flags;
 } __packed;
 
 struct iwm_ucode_capa {
 	uint32_t api_index;
 	uint32_t api_capa;
 } __packed;
 
 #define IWM_TLV_UCODE_MAGIC	0x0a4c5749
 
 struct iwm_tlv_ucode_header {
 	/*
 	 * The TLV style ucode header is distinguished from
 	 * the v1/v2 style header by first four bytes being
 	 * zero, as such is an invalid combination of
 	 * major/minor/API/serial versions.
 	 */
 	uint32_t zero;
 	uint32_t magic;
 	uint8_t human_readable[64];
 	uint32_t ver;		/* major/minor/API/serial */
 	uint32_t build;
 	uint64_t ignore;
 	/*
 	 * The data contained herein has a TLV layout,
 	 * see above for the TLV header and types.
 	 * Note that each TLV is padded to a length
 	 * that is a multiple of 4 for alignment.
 	 */
 	uint8_t data[0];
 };
 
 /*
  * END iwl-fw-file.h
  */
 
 /*
  * BEGIN iwl-prph.h
  */
 
 /*
  * Registers in this file are internal, not PCI bus memory mapped.
  * Driver accesses these via IWM_HBUS_TARG_PRPH_* registers.
  */
 #define IWM_PRPH_BASE	(0x00000)
 #define IWM_PRPH_END	(0xFFFFF)
 
 /* APMG (power management) constants */
 #define IWM_APMG_BASE			(IWM_PRPH_BASE + 0x3000)
 #define IWM_APMG_CLK_CTRL_REG		(IWM_APMG_BASE + 0x0000)
 #define IWM_APMG_CLK_EN_REG		(IWM_APMG_BASE + 0x0004)
 #define IWM_APMG_CLK_DIS_REG		(IWM_APMG_BASE + 0x0008)
 #define IWM_APMG_PS_CTRL_REG		(IWM_APMG_BASE + 0x000c)
 #define IWM_APMG_PCIDEV_STT_REG		(IWM_APMG_BASE + 0x0010)
 #define IWM_APMG_RFKILL_REG		(IWM_APMG_BASE + 0x0014)
 #define IWM_APMG_RTC_INT_STT_REG	(IWM_APMG_BASE + 0x001c)
 #define IWM_APMG_RTC_INT_MSK_REG	(IWM_APMG_BASE + 0x0020)
 #define IWM_APMG_DIGITAL_SVR_REG	(IWM_APMG_BASE + 0x0058)
 #define IWM_APMG_ANALOG_SVR_REG		(IWM_APMG_BASE + 0x006C)
 
 #define IWM_APMS_CLK_VAL_MRB_FUNC_MODE	(0x00000001)
 #define IWM_APMG_CLK_VAL_DMA_CLK_RQT	(0x00000200)
 #define IWM_APMG_CLK_VAL_BSM_CLK_RQT	(0x00000800)
 
 #define IWM_APMG_PS_CTRL_EARLY_PWR_OFF_RESET_DIS	(0x00400000)
 #define IWM_APMG_PS_CTRL_VAL_RESET_REQ			(0x04000000)
 #define IWM_APMG_PS_CTRL_MSK_PWR_SRC			(0x03000000)
 #define IWM_APMG_PS_CTRL_VAL_PWR_SRC_VMAIN		(0x00000000)
 #define IWM_APMG_PS_CTRL_VAL_PWR_SRC_VAUX		(0x02000000)
 #define IWM_APMG_SVR_VOLTAGE_CONFIG_BIT_MSK		(0x000001E0) /* bit 8:5 */
 #define IWM_APMG_SVR_DIGITAL_VOLTAGE_1_32		(0x00000060)
 
 #define IWM_APMG_PCIDEV_STT_VAL_L1_ACT_DIS		(0x00000800)
 
 #define IWM_APMG_RTC_INT_STT_RFKILL			(0x10000000)
 
 /* Device system time */
 #define IWM_DEVICE_SYSTEM_TIME_REG 0xA0206C
 
 /* Device NMI register */
 #define IWM_DEVICE_SET_NMI_REG		0x00a01c30
 #define IWM_DEVICE_SET_NMI_VAL_HW	0x01
 #define IWM_DEVICE_SET_NMI_VAL_DRV	0x80
 #define IWM_DEVICE_SET_NMI_8000_REG	0x00a01c24
 #define IWM_DEVICE_SET_NMI_8000_VAL	0x1000000
 
 /*
  * Device reset for family 8000
  * write to bit 24 in order to reset the CPU
  */
 #define IWM_RELEASE_CPU_RESET		0x300c
 #define IWM_RELEASE_CPU_RESET_BIT	0x1000000
 
 
 /*****************************************************************************
  *                        7000/3000 series SHR DTS addresses                 *
  *****************************************************************************/
 
 #define IWM_SHR_MISC_WFM_DTS_EN		(0x00a10024)
 #define IWM_DTSC_CFG_MODE		(0x00a10604)
 #define IWM_DTSC_VREF_AVG		(0x00a10648)
 #define IWM_DTSC_VREF5_AVG		(0x00a1064c)
 #define IWM_DTSC_CFG_MODE_PERIODIC	(0x2)
 #define IWM_DTSC_PTAT_AVG		(0x00a10650)
 
 
 /**
  * Tx Scheduler
  *
  * The Tx Scheduler selects the next frame to be transmitted, choosing TFDs
  * (Transmit Frame Descriptors) from up to 16 circular Tx queues resident in
  * host DRAM.  It steers each frame's Tx command (which contains the frame
  * data) into one of up to 7 prioritized Tx DMA FIFO channels within the
  * device.  A queue maps to only one (selectable by driver) Tx DMA channel,
  * but one DMA channel may take input from several queues.
  *
  * Tx DMA FIFOs have dedicated purposes.
  *
  * For 5000 series and up, they are used differently
  * (cf. iwl5000_default_queue_to_tx_fifo in iwl-5000.c):
  *
  * 0 -- EDCA BK (background) frames, lowest priority
  * 1 -- EDCA BE (best effort) frames, normal priority
  * 2 -- EDCA VI (video) frames, higher priority
  * 3 -- EDCA VO (voice) and management frames, highest priority
  * 4 -- unused
  * 5 -- unused
  * 6 -- unused
  * 7 -- Commands
  *
  * Driver should normally map queues 0-6 to Tx DMA/FIFO channels 0-6.
  * In addition, driver can map the remaining queues to Tx DMA/FIFO
  * channels 0-3 to support 11n aggregation via EDCA DMA channels.
  *
  * The driver sets up each queue to work in one of two modes:
  *
  * 1)  Scheduler-Ack, in which the scheduler automatically supports a
  *     block-ack (BA) window of up to 64 TFDs.  In this mode, each queue
  *     contains TFDs for a unique combination of Recipient Address (RA)
  *     and Traffic Identifier (TID), that is, traffic of a given
  *     Quality-Of-Service (QOS) priority, destined for a single station.
  *
  *     In scheduler-ack mode, the scheduler keeps track of the Tx status of
  *     each frame within the BA window, including whether it's been transmitted,
  *     and whether it's been acknowledged by the receiving station.  The device
  *     automatically processes block-acks received from the receiving STA,
  *     and reschedules un-acked frames to be retransmitted (successful
  *     Tx completion may end up being out-of-order).
  *
  *     The driver must maintain the queue's Byte Count table in host DRAM
  *     for this mode.
  *     This mode does not support fragmentation.
  *
  * 2)  FIFO (a.k.a. non-Scheduler-ACK), in which each TFD is processed in order.
  *     The device may automatically retry Tx, but will retry only one frame
  *     at a time, until receiving ACK from receiving station, or reaching
  *     retry limit and giving up.
  *
  *     The command queue (#4/#9) must use this mode!
  *     This mode does not require use of the Byte Count table in host DRAM.
  *
  * Driver controls scheduler operation via 3 means:
  * 1)  Scheduler registers
  * 2)  Shared scheduler data base in internal SRAM
  * 3)  Shared data in host DRAM
  *
  * Initialization:
  *
  * When loading, driver should allocate memory for:
  * 1)  16 TFD circular buffers, each with space for (typically) 256 TFDs.
  * 2)  16 Byte Count circular buffers in 16 KBytes contiguous memory
  *     (1024 bytes for each queue).
  *
  * After receiving "Alive" response from uCode, driver must initialize
  * the scheduler (especially for queue #4/#9, the command queue, otherwise
  * the driver can't issue commands!):
  */
 #define IWM_SCD_MEM_LOWER_BOUND		(0x0000)
 
 /**
  * Max Tx window size is the max number of contiguous TFDs that the scheduler
  * can keep track of at one time when creating block-ack chains of frames.
  * Note that "64" matches the number of ack bits in a block-ack packet.
  */
 #define IWM_SCD_WIN_SIZE				64
 #define IWM_SCD_FRAME_LIMIT				64
 
 #define IWM_SCD_TXFIFO_POS_TID			(0)
 #define IWM_SCD_TXFIFO_POS_RA			(4)
 #define IWM_SCD_QUEUE_RA_TID_MAP_RATID_MSK	(0x01FF)
 
 /* agn SCD */
 #define IWM_SCD_QUEUE_STTS_REG_POS_TXF		(0)
 #define IWM_SCD_QUEUE_STTS_REG_POS_ACTIVE	(3)
 #define IWM_SCD_QUEUE_STTS_REG_POS_WSL		(4)
 #define IWM_SCD_QUEUE_STTS_REG_POS_SCD_ACT_EN	(19)
 #define IWM_SCD_QUEUE_STTS_REG_MSK		(0x017F0000)
 
 #define IWM_SCD_QUEUE_CTX_REG1_CREDIT_POS	(8)
 #define IWM_SCD_QUEUE_CTX_REG1_CREDIT_MSK	(0x00FFFF00)
 #define IWM_SCD_QUEUE_CTX_REG1_SUPER_CREDIT_POS	(24)
 #define IWM_SCD_QUEUE_CTX_REG1_SUPER_CREDIT_MSK	(0xFF000000)
 #define IWM_SCD_QUEUE_CTX_REG2_WIN_SIZE_POS	(0)
 #define IWM_SCD_QUEUE_CTX_REG2_WIN_SIZE_MSK	(0x0000007F)
 #define IWM_SCD_QUEUE_CTX_REG2_FRAME_LIMIT_POS	(16)
 #define IWM_SCD_QUEUE_CTX_REG2_FRAME_LIMIT_MSK	(0x007F0000)
 #define IWM_SCD_GP_CTRL_ENABLE_31_QUEUES	(1 << 0)
 #define IWM_SCD_GP_CTRL_AUTO_ACTIVE_MODE	(1 << 18)
 
 /* Context Data */
 #define IWM_SCD_CONTEXT_MEM_LOWER_BOUND	(IWM_SCD_MEM_LOWER_BOUND + 0x600)
 #define IWM_SCD_CONTEXT_MEM_UPPER_BOUND	(IWM_SCD_MEM_LOWER_BOUND + 0x6A0)
 
 /* Tx status */
 #define IWM_SCD_TX_STTS_MEM_LOWER_BOUND	(IWM_SCD_MEM_LOWER_BOUND + 0x6A0)
 #define IWM_SCD_TX_STTS_MEM_UPPER_BOUND	(IWM_SCD_MEM_LOWER_BOUND + 0x7E0)
 
 /* Translation Data */
 #define IWM_SCD_TRANS_TBL_MEM_LOWER_BOUND (IWM_SCD_MEM_LOWER_BOUND + 0x7E0)
 #define IWM_SCD_TRANS_TBL_MEM_UPPER_BOUND (IWM_SCD_MEM_LOWER_BOUND + 0x808)
 
 #define IWM_SCD_CONTEXT_QUEUE_OFFSET(x)\
 	(IWM_SCD_CONTEXT_MEM_LOWER_BOUND + ((x) * 8))
 
 #define IWM_SCD_TX_STTS_QUEUE_OFFSET(x)\
 	(IWM_SCD_TX_STTS_MEM_LOWER_BOUND + ((x) * 16))
 
 #define IWM_SCD_TRANS_TBL_OFFSET_QUEUE(x) \
 	((IWM_SCD_TRANS_TBL_MEM_LOWER_BOUND + ((x) * 2)) & 0xfffc)
 
 #define IWM_SCD_BASE			(IWM_PRPH_BASE + 0xa02c00)
 
 #define IWM_SCD_SRAM_BASE_ADDR	(IWM_SCD_BASE + 0x0)
 #define IWM_SCD_DRAM_BASE_ADDR	(IWM_SCD_BASE + 0x8)
 #define IWM_SCD_AIT		(IWM_SCD_BASE + 0x0c)
 #define IWM_SCD_TXFACT		(IWM_SCD_BASE + 0x10)
 #define IWM_SCD_ACTIVE		(IWM_SCD_BASE + 0x14)
 #define IWM_SCD_QUEUECHAIN_SEL	(IWM_SCD_BASE + 0xe8)
 #define IWM_SCD_CHAINEXT_EN	(IWM_SCD_BASE + 0x244)
 #define IWM_SCD_AGGR_SEL	(IWM_SCD_BASE + 0x248)
 #define IWM_SCD_INTERRUPT_MASK	(IWM_SCD_BASE + 0x108)
 #define IWM_SCD_GP_CTRL		(IWM_SCD_BASE + 0x1a8)
 #define IWM_SCD_EN_CTRL		(IWM_SCD_BASE + 0x254)
 
 static inline unsigned int IWM_SCD_QUEUE_WRPTR(unsigned int chnl)
 {
 	if (chnl < 20)
 		return IWM_SCD_BASE + 0x18 + chnl * 4;
 	return IWM_SCD_BASE + 0x284 + (chnl - 20) * 4;
 }
 
 static inline unsigned int IWM_SCD_QUEUE_RDPTR(unsigned int chnl)
 {
 	if (chnl < 20)
 		return IWM_SCD_BASE + 0x68 + chnl * 4;
 	return IWM_SCD_BASE + 0x2B4 + (chnl - 20) * 4;
 }
 
 static inline unsigned int IWM_SCD_QUEUE_STATUS_BITS(unsigned int chnl)
 {
 	if (chnl < 20)
 		return IWM_SCD_BASE + 0x10c + chnl * 4;
 	return IWM_SCD_BASE + 0x384 + (chnl - 20) * 4;
 }
 
 /*********************** END TX SCHEDULER *************************************/
 
 /* Oscillator clock */
 #define IWM_OSC_CLK				(0xa04068)
 #define IWM_OSC_CLK_FORCE_CONTROL		(0x8)
 
 /*
  * END iwl-prph.h
  */
 
 /*
  * BEGIN iwl-fh.h
  */
 
 /****************************/
 /* Flow Handler Definitions */
 /****************************/
 
 /**
  * This I/O area is directly read/writable by driver (e.g. Linux uses writel())
  * Addresses are offsets from device's PCI hardware base address.
  */
 #define IWM_FH_MEM_LOWER_BOUND                   (0x1000)
 #define IWM_FH_MEM_UPPER_BOUND                   (0x2000)
 
 /**
  * Keep-Warm (KW) buffer base address.
  *
  * Driver must allocate a 4KByte buffer that is for keeping the
  * host DRAM powered on (via dummy accesses to DRAM) to maintain low-latency
  * DRAM access when doing Txing or Rxing.  The dummy accesses prevent host
  * from going into a power-savings mode that would cause higher DRAM latency,
  * and possible data over/under-runs, before all Tx/Rx is complete.
  *
  * Driver loads IWM_FH_KW_MEM_ADDR_REG with the physical address (bits 35:4)
  * of the buffer, which must be 4K aligned.  Once this is set up, the device
  * automatically invokes keep-warm accesses when normal accesses might not
  * be sufficient to maintain fast DRAM response.
  *
  * Bit fields:
  *  31-0:  Keep-warm buffer physical base address [35:4], must be 4K aligned
  */
 #define IWM_FH_KW_MEM_ADDR_REG		     (IWM_FH_MEM_LOWER_BOUND + 0x97C)
 
 
 /**
  * TFD Circular Buffers Base (CBBC) addresses
  *
  * Device has 16 base pointer registers, one for each of 16 host-DRAM-resident
  * circular buffers (CBs/queues) containing Transmit Frame Descriptors (TFDs)
  * (see struct iwm_tfd_frame).  These 16 pointer registers are offset by 0x04
  * bytes from one another.  Each TFD circular buffer in DRAM must be 256-byte
  * aligned (address bits 0-7 must be 0).
  * Later devices have 20 (5000 series) or 30 (higher) queues, but the registers
  * for them are in different places.
  *
  * Bit fields in each pointer register:
  *  27-0: TFD CB physical base address [35:8], must be 256-byte aligned
  */
 #define IWM_FH_MEM_CBBC_0_15_LOWER_BOUND	(IWM_FH_MEM_LOWER_BOUND + 0x9D0)
 #define IWM_FH_MEM_CBBC_0_15_UPPER_BOUN		(IWM_FH_MEM_LOWER_BOUND + 0xA10)
 #define IWM_FH_MEM_CBBC_16_19_LOWER_BOUND	(IWM_FH_MEM_LOWER_BOUND + 0xBF0)
 #define IWM_FH_MEM_CBBC_16_19_UPPER_BOUND	(IWM_FH_MEM_LOWER_BOUND + 0xC00)
 #define IWM_FH_MEM_CBBC_20_31_LOWER_BOUND	(IWM_FH_MEM_LOWER_BOUND + 0xB20)
 #define IWM_FH_MEM_CBBC_20_31_UPPER_BOUND	(IWM_FH_MEM_LOWER_BOUND + 0xB80)
 
 /* Find TFD CB base pointer for given queue */
 static inline unsigned int IWM_FH_MEM_CBBC_QUEUE(unsigned int chnl)
 {
 	if (chnl < 16)
 		return IWM_FH_MEM_CBBC_0_15_LOWER_BOUND + 4 * chnl;
 	if (chnl < 20)
 		return IWM_FH_MEM_CBBC_16_19_LOWER_BOUND + 4 * (chnl - 16);
 	return IWM_FH_MEM_CBBC_20_31_LOWER_BOUND + 4 * (chnl - 20);
 }
 
 
 /**
  * Rx SRAM Control and Status Registers (RSCSR)
  *
  * These registers provide handshake between driver and device for the Rx queue
  * (this queue handles *all* command responses, notifications, Rx data, etc.
  * sent from uCode to host driver).  Unlike Tx, there is only one Rx
  * queue, and only one Rx DMA/FIFO channel.  Also unlike Tx, which can
  * concatenate up to 20 DRAM buffers to form a Tx frame, each Receive Buffer
  * Descriptor (RBD) points to only one Rx Buffer (RB); there is a 1:1
  * mapping between RBDs and RBs.
  *
  * Driver must allocate host DRAM memory for the following, and set the
  * physical address of each into device registers:
  *
  * 1)  Receive Buffer Descriptor (RBD) circular buffer (CB), typically with 256
  *     entries (although any power of 2, up to 4096, is selectable by driver).
  *     Each entry (1 dword) points to a receive buffer (RB) of consistent size
  *     (typically 4K, although 8K or 16K are also selectable by driver).
  *     Driver sets up RB size and number of RBDs in the CB via Rx config
  *     register IWM_FH_MEM_RCSR_CHNL0_CONFIG_REG.
  *
  *     Bit fields within one RBD:
  *     27-0:  Receive Buffer physical address bits [35:8], 256-byte aligned
  *
  *     Driver sets physical address [35:8] of base of RBD circular buffer
  *     into IWM_FH_RSCSR_CHNL0_RBDCB_BASE_REG [27:0].
  *
  * 2)  Rx status buffer, 8 bytes, in which uCode indicates which Rx Buffers
  *     (RBs) have been filled, via a "write pointer", actually the index of
  *     the RB's corresponding RBD within the circular buffer.  Driver sets
  *     physical address [35:4] into IWM_FH_RSCSR_CHNL0_STTS_WPTR_REG [31:0].
  *
  *     Bit fields in lower dword of Rx status buffer (upper dword not used
  *     by driver:
  *     31-12:  Not used by driver
  *     11- 0:  Index of last filled Rx buffer descriptor
  *             (device writes, driver reads this value)
  *
  * As the driver prepares Receive Buffers (RBs) for device to fill, driver must
  * enter pointers to these RBs into contiguous RBD circular buffer entries,
  * and update the device's "write" index register,
  * IWM_FH_RSCSR_CHNL0_RBDCB_WPTR_REG.
  *
  * This "write" index corresponds to the *next* RBD that the driver will make
  * available, i.e. one RBD past the tail of the ready-to-fill RBDs within
  * the circular buffer.  This value should initially be 0 (before preparing any
  * RBs), should be 8 after preparing the first 8 RBs (for example), and must
  * wrap back to 0 at the end of the circular buffer (but don't wrap before
  * "read" index has advanced past 1!  See below).
  * NOTE:  DEVICE EXPECTS THE WRITE INDEX TO BE INCREMENTED IN MULTIPLES OF 8.
  *
  * As the device fills RBs (referenced from contiguous RBDs within the circular
  * buffer), it updates the Rx status buffer in host DRAM, 2) described above,
  * to tell the driver the index of the latest filled RBD.  The driver must
  * read this "read" index from DRAM after receiving an Rx interrupt from device
  *
  * The driver must also internally keep track of a third index, which is the
  * next RBD to process.  When receiving an Rx interrupt, driver should process
  * all filled but unprocessed RBs up to, but not including, the RB
  * corresponding to the "read" index.  For example, if "read" index becomes "1",
  * driver may process the RB pointed to by RBD 0.  Depending on volume of
  * traffic, there may be many RBs to process.
  *
  * If read index == write index, device thinks there is no room to put new data.
  * Due to this, the maximum number of filled RBs is 255, instead of 256.  To
  * be safe, make sure that there is a gap of at least 2 RBDs between "write"
  * and "read" indexes; that is, make sure that there are no more than 254
  * buffers waiting to be filled.
  */
 #define IWM_FH_MEM_RSCSR_LOWER_BOUND	(IWM_FH_MEM_LOWER_BOUND + 0xBC0)
 #define IWM_FH_MEM_RSCSR_UPPER_BOUND	(IWM_FH_MEM_LOWER_BOUND + 0xC00)
 #define IWM_FH_MEM_RSCSR_CHNL0		(IWM_FH_MEM_RSCSR_LOWER_BOUND)
 
 /**
  * Physical base address of 8-byte Rx Status buffer.
  * Bit fields:
  *  31-0: Rx status buffer physical base address [35:4], must 16-byte aligned.
  */
 #define IWM_FH_RSCSR_CHNL0_STTS_WPTR_REG	(IWM_FH_MEM_RSCSR_CHNL0)
 
 /**
  * Physical base address of Rx Buffer Descriptor Circular Buffer.
  * Bit fields:
  *  27-0:  RBD CD physical base address [35:8], must be 256-byte aligned.
  */
 #define IWM_FH_RSCSR_CHNL0_RBDCB_BASE_REG	(IWM_FH_MEM_RSCSR_CHNL0 + 0x004)
 
 /**
  * Rx write pointer (index, really!).
  * Bit fields:
  *  11-0:  Index of driver's most recent prepared-to-be-filled RBD, + 1.
  *         NOTE:  For 256-entry circular buffer, use only bits [7:0].
  */
 #define IWM_FH_RSCSR_CHNL0_RBDCB_WPTR_REG	(IWM_FH_MEM_RSCSR_CHNL0 + 0x008)
 #define IWM_FH_RSCSR_CHNL0_WPTR		(IWM_FH_RSCSR_CHNL0_RBDCB_WPTR_REG)
 
 #define IWM_FW_RSCSR_CHNL0_RXDCB_RDPTR_REG	(IWM_FH_MEM_RSCSR_CHNL0 + 0x00c)
 #define IWM_FH_RSCSR_CHNL0_RDPTR		IWM_FW_RSCSR_CHNL0_RXDCB_RDPTR_REG
 
 /**
  * Rx Config/Status Registers (RCSR)
  * Rx Config Reg for channel 0 (only channel used)
  *
  * Driver must initialize IWM_FH_MEM_RCSR_CHNL0_CONFIG_REG as follows for
  * normal operation (see bit fields).
  *
  * Clearing IWM_FH_MEM_RCSR_CHNL0_CONFIG_REG to 0 turns off Rx DMA.
  * Driver should poll IWM_FH_MEM_RSSR_RX_STATUS_REG	for
  * IWM_FH_RSSR_CHNL0_RX_STATUS_CHNL_IDLE (bit 24) before continuing.
  *
  * Bit fields:
  * 31-30: Rx DMA channel enable: '00' off/pause, '01' pause at end of frame,
  *        '10' operate normally
  * 29-24: reserved
  * 23-20: # RBDs in circular buffer = 2^value; use "8" for 256 RBDs (normal),
  *        min "5" for 32 RBDs, max "12" for 4096 RBDs.
  * 19-18: reserved
  * 17-16: size of each receive buffer; '00' 4K (normal), '01' 8K,
  *        '10' 12K, '11' 16K.
  * 15-14: reserved
  * 13-12: IRQ destination; '00' none, '01' host driver (normal operation)
  * 11- 4: timeout for closing Rx buffer and interrupting host (units 32 usec)
  *        typical value 0x10 (about 1/2 msec)
  *  3- 0: reserved
  */
 #define IWM_FH_MEM_RCSR_LOWER_BOUND      (IWM_FH_MEM_LOWER_BOUND + 0xC00)
 #define IWM_FH_MEM_RCSR_UPPER_BOUND      (IWM_FH_MEM_LOWER_BOUND + 0xCC0)
 #define IWM_FH_MEM_RCSR_CHNL0            (IWM_FH_MEM_RCSR_LOWER_BOUND)
 
 #define IWM_FH_MEM_RCSR_CHNL0_CONFIG_REG	(IWM_FH_MEM_RCSR_CHNL0)
 #define IWM_FH_MEM_RCSR_CHNL0_RBDCB_WPTR	(IWM_FH_MEM_RCSR_CHNL0 + 0x8)
 #define IWM_FH_MEM_RCSR_CHNL0_FLUSH_RB_REQ	(IWM_FH_MEM_RCSR_CHNL0 + 0x10)
 
 #define IWM_FH_RCSR_CHNL0_RX_CONFIG_RB_TIMEOUT_MSK (0x00000FF0) /* bits 4-11 */
 #define IWM_FH_RCSR_CHNL0_RX_CONFIG_IRQ_DEST_MSK   (0x00001000) /* bits 12 */
 #define IWM_FH_RCSR_CHNL0_RX_CONFIG_SINGLE_FRAME_MSK (0x00008000) /* bit 15 */
 #define IWM_FH_RCSR_CHNL0_RX_CONFIG_RB_SIZE_MSK   (0x00030000) /* bits 16-17 */
 #define IWM_FH_RCSR_CHNL0_RX_CONFIG_RBDBC_SIZE_MSK (0x00F00000) /* bits 20-23 */
 #define IWM_FH_RCSR_CHNL0_RX_CONFIG_DMA_CHNL_EN_MSK (0xC0000000) /* bits 30-31*/
 
 #define IWM_FH_RCSR_RX_CONFIG_RBDCB_SIZE_POS	(20)
 #define IWM_FH_RCSR_RX_CONFIG_REG_IRQ_RBTH_POS	(4)
 #define IWM_RX_RB_TIMEOUT	(0x11)
 
 #define IWM_FH_RCSR_RX_CONFIG_CHNL_EN_PAUSE_VAL         (0x00000000)
 #define IWM_FH_RCSR_RX_CONFIG_CHNL_EN_PAUSE_EOF_VAL     (0x40000000)
 #define IWM_FH_RCSR_RX_CONFIG_CHNL_EN_ENABLE_VAL        (0x80000000)
 
 #define IWM_FH_RCSR_RX_CONFIG_REG_VAL_RB_SIZE_4K    (0x00000000)
 #define IWM_FH_RCSR_RX_CONFIG_REG_VAL_RB_SIZE_8K    (0x00010000)
 #define IWM_FH_RCSR_RX_CONFIG_REG_VAL_RB_SIZE_12K   (0x00020000)
 #define IWM_FH_RCSR_RX_CONFIG_REG_VAL_RB_SIZE_16K   (0x00030000)
 
 #define IWM_FH_RCSR_CHNL0_RX_IGNORE_RXF_EMPTY              (0x00000004)
 #define IWM_FH_RCSR_CHNL0_RX_CONFIG_IRQ_DEST_NO_INT_VAL    (0x00000000)
 #define IWM_FH_RCSR_CHNL0_RX_CONFIG_IRQ_DEST_INT_HOST_VAL  (0x00001000)
 
 /**
  * Rx Shared Status Registers (RSSR)
  *
  * After stopping Rx DMA channel (writing 0 to
  * IWM_FH_MEM_RCSR_CHNL0_CONFIG_REG), driver must poll
  * IWM_FH_MEM_RSSR_RX_STATUS_REG until Rx channel is idle.
  *
  * Bit fields:
  *  24:  1 = Channel 0 is idle
  *
  * IWM_FH_MEM_RSSR_SHARED_CTRL_REG and IWM_FH_MEM_RSSR_RX_ENABLE_ERR_IRQ2DRV
  * contain default values that should not be altered by the driver.
  */
 #define IWM_FH_MEM_RSSR_LOWER_BOUND     (IWM_FH_MEM_LOWER_BOUND + 0xC40)
 #define IWM_FH_MEM_RSSR_UPPER_BOUND     (IWM_FH_MEM_LOWER_BOUND + 0xD00)
 
 #define IWM_FH_MEM_RSSR_SHARED_CTRL_REG (IWM_FH_MEM_RSSR_LOWER_BOUND)
 #define IWM_FH_MEM_RSSR_RX_STATUS_REG	(IWM_FH_MEM_RSSR_LOWER_BOUND + 0x004)
 #define IWM_FH_MEM_RSSR_RX_ENABLE_ERR_IRQ2DRV\
 					(IWM_FH_MEM_RSSR_LOWER_BOUND + 0x008)
 
 #define IWM_FH_RSSR_CHNL0_RX_STATUS_CHNL_IDLE	(0x01000000)
 
 #define IWM_FH_MEM_TFDIB_REG1_ADDR_BITSHIFT	28
 
 /* TFDB  Area - TFDs buffer table */
 #define IWM_FH_MEM_TFDIB_DRAM_ADDR_LSB_MSK      (0xFFFFFFFF)
 #define IWM_FH_TFDIB_LOWER_BOUND       (IWM_FH_MEM_LOWER_BOUND + 0x900)
 #define IWM_FH_TFDIB_UPPER_BOUND       (IWM_FH_MEM_LOWER_BOUND + 0x958)
 #define IWM_FH_TFDIB_CTRL0_REG(_chnl)  (IWM_FH_TFDIB_LOWER_BOUND + 0x8 * (_chnl))
 #define IWM_FH_TFDIB_CTRL1_REG(_chnl)  (IWM_FH_TFDIB_LOWER_BOUND + 0x8 * (_chnl) + 0x4)
 
 /**
  * Transmit DMA Channel Control/Status Registers (TCSR)
  *
  * Device has one configuration register for each of 8 Tx DMA/FIFO channels
  * supported in hardware (don't confuse these with the 16 Tx queues in DRAM,
  * which feed the DMA/FIFO channels); config regs are separated by 0x20 bytes.
  *
  * To use a Tx DMA channel, driver must initialize its
  * IWM_FH_TCSR_CHNL_TX_CONFIG_REG(chnl) with:
  *
  * IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE |
  * IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_ENABLE_VAL
  *
  * All other bits should be 0.
  *
  * Bit fields:
  * 31-30: Tx DMA channel enable: '00' off/pause, '01' pause at end of frame,
  *        '10' operate normally
  * 29- 4: Reserved, set to "0"
  *     3: Enable internal DMA requests (1, normal operation), disable (0)
  *  2- 0: Reserved, set to "0"
  */
 #define IWM_FH_TCSR_LOWER_BOUND  (IWM_FH_MEM_LOWER_BOUND + 0xD00)
 #define IWM_FH_TCSR_UPPER_BOUND  (IWM_FH_MEM_LOWER_BOUND + 0xE60)
 
 /* Find Control/Status reg for given Tx DMA/FIFO channel */
 #define IWM_FH_TCSR_CHNL_NUM                            (8)
 
 /* TCSR: tx_config register values */
 #define IWM_FH_TCSR_CHNL_TX_CONFIG_REG(_chnl)	\
 		(IWM_FH_TCSR_LOWER_BOUND + 0x20 * (_chnl))
 #define IWM_FH_TCSR_CHNL_TX_CREDIT_REG(_chnl)	\
 		(IWM_FH_TCSR_LOWER_BOUND + 0x20 * (_chnl) + 0x4)
 #define IWM_FH_TCSR_CHNL_TX_BUF_STS_REG(_chnl)	\
 		(IWM_FH_TCSR_LOWER_BOUND + 0x20 * (_chnl) + 0x8)
 
 #define IWM_FH_TCSR_TX_CONFIG_REG_VAL_MSG_MODE_TXF	(0x00000000)
 #define IWM_FH_TCSR_TX_CONFIG_REG_VAL_MSG_MODE_DRV	(0x00000001)
 
 #define IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_DISABLE	(0x00000000)
 #define IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_ENABLE		(0x00000008)
 
 #define IWM_FH_TCSR_TX_CONFIG_REG_VAL_CIRQ_HOST_NOINT	(0x00000000)
 #define IWM_FH_TCSR_TX_CONFIG_REG_VAL_CIRQ_HOST_ENDTFD	(0x00100000)
 #define IWM_FH_TCSR_TX_CONFIG_REG_VAL_CIRQ_HOST_IFTFD	(0x00200000)
 
 #define IWM_FH_TCSR_TX_CONFIG_REG_VAL_CIRQ_RTC_NOINT	(0x00000000)
 #define IWM_FH_TCSR_TX_CONFIG_REG_VAL_CIRQ_RTC_ENDTFD	(0x00400000)
 #define IWM_FH_TCSR_TX_CONFIG_REG_VAL_CIRQ_RTC_IFTFD	(0x00800000)
 
 #define IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_PAUSE		(0x00000000)
 #define IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_PAUSE_EOF	(0x40000000)
 #define IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE		(0x80000000)
 
 #define IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_VAL_TFDB_EMPTY	(0x00000000)
 #define IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_VAL_TFDB_WAIT	(0x00002000)
 #define IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_VAL_TFDB_VALID	(0x00000003)
 
 #define IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_POS_TB_NUM		(20)
 #define IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_POS_TB_IDX		(12)
 
 /**
  * Tx Shared Status Registers (TSSR)
  *
  * After stopping Tx DMA channel (writing 0 to
  * IWM_FH_TCSR_CHNL_TX_CONFIG_REG(chnl)), driver must poll
  * IWM_FH_TSSR_TX_STATUS_REG until selected Tx channel is idle
  * (channel's buffers empty | no pending requests).
  *
  * Bit fields:
  * 31-24:  1 = Channel buffers empty (channel 7:0)
  * 23-16:  1 = No pending requests (channel 7:0)
  */
 #define IWM_FH_TSSR_LOWER_BOUND		(IWM_FH_MEM_LOWER_BOUND + 0xEA0)
 #define IWM_FH_TSSR_UPPER_BOUND		(IWM_FH_MEM_LOWER_BOUND + 0xEC0)
 
 #define IWM_FH_TSSR_TX_STATUS_REG	(IWM_FH_TSSR_LOWER_BOUND + 0x010)
 
 /**
  * Bit fields for TSSR(Tx Shared Status & Control) error status register:
  * 31:  Indicates an address error when accessed to internal memory
  *	uCode/driver must write "1" in order to clear this flag
  * 30:  Indicates that Host did not send the expected number of dwords to FH
  *	uCode/driver must write "1" in order to clear this flag
  * 16-9:Each status bit is for one channel. Indicates that an (Error) ActDMA
  *	command was received from the scheduler while the TRB was already full
  *	with previous command
  *	uCode/driver must write "1" in order to clear this flag
  * 7-0: Each status bit indicates a channel's TxCredit error. When an error
  *	bit is set, it indicates that the FH has received a full indication
  *	from the RTC TxFIFO and the current value of the TxCredit counter was
  *	not equal to zero. This mean that the credit mechanism was not
  *	synchronized to the TxFIFO status
  *	uCode/driver must write "1" in order to clear this flag
  */
 #define IWM_FH_TSSR_TX_ERROR_REG	(IWM_FH_TSSR_LOWER_BOUND + 0x018)
 #define IWM_FH_TSSR_TX_MSG_CONFIG_REG	(IWM_FH_TSSR_LOWER_BOUND + 0x008)
 
 #define IWM_FH_TSSR_TX_STATUS_REG_MSK_CHNL_IDLE(_chnl) ((1 << (_chnl)) << 16)
 
 /* Tx service channels */
 #define IWM_FH_SRVC_CHNL		(9)
 #define IWM_FH_SRVC_LOWER_BOUND	(IWM_FH_MEM_LOWER_BOUND + 0x9C8)
 #define IWM_FH_SRVC_UPPER_BOUND	(IWM_FH_MEM_LOWER_BOUND + 0x9D0)
 #define IWM_FH_SRVC_CHNL_SRAM_ADDR_REG(_chnl) \
 		(IWM_FH_SRVC_LOWER_BOUND + ((_chnl) - 9) * 0x4)
 
 #define IWM_FH_TX_CHICKEN_BITS_REG	(IWM_FH_MEM_LOWER_BOUND + 0xE98)
 #define IWM_FH_TX_TRB_REG(_chan)	(IWM_FH_MEM_LOWER_BOUND + 0x958 + \
 					(_chan) * 4)
 
 /* Instruct FH to increment the retry count of a packet when
  * it is brought from the memory to TX-FIFO
  */
 #define IWM_FH_TX_CHICKEN_BITS_SCD_AUTO_RETRY_EN	(0x00000002)
 
 #define IWM_RX_QUEUE_SIZE                         256
 #define IWM_RX_QUEUE_MASK                         255
 #define IWM_RX_QUEUE_SIZE_LOG                     8
 
 /*
  * RX related structures and functions
  */
 #define IWM_RX_FREE_BUFFERS 64
 #define IWM_RX_LOW_WATERMARK 8
 
 /**
  * struct iwm_rb_status - reseve buffer status
  * 	host memory mapped FH registers
  * @closed_rb_num [0:11] - Indicates the index of the RB which was closed
  * @closed_fr_num [0:11] - Indicates the index of the RX Frame which was closed
  * @finished_rb_num [0:11] - Indicates the index of the current RB
  * 	in which the last frame was written to
  * @finished_fr_num [0:11] - Indicates the index of the RX Frame
  * 	which was transferred
  */
 struct iwm_rb_status {
 	uint16_t closed_rb_num;
 	uint16_t closed_fr_num;
 	uint16_t finished_rb_num;
 	uint16_t finished_fr_nam;
 	uint32_t unused;
 } __packed;
 
 
 #define IWM_TFD_QUEUE_SIZE_MAX		(256)
 #define IWM_TFD_QUEUE_SIZE_BC_DUP	(64)
 #define IWM_TFD_QUEUE_BC_SIZE		(IWM_TFD_QUEUE_SIZE_MAX + \
 					IWM_TFD_QUEUE_SIZE_BC_DUP)
 #define IWM_TX_DMA_MASK        DMA_BIT_MASK(36)
 #define IWM_NUM_OF_TBS		20
 
 static inline uint8_t iwm_get_dma_hi_addr(bus_addr_t addr)
 {
 	return (sizeof(addr) > sizeof(uint32_t) ? (addr >> 16) >> 16 : 0) & 0xF;
 }
 /**
  * struct iwm_tfd_tb transmit buffer descriptor within transmit frame descriptor
  *
  * This structure contains dma address and length of transmission address
  *
  * @lo: low [31:0] portion of the dma address of TX buffer
  * 	every even is unaligned on 16 bit boundary
  * @hi_n_len 0-3 [35:32] portion of dma
  *	     4-15 length of the tx buffer
  */
 struct iwm_tfd_tb {
 	uint32_t lo;
 	uint16_t hi_n_len;
 } __packed;
 
 /**
  * struct iwm_tfd
  *
  * Transmit Frame Descriptor (TFD)
  *
  * @ __reserved1[3] reserved
  * @ num_tbs 0-4 number of active tbs
  *	     5   reserved
  * 	     6-7 padding (not used)
  * @ tbs[20]	transmit frame buffer descriptors
  * @ __pad 	padding
  *
  * Each Tx queue uses a circular buffer of 256 TFDs stored in host DRAM.
  * Both driver and device share these circular buffers, each of which must be
  * contiguous 256 TFDs x 128 bytes-per-TFD = 32 KBytes
  *
  * Driver must indicate the physical address of the base of each
  * circular buffer via the IWM_FH_MEM_CBBC_QUEUE registers.
  *
  * Each TFD contains pointer/size information for up to 20 data buffers
  * in host DRAM.  These buffers collectively contain the (one) frame described
  * by the TFD.  Each buffer must be a single contiguous block of memory within
  * itself, but buffers may be scattered in host DRAM.  Each buffer has max size
  * of (4K - 4).  The concatenates all of a TFD's buffers into a single
  * Tx frame, up to 8 KBytes in size.
  *
  * A maximum of 255 (not 256!) TFDs may be on a queue waiting for Tx.
  */
 struct iwm_tfd {
 	uint8_t __reserved1[3];
 	uint8_t num_tbs;
 	struct iwm_tfd_tb tbs[IWM_NUM_OF_TBS];
 	uint32_t __pad;
 } __packed;
 
 /* Keep Warm Size */
 #define IWM_KW_SIZE 0x1000	/* 4k */
 
 /* Fixed (non-configurable) rx data from phy */
 
 /**
  * struct iwm_agn_schedq_bc_tbl scheduler byte count table
  *	base physical address provided by IWM_SCD_DRAM_BASE_ADDR
  * @tfd_offset  0-12 - tx command byte count
  *	       12-16 - station index
  */
 struct iwm_agn_scd_bc_tbl {
 	uint16_t tfd_offset[IWM_TFD_QUEUE_BC_SIZE];
 } __packed;
 
 /*
  * END iwl-fh.h
  */
 
 /*
  * BEGIN mvm/fw-api.h
  */
 
 /* Maximum number of Tx queues. */
 #define IWM_MVM_MAX_QUEUES	31
 
 /* Tx queue numbers */
 enum {
 	IWM_MVM_OFFCHANNEL_QUEUE = 8,
 	IWM_MVM_CMD_QUEUE = 9,
 	IWM_MVM_AUX_QUEUE = 15,
 };
 
 enum iwm_mvm_tx_fifo {
 	IWM_MVM_TX_FIFO_BK = 0,
 	IWM_MVM_TX_FIFO_BE,
 	IWM_MVM_TX_FIFO_VI,
 	IWM_MVM_TX_FIFO_VO,
 	IWM_MVM_TX_FIFO_MCAST = 5,
 	IWM_MVM_TX_FIFO_CMD = 7,
 };
 
 #define IWM_MVM_STATION_COUNT	16
 
 /* commands */
 enum {
 	IWM_MVM_ALIVE = 0x1,
 	IWM_REPLY_ERROR = 0x2,
 
 	IWM_INIT_COMPLETE_NOTIF = 0x4,
 
 	/* PHY context commands */
 	IWM_PHY_CONTEXT_CMD = 0x8,
 	IWM_DBG_CFG = 0x9,
 
 	/* UMAC scan commands */
 	IWM_SCAN_ITERATION_COMPLETE_UMAC = 0xb5,
 	IWM_SCAN_CFG_CMD = 0xc,
 	IWM_SCAN_REQ_UMAC = 0xd,
 	IWM_SCAN_ABORT_UMAC = 0xe,
 	IWM_SCAN_COMPLETE_UMAC = 0xf,
 
 	/* station table */
 	IWM_ADD_STA_KEY = 0x17,
 	IWM_ADD_STA = 0x18,
 	IWM_REMOVE_STA = 0x19,
 
 	/* TX */
 	IWM_TX_CMD = 0x1c,
 	IWM_TXPATH_FLUSH = 0x1e,
 	IWM_MGMT_MCAST_KEY = 0x1f,
 
 	/* scheduler config */
 	IWM_SCD_QUEUE_CFG = 0x1d,
 
 	/* global key */
 	IWM_WEP_KEY = 0x20,
 
 	/* MAC and Binding commands */
 	IWM_MAC_CONTEXT_CMD = 0x28,
 	IWM_TIME_EVENT_CMD = 0x29, /* both CMD and response */
 	IWM_TIME_EVENT_NOTIFICATION = 0x2a,
 	IWM_BINDING_CONTEXT_CMD = 0x2b,
 	IWM_TIME_QUOTA_CMD = 0x2c,
 	IWM_NON_QOS_TX_COUNTER_CMD = 0x2d,
 
 	IWM_LQ_CMD = 0x4e,
 
 	/* paging block to FW cpu2 */
 	IWM_FW_PAGING_BLOCK_CMD = 0x4f,
 
 	/* Scan offload */
 	IWM_SCAN_OFFLOAD_REQUEST_CMD = 0x51,
 	IWM_SCAN_OFFLOAD_ABORT_CMD = 0x52,
 	IWM_HOT_SPOT_CMD = 0x53,
 	IWM_SCAN_OFFLOAD_COMPLETE = 0x6d,
 	IWM_SCAN_OFFLOAD_UPDATE_PROFILES_CMD = 0x6e,
 	IWM_SCAN_OFFLOAD_CONFIG_CMD = 0x6f,
 	IWM_MATCH_FOUND_NOTIFICATION = 0xd9,
 	IWM_SCAN_ITERATION_COMPLETE = 0xe7,
 
 	/* Phy */
 	IWM_PHY_CONFIGURATION_CMD = 0x6a,
 	IWM_CALIB_RES_NOTIF_PHY_DB = 0x6b,
 	IWM_PHY_DB_CMD = 0x6c,
 
 	/* Power - legacy power table command */
 	IWM_POWER_TABLE_CMD = 0x77,
 	IWM_PSM_UAPSD_AP_MISBEHAVING_NOTIFICATION = 0x78,
 
 	/* Thermal Throttling*/
 	IWM_REPLY_THERMAL_MNG_BACKOFF = 0x7e,
 
 	/* Scanning */
 	IWM_SCAN_ABORT_CMD = 0x81,
 	IWM_SCAN_START_NOTIFICATION = 0x82,
 	IWM_SCAN_RESULTS_NOTIFICATION = 0x83,
 
 	/* NVM */
 	IWM_NVM_ACCESS_CMD = 0x88,
 
 	IWM_SET_CALIB_DEFAULT_CMD = 0x8e,
 
 	IWM_BEACON_NOTIFICATION = 0x90,
 	IWM_BEACON_TEMPLATE_CMD = 0x91,
 	IWM_TX_ANT_CONFIGURATION_CMD = 0x98,
 	IWM_BT_CONFIG = 0x9b,
 	IWM_STATISTICS_NOTIFICATION = 0x9d,
 	IWM_REDUCE_TX_POWER_CMD = 0x9f,
 
 	/* RF-KILL commands and notifications */
 	IWM_CARD_STATE_CMD = 0xa0,
 	IWM_CARD_STATE_NOTIFICATION = 0xa1,
 
 	IWM_MISSED_BEACONS_NOTIFICATION = 0xa2,
 
 	IWM_MFUART_LOAD_NOTIFICATION = 0xb1,
 
 	/* Power - new power table command */
 	IWM_MAC_PM_POWER_TABLE = 0xa9,
 
 	IWM_REPLY_RX_PHY_CMD = 0xc0,
 	IWM_REPLY_RX_MPDU_CMD = 0xc1,
 	IWM_BA_NOTIF = 0xc5,
 
 	/* Location Aware Regulatory */
 	IWM_MCC_UPDATE_CMD = 0xc8,
 	IWM_MCC_CHUB_UPDATE_CMD = 0xc9,
 
 	/* BT Coex */
 	IWM_BT_COEX_PRIO_TABLE = 0xcc,
 	IWM_BT_COEX_PROT_ENV = 0xcd,
 	IWM_BT_PROFILE_NOTIFICATION = 0xce,
 	IWM_BT_COEX_CI = 0x5d,
 
 	IWM_REPLY_SF_CFG_CMD = 0xd1,
 	IWM_REPLY_BEACON_FILTERING_CMD = 0xd2,
 
 	/* DTS measurements */
 	IWM_CMD_DTS_MEASUREMENT_TRIGGER = 0xdc,
 	IWM_DTS_MEASUREMENT_NOTIFICATION = 0xdd,
 
 	IWM_REPLY_DEBUG_CMD = 0xf0,
 	IWM_DEBUG_LOG_MSG = 0xf7,
 
 	IWM_MCAST_FILTER_CMD = 0xd0,
 
 	/* D3 commands/notifications */
 	IWM_D3_CONFIG_CMD = 0xd3,
 	IWM_PROT_OFFLOAD_CONFIG_CMD = 0xd4,
 	IWM_OFFLOADS_QUERY_CMD = 0xd5,
 	IWM_REMOTE_WAKE_CONFIG_CMD = 0xd6,
 
 	/* for WoWLAN in particular */
 	IWM_WOWLAN_PATTERNS = 0xe0,
 	IWM_WOWLAN_CONFIGURATION = 0xe1,
 	IWM_WOWLAN_TSC_RSC_PARAM = 0xe2,
 	IWM_WOWLAN_TKIP_PARAM = 0xe3,
 	IWM_WOWLAN_KEK_KCK_MATERIAL = 0xe4,
 	IWM_WOWLAN_GET_STATUSES = 0xe5,
 	IWM_WOWLAN_TX_POWER_PER_DB = 0xe6,
 
 	/* and for NetDetect */
 	IWM_NET_DETECT_CONFIG_CMD = 0x54,
 	IWM_NET_DETECT_PROFILES_QUERY_CMD = 0x56,
 	IWM_NET_DETECT_PROFILES_CMD = 0x57,
 	IWM_NET_DETECT_HOTSPOTS_CMD = 0x58,
 	IWM_NET_DETECT_HOTSPOTS_QUERY_CMD = 0x59,
 
 	IWM_REPLY_MAX = 0xff,
 };
 
 enum iwm_phy_ops_subcmd_ids {
 	IWM_CMD_DTS_MEASUREMENT_TRIGGER_WIDE = 0x0,
 	IWM_CTDP_CONFIG_CMD = 0x03,
 	IWM_TEMP_REPORTING_THRESHOLDS_CMD = 0x04,
 	IWM_CT_KILL_NOTIFICATION = 0xFE,
 	IWM_DTS_MEASUREMENT_NOTIF_WIDE = 0xFF,
 };
 
 /* command groups */
 enum {
 	IWM_LEGACY_GROUP = 0x0,
 	IWM_LONG_GROUP = 0x1,
 	IWM_SYSTEM_GROUP = 0x2,
 	IWM_MAC_CONF_GROUP = 0x3,
 	IWM_PHY_OPS_GROUP = 0x4,
 	IWM_DATA_PATH_GROUP = 0x5,
 	IWM_PROT_OFFLOAD_GROUP = 0xb,
 };
 
 /**
  * struct iwm_cmd_response - generic response struct for most commands
  * @status: status of the command asked, changes for each one
  */
 struct iwm_cmd_response {
 	uint32_t status;
 };
 
 /*
  * struct iwm_tx_ant_cfg_cmd
  * @valid: valid antenna configuration
  */
 struct iwm_tx_ant_cfg_cmd {
 	uint32_t valid;
 } __packed;
 
 /**
  * struct iwm_reduce_tx_power_cmd - TX power reduction command
  * IWM_REDUCE_TX_POWER_CMD = 0x9f
  * @flags: (reserved for future implementation)
  * @mac_context_id: id of the mac ctx for which we are reducing TX power.
  * @pwr_restriction: TX power restriction in dBms.
  */
 struct iwm_reduce_tx_power_cmd {
 	uint8_t flags;
 	uint8_t mac_context_id;
 	uint16_t pwr_restriction;
 } __packed; /* IWM_TX_REDUCED_POWER_API_S_VER_1 */
 
 /*
  * Calibration control struct.
  * Sent as part of the phy configuration command.
  * @flow_trigger: bitmap for which calibrations to perform according to
  *		flow triggers.
  * @event_trigger: bitmap for which calibrations to perform according to
  *		event triggers.
  */
 struct iwm_calib_ctrl {
 	uint32_t flow_trigger;
 	uint32_t event_trigger;
 } __packed;
 
 /* This enum defines the bitmap of various calibrations to enable in both
  * init ucode and runtime ucode through IWM_CALIBRATION_CFG_CMD.
  */
 enum iwm_calib_cfg {
 	IWM_CALIB_CFG_XTAL_IDX			= (1 << 0),
 	IWM_CALIB_CFG_TEMPERATURE_IDX		= (1 << 1),
 	IWM_CALIB_CFG_VOLTAGE_READ_IDX		= (1 << 2),
 	IWM_CALIB_CFG_PAPD_IDX			= (1 << 3),
 	IWM_CALIB_CFG_TX_PWR_IDX		= (1 << 4),
 	IWM_CALIB_CFG_DC_IDX			= (1 << 5),
 	IWM_CALIB_CFG_BB_FILTER_IDX		= (1 << 6),
 	IWM_CALIB_CFG_LO_LEAKAGE_IDX		= (1 << 7),
 	IWM_CALIB_CFG_TX_IQ_IDX			= (1 << 8),
 	IWM_CALIB_CFG_TX_IQ_SKEW_IDX		= (1 << 9),
 	IWM_CALIB_CFG_RX_IQ_IDX			= (1 << 10),
 	IWM_CALIB_CFG_RX_IQ_SKEW_IDX		= (1 << 11),
 	IWM_CALIB_CFG_SENSITIVITY_IDX		= (1 << 12),
 	IWM_CALIB_CFG_CHAIN_NOISE_IDX		= (1 << 13),
 	IWM_CALIB_CFG_DISCONNECTED_ANT_IDX	= (1 << 14),
 	IWM_CALIB_CFG_ANT_COUPLING_IDX		= (1 << 15),
 	IWM_CALIB_CFG_DAC_IDX			= (1 << 16),
 	IWM_CALIB_CFG_ABS_IDX			= (1 << 17),
 	IWM_CALIB_CFG_AGC_IDX			= (1 << 18),
 };
 
 /*
  * Phy configuration command.
  */
 struct iwm_phy_cfg_cmd {
 	uint32_t	phy_cfg;
 	struct iwm_calib_ctrl calib_control;
 } __packed;
 
 #define IWM_PHY_CFG_RADIO_TYPE	((1 << 0) | (1 << 1))
 #define IWM_PHY_CFG_RADIO_STEP	((1 << 2) | (1 << 3))
 #define IWM_PHY_CFG_RADIO_DASH	((1 << 4) | (1 << 5))
 #define IWM_PHY_CFG_PRODUCT_NUMBER	((1 << 6) | (1 << 7))
 #define IWM_PHY_CFG_TX_CHAIN_A	(1 << 8)
 #define IWM_PHY_CFG_TX_CHAIN_B	(1 << 9)
 #define IWM_PHY_CFG_TX_CHAIN_C	(1 << 10)
 #define IWM_PHY_CFG_RX_CHAIN_A	(1 << 12)
 #define IWM_PHY_CFG_RX_CHAIN_B	(1 << 13)
 #define IWM_PHY_CFG_RX_CHAIN_C	(1 << 14)
 
 
 /* Target of the IWM_NVM_ACCESS_CMD */
 enum {
 	IWM_NVM_ACCESS_TARGET_CACHE = 0,
 	IWM_NVM_ACCESS_TARGET_OTP = 1,
 	IWM_NVM_ACCESS_TARGET_EEPROM = 2,
 };
 
 /* Section types for IWM_NVM_ACCESS_CMD */
 enum {
 	IWM_NVM_SECTION_TYPE_SW = 1,
 	IWM_NVM_SECTION_TYPE_REGULATORY = 3,
 	IWM_NVM_SECTION_TYPE_CALIBRATION = 4,
 	IWM_NVM_SECTION_TYPE_PRODUCTION = 5,
 	IWM_NVM_SECTION_TYPE_MAC_OVERRIDE = 11,
 	IWM_NVM_SECTION_TYPE_PHY_SKU = 12,
 	IWM_NVM_MAX_NUM_SECTIONS = 13,
 };
 
 /**
  * struct iwm_nvm_access_cmd_ver2 - Request the device to send an NVM section
  * @op_code: 0 - read, 1 - write
  * @target: IWM_NVM_ACCESS_TARGET_*
  * @type: IWM_NVM_SECTION_TYPE_*
  * @offset: offset in bytes into the section
  * @length: in bytes, to read/write
  * @data: if write operation, the data to write. On read its empty
  */
 struct iwm_nvm_access_cmd {
 	uint8_t op_code;
 	uint8_t target;
 	uint16_t type;
 	uint16_t offset;
 	uint16_t length;
 	uint8_t data[];
 } __packed; /* IWM_NVM_ACCESS_CMD_API_S_VER_2 */
 
 #define IWM_NUM_OF_FW_PAGING_BLOCKS 33 /* 32 for data and 1 block for CSS */
 
 /*
  * struct iwm_fw_paging_cmd - paging layout
  *
  * (IWM_FW_PAGING_BLOCK_CMD = 0x4f)
  *
  * Send to FW the paging layout in the driver.
  *
  * @flags: various flags for the command
  * @block_size: the block size in powers of 2
  * @block_num: number of blocks specified in the command.
  * @device_phy_addr: virtual addresses from device side
 */
 struct iwm_fw_paging_cmd {
 	uint32_t flags;
 	uint32_t block_size;
 	uint32_t block_num;
 	uint32_t device_phy_addr[IWM_NUM_OF_FW_PAGING_BLOCKS];
 } __packed; /* IWM_FW_PAGING_BLOCK_CMD_API_S_VER_1 */
 
 /*
  * Fw items ID's
  *
  * @IWM_FW_ITEM_ID_PAGING: Address of the pages that the FW will upload
  *      download
  */
 enum iwm_fw_item_id {
 	IWM_FW_ITEM_ID_PAGING = 3,
 };
 
 /*
  * struct iwm_fw_get_item_cmd - get an item from the fw
  */
 struct iwm_fw_get_item_cmd {
 	uint32_t item_id;
 } __packed; /* IWM_FW_GET_ITEM_CMD_API_S_VER_1 */
 
 /**
  * struct iwm_nvm_access_resp_ver2 - response to IWM_NVM_ACCESS_CMD
  * @offset: offset in bytes into the section
  * @length: in bytes, either how much was written or read
  * @type: IWM_NVM_SECTION_TYPE_*
  * @status: 0 for success, fail otherwise
  * @data: if read operation, the data returned. Empty on write.
  */
 struct iwm_nvm_access_resp {
 	uint16_t offset;
 	uint16_t length;
 	uint16_t type;
 	uint16_t status;
 	uint8_t data[];
 } __packed; /* IWM_NVM_ACCESS_CMD_RESP_API_S_VER_2 */
 
 /* IWM_MVM_ALIVE 0x1 */
 
 /* alive response is_valid values */
 #define IWM_ALIVE_RESP_UCODE_OK	(1 << 0)
 #define IWM_ALIVE_RESP_RFKILL	(1 << 1)
 
 /* alive response ver_type values */
 enum {
 	IWM_FW_TYPE_HW = 0,
 	IWM_FW_TYPE_PROT = 1,
 	IWM_FW_TYPE_AP = 2,
 	IWM_FW_TYPE_WOWLAN = 3,
 	IWM_FW_TYPE_TIMING = 4,
 	IWM_FW_TYPE_WIPAN = 5
 };
 
 /* alive response ver_subtype values */
 enum {
 	IWM_FW_SUBTYPE_FULL_FEATURE = 0,
 	IWM_FW_SUBTYPE_BOOTSRAP = 1, /* Not valid */
 	IWM_FW_SUBTYPE_REDUCED = 2,
 	IWM_FW_SUBTYPE_ALIVE_ONLY = 3,
 	IWM_FW_SUBTYPE_WOWLAN = 4,
 	IWM_FW_SUBTYPE_AP_SUBTYPE = 5,
 	IWM_FW_SUBTYPE_WIPAN = 6,
 	IWM_FW_SUBTYPE_INITIALIZE = 9
 };
 
 #define IWM_ALIVE_STATUS_ERR 0xDEAD
 #define IWM_ALIVE_STATUS_OK 0xCAFE
 
 #define IWM_ALIVE_FLG_RFKILL	(1 << 0)
 
 struct iwm_mvm_alive_resp_ver1 {
 	uint16_t status;
 	uint16_t flags;
 	uint8_t ucode_minor;
 	uint8_t ucode_major;
 	uint16_t id;
 	uint8_t api_minor;
 	uint8_t api_major;
 	uint8_t ver_subtype;
 	uint8_t ver_type;
 	uint8_t mac;
 	uint8_t opt;
 	uint16_t reserved2;
 	uint32_t timestamp;
 	uint32_t error_event_table_ptr;	/* SRAM address for error log */
 	uint32_t log_event_table_ptr;	/* SRAM address for event log */
 	uint32_t cpu_register_ptr;
 	uint32_t dbgm_config_ptr;
 	uint32_t alive_counter_ptr;
 	uint32_t scd_base_ptr;		/* SRAM address for SCD */
 } __packed; /* IWM_ALIVE_RES_API_S_VER_1 */
 
 struct iwm_mvm_alive_resp_ver2 {
 	uint16_t status;
 	uint16_t flags;
 	uint8_t ucode_minor;
 	uint8_t ucode_major;
 	uint16_t id;
 	uint8_t api_minor;
 	uint8_t api_major;
 	uint8_t ver_subtype;
 	uint8_t ver_type;
 	uint8_t mac;
 	uint8_t opt;
 	uint16_t reserved2;
 	uint32_t timestamp;
 	uint32_t error_event_table_ptr;	/* SRAM address for error log */
 	uint32_t log_event_table_ptr;	/* SRAM address for LMAC event log */
 	uint32_t cpu_register_ptr;
 	uint32_t dbgm_config_ptr;
 	uint32_t alive_counter_ptr;
 	uint32_t scd_base_ptr;		/* SRAM address for SCD */
 	uint32_t st_fwrd_addr;		/* pointer to Store and forward */
 	uint32_t st_fwrd_size;
 	uint8_t umac_minor;		/* UMAC version: minor */
 	uint8_t umac_major;		/* UMAC version: major */
 	uint16_t umac_id;		/* UMAC version: id */
 	uint32_t error_info_addr;	/* SRAM address for UMAC error log */
 	uint32_t dbg_print_buff_addr;
 } __packed; /* ALIVE_RES_API_S_VER_2 */
 
 struct iwm_mvm_alive_resp {
 	uint16_t status;
 	uint16_t flags;
 	uint32_t ucode_minor;
 	uint32_t ucode_major;
 	uint8_t ver_subtype;
 	uint8_t ver_type;
 	uint8_t mac;
 	uint8_t opt;
 	uint32_t timestamp;
 	uint32_t error_event_table_ptr;	/* SRAM address for error log */
 	uint32_t log_event_table_ptr;	/* SRAM address for LMAC event log */
 	uint32_t cpu_register_ptr;
 	uint32_t dbgm_config_ptr;
 	uint32_t alive_counter_ptr;
 	uint32_t scd_base_ptr;		/* SRAM address for SCD */
 	uint32_t st_fwrd_addr;		/* pointer to Store and forward */
 	uint32_t st_fwrd_size;
 	uint32_t umac_minor;		/* UMAC version: minor */
 	uint32_t umac_major;		/* UMAC version: major */
 	uint32_t error_info_addr;	/* SRAM address for UMAC error log */
 	uint32_t dbg_print_buff_addr;
 } __packed; /* ALIVE_RES_API_S_VER_3 */
 
 /* Error response/notification */
 enum {
 	IWM_FW_ERR_UNKNOWN_CMD = 0x0,
 	IWM_FW_ERR_INVALID_CMD_PARAM = 0x1,
 	IWM_FW_ERR_SERVICE = 0x2,
 	IWM_FW_ERR_ARC_MEMORY = 0x3,
 	IWM_FW_ERR_ARC_CODE = 0x4,
 	IWM_FW_ERR_WATCH_DOG = 0x5,
 	IWM_FW_ERR_WEP_GRP_KEY_INDX = 0x10,
 	IWM_FW_ERR_WEP_KEY_SIZE = 0x11,
 	IWM_FW_ERR_OBSOLETE_FUNC = 0x12,
 	IWM_FW_ERR_UNEXPECTED = 0xFE,
 	IWM_FW_ERR_FATAL = 0xFF
 };
 
 /**
  * struct iwm_error_resp - FW error indication
  * ( IWM_REPLY_ERROR = 0x2 )
  * @error_type: one of IWM_FW_ERR_*
  * @cmd_id: the command ID for which the error occurred
  * @bad_cmd_seq_num: sequence number of the erroneous command
  * @error_service: which service created the error, applicable only if
  *	error_type = 2, otherwise 0
  * @timestamp: TSF in usecs.
  */
 struct iwm_error_resp {
 	uint32_t error_type;
 	uint8_t cmd_id;
 	uint8_t reserved1;
 	uint16_t bad_cmd_seq_num;
 	uint32_t error_service;
 	uint64_t timestamp;
 } __packed;
 
 
 /* Common PHY, MAC and Bindings definitions */
 
 #define IWM_MAX_MACS_IN_BINDING	(3)
 #define IWM_MAX_BINDINGS		(4)
 #define IWM_AUX_BINDING_INDEX	(3)
 #define IWM_MAX_PHYS		(4)
 
 /* Used to extract ID and color from the context dword */
 #define IWM_FW_CTXT_ID_POS	  (0)
 #define IWM_FW_CTXT_ID_MSK	  (0xff << IWM_FW_CTXT_ID_POS)
 #define IWM_FW_CTXT_COLOR_POS (8)
 #define IWM_FW_CTXT_COLOR_MSK (0xff << IWM_FW_CTXT_COLOR_POS)
 #define IWM_FW_CTXT_INVALID	  (0xffffffff)
 
 #define IWM_FW_CMD_ID_AND_COLOR(_id, _color) ((_id << IWM_FW_CTXT_ID_POS) |\
 					  (_color << IWM_FW_CTXT_COLOR_POS))
 
 /* Possible actions on PHYs, MACs and Bindings */
 enum {
 	IWM_FW_CTXT_ACTION_STUB = 0,
 	IWM_FW_CTXT_ACTION_ADD,
 	IWM_FW_CTXT_ACTION_MODIFY,
 	IWM_FW_CTXT_ACTION_REMOVE,
 	IWM_FW_CTXT_ACTION_NUM
 }; /* COMMON_CONTEXT_ACTION_API_E_VER_1 */
 
 /* Time Events */
 
 /* Time Event types, according to MAC type */
 enum iwm_time_event_type {
 	/* BSS Station Events */
 	IWM_TE_BSS_STA_AGGRESSIVE_ASSOC,
 	IWM_TE_BSS_STA_ASSOC,
 	IWM_TE_BSS_EAP_DHCP_PROT,
 	IWM_TE_BSS_QUIET_PERIOD,
 
 	/* P2P Device Events */
 	IWM_TE_P2P_DEVICE_DISCOVERABLE,
 	IWM_TE_P2P_DEVICE_LISTEN,
 	IWM_TE_P2P_DEVICE_ACTION_SCAN,
 	IWM_TE_P2P_DEVICE_FULL_SCAN,
 
 	/* P2P Client Events */
 	IWM_TE_P2P_CLIENT_AGGRESSIVE_ASSOC,
 	IWM_TE_P2P_CLIENT_ASSOC,
 	IWM_TE_P2P_CLIENT_QUIET_PERIOD,
 
 	/* P2P GO Events */
 	IWM_TE_P2P_GO_ASSOC_PROT,
 	IWM_TE_P2P_GO_REPETITIVE_NOA,
 	IWM_TE_P2P_GO_CT_WINDOW,
 
 	/* WiDi Sync Events */
 	IWM_TE_WIDI_TX_SYNC,
 
 	IWM_TE_MAX
 }; /* IWM_MAC_EVENT_TYPE_API_E_VER_1 */
 
 
 
 /* Time event - defines for command API v1 */
 
 /*
  * @IWM_TE_V1_FRAG_NONE: fragmentation of the time event is NOT allowed.
  * @IWM_TE_V1_FRAG_SINGLE: fragmentation of the time event is allowed, but only
  *	the first fragment is scheduled.
  * @IWM_TE_V1_FRAG_DUAL: fragmentation of the time event is allowed, but only
  *	the first 2 fragments are scheduled.
  * @IWM_TE_V1_FRAG_ENDLESS: fragmentation of the time event is allowed, and any
  *	number of fragments are valid.
  *
  * Other than the constant defined above, specifying a fragmentation value 'x'
  * means that the event can be fragmented but only the first 'x' will be
  * scheduled.
  */
 enum {
 	IWM_TE_V1_FRAG_NONE = 0,
 	IWM_TE_V1_FRAG_SINGLE = 1,
 	IWM_TE_V1_FRAG_DUAL = 2,
 	IWM_TE_V1_FRAG_ENDLESS = 0xffffffff
 };
 
 /* If a Time Event can be fragmented, this is the max number of fragments */
 #define IWM_TE_V1_FRAG_MAX_MSK		0x0fffffff
 /* Repeat the time event endlessly (until removed) */
 #define IWM_TE_V1_REPEAT_ENDLESS	0xffffffff
 /* If a Time Event has bounded repetitions, this is the maximal value */
 #define IWM_TE_V1_REPEAT_MAX_MSK_V1	0x0fffffff
 
 /* Time Event dependencies: none, on another TE, or in a specific time */
 enum {
 	IWM_TE_V1_INDEPENDENT		= 0,
 	IWM_TE_V1_DEP_OTHER		= (1 << 0),
 	IWM_TE_V1_DEP_TSF		= (1 << 1),
 	IWM_TE_V1_EVENT_SOCIOPATHIC	= (1 << 2),
 }; /* IWM_MAC_EVENT_DEPENDENCY_POLICY_API_E_VER_2 */
 
 /*
  * @IWM_TE_V1_NOTIF_NONE: no notifications
  * @IWM_TE_V1_NOTIF_HOST_EVENT_START: request/receive notification on event start
  * @IWM_TE_V1_NOTIF_HOST_EVENT_END:request/receive notification on event end
  * @IWM_TE_V1_NOTIF_INTERNAL_EVENT_START: internal FW use
  * @IWM_TE_V1_NOTIF_INTERNAL_EVENT_END: internal FW use.
  * @IWM_TE_V1_NOTIF_HOST_FRAG_START: request/receive notification on frag start
  * @IWM_TE_V1_NOTIF_HOST_FRAG_END:request/receive notification on frag end
  * @IWM_TE_V1_NOTIF_INTERNAL_FRAG_START: internal FW use.
  * @IWM_TE_V1_NOTIF_INTERNAL_FRAG_END: internal FW use.
  *
  * Supported Time event notifications configuration.
  * A notification (both event and fragment) includes a status indicating weather
  * the FW was able to schedule the event or not. For fragment start/end
  * notification the status is always success. There is no start/end fragment
  * notification for monolithic events.
  */
 enum {
 	IWM_TE_V1_NOTIF_NONE = 0,
 	IWM_TE_V1_NOTIF_HOST_EVENT_START = (1 << 0),
 	IWM_TE_V1_NOTIF_HOST_EVENT_END = (1 << 1),
 	IWM_TE_V1_NOTIF_INTERNAL_EVENT_START = (1 << 2),
 	IWM_TE_V1_NOTIF_INTERNAL_EVENT_END = (1 << 3),
 	IWM_TE_V1_NOTIF_HOST_FRAG_START = (1 << 4),
 	IWM_TE_V1_NOTIF_HOST_FRAG_END = (1 << 5),
 	IWM_TE_V1_NOTIF_INTERNAL_FRAG_START = (1 << 6),
 	IWM_TE_V1_NOTIF_INTERNAL_FRAG_END = (1 << 7),
 	IWM_T2_V2_START_IMMEDIATELY = (1 << 11),
 }; /* IWM_MAC_EVENT_ACTION_API_E_VER_2 */
 
 /* Time event - defines for command API */
 
 /*
  * @IWM_TE_V2_FRAG_NONE: fragmentation of the time event is NOT allowed.
  * @IWM_TE_V2_FRAG_SINGLE: fragmentation of the time event is allowed, but only
  *  the first fragment is scheduled.
  * @IWM_TE_V2_FRAG_DUAL: fragmentation of the time event is allowed, but only
  *  the first 2 fragments are scheduled.
  * @IWM_TE_V2_FRAG_ENDLESS: fragmentation of the time event is allowed, and any
  *  number of fragments are valid.
  *
  * Other than the constant defined above, specifying a fragmentation value 'x'
  * means that the event can be fragmented but only the first 'x' will be
  * scheduled.
  */
 enum {
 	IWM_TE_V2_FRAG_NONE = 0,
 	IWM_TE_V2_FRAG_SINGLE = 1,
 	IWM_TE_V2_FRAG_DUAL = 2,
 	IWM_TE_V2_FRAG_MAX = 0xfe,
 	IWM_TE_V2_FRAG_ENDLESS = 0xff
 };
 
 /* Repeat the time event endlessly (until removed) */
 #define IWM_TE_V2_REPEAT_ENDLESS	0xff
 /* If a Time Event has bounded repetitions, this is the maximal value */
 #define IWM_TE_V2_REPEAT_MAX	0xfe
 
 #define IWM_TE_V2_PLACEMENT_POS	12
 #define IWM_TE_V2_ABSENCE_POS	15
 
 /* Time event policy values
  * A notification (both event and fragment) includes a status indicating weather
  * the FW was able to schedule the event or not. For fragment start/end
  * notification the status is always success. There is no start/end fragment
  * notification for monolithic events.
  *
  * @IWM_TE_V2_DEFAULT_POLICY: independent, social, present, unoticable
  * @IWM_TE_V2_NOTIF_HOST_EVENT_START: request/receive notification on event start
  * @IWM_TE_V2_NOTIF_HOST_EVENT_END:request/receive notification on event end
  * @IWM_TE_V2_NOTIF_INTERNAL_EVENT_START: internal FW use
  * @IWM_TE_V2_NOTIF_INTERNAL_EVENT_END: internal FW use.
  * @IWM_TE_V2_NOTIF_HOST_FRAG_START: request/receive notification on frag start
  * @IWM_TE_V2_NOTIF_HOST_FRAG_END:request/receive notification on frag end
  * @IWM_TE_V2_NOTIF_INTERNAL_FRAG_START: internal FW use.
  * @IWM_TE_V2_NOTIF_INTERNAL_FRAG_END: internal FW use.
  * @IWM_TE_V2_DEP_OTHER: depends on another time event
  * @IWM_TE_V2_DEP_TSF: depends on a specific time
  * @IWM_TE_V2_EVENT_SOCIOPATHIC: can't co-exist with other events of tha same MAC
  * @IWM_TE_V2_ABSENCE: are we present or absent during the Time Event.
  */
 enum {
 	IWM_TE_V2_DEFAULT_POLICY = 0x0,
 
 	/* notifications (event start/stop, fragment start/stop) */
 	IWM_TE_V2_NOTIF_HOST_EVENT_START = (1 << 0),
 	IWM_TE_V2_NOTIF_HOST_EVENT_END = (1 << 1),
 	IWM_TE_V2_NOTIF_INTERNAL_EVENT_START = (1 << 2),
 	IWM_TE_V2_NOTIF_INTERNAL_EVENT_END = (1 << 3),
 
 	IWM_TE_V2_NOTIF_HOST_FRAG_START = (1 << 4),
 	IWM_TE_V2_NOTIF_HOST_FRAG_END = (1 << 5),
 	IWM_TE_V2_NOTIF_INTERNAL_FRAG_START = (1 << 6),
 	IWM_TE_V2_NOTIF_INTERNAL_FRAG_END = (1 << 7),
 
 	IWM_TE_V2_NOTIF_MSK = 0xff,
 
 	/* placement characteristics */
 	IWM_TE_V2_DEP_OTHER = (1 << IWM_TE_V2_PLACEMENT_POS),
 	IWM_TE_V2_DEP_TSF = (1 << (IWM_TE_V2_PLACEMENT_POS + 1)),
 	IWM_TE_V2_EVENT_SOCIOPATHIC = (1 << (IWM_TE_V2_PLACEMENT_POS + 2)),
 
 	/* are we present or absent during the Time Event. */
 	IWM_TE_V2_ABSENCE = (1 << IWM_TE_V2_ABSENCE_POS),
 };
 
 /**
  * struct iwm_time_event_cmd_api - configuring Time Events
  * with struct IWM_MAC_TIME_EVENT_DATA_API_S_VER_2 (see also
  * with version 1. determined by IWM_UCODE_TLV_FLAGS)
  * ( IWM_TIME_EVENT_CMD = 0x29 )
  * @id_and_color: ID and color of the relevant MAC
  * @action: action to perform, one of IWM_FW_CTXT_ACTION_*
  * @id: this field has two meanings, depending on the action:
  *	If the action is ADD, then it means the type of event to add.
  *	For all other actions it is the unique event ID assigned when the
  *	event was added by the FW.
  * @apply_time: When to start the Time Event (in GP2)
  * @max_delay: maximum delay to event's start (apply time), in TU
  * @depends_on: the unique ID of the event we depend on (if any)
  * @interval: interval between repetitions, in TU
  * @duration: duration of event in TU
  * @repeat: how many repetitions to do, can be IWM_TE_REPEAT_ENDLESS
  * @max_frags: maximal number of fragments the Time Event can be divided to
  * @policy: defines whether uCode shall notify the host or other uCode modules
  *	on event and/or fragment start and/or end
  *	using one of IWM_TE_INDEPENDENT, IWM_TE_DEP_OTHER, IWM_TE_DEP_TSF
  *	IWM_TE_EVENT_SOCIOPATHIC
  *	using IWM_TE_ABSENCE and using IWM_TE_NOTIF_*
  */
 struct iwm_time_event_cmd {
 	/* COMMON_INDEX_HDR_API_S_VER_1 */
 	uint32_t id_and_color;
 	uint32_t action;
 	uint32_t id;
 	/* IWM_MAC_TIME_EVENT_DATA_API_S_VER_2 */
 	uint32_t apply_time;
 	uint32_t max_delay;
 	uint32_t depends_on;
 	uint32_t interval;
 	uint32_t duration;
 	uint8_t repeat;
 	uint8_t max_frags;
 	uint16_t policy;
 } __packed; /* IWM_MAC_TIME_EVENT_CMD_API_S_VER_2 */
 
 /**
  * struct iwm_time_event_resp - response structure to iwm_time_event_cmd
  * @status: bit 0 indicates success, all others specify errors
  * @id: the Time Event type
  * @unique_id: the unique ID assigned (in ADD) or given (others) to the TE
  * @id_and_color: ID and color of the relevant MAC
  */
 struct iwm_time_event_resp {
 	uint32_t status;
 	uint32_t id;
 	uint32_t unique_id;
 	uint32_t id_and_color;
 } __packed; /* IWM_MAC_TIME_EVENT_RSP_API_S_VER_1 */
 
 /**
  * struct iwm_time_event_notif - notifications of time event start/stop
  * ( IWM_TIME_EVENT_NOTIFICATION = 0x2a )
  * @timestamp: action timestamp in GP2
  * @session_id: session's unique id
  * @unique_id: unique id of the Time Event itself
  * @id_and_color: ID and color of the relevant MAC
  * @action: one of IWM_TE_NOTIF_START or IWM_TE_NOTIF_END
  * @status: true if scheduled, false otherwise (not executed)
  */
 struct iwm_time_event_notif {
 	uint32_t timestamp;
 	uint32_t session_id;
 	uint32_t unique_id;
 	uint32_t id_and_color;
 	uint32_t action;
 	uint32_t status;
 } __packed; /* IWM_MAC_TIME_EVENT_NTFY_API_S_VER_1 */
 
 
 /* Bindings and Time Quota */
 
 /**
  * struct iwm_binding_cmd - configuring bindings
  * ( IWM_BINDING_CONTEXT_CMD = 0x2b )
  * @id_and_color: ID and color of the relevant Binding
  * @action: action to perform, one of IWM_FW_CTXT_ACTION_*
  * @macs: array of MAC id and colors which belong to the binding
  * @phy: PHY id and color which belongs to the binding
  */
 struct iwm_binding_cmd {
 	/* COMMON_INDEX_HDR_API_S_VER_1 */
 	uint32_t id_and_color;
 	uint32_t action;
 	/* IWM_BINDING_DATA_API_S_VER_1 */
 	uint32_t macs[IWM_MAX_MACS_IN_BINDING];
 	uint32_t phy;
 } __packed; /* IWM_BINDING_CMD_API_S_VER_1 */
 
 /* The maximal number of fragments in the FW's schedule session */
 #define IWM_MVM_MAX_QUOTA 128
 
 /**
  * struct iwm_time_quota_data - configuration of time quota per binding
  * @id_and_color: ID and color of the relevant Binding
  * @quota: absolute time quota in TU. The scheduler will try to divide the
  *	remainig quota (after Time Events) according to this quota.
  * @max_duration: max uninterrupted context duration in TU
  */
 struct iwm_time_quota_data {
 	uint32_t id_and_color;
 	uint32_t quota;
 	uint32_t max_duration;
 } __packed; /* IWM_TIME_QUOTA_DATA_API_S_VER_1 */
 
 /**
  * struct iwm_time_quota_cmd - configuration of time quota between bindings
  * ( IWM_TIME_QUOTA_CMD = 0x2c )
  * @quotas: allocations per binding
  */
 struct iwm_time_quota_cmd {
 	struct iwm_time_quota_data quotas[IWM_MAX_BINDINGS];
 } __packed; /* IWM_TIME_QUOTA_ALLOCATION_CMD_API_S_VER_1 */
 
 
 /* PHY context */
 
 /* Supported bands */
 #define IWM_PHY_BAND_5  (0)
 #define IWM_PHY_BAND_24 (1)
 
 /* Supported channel width, vary if there is VHT support */
 #define IWM_PHY_VHT_CHANNEL_MODE20	(0x0)
 #define IWM_PHY_VHT_CHANNEL_MODE40	(0x1)
 #define IWM_PHY_VHT_CHANNEL_MODE80	(0x2)
 #define IWM_PHY_VHT_CHANNEL_MODE160	(0x3)
 
 /*
  * Control channel position:
  * For legacy set bit means upper channel, otherwise lower.
  * For VHT - bit-2 marks if the control is lower/upper relative to center-freq
  *   bits-1:0 mark the distance from the center freq. for 20Mhz, offset is 0.
  *                                   center_freq
  *                                        |
  * 40Mhz                          |_______|_______|
  * 80Mhz                  |_______|_______|_______|_______|
  * 160Mhz |_______|_______|_______|_______|_______|_______|_______|_______|
  * code      011     010     001     000  |  100     101     110    111
  */
 #define IWM_PHY_VHT_CTRL_POS_1_BELOW  (0x0)
 #define IWM_PHY_VHT_CTRL_POS_2_BELOW  (0x1)
 #define IWM_PHY_VHT_CTRL_POS_3_BELOW  (0x2)
 #define IWM_PHY_VHT_CTRL_POS_4_BELOW  (0x3)
 #define IWM_PHY_VHT_CTRL_POS_1_ABOVE  (0x4)
 #define IWM_PHY_VHT_CTRL_POS_2_ABOVE  (0x5)
 #define IWM_PHY_VHT_CTRL_POS_3_ABOVE  (0x6)
 #define IWM_PHY_VHT_CTRL_POS_4_ABOVE  (0x7)
 
 /*
  * @band: IWM_PHY_BAND_*
  * @channel: channel number
  * @width: PHY_[VHT|LEGACY]_CHANNEL_*
  * @ctrl channel: PHY_[VHT|LEGACY]_CTRL_*
  */
 struct iwm_fw_channel_info {
 	uint8_t band;
 	uint8_t channel;
 	uint8_t width;
 	uint8_t ctrl_pos;
 } __packed;
 
 #define IWM_PHY_RX_CHAIN_DRIVER_FORCE_POS	(0)
 #define IWM_PHY_RX_CHAIN_DRIVER_FORCE_MSK \
 	(0x1 << IWM_PHY_RX_CHAIN_DRIVER_FORCE_POS)
 #define IWM_PHY_RX_CHAIN_VALID_POS		(1)
 #define IWM_PHY_RX_CHAIN_VALID_MSK \
 	(0x7 << IWM_PHY_RX_CHAIN_VALID_POS)
 #define IWM_PHY_RX_CHAIN_FORCE_SEL_POS	(4)
 #define IWM_PHY_RX_CHAIN_FORCE_SEL_MSK \
 	(0x7 << IWM_PHY_RX_CHAIN_FORCE_SEL_POS)
 #define IWM_PHY_RX_CHAIN_FORCE_MIMO_SEL_POS	(7)
 #define IWM_PHY_RX_CHAIN_FORCE_MIMO_SEL_MSK \
 	(0x7 << IWM_PHY_RX_CHAIN_FORCE_MIMO_SEL_POS)
 #define IWM_PHY_RX_CHAIN_CNT_POS		(10)
 #define IWM_PHY_RX_CHAIN_CNT_MSK \
 	(0x3 << IWM_PHY_RX_CHAIN_CNT_POS)
 #define IWM_PHY_RX_CHAIN_MIMO_CNT_POS	(12)
 #define IWM_PHY_RX_CHAIN_MIMO_CNT_MSK \
 	(0x3 << IWM_PHY_RX_CHAIN_MIMO_CNT_POS)
 #define IWM_PHY_RX_CHAIN_MIMO_FORCE_POS	(14)
 #define IWM_PHY_RX_CHAIN_MIMO_FORCE_MSK \
 	(0x1 << IWM_PHY_RX_CHAIN_MIMO_FORCE_POS)
 
 /* TODO: fix the value, make it depend on firmware at runtime? */
 #define IWM_NUM_PHY_CTX	3
 
 /* TODO: complete missing documentation */
 /**
  * struct iwm_phy_context_cmd - config of the PHY context
  * ( IWM_PHY_CONTEXT_CMD = 0x8 )
  * @id_and_color: ID and color of the relevant Binding
  * @action: action to perform, one of IWM_FW_CTXT_ACTION_*
  * @apply_time: 0 means immediate apply and context switch.
  *	other value means apply new params after X usecs
  * @tx_param_color: ???
  * @channel_info:
  * @txchain_info: ???
  * @rxchain_info: ???
  * @acquisition_data: ???
  * @dsp_cfg_flags: set to 0
  */
 struct iwm_phy_context_cmd {
 	/* COMMON_INDEX_HDR_API_S_VER_1 */
 	uint32_t id_and_color;
 	uint32_t action;
 	/* IWM_PHY_CONTEXT_DATA_API_S_VER_1 */
 	uint32_t apply_time;
 	uint32_t tx_param_color;
 	struct iwm_fw_channel_info ci;
 	uint32_t txchain_info;
 	uint32_t rxchain_info;
 	uint32_t acquisition_data;
 	uint32_t dsp_cfg_flags;
 } __packed; /* IWM_PHY_CONTEXT_CMD_API_VER_1 */
 
 #define IWM_RX_INFO_PHY_CNT 8
 #define IWM_RX_INFO_ENERGY_ANT_ABC_IDX 1
 #define IWM_RX_INFO_ENERGY_ANT_A_MSK 0x000000ff
 #define IWM_RX_INFO_ENERGY_ANT_B_MSK 0x0000ff00
 #define IWM_RX_INFO_ENERGY_ANT_C_MSK 0x00ff0000
 #define IWM_RX_INFO_ENERGY_ANT_A_POS 0
 #define IWM_RX_INFO_ENERGY_ANT_B_POS 8
 #define IWM_RX_INFO_ENERGY_ANT_C_POS 16
 
 #define IWM_RX_INFO_AGC_IDX 1
 #define IWM_RX_INFO_RSSI_AB_IDX 2
 #define IWM_OFDM_AGC_A_MSK 0x0000007f
 #define IWM_OFDM_AGC_A_POS 0
 #define IWM_OFDM_AGC_B_MSK 0x00003f80
 #define IWM_OFDM_AGC_B_POS 7
 #define IWM_OFDM_AGC_CODE_MSK 0x3fe00000
 #define IWM_OFDM_AGC_CODE_POS 20
 #define IWM_OFDM_RSSI_INBAND_A_MSK 0x00ff
 #define IWM_OFDM_RSSI_A_POS 0
 #define IWM_OFDM_RSSI_ALLBAND_A_MSK 0xff00
 #define IWM_OFDM_RSSI_ALLBAND_A_POS 8
 #define IWM_OFDM_RSSI_INBAND_B_MSK 0xff0000
 #define IWM_OFDM_RSSI_B_POS 16
 #define IWM_OFDM_RSSI_ALLBAND_B_MSK 0xff000000
 #define IWM_OFDM_RSSI_ALLBAND_B_POS 24
 
 /**
  * struct iwm_rx_phy_info - phy info
  * (IWM_REPLY_RX_PHY_CMD = 0xc0)
  * @non_cfg_phy_cnt: non configurable DSP phy data byte count
  * @cfg_phy_cnt: configurable DSP phy data byte count
  * @stat_id: configurable DSP phy data set ID
  * @reserved1:
  * @system_timestamp: GP2  at on air rise
  * @timestamp: TSF at on air rise
  * @beacon_time_stamp: beacon at on-air rise
  * @phy_flags: general phy flags: band, modulation, ...
  * @channel: channel number
  * @non_cfg_phy_buf: for various implementations of non_cfg_phy
  * @rate_n_flags: IWM_RATE_MCS_*
  * @byte_count: frame's byte-count
  * @frame_time: frame's time on the air, based on byte count and frame rate
  *	calculation
  * @mac_active_msk: what MACs were active when the frame was received
  *
  * Before each Rx, the device sends this data. It contains PHY information
  * about the reception of the packet.
  */
 struct iwm_rx_phy_info {
 	uint8_t non_cfg_phy_cnt;
 	uint8_t cfg_phy_cnt;
 	uint8_t stat_id;
 	uint8_t reserved1;
 	uint32_t system_timestamp;
 	uint64_t timestamp;
 	uint32_t beacon_time_stamp;
 	uint16_t phy_flags;
 #define IWM_PHY_INFO_FLAG_SHPREAMBLE	(1 << 2)
 	uint16_t channel;
 	uint32_t non_cfg_phy[IWM_RX_INFO_PHY_CNT];
 	uint8_t rate;
 	uint8_t rflags;
 	uint16_t xrflags;
 	uint32_t byte_count;
 	uint16_t mac_active_msk;
 	uint16_t frame_time;
 } __packed;
 
 struct iwm_rx_mpdu_res_start {
 	uint16_t byte_count;
 	uint16_t reserved;
 } __packed;
 
 /**
  * enum iwm_rx_phy_flags - to parse %iwm_rx_phy_info phy_flags
  * @IWM_RX_RES_PHY_FLAGS_BAND_24: true if the packet was received on 2.4 band
  * @IWM_RX_RES_PHY_FLAGS_MOD_CCK:
  * @IWM_RX_RES_PHY_FLAGS_SHORT_PREAMBLE: true if packet's preamble was short
  * @IWM_RX_RES_PHY_FLAGS_NARROW_BAND:
  * @IWM_RX_RES_PHY_FLAGS_ANTENNA: antenna on which the packet was received
  * @IWM_RX_RES_PHY_FLAGS_AGG: set if the packet was part of an A-MPDU
  * @IWM_RX_RES_PHY_FLAGS_OFDM_HT: The frame was an HT frame
  * @IWM_RX_RES_PHY_FLAGS_OFDM_GF: The frame used GF preamble
  * @IWM_RX_RES_PHY_FLAGS_OFDM_VHT: The frame was a VHT frame
  */
 enum iwm_rx_phy_flags {
 	IWM_RX_RES_PHY_FLAGS_BAND_24		= (1 << 0),
 	IWM_RX_RES_PHY_FLAGS_MOD_CCK		= (1 << 1),
 	IWM_RX_RES_PHY_FLAGS_SHORT_PREAMBLE	= (1 << 2),
 	IWM_RX_RES_PHY_FLAGS_NARROW_BAND	= (1 << 3),
 	IWM_RX_RES_PHY_FLAGS_ANTENNA		= (0x7 << 4),
 	IWM_RX_RES_PHY_FLAGS_ANTENNA_POS	= 4,
 	IWM_RX_RES_PHY_FLAGS_AGG		= (1 << 7),
 	IWM_RX_RES_PHY_FLAGS_OFDM_HT		= (1 << 8),
 	IWM_RX_RES_PHY_FLAGS_OFDM_GF		= (1 << 9),
 	IWM_RX_RES_PHY_FLAGS_OFDM_VHT		= (1 << 10),
 };
 
 /**
  * enum iwm_mvm_rx_status - written by fw for each Rx packet
  * @IWM_RX_MPDU_RES_STATUS_CRC_OK: CRC is fine
  * @IWM_RX_MPDU_RES_STATUS_OVERRUN_OK: there was no RXE overflow
  * @IWM_RX_MPDU_RES_STATUS_SRC_STA_FOUND:
  * @IWM_RX_MPDU_RES_STATUS_KEY_VALID:
  * @IWM_RX_MPDU_RES_STATUS_KEY_PARAM_OK:
  * @IWM_RX_MPDU_RES_STATUS_ICV_OK: ICV is fine, if not, the packet is destroyed
  * @IWM_RX_MPDU_RES_STATUS_MIC_OK: used for CCM alg only. TKIP MIC is checked
  *	in the driver.
  * @IWM_RX_MPDU_RES_STATUS_TTAK_OK: TTAK is fine
  * @IWM_RX_MPDU_RES_STATUS_MNG_FRAME_REPLAY_ERR:  valid for alg = CCM_CMAC or
  *	alg = CCM only. Checks replay attack for 11w frames. Relevant only if
  *	%IWM_RX_MPDU_RES_STATUS_ROBUST_MNG_FRAME is set.
  * @IWM_RX_MPDU_RES_STATUS_SEC_NO_ENC: this frame is not encrypted
  * @IWM_RX_MPDU_RES_STATUS_SEC_WEP_ENC: this frame is encrypted using WEP
  * @IWM_RX_MPDU_RES_STATUS_SEC_CCM_ENC: this frame is encrypted using CCM
  * @IWM_RX_MPDU_RES_STATUS_SEC_TKIP_ENC: this frame is encrypted using TKIP
  * @IWM_RX_MPDU_RES_STATUS_SEC_CCM_CMAC_ENC: this frame is encrypted using CCM_CMAC
  * @IWM_RX_MPDU_RES_STATUS_SEC_ENC_ERR: this frame couldn't be decrypted
  * @IWM_RX_MPDU_RES_STATUS_SEC_ENC_MSK: bitmask of the encryption algorithm
  * @IWM_RX_MPDU_RES_STATUS_DEC_DONE: this frame has been successfully decrypted
  * @IWM_RX_MPDU_RES_STATUS_PROTECT_FRAME_BIT_CMP:
  * @IWM_RX_MPDU_RES_STATUS_EXT_IV_BIT_CMP:
  * @IWM_RX_MPDU_RES_STATUS_KEY_ID_CMP_BIT:
  * @IWM_RX_MPDU_RES_STATUS_ROBUST_MNG_FRAME: this frame is an 11w management frame
  * @IWM_RX_MPDU_RES_STATUS_HASH_INDEX_MSK:
  * @IWM_RX_MPDU_RES_STATUS_STA_ID_MSK:
  * @IWM_RX_MPDU_RES_STATUS_RRF_KILL:
  * @IWM_RX_MPDU_RES_STATUS_FILTERING_MSK:
  * @IWM_RX_MPDU_RES_STATUS2_FILTERING_MSK:
  */
 enum iwm_mvm_rx_status {
 	IWM_RX_MPDU_RES_STATUS_CRC_OK			= (1 << 0),
 	IWM_RX_MPDU_RES_STATUS_OVERRUN_OK		= (1 << 1),
 	IWM_RX_MPDU_RES_STATUS_SRC_STA_FOUND		= (1 << 2),
 	IWM_RX_MPDU_RES_STATUS_KEY_VALID		= (1 << 3),
 	IWM_RX_MPDU_RES_STATUS_KEY_PARAM_OK		= (1 << 4),
 	IWM_RX_MPDU_RES_STATUS_ICV_OK			= (1 << 5),
 	IWM_RX_MPDU_RES_STATUS_MIC_OK			= (1 << 6),
 	IWM_RX_MPDU_RES_STATUS_TTAK_OK			= (1 << 7),
 	IWM_RX_MPDU_RES_STATUS_MNG_FRAME_REPLAY_ERR	= (1 << 7),
 	IWM_RX_MPDU_RES_STATUS_SEC_NO_ENC		= (0 << 8),
 	IWM_RX_MPDU_RES_STATUS_SEC_WEP_ENC		= (1 << 8),
 	IWM_RX_MPDU_RES_STATUS_SEC_CCM_ENC		= (2 << 8),
 	IWM_RX_MPDU_RES_STATUS_SEC_TKIP_ENC		= (3 << 8),
 	IWM_RX_MPDU_RES_STATUS_SEC_EXT_ENC		= (4 << 8),
 	IWM_RX_MPDU_RES_STATUS_SEC_CCM_CMAC_ENC		= (6 << 8),
 	IWM_RX_MPDU_RES_STATUS_SEC_ENC_ERR		= (7 << 8),
 	IWM_RX_MPDU_RES_STATUS_SEC_ENC_MSK		= (7 << 8),
 	IWM_RX_MPDU_RES_STATUS_DEC_DONE			= (1 << 11),
 	IWM_RX_MPDU_RES_STATUS_PROTECT_FRAME_BIT_CMP	= (1 << 12),
 	IWM_RX_MPDU_RES_STATUS_EXT_IV_BIT_CMP		= (1 << 13),
 	IWM_RX_MPDU_RES_STATUS_KEY_ID_CMP_BIT		= (1 << 14),
 	IWM_RX_MPDU_RES_STATUS_ROBUST_MNG_FRAME		= (1 << 15),
 	IWM_RX_MPDU_RES_STATUS_HASH_INDEX_MSK		= (0x3F0000),
 	IWM_RX_MPDU_RES_STATUS_STA_ID_MSK		= (0x1f000000),
 	IWM_RX_MPDU_RES_STATUS_RRF_KILL			= (1 << 29),
 	IWM_RX_MPDU_RES_STATUS_FILTERING_MSK		= (0xc00000),
 	IWM_RX_MPDU_RES_STATUS2_FILTERING_MSK		= (0xc0000000),
 };
 
 /**
  * struct iwm_radio_version_notif - information on the radio version
  * ( IWM_RADIO_VERSION_NOTIFICATION = 0x68 )
  * @radio_flavor:
  * @radio_step:
  * @radio_dash:
  */
 struct iwm_radio_version_notif {
 	uint32_t radio_flavor;
 	uint32_t radio_step;
 	uint32_t radio_dash;
 } __packed; /* IWM_RADIO_VERSION_NOTOFICATION_S_VER_1 */
 
 enum iwm_card_state_flags {
 	IWM_CARD_ENABLED		= 0x00,
 	IWM_HW_CARD_DISABLED	= 0x01,
 	IWM_SW_CARD_DISABLED	= 0x02,
 	IWM_CT_KILL_CARD_DISABLED	= 0x04,
 	IWM_HALT_CARD_DISABLED	= 0x08,
 	IWM_CARD_DISABLED_MSK	= 0x0f,
 	IWM_CARD_IS_RX_ON		= 0x10,
 };
 
 /**
  * struct iwm_radio_version_notif - information on the radio version
  * (IWM_CARD_STATE_NOTIFICATION = 0xa1 )
  * @flags: %iwm_card_state_flags
  */
 struct iwm_card_state_notif {
 	uint32_t flags;
 } __packed; /* CARD_STATE_NTFY_API_S_VER_1 */
 
 /**
  * struct iwm_missed_beacons_notif - information on missed beacons
  * ( IWM_MISSED_BEACONS_NOTIFICATION = 0xa2 )
  * @mac_id: interface ID
  * @consec_missed_beacons_since_last_rx: number of consecutive missed
  *	beacons since last RX.
  * @consec_missed_beacons: number of consecutive missed beacons
  * @num_expected_beacons:
  * @num_recvd_beacons:
  */
 struct iwm_missed_beacons_notif {
 	uint32_t mac_id;
 	uint32_t consec_missed_beacons_since_last_rx;
 	uint32_t consec_missed_beacons;
 	uint32_t num_expected_beacons;
 	uint32_t num_recvd_beacons;
 } __packed; /* IWM_MISSED_BEACON_NTFY_API_S_VER_3 */
 
 /**
  * struct iwm_mfuart_load_notif - mfuart image version & status
  * ( IWM_MFUART_LOAD_NOTIFICATION = 0xb1 )
  * @installed_ver: installed image version
  * @external_ver: external image version
  * @status: MFUART loading status
  * @duration: MFUART loading time
 */
 struct iwm_mfuart_load_notif {
 	uint32_t installed_ver;
 	uint32_t external_ver;
 	uint32_t status;
 	uint32_t duration;
 } __packed; /*MFU_LOADER_NTFY_API_S_VER_1*/
 
 /**
  * struct iwm_set_calib_default_cmd - set default value for calibration.
  * ( IWM_SET_CALIB_DEFAULT_CMD = 0x8e )
  * @calib_index: the calibration to set value for
  * @length: of data
  * @data: the value to set for the calibration result
  */
 struct iwm_set_calib_default_cmd {
 	uint16_t calib_index;
 	uint16_t length;
 	uint8_t data[0];
 } __packed; /* IWM_PHY_CALIB_OVERRIDE_VALUES_S */
 
 #define IWM_MAX_PORT_ID_NUM	2
 #define IWM_MAX_MCAST_FILTERING_ADDRESSES 256
 
 /**
  * struct iwm_mcast_filter_cmd - configure multicast filter.
  * @filter_own: Set 1 to filter out multicast packets sent by station itself
  * @port_id:	Multicast MAC addresses array specifier. This is a strange way
  *		to identify network interface adopted in host-device IF.
  *		It is used by FW as index in array of addresses. This array has
  *		IWM_MAX_PORT_ID_NUM members.
  * @count:	Number of MAC addresses in the array
  * @pass_all:	Set 1 to pass all multicast packets.
  * @bssid:	current association BSSID.
  * @addr_list:	Place holder for array of MAC addresses.
  *		IMPORTANT: add padding if necessary to ensure DWORD alignment.
  */
 struct iwm_mcast_filter_cmd {
 	uint8_t filter_own;
 	uint8_t port_id;
 	uint8_t count;
 	uint8_t pass_all;
 	uint8_t bssid[6];
 	uint8_t reserved[2];
 	uint8_t addr_list[0];
 } __packed; /* IWM_MCAST_FILTERING_CMD_API_S_VER_1 */
 
 /*
  * The first MAC indices (starting from 0)
  * are available to the driver, AUX follows
  */
 #define IWM_MAC_INDEX_AUX		4
 #define IWM_MAC_INDEX_MIN_DRIVER	0
 #define IWM_NUM_MAC_INDEX_DRIVER	IWM_MAC_INDEX_AUX
 #define IWM_NUM_MAC_INDEX		(IWM_MAC_INDEX_AUX + 1)
 
 /***********************************
  * Statistics API
  ***********************************/
 struct iwm_mvm_statistics_dbg {
 	uint32_t burst_check;
 	uint32_t burst_count;
 	uint32_t wait_for_silence_timeout_cnt;
 	uint32_t reserved[3];
 } __packed; /* IWM_STATISTICS_DEBUG_API_S_VER_2 */
 
 struct iwm_mvm_statistics_div {
 	uint32_t tx_on_a;
 	uint32_t tx_on_b;
 	uint32_t exec_time;
 	uint32_t probe_time;
 	uint32_t rssi_ant;
 	uint32_t reserved2;
 } __packed; /* IWM_STATISTICS_SLOW_DIV_API_S_VER_2 */
 
 struct iwm_mvm_statistics_rx_non_phy {
 	uint32_t bogus_cts;	/* CTS received when not expecting CTS */
 	uint32_t bogus_ack;	/* ACK received when not expecting ACK */
 	uint32_t non_bssid_frames;	/* number of frames with BSSID that
 					 * doesn't belong to the STA BSSID */
 	uint32_t filtered_frames;	/* count frames that were dumped in the
 				 * filtering process */
 	uint32_t non_channel_beacons;	/* beacons with our bss id but not on
 					 * our serving channel */
 	uint32_t channel_beacons;	/* beacons with our bss id and in our
 				 * serving channel */
 	uint32_t num_missed_bcon;	/* number of missed beacons */
 	uint32_t adc_rx_saturation_time;	/* count in 0.8us units the time the
 					 * ADC was in saturation */
 	uint32_t ina_detection_search_time;/* total time (in 0.8us) searched
 					  * for INA */
 	uint32_t beacon_silence_rssi[3];/* RSSI silence after beacon frame */
 	uint32_t interference_data_flag;	/* flag for interference data
 					 * availability. 1 when data is
 					 * available. */
 	uint32_t channel_load;		/* counts RX Enable time in uSec */
 	uint32_t dsp_false_alarms;	/* DSP false alarm (both OFDM
 					 * and CCK) counter */
 	uint32_t beacon_rssi_a;
 	uint32_t beacon_rssi_b;
 	uint32_t beacon_rssi_c;
 	uint32_t beacon_energy_a;
 	uint32_t beacon_energy_b;
 	uint32_t beacon_energy_c;
 	uint32_t num_bt_kills;
 	uint32_t mac_id;
 	uint32_t directed_data_mpdu;
 } __packed; /* IWM_STATISTICS_RX_NON_PHY_API_S_VER_3 */
 
 struct iwm_mvm_statistics_rx_phy {
 	uint32_t ina_cnt;
 	uint32_t fina_cnt;
 	uint32_t plcp_err;
 	uint32_t crc32_err;
 	uint32_t overrun_err;
 	uint32_t early_overrun_err;
 	uint32_t crc32_good;
 	uint32_t false_alarm_cnt;
 	uint32_t fina_sync_err_cnt;
 	uint32_t sfd_timeout;
 	uint32_t fina_timeout;
 	uint32_t unresponded_rts;
 	uint32_t rxe_frame_limit_overrun;
 	uint32_t sent_ack_cnt;
 	uint32_t sent_cts_cnt;
 	uint32_t sent_ba_rsp_cnt;
 	uint32_t dsp_self_kill;
 	uint32_t mh_format_err;
 	uint32_t re_acq_main_rssi_sum;
 	uint32_t reserved;
 } __packed; /* IWM_STATISTICS_RX_PHY_API_S_VER_2 */
 
 struct iwm_mvm_statistics_rx_ht_phy {
 	uint32_t plcp_err;
 	uint32_t overrun_err;
 	uint32_t early_overrun_err;
 	uint32_t crc32_good;
 	uint32_t crc32_err;
 	uint32_t mh_format_err;
 	uint32_t agg_crc32_good;
 	uint32_t agg_mpdu_cnt;
 	uint32_t agg_cnt;
 	uint32_t unsupport_mcs;
 } __packed;  /* IWM_STATISTICS_HT_RX_PHY_API_S_VER_1 */
 
 struct iwm_mvm_statistics_tx_non_phy {
 	uint32_t preamble_cnt;
 	uint32_t rx_detected_cnt;
 	uint32_t bt_prio_defer_cnt;
 	uint32_t bt_prio_kill_cnt;
 	uint32_t few_bytes_cnt;
 	uint32_t cts_timeout;
 	uint32_t ack_timeout;
 	uint32_t expected_ack_cnt;
 	uint32_t actual_ack_cnt;
 	uint32_t dump_msdu_cnt;
 	uint32_t burst_abort_next_frame_mismatch_cnt;
 	uint32_t burst_abort_missing_next_frame_cnt;
 	uint32_t cts_timeout_collision;
 	uint32_t ack_or_ba_timeout_collision;
 } __packed; /* IWM_STATISTICS_TX_NON_PHY_API_S_VER_3 */
 
 #define IWM_MAX_CHAINS 3
 
 struct iwm_mvm_statistics_tx_non_phy_agg {
 	uint32_t ba_timeout;
 	uint32_t ba_reschedule_frames;
 	uint32_t scd_query_agg_frame_cnt;
 	uint32_t scd_query_no_agg;
 	uint32_t scd_query_agg;
 	uint32_t scd_query_mismatch;
 	uint32_t frame_not_ready;
 	uint32_t underrun;
 	uint32_t bt_prio_kill;
 	uint32_t rx_ba_rsp_cnt;
 	int8_t txpower[IWM_MAX_CHAINS];
 	int8_t reserved;
 	uint32_t reserved2;
 } __packed; /* IWM_STATISTICS_TX_NON_PHY_AGG_API_S_VER_1 */
 
 struct iwm_mvm_statistics_tx_channel_width {
 	uint32_t ext_cca_narrow_ch20[1];
 	uint32_t ext_cca_narrow_ch40[2];
 	uint32_t ext_cca_narrow_ch80[3];
 	uint32_t ext_cca_narrow_ch160[4];
 	uint32_t last_tx_ch_width_indx;
 	uint32_t rx_detected_per_ch_width[4];
 	uint32_t success_per_ch_width[4];
 	uint32_t fail_per_ch_width[4];
 }; /* IWM_STATISTICS_TX_CHANNEL_WIDTH_API_S_VER_1 */
 
 struct iwm_mvm_statistics_tx {
 	struct iwm_mvm_statistics_tx_non_phy general;
 	struct iwm_mvm_statistics_tx_non_phy_agg agg;
 	struct iwm_mvm_statistics_tx_channel_width channel_width;
 } __packed; /* IWM_STATISTICS_TX_API_S_VER_4 */
 
 
 struct iwm_mvm_statistics_bt_activity {
 	uint32_t hi_priority_tx_req_cnt;
 	uint32_t hi_priority_tx_denied_cnt;
 	uint32_t lo_priority_tx_req_cnt;
 	uint32_t lo_priority_tx_denied_cnt;
 	uint32_t hi_priority_rx_req_cnt;
 	uint32_t hi_priority_rx_denied_cnt;
 	uint32_t lo_priority_rx_req_cnt;
 	uint32_t lo_priority_rx_denied_cnt;
 } __packed;  /* IWM_STATISTICS_BT_ACTIVITY_API_S_VER_1 */
 
 struct iwm_mvm_statistics_general_v8 {
 	uint32_t radio_temperature;
 	uint32_t radio_voltage;
 	struct iwm_mvm_statistics_dbg dbg;
 	uint32_t sleep_time;
 	uint32_t slots_out;
 	uint32_t slots_idle;
 	uint32_t ttl_timestamp;
 	struct iwm_mvm_statistics_div slow_div;
 	uint32_t rx_enable_counter;
 	/*
 	 * num_of_sos_states:
 	 *  count the number of times we have to re-tune
 	 *  in order to get out of bad PHY status
 	 */
 	uint32_t num_of_sos_states;
 	uint32_t beacon_filtered;
 	uint32_t missed_beacons;
 	uint8_t beacon_filter_average_energy;
 	uint8_t beacon_filter_reason;
 	uint8_t beacon_filter_current_energy;
 	uint8_t beacon_filter_reserved;
 	uint32_t beacon_filter_delta_time;
 	struct iwm_mvm_statistics_bt_activity bt_activity;
 	uint64_t rx_time;
 	uint64_t on_time_rf;
 	uint64_t on_time_scan;
 	uint64_t tx_time;
 	uint32_t beacon_counter[IWM_NUM_MAC_INDEX];
 	uint8_t beacon_average_energy[IWM_NUM_MAC_INDEX];
 	uint8_t reserved[4 - (IWM_NUM_MAC_INDEX % 4)];
 } __packed; /* IWM_STATISTICS_GENERAL_API_S_VER_8 */
 
 struct iwm_mvm_statistics_rx {
 	struct iwm_mvm_statistics_rx_phy ofdm;
 	struct iwm_mvm_statistics_rx_phy cck;
 	struct iwm_mvm_statistics_rx_non_phy general;
 	struct iwm_mvm_statistics_rx_ht_phy ofdm_ht;
 } __packed; /* IWM_STATISTICS_RX_API_S_VER_3 */
 
 /*
  * IWM_STATISTICS_NOTIFICATION = 0x9d (notification only, not a command)
  *
  * By default, uCode issues this notification after receiving a beacon
  * while associated.  To disable this behavior, set DISABLE_NOTIF flag in the
  * IWM_STATISTICS_CMD (0x9c), below.
  */
 
 struct iwm_notif_statistics_v10 {
 	uint32_t flag;
 	struct iwm_mvm_statistics_rx rx;
 	struct iwm_mvm_statistics_tx tx;
 	struct iwm_mvm_statistics_general_v8 general;
 } __packed; /* IWM_STATISTICS_NTFY_API_S_VER_10 */
 
 #define IWM_STATISTICS_FLG_CLEAR		0x1
 #define IWM_STATISTICS_FLG_DISABLE_NOTIF	0x2
 
 struct iwm_statistics_cmd {
 	uint32_t flags;
 } __packed; /* IWM_STATISTICS_CMD_API_S_VER_1 */
 
 /***********************************
  * Smart Fifo API
  ***********************************/
 /* Smart Fifo state */
 enum iwm_sf_state {
 	IWM_SF_LONG_DELAY_ON = 0, /* should never be called by driver */
 	IWM_SF_FULL_ON,
 	IWM_SF_UNINIT,
 	IWM_SF_INIT_OFF,
 	IWM_SF_HW_NUM_STATES
 };
 
 /* Smart Fifo possible scenario */
 enum iwm_sf_scenario {
 	IWM_SF_SCENARIO_SINGLE_UNICAST,
 	IWM_SF_SCENARIO_AGG_UNICAST,
 	IWM_SF_SCENARIO_MULTICAST,
 	IWM_SF_SCENARIO_BA_RESP,
 	IWM_SF_SCENARIO_TX_RESP,
 	IWM_SF_NUM_SCENARIO
 };
 
 #define IWM_SF_TRANSIENT_STATES_NUMBER 2 /* IWM_SF_LONG_DELAY_ON and IWM_SF_FULL_ON */
 #define IWM_SF_NUM_TIMEOUT_TYPES 2	/* Aging timer and Idle timer */
 
 /* smart FIFO default values */
 #define IWM_SF_W_MARK_SISO 4096
 #define IWM_SF_W_MARK_MIMO2 8192
 #define IWM_SF_W_MARK_MIMO3 6144
 #define IWM_SF_W_MARK_LEGACY 4096
 #define IWM_SF_W_MARK_SCAN 4096
 
 /* SF Scenarios timers for default configuration (aligned to 32 uSec) */
 #define IWM_SF_SINGLE_UNICAST_IDLE_TIMER_DEF 160	/* 150 uSec  */
 #define IWM_SF_SINGLE_UNICAST_AGING_TIMER_DEF 400	/* 0.4 mSec */
 #define IWM_SF_AGG_UNICAST_IDLE_TIMER_DEF 160		/* 150 uSec */
 #define IWM_SF_AGG_UNICAST_AGING_TIMER_DEF 400		/* 0.4 mSec */
 #define IWM_SF_MCAST_IDLE_TIMER_DEF 160			/* 150 uSec */
 #define IWM_SF_MCAST_AGING_TIMER_DEF 400		/* 0.4 mSec */
 #define IWM_SF_BA_IDLE_TIMER_DEF 160			/* 150 uSec */
 #define IWM_SF_BA_AGING_TIMER_DEF 400			/* 0.4 mSec */
 #define IWM_SF_TX_RE_IDLE_TIMER_DEF 160			/* 150 uSec */
 #define IWM_SF_TX_RE_AGING_TIMER_DEF 400		/* 0.4 mSec */
 
 /* SF Scenarios timers for FULL_ON state (aligned to 32 uSec) */
 #define IWM_SF_SINGLE_UNICAST_IDLE_TIMER 320	/* 300 uSec  */
 #define IWM_SF_SINGLE_UNICAST_AGING_TIMER 2016	/* 2 mSec */
 #define IWM_SF_AGG_UNICAST_IDLE_TIMER 320	/* 300 uSec */
 #define IWM_SF_AGG_UNICAST_AGING_TIMER 2016	/* 2 mSec */
 #define IWM_SF_MCAST_IDLE_TIMER 2016		/* 2 mSec */
 #define IWM_SF_MCAST_AGING_TIMER 10016		/* 10 mSec */
 #define IWM_SF_BA_IDLE_TIMER 320		/* 300 uSec */
 #define IWM_SF_BA_AGING_TIMER 2016		/* 2 mSec */
 #define IWM_SF_TX_RE_IDLE_TIMER 320		/* 300 uSec */
 #define IWM_SF_TX_RE_AGING_TIMER 2016		/* 2 mSec */
 
 #define IWM_SF_LONG_DELAY_AGING_TIMER 1000000	/* 1 Sec */
 
 #define IWM_SF_CFG_DUMMY_NOTIF_OFF	(1 << 16)
 
 /**
  * Smart Fifo configuration command.
  * @state: smart fifo state, types listed in iwm_sf_state.
  * @watermark: Minimum allowed available free space in RXF for transient state.
  * @long_delay_timeouts: aging and idle timer values for each scenario
  * in long delay state.
  * @full_on_timeouts: timer values for each scenario in full on state.
  */
 struct iwm_sf_cfg_cmd {
 	uint32_t state;
 	uint32_t watermark[IWM_SF_TRANSIENT_STATES_NUMBER];
 	uint32_t long_delay_timeouts[IWM_SF_NUM_SCENARIO][IWM_SF_NUM_TIMEOUT_TYPES];
 	uint32_t full_on_timeouts[IWM_SF_NUM_SCENARIO][IWM_SF_NUM_TIMEOUT_TYPES];
 } __packed; /* IWM_SF_CFG_API_S_VER_2 */
 
 /*
  * END mvm/fw-api.h
  */
 
 /*
  * BEGIN mvm/fw-api-mac.h
  */
 
 enum iwm_ac {
 	IWM_AC_BK,
 	IWM_AC_BE,
 	IWM_AC_VI,
 	IWM_AC_VO,
 	IWM_AC_NUM,
 };
 
 /**
  * enum iwm_mac_protection_flags - MAC context flags
  * @IWM_MAC_PROT_FLG_TGG_PROTECT: 11g protection when transmitting OFDM frames,
  *	this will require CCK RTS/CTS2self.
  *	RTS/CTS will protect full burst time.
  * @IWM_MAC_PROT_FLG_HT_PROT: enable HT protection
  * @IWM_MAC_PROT_FLG_FAT_PROT: protect 40 MHz transmissions
  * @IWM_MAC_PROT_FLG_SELF_CTS_EN: allow CTS2self
  */
 enum iwm_mac_protection_flags {
 	IWM_MAC_PROT_FLG_TGG_PROTECT	= (1 << 3),
 	IWM_MAC_PROT_FLG_HT_PROT		= (1 << 23),
 	IWM_MAC_PROT_FLG_FAT_PROT		= (1 << 24),
 	IWM_MAC_PROT_FLG_SELF_CTS_EN	= (1 << 30),
 };
 
 #define IWM_MAC_FLG_SHORT_SLOT		(1 << 4)
 #define IWM_MAC_FLG_SHORT_PREAMBLE		(1 << 5)
 
 /**
  * enum iwm_mac_types - Supported MAC types
  * @IWM_FW_MAC_TYPE_FIRST: lowest supported MAC type
  * @IWM_FW_MAC_TYPE_AUX: Auxiliary MAC (internal)
  * @IWM_FW_MAC_TYPE_LISTENER: monitor MAC type (?)
  * @IWM_FW_MAC_TYPE_PIBSS: Pseudo-IBSS
  * @IWM_FW_MAC_TYPE_IBSS: IBSS
  * @IWM_FW_MAC_TYPE_BSS_STA: BSS (managed) station
  * @IWM_FW_MAC_TYPE_P2P_DEVICE: P2P Device
  * @IWM_FW_MAC_TYPE_P2P_STA: P2P client
  * @IWM_FW_MAC_TYPE_GO: P2P GO
  * @IWM_FW_MAC_TYPE_TEST: ?
  * @IWM_FW_MAC_TYPE_MAX: highest support MAC type
  */
 enum iwm_mac_types {
 	IWM_FW_MAC_TYPE_FIRST = 1,
 	IWM_FW_MAC_TYPE_AUX = IWM_FW_MAC_TYPE_FIRST,
 	IWM_FW_MAC_TYPE_LISTENER,
 	IWM_FW_MAC_TYPE_PIBSS,
 	IWM_FW_MAC_TYPE_IBSS,
 	IWM_FW_MAC_TYPE_BSS_STA,
 	IWM_FW_MAC_TYPE_P2P_DEVICE,
 	IWM_FW_MAC_TYPE_P2P_STA,
 	IWM_FW_MAC_TYPE_GO,
 	IWM_FW_MAC_TYPE_TEST,
 	IWM_FW_MAC_TYPE_MAX = IWM_FW_MAC_TYPE_TEST
 }; /* IWM_MAC_CONTEXT_TYPE_API_E_VER_1 */
 
 /**
  * enum iwm_tsf_id - TSF hw timer ID
  * @IWM_TSF_ID_A: use TSF A
  * @IWM_TSF_ID_B: use TSF B
  * @IWM_TSF_ID_C: use TSF C
  * @IWM_TSF_ID_D: use TSF D
  * @IWM_NUM_TSF_IDS: number of TSF timers available
  */
 enum iwm_tsf_id {
 	IWM_TSF_ID_A = 0,
 	IWM_TSF_ID_B = 1,
 	IWM_TSF_ID_C = 2,
 	IWM_TSF_ID_D = 3,
 	IWM_NUM_TSF_IDS = 4,
 }; /* IWM_TSF_ID_API_E_VER_1 */
 
 /**
  * struct iwm_mac_data_ap - configuration data for AP MAC context
  * @beacon_time: beacon transmit time in system time
  * @beacon_tsf: beacon transmit time in TSF
  * @bi: beacon interval in TU
  * @bi_reciprocal: 2^32 / bi
  * @dtim_interval: dtim transmit time in TU
  * @dtim_reciprocal: 2^32 / dtim_interval
  * @mcast_qid: queue ID for multicast traffic
  * @beacon_template: beacon template ID
  */
 struct iwm_mac_data_ap {
 	uint32_t beacon_time;
 	uint64_t beacon_tsf;
 	uint32_t bi;
 	uint32_t bi_reciprocal;
 	uint32_t dtim_interval;
 	uint32_t dtim_reciprocal;
 	uint32_t mcast_qid;
 	uint32_t beacon_template;
 } __packed; /* AP_MAC_DATA_API_S_VER_1 */
 
 /**
  * struct iwm_mac_data_ibss - configuration data for IBSS MAC context
  * @beacon_time: beacon transmit time in system time
  * @beacon_tsf: beacon transmit time in TSF
  * @bi: beacon interval in TU
  * @bi_reciprocal: 2^32 / bi
  * @beacon_template: beacon template ID
  */
 struct iwm_mac_data_ibss {
 	uint32_t beacon_time;
 	uint64_t beacon_tsf;
 	uint32_t bi;
 	uint32_t bi_reciprocal;
 	uint32_t beacon_template;
 } __packed; /* IBSS_MAC_DATA_API_S_VER_1 */
 
 /**
  * struct iwm_mac_data_sta - configuration data for station MAC context
  * @is_assoc: 1 for associated state, 0 otherwise
  * @dtim_time: DTIM arrival time in system time
  * @dtim_tsf: DTIM arrival time in TSF
  * @bi: beacon interval in TU, applicable only when associated
  * @bi_reciprocal: 2^32 / bi , applicable only when associated
  * @dtim_interval: DTIM interval in TU, applicable only when associated
  * @dtim_reciprocal: 2^32 / dtim_interval , applicable only when associated
  * @listen_interval: in beacon intervals, applicable only when associated
  * @assoc_id: unique ID assigned by the AP during association
  */
 struct iwm_mac_data_sta {
 	uint32_t is_assoc;
 	uint32_t dtim_time;
 	uint64_t dtim_tsf;
 	uint32_t bi;
 	uint32_t bi_reciprocal;
 	uint32_t dtim_interval;
 	uint32_t dtim_reciprocal;
 	uint32_t listen_interval;
 	uint32_t assoc_id;
 	uint32_t assoc_beacon_arrive_time;
 } __packed; /* IWM_STA_MAC_DATA_API_S_VER_1 */
 
 /**
  * struct iwm_mac_data_go - configuration data for P2P GO MAC context
  * @ap: iwm_mac_data_ap struct with most config data
  * @ctwin: client traffic window in TU (period after TBTT when GO is present).
  *	0 indicates that there is no CT window.
  * @opp_ps_enabled: indicate that opportunistic PS allowed
  */
 struct iwm_mac_data_go {
 	struct iwm_mac_data_ap ap;
 	uint32_t ctwin;
 	uint32_t opp_ps_enabled;
 } __packed; /* GO_MAC_DATA_API_S_VER_1 */
 
 /**
  * struct iwm_mac_data_p2p_sta - configuration data for P2P client MAC context
  * @sta: iwm_mac_data_sta struct with most config data
  * @ctwin: client traffic window in TU (period after TBTT when GO is present).
  *	0 indicates that there is no CT window.
  */
 struct iwm_mac_data_p2p_sta {
 	struct iwm_mac_data_sta sta;
 	uint32_t ctwin;
 } __packed; /* P2P_STA_MAC_DATA_API_S_VER_1 */
 
 /**
  * struct iwm_mac_data_pibss - Pseudo IBSS config data
  * @stats_interval: interval in TU between statistics notifications to host.
  */
 struct iwm_mac_data_pibss {
 	uint32_t stats_interval;
 } __packed; /* PIBSS_MAC_DATA_API_S_VER_1 */
 
 /*
  * struct iwm_mac_data_p2p_dev - configuration data for the P2P Device MAC
  * context.
  * @is_disc_extended: if set to true, P2P Device discoverability is enabled on
  *	other channels as well. This should be to true only in case that the
  *	device is discoverable and there is an active GO. Note that setting this
  *	field when not needed, will increase the number of interrupts and have
  *	effect on the platform power, as this setting opens the Rx filters on
  *	all macs.
  */
 struct iwm_mac_data_p2p_dev {
 	uint32_t is_disc_extended;
 } __packed; /* _P2P_DEV_MAC_DATA_API_S_VER_1 */
 
 /**
  * enum iwm_mac_filter_flags - MAC context filter flags
  * @IWM_MAC_FILTER_IN_PROMISC: accept all data frames
  * @IWM_MAC_FILTER_IN_CONTROL_AND_MGMT: pass all mangement and
  *	control frames to the host
  * @IWM_MAC_FILTER_ACCEPT_GRP: accept multicast frames
  * @IWM_MAC_FILTER_DIS_DECRYPT: don't decrypt unicast frames
  * @IWM_MAC_FILTER_DIS_GRP_DECRYPT: don't decrypt multicast frames
  * @IWM_MAC_FILTER_IN_BEACON: transfer foreign BSS's beacons to host
  *	(in station mode when associated)
  * @IWM_MAC_FILTER_OUT_BCAST: filter out all broadcast frames
  * @IWM_MAC_FILTER_IN_CRC32: extract FCS and append it to frames
  * @IWM_MAC_FILTER_IN_PROBE_REQUEST: pass probe requests to host
  */
 enum iwm_mac_filter_flags {
 	IWM_MAC_FILTER_IN_PROMISC		= (1 << 0),
 	IWM_MAC_FILTER_IN_CONTROL_AND_MGMT	= (1 << 1),
 	IWM_MAC_FILTER_ACCEPT_GRP		= (1 << 2),
 	IWM_MAC_FILTER_DIS_DECRYPT		= (1 << 3),
 	IWM_MAC_FILTER_DIS_GRP_DECRYPT		= (1 << 4),
 	IWM_MAC_FILTER_IN_BEACON		= (1 << 6),
 	IWM_MAC_FILTER_OUT_BCAST		= (1 << 8),
 	IWM_MAC_FILTER_IN_CRC32			= (1 << 11),
 	IWM_MAC_FILTER_IN_PROBE_REQUEST		= (1 << 12),
 };
 
 /**
  * enum iwm_mac_qos_flags - QoS flags
  * @IWM_MAC_QOS_FLG_UPDATE_EDCA: ?
  * @IWM_MAC_QOS_FLG_TGN: HT is enabled
  * @IWM_MAC_QOS_FLG_TXOP_TYPE: ?
  *
  */
 enum iwm_mac_qos_flags {
 	IWM_MAC_QOS_FLG_UPDATE_EDCA	= (1 << 0),
 	IWM_MAC_QOS_FLG_TGN		= (1 << 1),
 	IWM_MAC_QOS_FLG_TXOP_TYPE	= (1 << 4),
 };
 
 /**
  * struct iwm_ac_qos - QOS timing params for IWM_MAC_CONTEXT_CMD
  * @cw_min: Contention window, start value in numbers of slots.
  *	Should be a power-of-2, minus 1.  Device's default is 0x0f.
  * @cw_max: Contention window, max value in numbers of slots.
  *	Should be a power-of-2, minus 1.  Device's default is 0x3f.
  * @aifsn:  Number of slots in Arbitration Interframe Space (before
  *	performing random backoff timing prior to Tx).  Device default 1.
  * @fifos_mask: FIFOs used by this MAC for this AC
  * @edca_txop:  Length of Tx opportunity, in uSecs.  Device default is 0.
  *
  * One instance of this config struct for each of 4 EDCA access categories
  * in struct iwm_qosparam_cmd.
  *
  * Device will automatically increase contention window by (2*CW) + 1 for each
  * transmission retry.  Device uses cw_max as a bit mask, ANDed with new CW
  * value, to cap the CW value.
  */
 struct iwm_ac_qos {
 	uint16_t cw_min;
 	uint16_t cw_max;
 	uint8_t aifsn;
 	uint8_t fifos_mask;
 	uint16_t edca_txop;
 } __packed; /* IWM_AC_QOS_API_S_VER_2 */
 
 /**
  * struct iwm_mac_ctx_cmd - command structure to configure MAC contexts
  * ( IWM_MAC_CONTEXT_CMD = 0x28 )
  * @id_and_color: ID and color of the MAC
  * @action: action to perform, one of IWM_FW_CTXT_ACTION_*
  * @mac_type: one of IWM_FW_MAC_TYPE_*
  * @tsd_id: TSF HW timer, one of IWM_TSF_ID_*
  * @node_addr: MAC address
  * @bssid_addr: BSSID
  * @cck_rates: basic rates available for CCK
  * @ofdm_rates: basic rates available for OFDM
  * @protection_flags: combination of IWM_MAC_PROT_FLG_FLAG_*
  * @cck_short_preamble: 0x20 for enabling short preamble, 0 otherwise
  * @short_slot: 0x10 for enabling short slots, 0 otherwise
  * @filter_flags: combination of IWM_MAC_FILTER_*
  * @qos_flags: from IWM_MAC_QOS_FLG_*
  * @ac: one iwm_mac_qos configuration for each AC
  * @mac_specific: one of struct iwm_mac_data_*, according to mac_type
  */
 struct iwm_mac_ctx_cmd {
 	/* COMMON_INDEX_HDR_API_S_VER_1 */
 	uint32_t id_and_color;
 	uint32_t action;
 	/* IWM_MAC_CONTEXT_COMMON_DATA_API_S_VER_1 */
 	uint32_t mac_type;
 	uint32_t tsf_id;
 	uint8_t node_addr[6];
 	uint16_t reserved_for_node_addr;
 	uint8_t bssid_addr[6];
 	uint16_t reserved_for_bssid_addr;
 	uint32_t cck_rates;
 	uint32_t ofdm_rates;
 	uint32_t protection_flags;
 	uint32_t cck_short_preamble;
 	uint32_t short_slot;
 	uint32_t filter_flags;
 	/* IWM_MAC_QOS_PARAM_API_S_VER_1 */
 	uint32_t qos_flags;
 	struct iwm_ac_qos ac[IWM_AC_NUM+1];
 	/* IWM_MAC_CONTEXT_COMMON_DATA_API_S */
 	union {
 		struct iwm_mac_data_ap ap;
 		struct iwm_mac_data_go go;
 		struct iwm_mac_data_sta sta;
 		struct iwm_mac_data_p2p_sta p2p_sta;
 		struct iwm_mac_data_p2p_dev p2p_dev;
 		struct iwm_mac_data_pibss pibss;
 		struct iwm_mac_data_ibss ibss;
 	};
 } __packed; /* IWM_MAC_CONTEXT_CMD_API_S_VER_1 */
 
 static inline uint32_t iwm_mvm_reciprocal(uint32_t v)
 {
 	if (!v)
 		return 0;
 	return 0xFFFFFFFF / v;
 }
 
 #define IWM_NONQOS_SEQ_GET	0x1
 #define IWM_NONQOS_SEQ_SET	0x2
 struct iwm_nonqos_seq_query_cmd {
 	uint32_t get_set_flag;
 	uint32_t mac_id_n_color;
 	uint16_t value;
 	uint16_t reserved;
 } __packed; /* IWM_NON_QOS_TX_COUNTER_GET_SET_API_S_VER_1 */
 
 /*
  * END mvm/fw-api-mac.h
  */
 
 /*
  * BEGIN mvm/fw-api-power.h
  */
 
 /* Power Management Commands, Responses, Notifications */
 
 /* Radio LP RX Energy Threshold measured in dBm */
 #define IWM_POWER_LPRX_RSSI_THRESHOLD	75
 #define IWM_POWER_LPRX_RSSI_THRESHOLD_MAX	94
 #define IWM_POWER_LPRX_RSSI_THRESHOLD_MIN	30
 
 /**
  * enum iwm_scan_flags - masks for power table command flags
  * @IWM_POWER_FLAGS_POWER_SAVE_ENA_MSK: '1' Allow to save power by turning off
  *		receiver and transmitter. '0' - does not allow.
  * @IWM_POWER_FLAGS_POWER_MANAGEMENT_ENA_MSK: '0' Driver disables power management,
  *		'1' Driver enables PM (use rest of parameters)
  * @IWM_POWER_FLAGS_SKIP_OVER_DTIM_MSK: '0' PM have to walk up every DTIM,
  *		'1' PM could sleep over DTIM till listen Interval.
  * @IWM_POWER_FLAGS_SNOOZE_ENA_MSK: Enable snoozing only if uAPSD is enabled and all
  *		access categories are both delivery and trigger enabled.
  * @IWM_POWER_FLAGS_BT_SCO_ENA: Enable BT SCO coex only if uAPSD and
  *		PBW Snoozing enabled
  * @IWM_POWER_FLAGS_ADVANCE_PM_ENA_MSK: Advanced PM (uAPSD) enable mask
  * @IWM_POWER_FLAGS_LPRX_ENA_MSK: Low Power RX enable.
  * @IWM_POWER_FLAGS_AP_UAPSD_MISBEHAVING_ENA_MSK: AP/GO's uAPSD misbehaving
  *		detection enablement
 */
 enum iwm_power_flags {
 	IWM_POWER_FLAGS_POWER_SAVE_ENA_MSK		= (1 << 0),
 	IWM_POWER_FLAGS_POWER_MANAGEMENT_ENA_MSK	= (1 << 1),
 	IWM_POWER_FLAGS_SKIP_OVER_DTIM_MSK		= (1 << 2),
 	IWM_POWER_FLAGS_SNOOZE_ENA_MSK		= (1 << 5),
 	IWM_POWER_FLAGS_BT_SCO_ENA			= (1 << 8),
 	IWM_POWER_FLAGS_ADVANCE_PM_ENA_MSK		= (1 << 9),
 	IWM_POWER_FLAGS_LPRX_ENA_MSK		= (1 << 11),
 	IWM_POWER_FLAGS_UAPSD_MISBEHAVING_ENA_MSK	= (1 << 12),
 };
 
 #define IWM_POWER_VEC_SIZE 5
 
 /**
  * struct iwm_powertable_cmd - legacy power command. Beside old API support this
  *	is used also with a new	power API for device wide power settings.
  * IWM_POWER_TABLE_CMD = 0x77 (command, has simple generic response)
  *
  * @flags:		Power table command flags from IWM_POWER_FLAGS_*
  * @keep_alive_seconds: Keep alive period in seconds. Default - 25 sec.
  *			Minimum allowed:- 3 * DTIM. Keep alive period must be
  *			set regardless of power scheme or current power state.
  *			FW use this value also when PM is disabled.
  * @rx_data_timeout:    Minimum time (usec) from last Rx packet for AM to
  *			PSM transition - legacy PM
  * @tx_data_timeout:    Minimum time (usec) from last Tx packet for AM to
  *			PSM transition - legacy PM
  * @sleep_interval:	not in use
  * @skip_dtim_periods:	Number of DTIM periods to skip if Skip over DTIM flag
  *			is set. For example, if it is required to skip over
  *			one DTIM, this value need to be set to 2 (DTIM periods).
  * @lprx_rssi_threshold: Signal strength up to which LP RX can be enabled.
  *			Default: 80dbm
  */
 struct iwm_powertable_cmd {
 	/* PM_POWER_TABLE_CMD_API_S_VER_6 */
 	uint16_t flags;
 	uint8_t keep_alive_seconds;
 	uint8_t debug_flags;
 	uint32_t rx_data_timeout;
 	uint32_t tx_data_timeout;
 	uint32_t sleep_interval[IWM_POWER_VEC_SIZE];
 	uint32_t skip_dtim_periods;
 	uint32_t lprx_rssi_threshold;
 } __packed;
 
 /**
  * enum iwm_device_power_flags - masks for device power command flags
  * @IWM_DEVICE_POWER_FLAGS_POWER_SAVE_ENA_MSK: '1' Allow to save power by turning off
  *	receiver and transmitter. '0' - does not allow.
  */
 enum iwm_device_power_flags {
 	IWM_DEVICE_POWER_FLAGS_POWER_SAVE_ENA_MSK	= (1 << 0),
 };
 
 /**
  * struct iwm_device_power_cmd - device wide power command.
  * IWM_DEVICE_POWER_CMD = 0x77 (command, has simple generic response)
  *
  * @flags:	Power table command flags from IWM_DEVICE_POWER_FLAGS_*
  */
 struct iwm_device_power_cmd {
 	/* PM_POWER_TABLE_CMD_API_S_VER_6 */
 	uint16_t flags;
 	uint16_t reserved;
 } __packed;
 
 /**
  * struct iwm_mac_power_cmd - New power command containing uAPSD support
  * IWM_MAC_PM_POWER_TABLE = 0xA9 (command, has simple generic response)
  * @id_and_color:	MAC contex identifier
  * @flags:		Power table command flags from POWER_FLAGS_*
  * @keep_alive_seconds:	Keep alive period in seconds. Default - 25 sec.
  *			Minimum allowed:- 3 * DTIM. Keep alive period must be
  *			set regardless of power scheme or current power state.
  *			FW use this value also when PM is disabled.
  * @rx_data_timeout:    Minimum time (usec) from last Rx packet for AM to
  *			PSM transition - legacy PM
  * @tx_data_timeout:    Minimum time (usec) from last Tx packet for AM to
  *			PSM transition - legacy PM
  * @sleep_interval:	not in use
  * @skip_dtim_periods:	Number of DTIM periods to skip if Skip over DTIM flag
  *			is set. For example, if it is required to skip over
  *			one DTIM, this value need to be set to 2 (DTIM periods).
  * @rx_data_timeout_uapsd: Minimum time (usec) from last Rx packet for AM to
  *			PSM transition - uAPSD
  * @tx_data_timeout_uapsd: Minimum time (usec) from last Tx packet for AM to
  *			PSM transition - uAPSD
  * @lprx_rssi_threshold: Signal strength up to which LP RX can be enabled.
  *			Default: 80dbm
  * @num_skip_dtim:	Number of DTIMs to skip if Skip over DTIM flag is set
  * @snooze_interval:	Maximum time between attempts to retrieve buffered data
  *			from the AP [msec]
  * @snooze_window:	A window of time in which PBW snoozing insures that all
  *			packets received. It is also the minimum time from last
  *			received unicast RX packet, before client stops snoozing
  *			for data. [msec]
  * @snooze_step:	TBD
  * @qndp_tid:		TID client shall use for uAPSD QNDP triggers
  * @uapsd_ac_flags:	Set trigger-enabled and delivery-enabled indication for
  *			each corresponding AC.
  *			Use IEEE80211_WMM_IE_STA_QOSINFO_AC* for correct values.
  * @uapsd_max_sp:	Use IEEE80211_WMM_IE_STA_QOSINFO_SP_* for correct
  *			values.
  * @heavy_tx_thld_packets:	TX threshold measured in number of packets
  * @heavy_rx_thld_packets:	RX threshold measured in number of packets
  * @heavy_tx_thld_percentage:	TX threshold measured in load's percentage
  * @heavy_rx_thld_percentage:	RX threshold measured in load's percentage
  * @limited_ps_threshold:
 */
 struct iwm_mac_power_cmd {
 	/* CONTEXT_DESC_API_T_VER_1 */
 	uint32_t id_and_color;
 
 	/* CLIENT_PM_POWER_TABLE_S_VER_1 */
 	uint16_t flags;
 	uint16_t keep_alive_seconds;
 	uint32_t rx_data_timeout;
 	uint32_t tx_data_timeout;
 	uint32_t rx_data_timeout_uapsd;
 	uint32_t tx_data_timeout_uapsd;
 	uint8_t lprx_rssi_threshold;
 	uint8_t skip_dtim_periods;
 	uint16_t snooze_interval;
 	uint16_t snooze_window;
 	uint8_t snooze_step;
 	uint8_t qndp_tid;
 	uint8_t uapsd_ac_flags;
 	uint8_t uapsd_max_sp;
 	uint8_t heavy_tx_thld_packets;
 	uint8_t heavy_rx_thld_packets;
 	uint8_t heavy_tx_thld_percentage;
 	uint8_t heavy_rx_thld_percentage;
 	uint8_t limited_ps_threshold;
 	uint8_t reserved;
 } __packed;
 
 /*
  * struct iwm_uapsd_misbehaving_ap_notif - FW sends this notification when
  * associated AP is identified as improperly implementing uAPSD protocol.
  * IWM_PSM_UAPSD_AP_MISBEHAVING_NOTIFICATION = 0x78
  * @sta_id: index of station in uCode's station table - associated AP ID in
  *	    this context.
  */
 struct iwm_uapsd_misbehaving_ap_notif {
 	uint32_t sta_id;
 	uint8_t mac_id;
 	uint8_t reserved[3];
 } __packed;
 
 /**
  * struct iwm_beacon_filter_cmd
  * IWM_REPLY_BEACON_FILTERING_CMD = 0xd2 (command)
  * @id_and_color: MAC contex identifier
  * @bf_energy_delta: Used for RSSI filtering, if in 'normal' state. Send beacon
  *      to driver if delta in Energy values calculated for this and last
  *      passed beacon is greater than this threshold. Zero value means that
  *      the Energy change is ignored for beacon filtering, and beacon will
  *      not be forced to be sent to driver regardless of this delta. Typical
  *      energy delta 5dB.
  * @bf_roaming_energy_delta: Used for RSSI filtering, if in 'roaming' state.
  *      Send beacon to driver if delta in Energy values calculated for this
  *      and last passed beacon is greater than this threshold. Zero value
  *      means that the Energy change is ignored for beacon filtering while in
  *      Roaming state, typical energy delta 1dB.
  * @bf_roaming_state: Used for RSSI filtering. If absolute Energy values
  *      calculated for current beacon is less than the threshold, use
  *      Roaming Energy Delta Threshold, otherwise use normal Energy Delta
  *      Threshold. Typical energy threshold is -72dBm.
  * @bf_temp_threshold: This threshold determines the type of temperature
  *	filtering (Slow or Fast) that is selected (Units are in Celsuis):
  *      If the current temperature is above this threshold - Fast filter
  *	will be used, If the current temperature is below this threshold -
  *	Slow filter will be used.
  * @bf_temp_fast_filter: Send Beacon to driver if delta in temperature values
  *      calculated for this and the last passed beacon is greater than this
  *      threshold. Zero value means that the temperature change is ignored for
  *      beacon filtering; beacons will not be  forced to be sent to driver
  *      regardless of whether its temperature has been changed.
  * @bf_temp_slow_filter: Send Beacon to driver if delta in temperature values
  *      calculated for this and the last passed beacon is greater than this
  *      threshold. Zero value means that the temperature change is ignored for
  *      beacon filtering; beacons will not be forced to be sent to driver
  *      regardless of whether its temperature has been changed.
  * @bf_enable_beacon_filter: 1, beacon filtering is enabled; 0, disabled.
- * @bf_filter_escape_timer: Send beacons to to driver if no beacons were passed
+ * @bf_filter_escape_timer: Send beacons to the driver if no beacons were passed
  *      for a specific period of time. Units: Beacons.
  * @ba_escape_timer: Fully receive and parse beacon if no beacons were passed
  *      for a longer period of time then this escape-timeout. Units: Beacons.
  * @ba_enable_beacon_abort: 1, beacon abort is enabled; 0, disabled.
  */
 struct iwm_beacon_filter_cmd {
 	uint32_t bf_energy_delta;
 	uint32_t bf_roaming_energy_delta;
 	uint32_t bf_roaming_state;
 	uint32_t bf_temp_threshold;
 	uint32_t bf_temp_fast_filter;
 	uint32_t bf_temp_slow_filter;
 	uint32_t bf_enable_beacon_filter;
 	uint32_t bf_debug_flag;
 	uint32_t bf_escape_timer;
 	uint32_t ba_escape_timer;
 	uint32_t ba_enable_beacon_abort;
 } __packed;
 
 /* Beacon filtering and beacon abort */
 #define IWM_BF_ENERGY_DELTA_DEFAULT 5
 #define IWM_BF_ENERGY_DELTA_MAX 255
 #define IWM_BF_ENERGY_DELTA_MIN 0
 
 #define IWM_BF_ROAMING_ENERGY_DELTA_DEFAULT 1
 #define IWM_BF_ROAMING_ENERGY_DELTA_MAX 255
 #define IWM_BF_ROAMING_ENERGY_DELTA_MIN 0
 
 #define IWM_BF_ROAMING_STATE_DEFAULT 72
 #define IWM_BF_ROAMING_STATE_MAX 255
 #define IWM_BF_ROAMING_STATE_MIN 0
 
 #define IWM_BF_TEMP_THRESHOLD_DEFAULT 112
 #define IWM_BF_TEMP_THRESHOLD_MAX 255
 #define IWM_BF_TEMP_THRESHOLD_MIN 0
 
 #define IWM_BF_TEMP_FAST_FILTER_DEFAULT 1
 #define IWM_BF_TEMP_FAST_FILTER_MAX 255
 #define IWM_BF_TEMP_FAST_FILTER_MIN 0
 
 #define IWM_BF_TEMP_SLOW_FILTER_DEFAULT 5
 #define IWM_BF_TEMP_SLOW_FILTER_MAX 255
 #define IWM_BF_TEMP_SLOW_FILTER_MIN 0
 
 #define IWM_BF_ENABLE_BEACON_FILTER_DEFAULT 1
 
 #define IWM_BF_DEBUG_FLAG_DEFAULT 0
 
 #define IWM_BF_ESCAPE_TIMER_DEFAULT 50
 #define IWM_BF_ESCAPE_TIMER_MAX 1024
 #define IWM_BF_ESCAPE_TIMER_MIN 0
 
 #define IWM_BA_ESCAPE_TIMER_DEFAULT 6
 #define IWM_BA_ESCAPE_TIMER_D3 9
 #define IWM_BA_ESCAPE_TIMER_MAX 1024
 #define IWM_BA_ESCAPE_TIMER_MIN 0
 
 #define IWM_BA_ENABLE_BEACON_ABORT_DEFAULT 1
 
 #define IWM_BF_CMD_CONFIG_DEFAULTS					     \
 	.bf_energy_delta = htole32(IWM_BF_ENERGY_DELTA_DEFAULT),	     \
 	.bf_roaming_energy_delta =					     \
 		htole32(IWM_BF_ROAMING_ENERGY_DELTA_DEFAULT),	     \
 	.bf_roaming_state = htole32(IWM_BF_ROAMING_STATE_DEFAULT),	     \
 	.bf_temp_threshold = htole32(IWM_BF_TEMP_THRESHOLD_DEFAULT),     \
 	.bf_temp_fast_filter = htole32(IWM_BF_TEMP_FAST_FILTER_DEFAULT), \
 	.bf_temp_slow_filter = htole32(IWM_BF_TEMP_SLOW_FILTER_DEFAULT), \
 	.bf_debug_flag = htole32(IWM_BF_DEBUG_FLAG_DEFAULT),	     \
 	.bf_escape_timer = htole32(IWM_BF_ESCAPE_TIMER_DEFAULT),	     \
 	.ba_escape_timer = htole32(IWM_BA_ESCAPE_TIMER_DEFAULT)
 
 /*
  * END mvm/fw-api-power.h
  */
 
 /*
  * BEGIN mvm/fw-api-rs.h
  */
 
 /*
  * These serve as indexes into
  * struct iwm_rate_info fw_rate_idx_to_plcp[IWM_RATE_COUNT];
  * TODO: avoid overlap between legacy and HT rates
  */
 enum {
 	IWM_RATE_1M_INDEX = 0,
 	IWM_FIRST_CCK_RATE = IWM_RATE_1M_INDEX,
 	IWM_RATE_2M_INDEX,
 	IWM_RATE_5M_INDEX,
 	IWM_RATE_11M_INDEX,
 	IWM_LAST_CCK_RATE = IWM_RATE_11M_INDEX,
 	IWM_RATE_6M_INDEX,
 	IWM_FIRST_OFDM_RATE = IWM_RATE_6M_INDEX,
 	IWM_RATE_MCS_0_INDEX = IWM_RATE_6M_INDEX,
 	IWM_FIRST_HT_RATE = IWM_RATE_MCS_0_INDEX,
 	IWM_FIRST_VHT_RATE = IWM_RATE_MCS_0_INDEX,
 	IWM_RATE_9M_INDEX,
 	IWM_RATE_12M_INDEX,
 	IWM_RATE_MCS_1_INDEX = IWM_RATE_12M_INDEX,
 	IWM_RATE_18M_INDEX,
 	IWM_RATE_MCS_2_INDEX = IWM_RATE_18M_INDEX,
 	IWM_RATE_24M_INDEX,
 	IWM_RATE_MCS_3_INDEX = IWM_RATE_24M_INDEX,
 	IWM_RATE_36M_INDEX,
 	IWM_RATE_MCS_4_INDEX = IWM_RATE_36M_INDEX,
 	IWM_RATE_48M_INDEX,
 	IWM_RATE_MCS_5_INDEX = IWM_RATE_48M_INDEX,
 	IWM_RATE_54M_INDEX,
 	IWM_RATE_MCS_6_INDEX = IWM_RATE_54M_INDEX,
 	IWM_LAST_NON_HT_RATE = IWM_RATE_54M_INDEX,
 	IWM_RATE_60M_INDEX,
 	IWM_RATE_MCS_7_INDEX = IWM_RATE_60M_INDEX,
 	IWM_LAST_HT_RATE = IWM_RATE_MCS_7_INDEX,
 	IWM_RATE_MCS_8_INDEX,
 	IWM_RATE_MCS_9_INDEX,
 	IWM_LAST_VHT_RATE = IWM_RATE_MCS_9_INDEX,
 	IWM_RATE_COUNT_LEGACY = IWM_LAST_NON_HT_RATE + 1,
 	IWM_RATE_COUNT = IWM_LAST_VHT_RATE + 1,
 };
 
 #define IWM_RATE_BIT_MSK(r) (1 << (IWM_RATE_##r##M_INDEX))
 
 /* fw API values for legacy bit rates, both OFDM and CCK */
 enum {
 	IWM_RATE_6M_PLCP  = 13,
 	IWM_RATE_9M_PLCP  = 15,
 	IWM_RATE_12M_PLCP = 5,
 	IWM_RATE_18M_PLCP = 7,
 	IWM_RATE_24M_PLCP = 9,
 	IWM_RATE_36M_PLCP = 11,
 	IWM_RATE_48M_PLCP = 1,
 	IWM_RATE_54M_PLCP = 3,
 	IWM_RATE_1M_PLCP  = 10,
 	IWM_RATE_2M_PLCP  = 20,
 	IWM_RATE_5M_PLCP  = 55,
 	IWM_RATE_11M_PLCP = 110,
 	IWM_RATE_INVM_PLCP = -1,
 };
 
 /*
  * rate_n_flags bit fields
  *
  * The 32-bit value has different layouts in the low 8 bites depending on the
  * format. There are three formats, HT, VHT and legacy (11abg, with subformats
  * for CCK and OFDM).
  *
  * High-throughput (HT) rate format
  *	bit 8 is 1, bit 26 is 0, bit 9 is 0 (OFDM)
  * Very High-throughput (VHT) rate format
  *	bit 8 is 0, bit 26 is 1, bit 9 is 0 (OFDM)
  * Legacy OFDM rate format for bits 7:0
  *	bit 8 is 0, bit 26 is 0, bit 9 is 0 (OFDM)
  * Legacy CCK rate format for bits 7:0:
  *	bit 8 is 0, bit 26 is 0, bit 9 is 1 (CCK)
  */
 
 /* Bit 8: (1) HT format, (0) legacy or VHT format */
 #define IWM_RATE_MCS_HT_POS 8
 #define IWM_RATE_MCS_HT_MSK (1 << IWM_RATE_MCS_HT_POS)
 
 /* Bit 9: (1) CCK, (0) OFDM.  HT (bit 8) must be "0" for this bit to be valid */
 #define IWM_RATE_MCS_CCK_POS 9
 #define IWM_RATE_MCS_CCK_MSK (1 << IWM_RATE_MCS_CCK_POS)
 
 /* Bit 26: (1) VHT format, (0) legacy format in bits 8:0 */
 #define IWM_RATE_MCS_VHT_POS 26
 #define IWM_RATE_MCS_VHT_MSK (1 << IWM_RATE_MCS_VHT_POS)
 
 
 /*
  * High-throughput (HT) rate format for bits 7:0
  *
  *  2-0:  MCS rate base
  *        0)   6 Mbps
  *        1)  12 Mbps
  *        2)  18 Mbps
  *        3)  24 Mbps
  *        4)  36 Mbps
  *        5)  48 Mbps
  *        6)  54 Mbps
  *        7)  60 Mbps
  *  4-3:  0)  Single stream (SISO)
  *        1)  Dual stream (MIMO)
  *        2)  Triple stream (MIMO)
  *    5:  Value of 0x20 in bits 7:0 indicates 6 Mbps HT40 duplicate data
  *  (bits 7-6 are zero)
  *
  * Together the low 5 bits work out to the MCS index because we don't
  * support MCSes above 15/23, and 0-7 have one stream, 8-15 have two
  * streams and 16-23 have three streams. We could also support MCS 32
  * which is the duplicate 20 MHz MCS (bit 5 set, all others zero.)
  */
 #define IWM_RATE_HT_MCS_RATE_CODE_MSK	0x7
 #define IWM_RATE_HT_MCS_NSS_POS             3
 #define IWM_RATE_HT_MCS_NSS_MSK             (3 << IWM_RATE_HT_MCS_NSS_POS)
 
 /* Bit 10: (1) Use Green Field preamble */
 #define IWM_RATE_HT_MCS_GF_POS		10
 #define IWM_RATE_HT_MCS_GF_MSK		(1 << IWM_RATE_HT_MCS_GF_POS)
 
 #define IWM_RATE_HT_MCS_INDEX_MSK		0x3f
 
 /*
  * Very High-throughput (VHT) rate format for bits 7:0
  *
  *  3-0:  VHT MCS (0-9)
  *  5-4:  number of streams - 1:
  *        0)  Single stream (SISO)
  *        1)  Dual stream (MIMO)
  *        2)  Triple stream (MIMO)
  */
 
 /* Bit 4-5: (0) SISO, (1) MIMO2 (2) MIMO3 */
 #define IWM_RATE_VHT_MCS_RATE_CODE_MSK	0xf
 #define IWM_RATE_VHT_MCS_NSS_POS		4
 #define IWM_RATE_VHT_MCS_NSS_MSK		(3 << IWM_RATE_VHT_MCS_NSS_POS)
 
 /*
  * Legacy OFDM rate format for bits 7:0
  *
  *  3-0:  0xD)   6 Mbps
  *        0xF)   9 Mbps
  *        0x5)  12 Mbps
  *        0x7)  18 Mbps
  *        0x9)  24 Mbps
  *        0xB)  36 Mbps
  *        0x1)  48 Mbps
  *        0x3)  54 Mbps
  * (bits 7-4 are 0)
  *
  * Legacy CCK rate format for bits 7:0:
  * bit 8 is 0, bit 26 is 0, bit 9 is 1 (CCK):
  *
  *  6-0:   10)  1 Mbps
  *         20)  2 Mbps
  *         55)  5.5 Mbps
  *        110)  11 Mbps
  * (bit 7 is 0)
  */
 #define IWM_RATE_LEGACY_RATE_MSK 0xff
 
 
 /*
  * Bit 11-12: (0) 20MHz, (1) 40MHz, (2) 80MHz, (3) 160MHz
  * 0 and 1 are valid for HT and VHT, 2 and 3 only for VHT
  */
 #define IWM_RATE_MCS_CHAN_WIDTH_POS	11
 #define IWM_RATE_MCS_CHAN_WIDTH_MSK	(3 << IWM_RATE_MCS_CHAN_WIDTH_POS)
 #define IWM_RATE_MCS_CHAN_WIDTH_20	(0 << IWM_RATE_MCS_CHAN_WIDTH_POS)
 #define IWM_RATE_MCS_CHAN_WIDTH_40	(1 << IWM_RATE_MCS_CHAN_WIDTH_POS)
 #define IWM_RATE_MCS_CHAN_WIDTH_80	(2 << IWM_RATE_MCS_CHAN_WIDTH_POS)
 #define IWM_RATE_MCS_CHAN_WIDTH_160	(3 << IWM_RATE_MCS_CHAN_WIDTH_POS)
 
 /* Bit 13: (1) Short guard interval (0.4 usec), (0) normal GI (0.8 usec) */
 #define IWM_RATE_MCS_SGI_POS		13
 #define IWM_RATE_MCS_SGI_MSK		(1 << IWM_RATE_MCS_SGI_POS)
 
 /* Bit 14-16: Antenna selection (1) Ant A, (2) Ant B, (4) Ant C */
 #define IWM_RATE_MCS_ANT_POS		14
 #define IWM_RATE_MCS_ANT_A_MSK		(1 << IWM_RATE_MCS_ANT_POS)
 #define IWM_RATE_MCS_ANT_B_MSK		(2 << IWM_RATE_MCS_ANT_POS)
 #define IWM_RATE_MCS_ANT_C_MSK		(4 << IWM_RATE_MCS_ANT_POS)
 #define IWM_RATE_MCS_ANT_AB_MSK		(IWM_RATE_MCS_ANT_A_MSK | \
 					 IWM_RATE_MCS_ANT_B_MSK)
 #define IWM_RATE_MCS_ANT_ABC_MSK	(IWM_RATE_MCS_ANT_AB_MSK | \
 					 IWM_RATE_MCS_ANT_C_MSK)
 #define IWM_RATE_MCS_ANT_MSK		IWM_RATE_MCS_ANT_ABC_MSK
 #define IWM_RATE_MCS_ANT_NUM 3
 
 /* Bit 17-18: (0) SS, (1) SS*2 */
 #define IWM_RATE_MCS_STBC_POS		17
 #define IWM_RATE_MCS_STBC_MSK		(1 << IWM_RATE_MCS_STBC_POS)
 
 /* Bit 19: (0) Beamforming is off, (1) Beamforming is on */
 #define IWM_RATE_MCS_BF_POS		19
 #define IWM_RATE_MCS_BF_MSK		(1 << IWM_RATE_MCS_BF_POS)
 
 /* Bit 20: (0) ZLF is off, (1) ZLF is on */
 #define IWM_RATE_MCS_ZLF_POS		20
 #define IWM_RATE_MCS_ZLF_MSK		(1 << IWM_RATE_MCS_ZLF_POS)
 
 /* Bit 24-25: (0) 20MHz (no dup), (1) 2x20MHz, (2) 4x20MHz, 3 8x20MHz */
 #define IWM_RATE_MCS_DUP_POS		24
 #define IWM_RATE_MCS_DUP_MSK		(3 << IWM_RATE_MCS_DUP_POS)
 
 /* Bit 27: (1) LDPC enabled, (0) LDPC disabled */
 #define IWM_RATE_MCS_LDPC_POS		27
 #define IWM_RATE_MCS_LDPC_MSK		(1 << IWM_RATE_MCS_LDPC_POS)
 
 
 /* Link Quality definitions */
 
 /* # entries in rate scale table to support Tx retries */
 #define  IWM_LQ_MAX_RETRY_NUM 16
 
 /* Link quality command flags bit fields */
 
 /* Bit 0: (0) Don't use RTS (1) Use RTS */
 #define IWM_LQ_FLAG_USE_RTS_POS         0
 #define IWM_LQ_FLAG_USE_RTS_MSK         (1 << IWM_LQ_FLAG_USE_RTS_POS)
 
 /* Bit 1-3: LQ command color. Used to match responses to LQ commands */
 #define IWM_LQ_FLAG_COLOR_POS           1
 #define IWM_LQ_FLAG_COLOR_MSK           (7 << IWM_LQ_FLAG_COLOR_POS)
 
 /* Bit 4-5: Tx RTS BW Signalling
  * (0) No RTS BW signalling
  * (1) Static BW signalling
  * (2) Dynamic BW signalling
  */
 #define IWM_LQ_FLAG_RTS_BW_SIG_POS      4
 #define IWM_LQ_FLAG_RTS_BW_SIG_NONE     (0 << IWM_LQ_FLAG_RTS_BW_SIG_POS)
 #define IWM_LQ_FLAG_RTS_BW_SIG_STATIC   (1 << IWM_LQ_FLAG_RTS_BW_SIG_POS)
 #define IWM_LQ_FLAG_RTS_BW_SIG_DYNAMIC  (2 << IWM_LQ_FLAG_RTS_BW_SIG_POS)
 
 /* Bit 6: (0) No dynamic BW selection (1) Allow dynamic BW selection
  * Dyanmic BW selection allows Tx with narrower BW then requested in rates
  */
 #define IWM_LQ_FLAG_DYNAMIC_BW_POS      6
 #define IWM_LQ_FLAG_DYNAMIC_BW_MSK      (1 << IWM_LQ_FLAG_DYNAMIC_BW_POS)
 
 /* Single Stream Tx Parameters (lq_cmd->ss_params)
  * Flags to control a smart FW decision about whether BFER/STBC/SISO will be
  * used for single stream Tx.
  */
 
 /* Bit 0-1: Max STBC streams allowed. Can be 0-3.
  * (0) - No STBC allowed
  * (1) - 2x1 STBC allowed (HT/VHT)
  * (2) - 4x2 STBC allowed (HT/VHT)
  * (3) - 3x2 STBC allowed (HT only)
  * All our chips are at most 2 antennas so only (1) is valid for now.
  */
 #define IWM_LQ_SS_STBC_ALLOWED_POS	0
 #define IWM_LQ_SS_STBC_ALLOWED_MSK	(3 << IWM_LQ_SS_STBC_ALLOWED_MSK)
 
 /* 2x1 STBC is allowed */
 #define IWM_LQ_SS_STBC_1SS_ALLOWED	(1 << IWM_LQ_SS_STBC_ALLOWED_POS)
 
 /* Bit 2: Beamformer (VHT only) is allowed */
 #define IWM_LQ_SS_BFER_ALLOWED_POS	2
 #define IWM_LQ_SS_BFER_ALLOWED		(1 << IWM_LQ_SS_BFER_ALLOWED_POS)
 
 /* Bit 3: Force BFER or STBC for testing
  * If this is set:
  * If BFER is allowed then force the ucode to choose BFER else
  * If STBC is allowed then force the ucode to choose STBC over SISO
  */
 #define IWM_LQ_SS_FORCE_POS		3
 #define IWM_LQ_SS_FORCE			(1 << IWM_LQ_SS_FORCE_POS)
 
 /* Bit 31: ss_params field is valid. Used for FW backward compatibility
  * with other drivers which don't support the ss_params API yet
  */
 #define IWM_LQ_SS_PARAMS_VALID_POS	31
 #define IWM_LQ_SS_PARAMS_VALID		(1 << IWM_LQ_SS_PARAMS_VALID_POS)
 
 /**
  * struct iwm_lq_cmd - link quality command
  * @sta_id: station to update
  * @control: not used
  * @flags: combination of IWM_LQ_FLAG_*
  * @mimo_delim: the first SISO index in rs_table, which separates MIMO
  *	and SISO rates
  * @single_stream_ant_msk: best antenna for SISO (can be dual in CDD).
  *	Should be ANT_[ABC]
  * @dual_stream_ant_msk: best antennas for MIMO, combination of ANT_[ABC]
  * @initial_rate_index: first index from rs_table per AC category
  * @agg_time_limit: aggregation max time threshold in usec/100, meaning
  *	value of 100 is one usec. Range is 100 to 8000
  * @agg_disable_start_th: try-count threshold for starting aggregation.
  *	If a frame has higher try-count, it should not be selected for
  *	starting an aggregation sequence.
  * @agg_frame_cnt_limit: max frame count in an aggregation.
  *	0: no limit
  *	1: no aggregation (one frame per aggregation)
  *	2 - 0x3f: maximal number of frames (up to 3f == 63)
  * @rs_table: array of rates for each TX try, each is rate_n_flags,
  *	meaning it is a combination of IWM_RATE_MCS_* and IWM_RATE_*_PLCP
  * @ss_params: single stream features. declare whether STBC or BFER are allowed.
  */
 struct iwm_lq_cmd {
 	uint8_t sta_id;
 	uint8_t reduced_tpc;
 	uint16_t control;
 	/* LINK_QUAL_GENERAL_PARAMS_API_S_VER_1 */
 	uint8_t flags;
 	uint8_t mimo_delim;
 	uint8_t single_stream_ant_msk;
 	uint8_t dual_stream_ant_msk;
 	uint8_t initial_rate_index[IWM_AC_NUM];
 	/* LINK_QUAL_AGG_PARAMS_API_S_VER_1 */
 	uint16_t agg_time_limit;
 	uint8_t agg_disable_start_th;
 	uint8_t agg_frame_cnt_limit;
 	uint32_t reserved2;
 	uint32_t rs_table[IWM_LQ_MAX_RETRY_NUM];
 	uint32_t ss_params;
 }; /* LINK_QUALITY_CMD_API_S_VER_1 */
 
 /*
  * END mvm/fw-api-rs.h
  */
 
 /*
  * BEGIN mvm/fw-api-tx.h
  */
 
 /**
  * enum iwm_tx_flags - bitmasks for tx_flags in TX command
  * @IWM_TX_CMD_FLG_PROT_REQUIRE: use RTS or CTS-to-self to protect the frame
  * @IWM_TX_CMD_FLG_ACK: expect ACK from receiving station
  * @IWM_TX_CMD_FLG_STA_RATE: use RS table with initial index from the TX command.
  *	Otherwise, use rate_n_flags from the TX command
  * @IWM_TX_CMD_FLG_BA: this frame is a block ack
  * @IWM_TX_CMD_FLG_BAR: this frame is a BA request, immediate BAR is expected
  *	Must set IWM_TX_CMD_FLG_ACK with this flag.
  * @IWM_TX_CMD_FLG_TXOP_PROT: protect frame with full TXOP protection
  * @IWM_TX_CMD_FLG_VHT_NDPA: mark frame is NDPA for VHT beamformer sequence
  * @IWM_TX_CMD_FLG_HT_NDPA: mark frame is NDPA for HT beamformer sequence
  * @IWM_TX_CMD_FLG_CSI_FDBK2HOST: mark to send feedback to host (only if good CRC)
  * @IWM_TX_CMD_FLG_BT_DIS: disable BT priority for this frame
  * @IWM_TX_CMD_FLG_SEQ_CTL: set if FW should override the sequence control.
  *	Should be set for mgmt, non-QOS data, mcast, bcast and in scan command
  * @IWM_TX_CMD_FLG_MORE_FRAG: this frame is non-last MPDU
  * @IWM_TX_CMD_FLG_NEXT_FRAME: this frame includes information of the next frame
  * @IWM_TX_CMD_FLG_TSF: FW should calculate and insert TSF in the frame
  *	Should be set for beacons and probe responses
  * @IWM_TX_CMD_FLG_CALIB: activate PA TX power calibrations
  * @IWM_TX_CMD_FLG_KEEP_SEQ_CTL: if seq_ctl is set, don't increase inner seq count
  * @IWM_TX_CMD_FLG_AGG_START: allow this frame to start aggregation
  * @IWM_TX_CMD_FLG_MH_PAD: driver inserted 2 byte padding after MAC header.
  *	Should be set for 26/30 length MAC headers
  * @IWM_TX_CMD_FLG_RESP_TO_DRV: zero this if the response should go only to FW
  * @IWM_TX_CMD_FLG_TKIP_MIC_DONE: FW already performed TKIP MIC calculation
  * @IWM_TX_CMD_FLG_DUR: disable duration overwriting used in PS-Poll Assoc-id
  * @IWM_TX_CMD_FLG_FW_DROP: FW should mark frame to be dropped
  * @IWM_TX_CMD_FLG_EXEC_PAPD: execute PAPD
  * @IWM_TX_CMD_FLG_PAPD_TYPE: 0 for reference power, 1 for nominal power
  * @IWM_TX_CMD_FLG_HCCA_CHUNK: mark start of TSPEC chunk
  */
 enum iwm_tx_flags {
 	IWM_TX_CMD_FLG_PROT_REQUIRE	= (1 << 0),
 	IWM_TX_CMD_FLG_ACK		= (1 << 3),
 	IWM_TX_CMD_FLG_STA_RATE		= (1 << 4),
 	IWM_TX_CMD_FLG_BA		= (1 << 5),
 	IWM_TX_CMD_FLG_BAR		= (1 << 6),
 	IWM_TX_CMD_FLG_TXOP_PROT	= (1 << 7),
 	IWM_TX_CMD_FLG_VHT_NDPA		= (1 << 8),
 	IWM_TX_CMD_FLG_HT_NDPA		= (1 << 9),
 	IWM_TX_CMD_FLG_CSI_FDBK2HOST	= (1 << 10),
 	IWM_TX_CMD_FLG_BT_DIS		= (1 << 12),
 	IWM_TX_CMD_FLG_SEQ_CTL		= (1 << 13),
 	IWM_TX_CMD_FLG_MORE_FRAG	= (1 << 14),
 	IWM_TX_CMD_FLG_NEXT_FRAME	= (1 << 15),
 	IWM_TX_CMD_FLG_TSF		= (1 << 16),
 	IWM_TX_CMD_FLG_CALIB		= (1 << 17),
 	IWM_TX_CMD_FLG_KEEP_SEQ_CTL	= (1 << 18),
 	IWM_TX_CMD_FLG_AGG_START	= (1 << 19),
 	IWM_TX_CMD_FLG_MH_PAD		= (1 << 20),
 	IWM_TX_CMD_FLG_RESP_TO_DRV	= (1 << 21),
 	IWM_TX_CMD_FLG_TKIP_MIC_DONE	= (1 << 23),
 	IWM_TX_CMD_FLG_DUR		= (1 << 25),
 	IWM_TX_CMD_FLG_FW_DROP		= (1 << 26),
 	IWM_TX_CMD_FLG_EXEC_PAPD	= (1 << 27),
 	IWM_TX_CMD_FLG_PAPD_TYPE	= (1 << 28),
 	IWM_TX_CMD_FLG_HCCA_CHUNK	= (1 << 31)
 }; /* IWM_TX_FLAGS_BITS_API_S_VER_1 */
 
 /**
  * enum iwm_tx_pm_timeouts - pm timeout values in TX command
  * @IWM_PM_FRAME_NONE: no need to suspend sleep mode
  * @IWM_PM_FRAME_MGMT: fw suspend sleep mode for 100TU
  * @IWM_PM_FRAME_ASSOC: fw suspend sleep mode for 10sec
  */
 enum iwm_tx_pm_timeouts {
 	IWM_PM_FRAME_NONE           = 0,
 	IWM_PM_FRAME_MGMT           = 2,
 	IWM_PM_FRAME_ASSOC          = 3,
 };
 
 /*
  * TX command security control
  */
 #define IWM_TX_CMD_SEC_WEP		0x01
 #define IWM_TX_CMD_SEC_CCM		0x02
 #define IWM_TX_CMD_SEC_TKIP		0x03
 #define IWM_TX_CMD_SEC_EXT		0x04
 #define IWM_TX_CMD_SEC_MSK		0x07
 #define IWM_TX_CMD_SEC_WEP_KEY_IDX_POS	6
 #define IWM_TX_CMD_SEC_WEP_KEY_IDX_MSK	0xc0
 #define IWM_TX_CMD_SEC_KEY128		0x08
 
 /* TODO: how does these values are OK with only 16 bit variable??? */
 /*
  * TX command next frame info
  *
  * bits 0:2 - security control (IWM_TX_CMD_SEC_*)
  * bit 3 - immediate ACK required
  * bit 4 - rate is taken from STA table
  * bit 5 - frame belongs to BA stream
  * bit 6 - immediate BA response expected
  * bit 7 - unused
  * bits 8:15 - Station ID
  * bits 16:31 - rate
  */
 #define IWM_TX_CMD_NEXT_FRAME_ACK_MSK		(0x8)
 #define IWM_TX_CMD_NEXT_FRAME_STA_RATE_MSK	(0x10)
 #define IWM_TX_CMD_NEXT_FRAME_BA_MSK		(0x20)
 #define IWM_TX_CMD_NEXT_FRAME_IMM_BA_RSP_MSK	(0x40)
 #define IWM_TX_CMD_NEXT_FRAME_FLAGS_MSK		(0xf8)
 #define IWM_TX_CMD_NEXT_FRAME_STA_ID_MSK	(0xff00)
 #define IWM_TX_CMD_NEXT_FRAME_STA_ID_POS	(8)
 #define IWM_TX_CMD_NEXT_FRAME_RATE_MSK		(0xffff0000)
 #define IWM_TX_CMD_NEXT_FRAME_RATE_POS		(16)
 
 /*
  * TX command Frame life time in us - to be written in pm_frame_timeout
  */
 #define IWM_TX_CMD_LIFE_TIME_INFINITE	0xFFFFFFFF
 #define IWM_TX_CMD_LIFE_TIME_DEFAULT	2000000 /* 2000 ms*/
 #define IWM_TX_CMD_LIFE_TIME_PROBE_RESP	40000 /* 40 ms */
 #define IWM_TX_CMD_LIFE_TIME_EXPIRED_FRAME	0
 
 /*
  * TID for non QoS frames - to be written in tid_tspec
  */
 #define IWM_TID_NON_QOS	IWM_MAX_TID_COUNT
 
 /*
  * Limits on the retransmissions - to be written in {data,rts}_retry_limit
  */
 #define IWM_DEFAULT_TX_RETRY			15
 #define IWM_MGMT_DFAULT_RETRY_LIMIT		3
 #define IWM_RTS_DFAULT_RETRY_LIMIT		60
 #define IWM_BAR_DFAULT_RETRY_LIMIT		60
 #define IWM_LOW_RETRY_LIMIT			7
 
 /* TODO: complete documentation for try_cnt and btkill_cnt */
 /**
  * struct iwm_tx_cmd - TX command struct to FW
  * ( IWM_TX_CMD = 0x1c )
  * @len: in bytes of the payload, see below for details
  * @next_frame_len: same as len, but for next frame (0 if not applicable)
  *	Used for fragmentation and bursting, but not in 11n aggregation.
  * @tx_flags: combination of IWM_TX_CMD_FLG_*
  * @rate_n_flags: rate for *all* Tx attempts, if IWM_TX_CMD_FLG_STA_RATE_MSK is
  *	cleared. Combination of IWM_RATE_MCS_*
  * @sta_id: index of destination station in FW station table
  * @sec_ctl: security control, IWM_TX_CMD_SEC_*
  * @initial_rate_index: index into the rate table for initial TX attempt.
  *	Applied if IWM_TX_CMD_FLG_STA_RATE_MSK is set, normally 0 for data frames.
  * @key: security key
  * @next_frame_flags: IWM_TX_CMD_SEC_* and IWM_TX_CMD_NEXT_FRAME_*
  * @life_time: frame life time (usecs??)
  * @dram_lsb_ptr: Physical address of scratch area in the command (try_cnt +
  *	btkill_cnd + reserved), first 32 bits. "0" disables usage.
  * @dram_msb_ptr: upper bits of the scratch physical address
  * @rts_retry_limit: max attempts for RTS
  * @data_retry_limit: max attempts to send the data packet
  * @tid_spec: TID/tspec
  * @pm_frame_timeout: PM TX frame timeout
  * @driver_txop: duration od EDCA TXOP, in 32-usec units. Set this if not
  *	specified by HCCA protocol
  *
  * The byte count (both len and next_frame_len) includes MAC header
  * (24/26/30/32 bytes)
  * + 2 bytes pad if 26/30 header size
  * + 8 byte IV for CCM or TKIP (not used for WEP)
  * + Data payload
  * + 8-byte MIC (not used for CCM/WEP)
  * It does not include post-MAC padding, i.e.,
  * MIC (CCM) 8 bytes, ICV (WEP/TKIP/CKIP) 4 bytes, CRC 4 bytes.
  * Range of len: 14-2342 bytes.
  *
  * After the struct fields the MAC header is placed, plus any padding,
  * and then the actial payload.
  */
 struct iwm_tx_cmd {
 	uint16_t len;
 	uint16_t next_frame_len;
 	uint32_t tx_flags;
 	struct {
 		uint8_t try_cnt;
 		uint8_t btkill_cnt;
 		uint16_t reserved;
 	} scratch; /* DRAM_SCRATCH_API_U_VER_1 */
 	uint32_t rate_n_flags;
 	uint8_t sta_id;
 	uint8_t sec_ctl;
 	uint8_t initial_rate_index;
 	uint8_t reserved2;
 	uint8_t key[16];
 	uint16_t next_frame_flags;
 	uint16_t reserved3;
 	uint32_t life_time;
 	uint32_t dram_lsb_ptr;
 	uint8_t dram_msb_ptr;
 	uint8_t rts_retry_limit;
 	uint8_t data_retry_limit;
 	uint8_t tid_tspec;
 	uint16_t pm_frame_timeout;
 	uint16_t driver_txop;
 	uint8_t payload[0];
 	struct ieee80211_frame hdr[0];
 } __packed; /* IWM_TX_CMD_API_S_VER_3 */
 
 /*
  * TX response related data
  */
 
 /*
  * enum iwm_tx_status - status that is returned by the fw after attempts to Tx
  * @IWM_TX_STATUS_SUCCESS:
  * @IWM_TX_STATUS_DIRECT_DONE:
  * @IWM_TX_STATUS_POSTPONE_DELAY:
  * @IWM_TX_STATUS_POSTPONE_FEW_BYTES:
  * @IWM_TX_STATUS_POSTPONE_BT_PRIO:
  * @IWM_TX_STATUS_POSTPONE_QUIET_PERIOD:
  * @IWM_TX_STATUS_POSTPONE_CALC_TTAK:
  * @IWM_TX_STATUS_FAIL_INTERNAL_CROSSED_RETRY:
  * @IWM_TX_STATUS_FAIL_SHORT_LIMIT:
  * @IWM_TX_STATUS_FAIL_LONG_LIMIT:
  * @IWM_TX_STATUS_FAIL_UNDERRUN:
  * @IWM_TX_STATUS_FAIL_DRAIN_FLOW:
  * @IWM_TX_STATUS_FAIL_RFKILL_FLUSH:
  * @IWM_TX_STATUS_FAIL_LIFE_EXPIRE:
  * @IWM_TX_STATUS_FAIL_DEST_PS:
  * @IWM_TX_STATUS_FAIL_HOST_ABORTED:
  * @IWM_TX_STATUS_FAIL_BT_RETRY:
  * @IWM_TX_STATUS_FAIL_STA_INVALID:
  * @IWM_TX_TATUS_FAIL_FRAG_DROPPED:
  * @IWM_TX_STATUS_FAIL_TID_DISABLE:
  * @IWM_TX_STATUS_FAIL_FIFO_FLUSHED:
  * @IWM_TX_STATUS_FAIL_SMALL_CF_POLL:
  * @IWM_TX_STATUS_FAIL_FW_DROP:
  * @IWM_TX_STATUS_FAIL_STA_COLOR_MISMATCH: mismatch between color of Tx cmd and
  *	STA table
  * @IWM_TX_FRAME_STATUS_INTERNAL_ABORT:
  * @IWM_TX_MODE_MSK:
  * @IWM_TX_MODE_NO_BURST:
  * @IWM_TX_MODE_IN_BURST_SEQ:
  * @IWM_TX_MODE_FIRST_IN_BURST:
  * @IWM_TX_QUEUE_NUM_MSK:
  *
  * Valid only if frame_count =1
  * TODO: complete documentation
  */
 enum iwm_tx_status {
 	IWM_TX_STATUS_MSK = 0x000000ff,
 	IWM_TX_STATUS_SUCCESS = 0x01,
 	IWM_TX_STATUS_DIRECT_DONE = 0x02,
 	/* postpone TX */
 	IWM_TX_STATUS_POSTPONE_DELAY = 0x40,
 	IWM_TX_STATUS_POSTPONE_FEW_BYTES = 0x41,
 	IWM_TX_STATUS_POSTPONE_BT_PRIO = 0x42,
 	IWM_TX_STATUS_POSTPONE_QUIET_PERIOD = 0x43,
 	IWM_TX_STATUS_POSTPONE_CALC_TTAK = 0x44,
 	/* abort TX */
 	IWM_TX_STATUS_FAIL_INTERNAL_CROSSED_RETRY = 0x81,
 	IWM_TX_STATUS_FAIL_SHORT_LIMIT = 0x82,
 	IWM_TX_STATUS_FAIL_LONG_LIMIT = 0x83,
 	IWM_TX_STATUS_FAIL_UNDERRUN = 0x84,
 	IWM_TX_STATUS_FAIL_DRAIN_FLOW = 0x85,
 	IWM_TX_STATUS_FAIL_RFKILL_FLUSH = 0x86,
 	IWM_TX_STATUS_FAIL_LIFE_EXPIRE = 0x87,
 	IWM_TX_STATUS_FAIL_DEST_PS = 0x88,
 	IWM_TX_STATUS_FAIL_HOST_ABORTED = 0x89,
 	IWM_TX_STATUS_FAIL_BT_RETRY = 0x8a,
 	IWM_TX_STATUS_FAIL_STA_INVALID = 0x8b,
 	IWM_TX_STATUS_FAIL_FRAG_DROPPED = 0x8c,
 	IWM_TX_STATUS_FAIL_TID_DISABLE = 0x8d,
 	IWM_TX_STATUS_FAIL_FIFO_FLUSHED = 0x8e,
 	IWM_TX_STATUS_FAIL_SMALL_CF_POLL = 0x8f,
 	IWM_TX_STATUS_FAIL_FW_DROP = 0x90,
 	IWM_TX_STATUS_FAIL_STA_COLOR_MISMATCH = 0x91,
 	IWM_TX_STATUS_INTERNAL_ABORT = 0x92,
 	IWM_TX_MODE_MSK = 0x00000f00,
 	IWM_TX_MODE_NO_BURST = 0x00000000,
 	IWM_TX_MODE_IN_BURST_SEQ = 0x00000100,
 	IWM_TX_MODE_FIRST_IN_BURST = 0x00000200,
 	IWM_TX_QUEUE_NUM_MSK = 0x0001f000,
 	IWM_TX_NARROW_BW_MSK = 0x00060000,
 	IWM_TX_NARROW_BW_1DIV2 = 0x00020000,
 	IWM_TX_NARROW_BW_1DIV4 = 0x00040000,
 	IWM_TX_NARROW_BW_1DIV8 = 0x00060000,
 };
 
 /*
  * enum iwm_tx_agg_status - TX aggregation status
  * @IWM_AGG_TX_STATE_STATUS_MSK:
  * @IWM_AGG_TX_STATE_TRANSMITTED:
  * @IWM_AGG_TX_STATE_UNDERRUN:
  * @IWM_AGG_TX_STATE_BT_PRIO:
  * @IWM_AGG_TX_STATE_FEW_BYTES:
  * @IWM_AGG_TX_STATE_ABORT:
  * @IWM_AGG_TX_STATE_LAST_SENT_TTL:
  * @IWM_AGG_TX_STATE_LAST_SENT_TRY_CNT:
  * @IWM_AGG_TX_STATE_LAST_SENT_BT_KILL:
  * @IWM_AGG_TX_STATE_SCD_QUERY:
  * @IWM_AGG_TX_STATE_TEST_BAD_CRC32:
  * @IWM_AGG_TX_STATE_RESPONSE:
  * @IWM_AGG_TX_STATE_DUMP_TX:
  * @IWM_AGG_TX_STATE_DELAY_TX:
  * @IWM_AGG_TX_STATE_TRY_CNT_MSK: Retry count for 1st frame in aggregation (retries
  *	occur if tx failed for this frame when it was a member of a previous
  *	aggregation block). If rate scaling is used, retry count indicates the
  *	rate table entry used for all frames in the new agg.
  *@ IWM_AGG_TX_STATE_SEQ_NUM_MSK: Command ID and sequence number of Tx command for
  *	this frame
  *
  * TODO: complete documentation
  */
 enum iwm_tx_agg_status {
 	IWM_AGG_TX_STATE_STATUS_MSK = 0x00fff,
 	IWM_AGG_TX_STATE_TRANSMITTED = 0x000,
 	IWM_AGG_TX_STATE_UNDERRUN = 0x001,
 	IWM_AGG_TX_STATE_BT_PRIO = 0x002,
 	IWM_AGG_TX_STATE_FEW_BYTES = 0x004,
 	IWM_AGG_TX_STATE_ABORT = 0x008,
 	IWM_AGG_TX_STATE_LAST_SENT_TTL = 0x010,
 	IWM_AGG_TX_STATE_LAST_SENT_TRY_CNT = 0x020,
 	IWM_AGG_TX_STATE_LAST_SENT_BT_KILL = 0x040,
 	IWM_AGG_TX_STATE_SCD_QUERY = 0x080,
 	IWM_AGG_TX_STATE_TEST_BAD_CRC32 = 0x0100,
 	IWM_AGG_TX_STATE_RESPONSE = 0x1ff,
 	IWM_AGG_TX_STATE_DUMP_TX = 0x200,
 	IWM_AGG_TX_STATE_DELAY_TX = 0x400,
 	IWM_AGG_TX_STATE_TRY_CNT_POS = 12,
 	IWM_AGG_TX_STATE_TRY_CNT_MSK = 0xf << IWM_AGG_TX_STATE_TRY_CNT_POS,
 };
 
 #define IWM_AGG_TX_STATE_LAST_SENT_MSK  (IWM_AGG_TX_STATE_LAST_SENT_TTL| \
 				     IWM_AGG_TX_STATE_LAST_SENT_TRY_CNT| \
 				     IWM_AGG_TX_STATE_LAST_SENT_BT_KILL)
 
 /*
  * The mask below describes a status where we are absolutely sure that the MPDU
  * wasn't sent. For BA/Underrun we cannot be that sure. All we know that we've
  * written the bytes to the TXE, but we know nothing about what the DSP did.
  */
 #define IWM_AGG_TX_STAT_FRAME_NOT_SENT (IWM_AGG_TX_STATE_FEW_BYTES | \
 				    IWM_AGG_TX_STATE_ABORT | \
 				    IWM_AGG_TX_STATE_SCD_QUERY)
 
 /*
  * IWM_REPLY_TX = 0x1c (response)
  *
  * This response may be in one of two slightly different formats, indicated
  * by the frame_count field:
  *
  * 1)	No aggregation (frame_count == 1).  This reports Tx results for a single
  *	frame. Multiple attempts, at various bit rates, may have been made for
  *	this frame.
  *
  * 2)	Aggregation (frame_count > 1).  This reports Tx results for two or more
  *	frames that used block-acknowledge.  All frames were transmitted at
  *	same rate. Rate scaling may have been used if first frame in this new
  *	agg block failed in previous agg block(s).
  *
  *	Note that, for aggregation, ACK (block-ack) status is not delivered
  *	here; block-ack has not been received by the time the device records
  *	this status.
  *	This status relates to reasons the tx might have been blocked or aborted
  *	within the device, rather than whether it was received successfully by
  *	the destination station.
  */
 
 /**
  * struct iwm_agg_tx_status - per packet TX aggregation status
  * @status: enum iwm_tx_agg_status
  * @sequence: Sequence # for this frame's Tx cmd (not SSN!)
  */
 struct iwm_agg_tx_status {
 	uint16_t status;
 	uint16_t sequence;
 } __packed;
 
 /*
  * definitions for initial rate index field
  * bits [3:0] initial rate index
  * bits [6:4] rate table color, used for the initial rate
  * bit-7 invalid rate indication
  */
 #define IWM_TX_RES_INIT_RATE_INDEX_MSK 0x0f
 #define IWM_TX_RES_RATE_TABLE_COLOR_MSK 0x70
 #define IWM_TX_RES_INV_RATE_INDEX_MSK 0x80
 
 #define IWM_MVM_TX_RES_GET_TID(_ra_tid) ((_ra_tid) & 0x0f)
 #define IWM_MVM_TX_RES_GET_RA(_ra_tid) ((_ra_tid) >> 4)
 
 /**
  * struct iwm_mvm_tx_resp - notifies that fw is TXing a packet
  * ( IWM_REPLY_TX = 0x1c )
  * @frame_count: 1 no aggregation, >1 aggregation
  * @bt_kill_count: num of times blocked by bluetooth (unused for agg)
  * @failure_rts: num of failures due to unsuccessful RTS
  * @failure_frame: num failures due to no ACK (unused for agg)
  * @initial_rate: for non-agg: rate of the successful Tx. For agg: rate of the
  *	Tx of all the batch. IWM_RATE_MCS_*
  * @wireless_media_time: for non-agg: RTS + CTS + frame tx attempts time + ACK.
  *	for agg: RTS + CTS + aggregation tx time + block-ack time.
  *	in usec.
  * @pa_status: tx power info
  * @pa_integ_res_a: tx power info
  * @pa_integ_res_b: tx power info
  * @pa_integ_res_c: tx power info
  * @measurement_req_id: tx power info
  * @tfd_info: TFD information set by the FH
  * @seq_ctl: sequence control from the Tx cmd
  * @byte_cnt: byte count from the Tx cmd
  * @tlc_info: TLC rate info
  * @ra_tid: bits [3:0] = ra, bits [7:4] = tid
  * @frame_ctrl: frame control
  * @status: for non-agg:  frame status IWM_TX_STATUS_*
  *	for agg: status of 1st frame, IWM_AGG_TX_STATE_*; other frame status fields
  *	follow this one, up to frame_count.
  *
  * After the array of statuses comes the SSN of the SCD. Look at
  * %iwm_mvm_get_scd_ssn for more details.
  */
 struct iwm_mvm_tx_resp {
 	uint8_t frame_count;
 	uint8_t bt_kill_count;
 	uint8_t failure_rts;
 	uint8_t failure_frame;
 	uint32_t initial_rate;
 	uint16_t wireless_media_time;
 
 	uint8_t pa_status;
 	uint8_t pa_integ_res_a[3];
 	uint8_t pa_integ_res_b[3];
 	uint8_t pa_integ_res_c[3];
 	uint16_t measurement_req_id;
 	uint8_t reduced_tpc;
 	uint8_t reserved;
 
 	uint32_t tfd_info;
 	uint16_t seq_ctl;
 	uint16_t byte_cnt;
 	uint8_t tlc_info;
 	uint8_t ra_tid;
 	uint16_t frame_ctrl;
 
 	struct iwm_agg_tx_status status;
 } __packed; /* IWM_TX_RSP_API_S_VER_3 */
 
 /**
  * struct iwm_mvm_ba_notif - notifies about reception of BA
  * ( IWM_BA_NOTIF = 0xc5 )
  * @sta_addr_lo32: lower 32 bits of the MAC address
  * @sta_addr_hi16: upper 16 bits of the MAC address
  * @sta_id: Index of recipient (BA-sending) station in fw's station table
  * @tid: tid of the session
  * @seq_ctl:
  * @bitmap: the bitmap of the BA notification as seen in the air
  * @scd_flow: the tx queue this BA relates to
  * @scd_ssn: the index of the last contiguously sent packet
  * @txed: number of Txed frames in this batch
  * @txed_2_done: number of Acked frames in this batch
  */
 struct iwm_mvm_ba_notif {
 	uint32_t sta_addr_lo32;
 	uint16_t sta_addr_hi16;
 	uint16_t reserved;
 
 	uint8_t sta_id;
 	uint8_t tid;
 	uint16_t seq_ctl;
 	uint64_t bitmap;
 	uint16_t scd_flow;
 	uint16_t scd_ssn;
 	uint8_t txed;
 	uint8_t txed_2_done;
 	uint16_t reserved1;
 } __packed;
 
 /*
  * struct iwm_mac_beacon_cmd - beacon template command
  * @tx: the tx commands associated with the beacon frame
  * @template_id: currently equal to the mac context id of the coresponding
  *  mac.
  * @tim_idx: the offset of the tim IE in the beacon
  * @tim_size: the length of the tim IE
  * @frame: the template of the beacon frame
  */
 struct iwm_mac_beacon_cmd {
 	struct iwm_tx_cmd tx;
 	uint32_t template_id;
 	uint32_t tim_idx;
 	uint32_t tim_size;
 	struct ieee80211_frame frame[0];
 } __packed;
 
 struct iwm_beacon_notif {
 	struct iwm_mvm_tx_resp beacon_notify_hdr;
 	uint64_t tsf;
 	uint32_t ibss_mgr_status;
 } __packed;
 
 /**
  * enum iwm_dump_control - dump (flush) control flags
  * @IWM_DUMP_TX_FIFO_FLUSH: Dump MSDUs until the FIFO is empty
  *	and the TFD queues are empty.
  */
 enum iwm_dump_control {
 	IWM_DUMP_TX_FIFO_FLUSH	= (1 << 1),
 };
 
 /**
  * struct iwm_tx_path_flush_cmd -- queue/FIFO flush command
  * @queues_ctl: bitmap of queues to flush
  * @flush_ctl: control flags
  * @reserved: reserved
  */
 struct iwm_tx_path_flush_cmd {
 	uint32_t queues_ctl;
 	uint16_t flush_ctl;
 	uint16_t reserved;
 } __packed; /* IWM_TX_PATH_FLUSH_CMD_API_S_VER_1 */
 
 /**
  * iwm_mvm_get_scd_ssn - returns the SSN of the SCD
  * @tx_resp: the Tx response from the fw (agg or non-agg)
  *
  * When the fw sends an AMPDU, it fetches the MPDUs one after the other. Since
  * it can't know that everything will go well until the end of the AMPDU, it
  * can't know in advance the number of MPDUs that will be sent in the current
  * batch. This is why it writes the agg Tx response while it fetches the MPDUs.
  * Hence, it can't know in advance what the SSN of the SCD will be at the end
  * of the batch. This is why the SSN of the SCD is written at the end of the
  * whole struct at a variable offset. This function knows how to cope with the
  * variable offset and returns the SSN of the SCD.
  */
 static inline uint32_t iwm_mvm_get_scd_ssn(struct iwm_mvm_tx_resp *tx_resp)
 {
 	return le32_to_cpup((uint32_t *)&tx_resp->status +
 			    tx_resp->frame_count) & 0xfff;
 }
 
 /*
  * END mvm/fw-api-tx.h
  */
 
 /*
  * BEGIN mvm/fw-api-scan.h
  */
 
 /**
  * struct iwm_scd_txq_cfg_cmd - New txq hw scheduler config command
  * @token:
  * @sta_id: station id
  * @tid:
  * @scd_queue: scheduler queue to confiug
  * @enable: 1 queue enable, 0 queue disable
  * @aggregate: 1 aggregated queue, 0 otherwise
  * @tx_fifo: %enum iwm_mvm_tx_fifo
  * @window: BA window size
  * @ssn: SSN for the BA agreement
  */
 struct iwm_scd_txq_cfg_cmd {
 	uint8_t token;
 	uint8_t sta_id;
 	uint8_t tid;
 	uint8_t scd_queue;
 	uint8_t enable;
 	uint8_t aggregate;
 	uint8_t tx_fifo;
 	uint8_t window;
 	uint16_t ssn;
 	uint16_t reserved;
 } __packed; /* SCD_QUEUE_CFG_CMD_API_S_VER_1 */
 
 /**
  * struct iwm_scd_txq_cfg_rsp
  * @token: taken from the command
  * @sta_id: station id from the command
  * @tid: tid from the command
  * @scd_queue: scd_queue from the command
  */
 struct iwm_scd_txq_cfg_rsp {
 	uint8_t token;
 	uint8_t sta_id;
 	uint8_t tid;
 	uint8_t scd_queue;
 } __packed; /* SCD_QUEUE_CFG_RSP_API_S_VER_1 */
 
 
 /* Scan Commands, Responses, Notifications */
 
 /* Masks for iwm_scan_channel.type flags */
 #define IWM_SCAN_CHANNEL_TYPE_ACTIVE	(1 << 0)
 #define IWM_SCAN_CHANNEL_NSSIDS(x)	(((1 << (x)) - 1) << 1)
 
 /* Max number of IEs for direct SSID scans in a command */
 #define IWM_PROBE_OPTION_MAX		20
 
 /**
  * struct iwm_ssid_ie - directed scan network information element
  *
  * Up to 20 of these may appear in IWM_REPLY_SCAN_CMD,
  * selected by "type" bit field in struct iwm_scan_channel;
  * each channel may select different ssids from among the 20 entries.
  * SSID IEs get transmitted in reverse order of entry.
  */
 struct iwm_ssid_ie {
 	uint8_t id;
 	uint8_t len;
 	uint8_t ssid[IEEE80211_NWID_LEN];
 } __packed; /* IWM_SCAN_DIRECT_SSID_IE_API_S_VER_1 */
 
 /* scan offload */
 #define IWM_SCAN_MAX_BLACKLIST_LEN	64
 #define IWM_SCAN_SHORT_BLACKLIST_LEN	16
 #define IWM_SCAN_MAX_PROFILES		11
 #define IWM_SCAN_OFFLOAD_PROBE_REQ_SIZE	512
 
 /* Default watchdog (in MS) for scheduled scan iteration */
 #define IWM_SCHED_SCAN_WATCHDOG cpu_to_le16(15000)
 
 #define IWM_GOOD_CRC_TH_DEFAULT cpu_to_le16(1)
 #define IWM_CAN_ABORT_STATUS 1
 
 #define IWM_FULL_SCAN_MULTIPLIER 5
 #define IWM_FAST_SCHED_SCAN_ITERATIONS 3
 #define IWM_MAX_SCHED_SCAN_PLANS 2
 
 /**
  * iwm_scan_schedule_lmac - schedule of scan offload
  * @delay:		delay between iterations, in seconds.
  * @iterations:		num of scan iterations
  * @full_scan_mul:	number of partial scans before each full scan
  */
 struct iwm_scan_schedule_lmac {
 	uint16_t delay;
 	uint8_t iterations;
 	uint8_t full_scan_mul;
 } __packed; /* SCAN_SCHEDULE_API_S */
 
 /**
  * iwm_scan_req_tx_cmd - SCAN_REQ_TX_CMD_API_S
  * @tx_flags: combination of TX_CMD_FLG_*
  * @rate_n_flags: rate for *all* Tx attempts, if TX_CMD_FLG_STA_RATE_MSK is
  *	cleared. Combination of RATE_MCS_*
  * @sta_id: index of destination station in FW station table
  * @reserved: for alignment and future use
  */
 struct iwm_scan_req_tx_cmd {
 	uint32_t tx_flags;
 	uint32_t rate_n_flags;
 	uint8_t sta_id;
 	uint8_t reserved[3];
 } __packed;
 
 enum iwm_scan_channel_flags_lmac {
 	IWM_UNIFIED_SCAN_CHANNEL_FULL		= (1 << 27),
 	IWM_UNIFIED_SCAN_CHANNEL_PARTIAL	= (1 << 28),
 };
 
 /**
  * iwm_scan_channel_cfg_lmac - SCAN_CHANNEL_CFG_S_VER2
  * @flags:		bits 1-20: directed scan to i'th ssid
  *			other bits &enum iwm_scan_channel_flags_lmac
  * @channel_number:	channel number 1-13 etc
  * @iter_count:		scan iteration on this channel
  * @iter_interval:	interval in seconds between iterations on one channel
  */
 struct iwm_scan_channel_cfg_lmac {
 	uint32_t flags;
 	uint16_t channel_num;
 	uint16_t iter_count;
 	uint32_t iter_interval;
 } __packed;
 
 /*
  * iwm_scan_probe_segment - PROBE_SEGMENT_API_S_VER_1
  * @offset: offset in the data block
  * @len: length of the segment
  */
 struct iwm_scan_probe_segment {
 	uint16_t offset;
 	uint16_t len;
 } __packed;
 
 /* iwm_scan_probe_req - PROBE_REQUEST_FRAME_API_S_VER_2
  * @mac_header: first (and common) part of the probe
  * @band_data: band specific data
  * @common_data: last (and common) part of the probe
  * @buf: raw data block
  */
 struct iwm_scan_probe_req {
 	struct iwm_scan_probe_segment mac_header;
 	struct iwm_scan_probe_segment band_data[2];
 	struct iwm_scan_probe_segment common_data;
 	uint8_t buf[IWM_SCAN_OFFLOAD_PROBE_REQ_SIZE];
 } __packed;
 
 enum iwm_scan_channel_flags {
 	IWM_SCAN_CHANNEL_FLAG_EBS		= (1 << 0),
 	IWM_SCAN_CHANNEL_FLAG_EBS_ACCURATE	= (1 << 1),
 	IWM_SCAN_CHANNEL_FLAG_CACHE_ADD		= (1 << 2),
 };
 
 /* iwm_scan_channel_opt - CHANNEL_OPTIMIZATION_API_S
  * @flags: enum iwm_scan_channel_flags
  * @non_ebs_ratio: defines the ratio of number of scan iterations where EBS is
  *	involved.
  *	1 - EBS is disabled.
  *	2 - every second scan will be full scan(and so on).
  */
 struct iwm_scan_channel_opt {
 	uint16_t flags;
 	uint16_t non_ebs_ratio;
 } __packed;
 
 /**
  * iwm_mvm_lmac_scan_flags
  * @IWM_MVM_LMAC_SCAN_FLAG_PASS_ALL: pass all beacons and probe responses
  *      without filtering.
  * @IWM_MVM_LMAC_SCAN_FLAG_PASSIVE: force passive scan on all channels
  * @IWM_MVM_LMAC_SCAN_FLAG_PRE_CONNECTION: single channel scan
  * @IWM_MVM_LMAC_SCAN_FLAG_ITER_COMPLETE: send iteration complete notification
  * @IWM_MVM_LMAC_SCAN_FLAG_MULTIPLE_SSIDS multiple SSID matching
  * @IWM_MVM_LMAC_SCAN_FLAG_FRAGMENTED: all passive scans will be fragmented
  * @IWM_MVM_LMAC_SCAN_FLAGS_RRM_ENABLED: insert WFA vendor-specific TPC report
  *      and DS parameter set IEs into probe requests.
  * @IWM_MVM_LMAC_SCAN_FLAG_EXTENDED_DWELL: use extended dwell time on channels
  *      1, 6 and 11.
  * @IWM_MVM_LMAC_SCAN_FLAG_MATCH: Send match found notification on matches
  */
 enum iwm_mvm_lmac_scan_flags {
 	IWM_MVM_LMAC_SCAN_FLAG_PASS_ALL		= (1 << 0),
 	IWM_MVM_LMAC_SCAN_FLAG_PASSIVE		= (1 << 1),
 	IWM_MVM_LMAC_SCAN_FLAG_PRE_CONNECTION	= (1 << 2),
 	IWM_MVM_LMAC_SCAN_FLAG_ITER_COMPLETE	= (1 << 3),
 	IWM_MVM_LMAC_SCAN_FLAG_MULTIPLE_SSIDS	= (1 << 4),
 	IWM_MVM_LMAC_SCAN_FLAG_FRAGMENTED	= (1 << 5),
 	IWM_MVM_LMAC_SCAN_FLAGS_RRM_ENABLED	= (1 << 6),
 	IWM_MVM_LMAC_SCAN_FLAG_EXTENDED_DWELL	= (1 << 7),
 	IWM_MVM_LMAC_SCAN_FLAG_MATCH		= (1 << 9),
 };
 
 enum iwm_scan_priority {
 	IWM_SCAN_PRIORITY_LOW,
 	IWM_SCAN_PRIORITY_MEDIUM,
 	IWM_SCAN_PRIORITY_HIGH,
 };
 
 /**
  * iwm_scan_req_lmac - SCAN_REQUEST_CMD_API_S_VER_1
  * @reserved1: for alignment and future use
  * @channel_num: num of channels to scan
  * @active-dwell: dwell time for active channels
  * @passive-dwell: dwell time for passive channels
  * @fragmented-dwell: dwell time for fragmented passive scan
  * @extended_dwell: dwell time for channels 1, 6 and 11 (in certain cases)
  * @reserved2: for alignment and future use
  * @rx_chain_selct: PHY_RX_CHAIN_* flags
  * @scan_flags: &enum iwm_mvm_lmac_scan_flags
  * @max_out_time: max time (in TU) to be out of associated channel
  * @suspend_time: pause scan this long (TUs) when returning to service channel
  * @flags: RXON flags
  * @filter_flags: RXON filter
  * @tx_cmd: tx command for active scan; for 2GHz and for 5GHz
  * @direct_scan: list of SSIDs for directed active scan
  * @scan_prio: enum iwm_scan_priority
  * @iter_num: number of scan iterations
  * @delay: delay in seconds before first iteration
  * @schedule: two scheduling plans. The first one is finite, the second one can
  *	be infinite.
  * @channel_opt: channel optimization options, for full and partial scan
  * @data: channel configuration and probe request packet.
  */
 struct iwm_scan_req_lmac {
 	/* SCAN_REQUEST_FIXED_PART_API_S_VER_7 */
 	uint32_t reserved1;
 	uint8_t n_channels;
 	uint8_t active_dwell;
 	uint8_t passive_dwell;
 	uint8_t fragmented_dwell;
 	uint8_t extended_dwell;
 	uint8_t reserved2;
 	uint16_t rx_chain_select;
 	uint32_t scan_flags;
 	uint32_t max_out_time;
 	uint32_t suspend_time;
 	/* RX_ON_FLAGS_API_S_VER_1 */
 	uint32_t flags;
 	uint32_t filter_flags;
 	struct iwm_scan_req_tx_cmd tx_cmd[2];
 	struct iwm_ssid_ie direct_scan[IWM_PROBE_OPTION_MAX];
 	uint32_t scan_prio;
 	/* SCAN_REQ_PERIODIC_PARAMS_API_S */
 	uint32_t iter_num;
 	uint32_t delay;
 	struct iwm_scan_schedule_lmac schedule[IWM_MAX_SCHED_SCAN_PLANS];
 	struct iwm_scan_channel_opt channel_opt[2];
 	uint8_t data[];
 } __packed;
 
 /**
  * iwm_scan_offload_complete - PERIODIC_SCAN_COMPLETE_NTF_API_S_VER_2
  * @last_schedule_line: last schedule line executed (fast or regular)
  * @last_schedule_iteration: last scan iteration executed before scan abort
  * @status: enum iwm_scan_offload_complete_status
  * @ebs_status: EBS success status &enum iwm_scan_ebs_status
  * @time_after_last_iter; time in seconds elapsed after last iteration
  */
 struct iwm_periodic_scan_complete {
 	uint8_t last_schedule_line;
 	uint8_t last_schedule_iteration;
 	uint8_t status;
 	uint8_t ebs_status;
 	uint32_t time_after_last_iter;
 	uint32_t reserved;
 } __packed;
 
 /* How many statistics are gathered for each channel */
 #define IWM_SCAN_RESULTS_STATISTICS 1
 
 /**
  * enum iwm_scan_complete_status - status codes for scan complete notifications
  * @IWM_SCAN_COMP_STATUS_OK:  scan completed successfully
  * @IWM_SCAN_COMP_STATUS_ABORT: scan was aborted by user
  * @IWM_SCAN_COMP_STATUS_ERR_SLEEP: sending null sleep packet failed
  * @IWM_SCAN_COMP_STATUS_ERR_CHAN_TIMEOUT: timeout before channel is ready
  * @IWM_SCAN_COMP_STATUS_ERR_PROBE: sending probe request failed
  * @IWM_SCAN_COMP_STATUS_ERR_WAKEUP: sending null wakeup packet failed
  * @IWM_SCAN_COMP_STATUS_ERR_ANTENNAS: invalid antennas chosen at scan command
  * @IWM_SCAN_COMP_STATUS_ERR_INTERNAL: internal error caused scan abort
  * @IWM_SCAN_COMP_STATUS_ERR_COEX: medium was lost ot WiMax
  * @IWM_SCAN_COMP_STATUS_P2P_ACTION_OK: P2P public action frame TX was successful
  *	(not an error!)
  * @IWM_SCAN_COMP_STATUS_ITERATION_END: indicates end of one repeatition the driver
  *	asked for
  * @IWM_SCAN_COMP_STATUS_ERR_ALLOC_TE: scan could not allocate time events
 */
 enum iwm_scan_complete_status {
 	IWM_SCAN_COMP_STATUS_OK = 0x1,
 	IWM_SCAN_COMP_STATUS_ABORT = 0x2,
 	IWM_SCAN_COMP_STATUS_ERR_SLEEP = 0x3,
 	IWM_SCAN_COMP_STATUS_ERR_CHAN_TIMEOUT = 0x4,
 	IWM_SCAN_COMP_STATUS_ERR_PROBE = 0x5,
 	IWM_SCAN_COMP_STATUS_ERR_WAKEUP = 0x6,
 	IWM_SCAN_COMP_STATUS_ERR_ANTENNAS = 0x7,
 	IWM_SCAN_COMP_STATUS_ERR_INTERNAL = 0x8,
 	IWM_SCAN_COMP_STATUS_ERR_COEX = 0x9,
 	IWM_SCAN_COMP_STATUS_P2P_ACTION_OK = 0xA,
 	IWM_SCAN_COMP_STATUS_ITERATION_END = 0x0B,
 	IWM_SCAN_COMP_STATUS_ERR_ALLOC_TE = 0x0C,
 };
 
 /**
  * struct iwm_scan_results_notif - scan results for one channel
  * ( IWM_SCAN_RESULTS_NOTIFICATION = 0x83 )
  * @channel: which channel the results are from
  * @band: 0 for 5.2 GHz, 1 for 2.4 GHz
  * @probe_status: IWM_SCAN_PROBE_STATUS_*, indicates success of probe request
  * @num_probe_not_sent: # of request that weren't sent due to not enough time
  * @duration: duration spent in channel, in usecs
  * @statistics: statistics gathered for this channel
  */
 struct iwm_scan_results_notif {
 	uint8_t channel;
 	uint8_t band;
 	uint8_t probe_status;
 	uint8_t num_probe_not_sent;
 	uint32_t duration;
 	uint32_t statistics[IWM_SCAN_RESULTS_STATISTICS];
 } __packed; /* IWM_SCAN_RESULT_NTF_API_S_VER_2 */
 
 enum iwm_scan_framework_client {
 	IWM_SCAN_CLIENT_SCHED_SCAN	= (1 << 0),
 	IWM_SCAN_CLIENT_NETDETECT	= (1 << 1),
 	IWM_SCAN_CLIENT_ASSET_TRACKING	= (1 << 2),
 };
 
 /**
  * iwm_scan_offload_blacklist - IWM_SCAN_OFFLOAD_BLACKLIST_S
  * @ssid:		MAC address to filter out
  * @reported_rssi:	AP rssi reported to the host
  * @client_bitmap: clients ignore this entry  - enum scan_framework_client
  */
 struct iwm_scan_offload_blacklist {
 	uint8_t ssid[IEEE80211_ADDR_LEN];
 	uint8_t reported_rssi;
 	uint8_t client_bitmap;
 } __packed;
 
 enum iwm_scan_offload_network_type {
 	IWM_NETWORK_TYPE_BSS	= 1,
 	IWM_NETWORK_TYPE_IBSS	= 2,
 	IWM_NETWORK_TYPE_ANY	= 3,
 };
 
 enum iwm_scan_offload_band_selection {
 	IWM_SCAN_OFFLOAD_SELECT_2_4	= 0x4,
 	IWM_SCAN_OFFLOAD_SELECT_5_2	= 0x8,
 	IWM_SCAN_OFFLOAD_SELECT_ANY	= 0xc,
 };
 
 /**
  * iwm_scan_offload_profile - IWM_SCAN_OFFLOAD_PROFILE_S
  * @ssid_index:		index to ssid list in fixed part
  * @unicast_cipher:	encryption olgorithm to match - bitmap
  * @aut_alg:		authentication olgorithm to match - bitmap
  * @network_type:	enum iwm_scan_offload_network_type
  * @band_selection:	enum iwm_scan_offload_band_selection
  * @client_bitmap:	clients waiting for match - enum scan_framework_client
  */
 struct iwm_scan_offload_profile {
 	uint8_t ssid_index;
 	uint8_t unicast_cipher;
 	uint8_t auth_alg;
 	uint8_t network_type;
 	uint8_t band_selection;
 	uint8_t client_bitmap;
 	uint8_t reserved[2];
 } __packed;
 
 /**
  * iwm_scan_offload_profile_cfg - IWM_SCAN_OFFLOAD_PROFILES_CFG_API_S_VER_1
  * @blaclist:		AP list to filter off from scan results
  * @profiles:		profiles to search for match
  * @blacklist_len:	length of blacklist
  * @num_profiles:	num of profiles in the list
  * @match_notify:	clients waiting for match found notification
  * @pass_match:		clients waiting for the results
  * @active_clients:	active clients bitmap - enum scan_framework_client
  * @any_beacon_notify:	clients waiting for match notification without match
  */
 struct iwm_scan_offload_profile_cfg {
 	struct iwm_scan_offload_profile profiles[IWM_SCAN_MAX_PROFILES];
 	uint8_t blacklist_len;
 	uint8_t num_profiles;
 	uint8_t match_notify;
 	uint8_t pass_match;
 	uint8_t active_clients;
 	uint8_t any_beacon_notify;
 	uint8_t reserved[2];
 } __packed;
 
 enum iwm_scan_offload_complete_status {
 	IWM_SCAN_OFFLOAD_COMPLETED	= 1,
 	IWM_SCAN_OFFLOAD_ABORTED	= 2,
 };
 
 /**
  * struct iwm_lmac_scan_complete_notif - notifies end of scanning (all channels)
  *	SCAN_COMPLETE_NTF_API_S_VER_3
  * @scanned_channels: number of channels scanned (and number of valid results)
  * @status: one of SCAN_COMP_STATUS_*
  * @bt_status: BT on/off status
  * @last_channel: last channel that was scanned
  * @tsf_low: TSF timer (lower half) in usecs
  * @tsf_high: TSF timer (higher half) in usecs
  * @results: an array of scan results, only "scanned_channels" of them are valid
  */
 struct iwm_lmac_scan_complete_notif {
 	uint8_t scanned_channels;
 	uint8_t status;
 	uint8_t bt_status;
 	uint8_t last_channel;
 	uint32_t tsf_low;
 	uint32_t tsf_high;
 	struct iwm_scan_results_notif results[];
 } __packed;
 
 
 /*
  * END mvm/fw-api-scan.h
  */
 
 /*
  * BEGIN mvm/fw-api-sta.h
  */
 
 /* UMAC Scan API */
 
 /* The maximum of either of these cannot exceed 8, because we use an
  * 8-bit mask (see IWM_MVM_SCAN_MASK).
  */
 #define IWM_MVM_MAX_UMAC_SCANS 8
 #define IWM_MVM_MAX_LMAC_SCANS 1
 
 enum iwm_scan_config_flags {
 	IWM_SCAN_CONFIG_FLAG_ACTIVATE			= (1 << 0),
 	IWM_SCAN_CONFIG_FLAG_DEACTIVATE			= (1 << 1),
 	IWM_SCAN_CONFIG_FLAG_FORBID_CHUB_REQS		= (1 << 2),
 	IWM_SCAN_CONFIG_FLAG_ALLOW_CHUB_REQS		= (1 << 3),
 	IWM_SCAN_CONFIG_FLAG_SET_TX_CHAINS		= (1 << 8),
 	IWM_SCAN_CONFIG_FLAG_SET_RX_CHAINS		= (1 << 9),
 	IWM_SCAN_CONFIG_FLAG_SET_AUX_STA_ID		= (1 << 10),
 	IWM_SCAN_CONFIG_FLAG_SET_ALL_TIMES		= (1 << 11),
 	IWM_SCAN_CONFIG_FLAG_SET_EFFECTIVE_TIMES	= (1 << 12),
 	IWM_SCAN_CONFIG_FLAG_SET_CHANNEL_FLAGS		= (1 << 13),
 	IWM_SCAN_CONFIG_FLAG_SET_LEGACY_RATES		= (1 << 14),
 	IWM_SCAN_CONFIG_FLAG_SET_MAC_ADDR		= (1 << 15),
 	IWM_SCAN_CONFIG_FLAG_SET_FRAGMENTED		= (1 << 16),
 	IWM_SCAN_CONFIG_FLAG_CLEAR_FRAGMENTED		= (1 << 17),
 	IWM_SCAN_CONFIG_FLAG_SET_CAM_MODE		= (1 << 18),
 	IWM_SCAN_CONFIG_FLAG_CLEAR_CAM_MODE		= (1 << 19),
 	IWM_SCAN_CONFIG_FLAG_SET_PROMISC_MODE		= (1 << 20),
 	IWM_SCAN_CONFIG_FLAG_CLEAR_PROMISC_MODE		= (1 << 21),
 
 	/* Bits 26-31 are for num of channels in channel_array */
 #define IWM_SCAN_CONFIG_N_CHANNELS(n) ((n) << 26)
 };
 
 enum iwm_scan_config_rates {
 	/* OFDM basic rates */
 	IWM_SCAN_CONFIG_RATE_6M		= (1 << 0),
 	IWM_SCAN_CONFIG_RATE_9M		= (1 << 1),
 	IWM_SCAN_CONFIG_RATE_12M	= (1 << 2),
 	IWM_SCAN_CONFIG_RATE_18M	= (1 << 3),
 	IWM_SCAN_CONFIG_RATE_24M	= (1 << 4),
 	IWM_SCAN_CONFIG_RATE_36M	= (1 << 5),
 	IWM_SCAN_CONFIG_RATE_48M	= (1 << 6),
 	IWM_SCAN_CONFIG_RATE_54M	= (1 << 7),
 	/* CCK basic rates */
 	IWM_SCAN_CONFIG_RATE_1M		= (1 << 8),
 	IWM_SCAN_CONFIG_RATE_2M		= (1 << 9),
 	IWM_SCAN_CONFIG_RATE_5M		= (1 << 10),
 	IWM_SCAN_CONFIG_RATE_11M	= (1 << 11),
 
 	/* Bits 16-27 are for supported rates */
 #define IWM_SCAN_CONFIG_SUPPORTED_RATE(rate)	((rate) << 16)
 };
 
 enum iwm_channel_flags {
 	IWM_CHANNEL_FLAG_EBS				= (1 << 0),
 	IWM_CHANNEL_FLAG_ACCURATE_EBS			= (1 << 1),
 	IWM_CHANNEL_FLAG_EBS_ADD			= (1 << 2),
 	IWM_CHANNEL_FLAG_PRE_SCAN_PASSIVE2ACTIVE	= (1 << 3),
 };
 
 /**
  * struct iwm_scan_config
  * @flags:			enum scan_config_flags
  * @tx_chains:			valid_tx antenna - ANT_* definitions
  * @rx_chains:			valid_rx antenna - ANT_* definitions
  * @legacy_rates:		default legacy rates - enum scan_config_rates
  * @out_of_channel_time:	default max out of serving channel time
  * @suspend_time:		default max suspend time
  * @dwell_active:		default dwell time for active scan
  * @dwell_passive:		default dwell time for passive scan
  * @dwell_fragmented:		default dwell time for fragmented scan
  * @dwell_extended:		default dwell time for channels 1, 6 and 11
  * @mac_addr:			default mac address to be used in probes
  * @bcast_sta_id:		the index of the station in the fw
  * @channel_flags:		default channel flags - enum iwm_channel_flags
  *				scan_config_channel_flag
  * @channel_array:		default supported channels
  */
 struct iwm_scan_config {
 	uint32_t flags;
 	uint32_t tx_chains;
 	uint32_t rx_chains;
 	uint32_t legacy_rates;
 	uint32_t out_of_channel_time;
 	uint32_t suspend_time;
 	uint8_t dwell_active;
 	uint8_t dwell_passive;
 	uint8_t dwell_fragmented;
 	uint8_t dwell_extended;
 	uint8_t mac_addr[IEEE80211_ADDR_LEN];
 	uint8_t bcast_sta_id;
 	uint8_t channel_flags;
 	uint8_t channel_array[];
 } __packed; /* SCAN_CONFIG_DB_CMD_API_S */
 
 /**
  * iwm_umac_scan_flags
  *@IWM_UMAC_SCAN_FLAG_PREEMPTIVE: scan process triggered by this scan request
  *	can be preempted by other scan requests with higher priority.
  *	The low priority scan will be resumed when the higher proirity scan is
  *	completed.
  *@IWM_UMAC_SCAN_FLAG_START_NOTIF: notification will be sent to the driver
  *	when scan starts.
  */
 enum iwm_umac_scan_flags {
 	IWM_UMAC_SCAN_FLAG_PREEMPTIVE		= (1 << 0),
 	IWM_UMAC_SCAN_FLAG_START_NOTIF		= (1 << 1),
 };
 
 enum iwm_umac_scan_uid_offsets {
 	IWM_UMAC_SCAN_UID_TYPE_OFFSET		= 0,
 	IWM_UMAC_SCAN_UID_SEQ_OFFSET		= 8,
 };
 
 enum iwm_umac_scan_general_flags {
 	IWM_UMAC_SCAN_GEN_FLAGS_PERIODIC	= (1 << 0),
 	IWM_UMAC_SCAN_GEN_FLAGS_OVER_BT		= (1 << 1),
 	IWM_UMAC_SCAN_GEN_FLAGS_PASS_ALL	= (1 << 2),
 	IWM_UMAC_SCAN_GEN_FLAGS_PASSIVE		= (1 << 3),
 	IWM_UMAC_SCAN_GEN_FLAGS_PRE_CONNECT	= (1 << 4),
 	IWM_UMAC_SCAN_GEN_FLAGS_ITER_COMPLETE	= (1 << 5),
 	IWM_UMAC_SCAN_GEN_FLAGS_MULTIPLE_SSID	= (1 << 6),
 	IWM_UMAC_SCAN_GEN_FLAGS_FRAGMENTED	= (1 << 7),
 	IWM_UMAC_SCAN_GEN_FLAGS_RRM_ENABLED	= (1 << 8),
 	IWM_UMAC_SCAN_GEN_FLAGS_MATCH		= (1 << 9),
 	IWM_UMAC_SCAN_GEN_FLAGS_EXTENDED_DWELL	= (1 << 10),
 };
 
 /**
  * struct iwm_scan_channel_cfg_umac
  * @flags:		bitmap - 0-19:	directed scan to i'th ssid.
  * @channel_num:	channel number 1-13 etc.
  * @iter_count:		repetition count for the channel.
  * @iter_interval:	interval between two scan iterations on one channel.
  */
 struct iwm_scan_channel_cfg_umac {
 	uint32_t flags;
 #define IWM_SCAN_CHANNEL_UMAC_NSSIDS(x)		((1 << (x)) - 1)
 
 	uint8_t channel_num;
 	uint8_t iter_count;
 	uint16_t iter_interval;
 } __packed; /* SCAN_CHANNEL_CFG_S_VER2 */
 
 /**
  * struct iwm_scan_umac_schedule
  * @interval: interval in seconds between scan iterations
  * @iter_count: num of scan iterations for schedule plan, 0xff for infinite loop
  * @reserved: for alignment and future use
  */
 struct iwm_scan_umac_schedule {
 	uint16_t interval;
 	uint8_t iter_count;
 	uint8_t reserved;
 } __packed; /* SCAN_SCHED_PARAM_API_S_VER_1 */
 
 /**
  * struct iwm_scan_req_umac_tail - the rest of the UMAC scan request command
  *      parameters following channels configuration array.
  * @schedule: two scheduling plans.
  * @delay: delay in TUs before starting the first scan iteration
  * @reserved: for future use and alignment
  * @preq: probe request with IEs blocks
  * @direct_scan: list of SSIDs for directed active scan
  */
 struct iwm_scan_req_umac_tail {
 	/* SCAN_PERIODIC_PARAMS_API_S_VER_1 */
 	struct iwm_scan_umac_schedule schedule[IWM_MAX_SCHED_SCAN_PLANS];
 	uint16_t delay;
 	uint16_t reserved;
 	/* SCAN_PROBE_PARAMS_API_S_VER_1 */
 	struct iwm_scan_probe_req preq;
 	struct iwm_ssid_ie direct_scan[IWM_PROBE_OPTION_MAX];
 } __packed;
 
 /**
  * struct iwm_scan_req_umac
  * @flags: &enum iwm_umac_scan_flags
  * @uid: scan id, &enum iwm_umac_scan_uid_offsets
  * @ooc_priority: out of channel priority - &enum iwm_scan_priority
  * @general_flags: &enum iwm_umac_scan_general_flags
  * @extended_dwell: dwell time for channels 1, 6 and 11
  * @active_dwell: dwell time for active scan
  * @passive_dwell: dwell time for passive scan
  * @fragmented_dwell: dwell time for fragmented passive scan
  * @max_out_time: max out of serving channel time
  * @suspend_time: max suspend time
  * @scan_priority: scan internal prioritization &enum iwm_scan_priority
  * @channel_flags: &enum iwm_scan_channel_flags
  * @n_channels: num of channels in scan request
  * @reserved: for future use and alignment
  * @data: &struct iwm_scan_channel_cfg_umac and
  *	&struct iwm_scan_req_umac_tail
  */
 struct iwm_scan_req_umac {
 	uint32_t flags;
 	uint32_t uid;
 	uint32_t ooc_priority;
 	/* SCAN_GENERAL_PARAMS_API_S_VER_1 */
 	uint32_t general_flags;
 	uint8_t extended_dwell;
 	uint8_t active_dwell;
 	uint8_t passive_dwell;
 	uint8_t fragmented_dwell;
 	uint32_t max_out_time;
 	uint32_t suspend_time;
 	uint32_t scan_priority;
 	/* SCAN_CHANNEL_PARAMS_API_S_VER_1 */
 	uint8_t channel_flags;
 	uint8_t n_channels;
 	uint16_t reserved;
 	uint8_t data[];
 } __packed; /* SCAN_REQUEST_CMD_UMAC_API_S_VER_1 */
 
 /**
  * struct iwm_umac_scan_abort
  * @uid: scan id, &enum iwm_umac_scan_uid_offsets
  * @flags: reserved
  */
 struct iwm_umac_scan_abort {
 	uint32_t uid;
 	uint32_t flags;
 } __packed; /* SCAN_ABORT_CMD_UMAC_API_S_VER_1 */
 
 /**
  * struct iwm_umac_scan_complete
  * @uid: scan id, &enum iwm_umac_scan_uid_offsets
  * @last_schedule: last scheduling line
  * @last_iter:	last scan iteration number
  * @scan status: &enum iwm_scan_offload_complete_status
  * @ebs_status: &enum iwm_scan_ebs_status
  * @time_from_last_iter: time elapsed from last iteration
  * @reserved: for future use
  */
 struct iwm_umac_scan_complete {
 	uint32_t uid;
 	uint8_t last_schedule;
 	uint8_t last_iter;
 	uint8_t status;
 	uint8_t ebs_status;
 	uint32_t time_from_last_iter;
 	uint32_t reserved;
 } __packed; /* SCAN_COMPLETE_NTF_UMAC_API_S_VER_1 */
 
 #define IWM_SCAN_OFFLOAD_MATCHING_CHANNELS_LEN 5
 /**
  * struct iwm_scan_offload_profile_match - match information
  * @bssid: matched bssid
  * @channel: channel where the match occurred
  * @energy:
  * @matching_feature:
  * @matching_channels: bitmap of channels that matched, referencing
  *	the channels passed in tue scan offload request
  */
 struct iwm_scan_offload_profile_match {
 	uint8_t bssid[IEEE80211_ADDR_LEN];
 	uint16_t reserved;
 	uint8_t channel;
 	uint8_t energy;
 	uint8_t matching_feature;
 	uint8_t matching_channels[IWM_SCAN_OFFLOAD_MATCHING_CHANNELS_LEN];
 } __packed; /* SCAN_OFFLOAD_PROFILE_MATCH_RESULTS_S_VER_1 */
 
 /**
  * struct iwm_scan_offload_profiles_query - match results query response
  * @matched_profiles: bitmap of matched profiles, referencing the
  *	matches passed in the scan offload request
  * @last_scan_age: age of the last offloaded scan
  * @n_scans_done: number of offloaded scans done
  * @gp2_d0u: GP2 when D0U occurred
  * @gp2_invoked: GP2 when scan offload was invoked
  * @resume_while_scanning: not used
  * @self_recovery: obsolete
  * @reserved: reserved
  * @matches: array of match information, one for each match
  */
 struct iwm_scan_offload_profiles_query {
 	uint32_t matched_profiles;
 	uint32_t last_scan_age;
 	uint32_t n_scans_done;
 	uint32_t gp2_d0u;
 	uint32_t gp2_invoked;
 	uint8_t resume_while_scanning;
 	uint8_t self_recovery;
 	uint16_t reserved;
 	struct iwm_scan_offload_profile_match matches[IWM_SCAN_MAX_PROFILES];
 } __packed; /* SCAN_OFFLOAD_PROFILES_QUERY_RSP_S_VER_2 */
 
 /**
  * struct iwm_umac_scan_iter_complete_notif - notifies end of scanning iteration
  * @uid: scan id, &enum iwm_umac_scan_uid_offsets
  * @scanned_channels: number of channels scanned and number of valid elements in
  *	results array
  * @status: one of SCAN_COMP_STATUS_*
  * @bt_status: BT on/off status
  * @last_channel: last channel that was scanned
  * @tsf_low: TSF timer (lower half) in usecs
  * @tsf_high: TSF timer (higher half) in usecs
  * @results: array of scan results, only "scanned_channels" of them are valid
  */
 struct iwm_umac_scan_iter_complete_notif {
 	uint32_t uid;
 	uint8_t scanned_channels;
 	uint8_t status;
 	uint8_t bt_status;
 	uint8_t last_channel;
 	uint32_t tsf_low;
 	uint32_t tsf_high;
 	struct iwm_scan_results_notif results[];
 } __packed; /* SCAN_ITER_COMPLETE_NTF_UMAC_API_S_VER_1 */
 
 /* Please keep this enum *SORTED* by hex value.
  * Needed for binary search, otherwise a warning will be triggered.
  */
 enum iwm_scan_subcmd_ids {
 	IWM_GSCAN_START_CMD = 0x0,
 	IWM_GSCAN_STOP_CMD = 0x1,
 	IWM_GSCAN_SET_HOTLIST_CMD = 0x2,
 	IWM_GSCAN_RESET_HOTLIST_CMD = 0x3,
 	IWM_GSCAN_SET_SIGNIFICANT_CHANGE_CMD = 0x4,
 	IWM_GSCAN_RESET_SIGNIFICANT_CHANGE_CMD = 0x5,
 	IWM_GSCAN_SIGNIFICANT_CHANGE_EVENT = 0xFD,
 	IWM_GSCAN_HOTLIST_CHANGE_EVENT = 0xFE,
 	IWM_GSCAN_RESULTS_AVAILABLE_EVENT = 0xFF,
 };
 
 /* STA API */
 
 /**
  * enum iwm_sta_flags - flags for the ADD_STA host command
  * @IWM_STA_FLG_REDUCED_TX_PWR_CTRL:
  * @IWM_STA_FLG_REDUCED_TX_PWR_DATA:
  * @IWM_STA_FLG_DISABLE_TX: set if TX should be disabled
  * @IWM_STA_FLG_PS: set if STA is in Power Save
  * @IWM_STA_FLG_INVALID: set if STA is invalid
  * @IWM_STA_FLG_DLP_EN: Direct Link Protocol is enabled
  * @IWM_STA_FLG_SET_ALL_KEYS: the current key applies to all key IDs
  * @IWM_STA_FLG_DRAIN_FLOW: drain flow
  * @IWM_STA_FLG_PAN: STA is for PAN interface
  * @IWM_STA_FLG_CLASS_AUTH:
  * @IWM_STA_FLG_CLASS_ASSOC:
  * @IWM_STA_FLG_CLASS_MIMO_PROT:
  * @IWM_STA_FLG_MAX_AGG_SIZE_MSK: maximal size for A-MPDU
  * @IWM_STA_FLG_AGG_MPDU_DENS_MSK: maximal MPDU density for Tx aggregation
  * @IWM_STA_FLG_FAT_EN_MSK: support for channel width (for Tx). This flag is
  *	initialised by driver and can be updated by fw upon reception of
  *	action frames that can change the channel width. When cleared the fw
  *	will send all the frames in 20MHz even when FAT channel is requested.
  * @IWM_STA_FLG_MIMO_EN_MSK: support for MIMO. This flag is initialised by the
  *	driver and can be updated by fw upon reception of action frames.
  * @IWM_STA_FLG_MFP_EN: Management Frame Protection
  */
 enum iwm_sta_flags {
 	IWM_STA_FLG_REDUCED_TX_PWR_CTRL	= (1 << 3),
 	IWM_STA_FLG_REDUCED_TX_PWR_DATA	= (1 << 6),
 
 	IWM_STA_FLG_DISABLE_TX		= (1 << 4),
 
 	IWM_STA_FLG_PS			= (1 << 8),
 	IWM_STA_FLG_DRAIN_FLOW		= (1 << 12),
 	IWM_STA_FLG_PAN			= (1 << 13),
 	IWM_STA_FLG_CLASS_AUTH		= (1 << 14),
 	IWM_STA_FLG_CLASS_ASSOC		= (1 << 15),
 	IWM_STA_FLG_RTS_MIMO_PROT	= (1 << 17),
 
 	IWM_STA_FLG_MAX_AGG_SIZE_SHIFT	= 19,
 	IWM_STA_FLG_MAX_AGG_SIZE_8K	= (0 << IWM_STA_FLG_MAX_AGG_SIZE_SHIFT),
 	IWM_STA_FLG_MAX_AGG_SIZE_16K	= (1 << IWM_STA_FLG_MAX_AGG_SIZE_SHIFT),
 	IWM_STA_FLG_MAX_AGG_SIZE_32K	= (2 << IWM_STA_FLG_MAX_AGG_SIZE_SHIFT),
 	IWM_STA_FLG_MAX_AGG_SIZE_64K	= (3 << IWM_STA_FLG_MAX_AGG_SIZE_SHIFT),
 	IWM_STA_FLG_MAX_AGG_SIZE_128K	= (4 << IWM_STA_FLG_MAX_AGG_SIZE_SHIFT),
 	IWM_STA_FLG_MAX_AGG_SIZE_256K	= (5 << IWM_STA_FLG_MAX_AGG_SIZE_SHIFT),
 	IWM_STA_FLG_MAX_AGG_SIZE_512K	= (6 << IWM_STA_FLG_MAX_AGG_SIZE_SHIFT),
 	IWM_STA_FLG_MAX_AGG_SIZE_1024K	= (7 << IWM_STA_FLG_MAX_AGG_SIZE_SHIFT),
 	IWM_STA_FLG_MAX_AGG_SIZE_MSK	= (7 << IWM_STA_FLG_MAX_AGG_SIZE_SHIFT),
 
 	IWM_STA_FLG_AGG_MPDU_DENS_SHIFT	= 23,
 	IWM_STA_FLG_AGG_MPDU_DENS_2US	= (4 << IWM_STA_FLG_AGG_MPDU_DENS_SHIFT),
 	IWM_STA_FLG_AGG_MPDU_DENS_4US	= (5 << IWM_STA_FLG_AGG_MPDU_DENS_SHIFT),
 	IWM_STA_FLG_AGG_MPDU_DENS_8US	= (6 << IWM_STA_FLG_AGG_MPDU_DENS_SHIFT),
 	IWM_STA_FLG_AGG_MPDU_DENS_16US	= (7 << IWM_STA_FLG_AGG_MPDU_DENS_SHIFT),
 	IWM_STA_FLG_AGG_MPDU_DENS_MSK	= (7 << IWM_STA_FLG_AGG_MPDU_DENS_SHIFT),
 
 	IWM_STA_FLG_FAT_EN_20MHZ	= (0 << 26),
 	IWM_STA_FLG_FAT_EN_40MHZ	= (1 << 26),
 	IWM_STA_FLG_FAT_EN_80MHZ	= (2 << 26),
 	IWM_STA_FLG_FAT_EN_160MHZ	= (3 << 26),
 	IWM_STA_FLG_FAT_EN_MSK		= (3 << 26),
 
 	IWM_STA_FLG_MIMO_EN_SISO	= (0 << 28),
 	IWM_STA_FLG_MIMO_EN_MIMO2	= (1 << 28),
 	IWM_STA_FLG_MIMO_EN_MIMO3	= (2 << 28),
 	IWM_STA_FLG_MIMO_EN_MSK		= (3 << 28),
 };
 
 /**
  * enum iwm_sta_key_flag - key flags for the ADD_STA host command
  * @IWM_STA_KEY_FLG_NO_ENC: no encryption
  * @IWM_STA_KEY_FLG_WEP: WEP encryption algorithm
  * @IWM_STA_KEY_FLG_CCM: CCMP encryption algorithm
  * @IWM_STA_KEY_FLG_TKIP: TKIP encryption algorithm
  * @IWM_STA_KEY_FLG_EXT: extended cipher algorithm (depends on the FW support)
  * @IWM_STA_KEY_FLG_CMAC: CMAC encryption algorithm
  * @IWM_STA_KEY_FLG_ENC_UNKNOWN: unknown encryption algorithm
  * @IWM_STA_KEY_FLG_EN_MSK: mask for encryption algorithmi value
  * @IWM_STA_KEY_FLG_WEP_KEY_MAP: wep is either a group key (0 - legacy WEP) or from
  *	station info array (1 - n 1X mode)
  * @IWM_STA_KEY_FLG_KEYID_MSK: the index of the key
  * @IWM_STA_KEY_NOT_VALID: key is invalid
  * @IWM_STA_KEY_FLG_WEP_13BYTES: set for 13 bytes WEP key
  * @IWM_STA_KEY_MULTICAST: set for multical key
  * @IWM_STA_KEY_MFP: key is used for Management Frame Protection
  */
 enum iwm_sta_key_flag {
 	IWM_STA_KEY_FLG_NO_ENC		= (0 << 0),
 	IWM_STA_KEY_FLG_WEP		= (1 << 0),
 	IWM_STA_KEY_FLG_CCM		= (2 << 0),
 	IWM_STA_KEY_FLG_TKIP		= (3 << 0),
 	IWM_STA_KEY_FLG_EXT		= (4 << 0),
 	IWM_STA_KEY_FLG_CMAC		= (6 << 0),
 	IWM_STA_KEY_FLG_ENC_UNKNOWN	= (7 << 0),
 	IWM_STA_KEY_FLG_EN_MSK		= (7 << 0),
 
 	IWM_STA_KEY_FLG_WEP_KEY_MAP	= (1 << 3),
 	IWM_STA_KEY_FLG_KEYID_POS	= 8,
 	IWM_STA_KEY_FLG_KEYID_MSK	= (3 << IWM_STA_KEY_FLG_KEYID_POS),
 	IWM_STA_KEY_NOT_VALID		= (1 << 11),
 	IWM_STA_KEY_FLG_WEP_13BYTES	= (1 << 12),
 	IWM_STA_KEY_MULTICAST		= (1 << 14),
 	IWM_STA_KEY_MFP			= (1 << 15),
 };
 
 /**
  * enum iwm_sta_modify_flag - indicate to the fw what flag are being changed
  * @IWM_STA_MODIFY_QUEUE_REMOVAL: this command removes a queue
  * @IWM_STA_MODIFY_TID_DISABLE_TX: this command modifies %tid_disable_tx
  * @IWM_STA_MODIFY_TX_RATE: unused
  * @IWM_STA_MODIFY_ADD_BA_TID: this command modifies %add_immediate_ba_tid
  * @IWM_STA_MODIFY_REMOVE_BA_TID: this command modifies %remove_immediate_ba_tid
  * @IWM_STA_MODIFY_SLEEPING_STA_TX_COUNT: this command modifies %sleep_tx_count
  * @IWM_STA_MODIFY_PROT_TH:
  * @IWM_STA_MODIFY_QUEUES: modify the queues used by this station
  */
 enum iwm_sta_modify_flag {
 	IWM_STA_MODIFY_QUEUE_REMOVAL		= (1 << 0),
 	IWM_STA_MODIFY_TID_DISABLE_TX		= (1 << 1),
 	IWM_STA_MODIFY_TX_RATE			= (1 << 2),
 	IWM_STA_MODIFY_ADD_BA_TID		= (1 << 3),
 	IWM_STA_MODIFY_REMOVE_BA_TID		= (1 << 4),
 	IWM_STA_MODIFY_SLEEPING_STA_TX_COUNT	= (1 << 5),
 	IWM_STA_MODIFY_PROT_TH			= (1 << 6),
 	IWM_STA_MODIFY_QUEUES			= (1 << 7),
 };
 
 #define IWM_STA_MODE_MODIFY	1
 
 /**
  * enum iwm_sta_sleep_flag - type of sleep of the station
  * @IWM_STA_SLEEP_STATE_AWAKE:
  * @IWM_STA_SLEEP_STATE_PS_POLL:
  * @IWM_STA_SLEEP_STATE_UAPSD:
  * @IWM_STA_SLEEP_STATE_MOREDATA: set more-data bit on
  *	(last) released frame
  */
 enum iwm_sta_sleep_flag {
 	IWM_STA_SLEEP_STATE_AWAKE	= 0,
 	IWM_STA_SLEEP_STATE_PS_POLL	= (1 << 0),
 	IWM_STA_SLEEP_STATE_UAPSD	= (1 << 1),
 	IWM_STA_SLEEP_STATE_MOREDATA	= (1 << 2),
 };
 
 /* STA ID and color bits definitions */
 #define IWM_STA_ID_SEED		(0x0f)
 #define IWM_STA_ID_POS		(0)
 #define IWM_STA_ID_MSK		(IWM_STA_ID_SEED << IWM_STA_ID_POS)
 
 #define IWM_STA_COLOR_SEED	(0x7)
 #define IWM_STA_COLOR_POS	(4)
 #define IWM_STA_COLOR_MSK	(IWM_STA_COLOR_SEED << IWM_STA_COLOR_POS)
 
 #define IWM_STA_ID_N_COLOR_GET_COLOR(id_n_color) \
 	(((id_n_color) & IWM_STA_COLOR_MSK) >> IWM_STA_COLOR_POS)
 #define IWM_STA_ID_N_COLOR_GET_ID(id_n_color)    \
 	(((id_n_color) & IWM_STA_ID_MSK) >> IWM_STA_ID_POS)
 
 #define IWM_STA_KEY_MAX_NUM (16)
 #define IWM_STA_KEY_IDX_INVALID (0xff)
 #define IWM_STA_KEY_MAX_DATA_KEY_NUM (4)
 #define IWM_MAX_GLOBAL_KEYS (4)
 #define IWM_STA_KEY_LEN_WEP40 (5)
 #define IWM_STA_KEY_LEN_WEP104 (13)
 
 /**
  * struct iwm_mvm_keyinfo - key information
  * @key_flags: type %iwm_sta_key_flag
  * @tkip_rx_tsc_byte2: TSC[2] for key mix ph1 detection
  * @tkip_rx_ttak: 10-byte unicast TKIP TTAK for Rx
  * @key_offset: key offset in the fw's key table
  * @key: 16-byte unicast decryption key
  * @tx_secur_seq_cnt: initial RSC / PN needed for replay check
  * @hw_tkip_mic_rx_key: byte: MIC Rx Key - used for TKIP only
  * @hw_tkip_mic_tx_key: byte: MIC Tx Key - used for TKIP only
  */
 struct iwm_mvm_keyinfo {
 	uint16_t key_flags;
 	uint8_t tkip_rx_tsc_byte2;
 	uint8_t reserved1;
 	uint16_t tkip_rx_ttak[5];
 	uint8_t key_offset;
 	uint8_t reserved2;
 	uint8_t key[16];
 	uint64_t tx_secur_seq_cnt;
 	uint64_t hw_tkip_mic_rx_key;
 	uint64_t hw_tkip_mic_tx_key;
 } __packed;
 
 #define IWM_ADD_STA_STATUS_MASK		0xFF
 #define IWM_ADD_STA_BAID_VALID_MASK	0x8000
 #define IWM_ADD_STA_BAID_MASK		0x7F00
 #define IWM_ADD_STA_BAID_SHIFT		8
 
 /**
  * struct iwm_mvm_add_sta_cmd - Add/modify a station in the fw's sta table.
  * ( REPLY_ADD_STA = 0x18 )
  * @add_modify: 1: modify existing, 0: add new station
  * @awake_acs:
  * @tid_disable_tx: is tid BIT(tid) enabled for Tx. Clear BIT(x) to enable
  *	AMPDU for tid x. Set %IWM_STA_MODIFY_TID_DISABLE_TX to change this field.
  * @mac_id_n_color: the Mac context this station belongs to
  * @addr[IEEE80211_ADDR_LEN]: station's MAC address
  * @sta_id: index of station in uCode's station table
  * @modify_mask: IWM_STA_MODIFY_*, selects which parameters to modify vs. leave
  *	alone. 1 - modify, 0 - don't change.
  * @station_flags: look at %iwm_sta_flags
  * @station_flags_msk: what of %station_flags have changed
  * @add_immediate_ba_tid: tid for which to add block-ack support (Rx)
  *	Set %IWM_STA_MODIFY_ADD_BA_TID to use this field, and also set
  *	add_immediate_ba_ssn.
  * @remove_immediate_ba_tid: tid for which to remove block-ack support (Rx)
  *	Set %IWM_STA_MODIFY_REMOVE_BA_TID to use this field
  * @add_immediate_ba_ssn: ssn for the Rx block-ack session. Used together with
  *	add_immediate_ba_tid.
  * @sleep_tx_count: number of packets to transmit to station even though it is
  *	asleep. Used to synchronise PS-poll and u-APSD responses while ucode
  *	keeps track of STA sleep state.
  * @sleep_state_flags: Look at %iwm_sta_sleep_flag.
  * @assoc_id: assoc_id to be sent in VHT PLCP (9-bit), for grp use 0, for AP
  *	mac-addr.
  * @beamform_flags: beam forming controls
  * @tfd_queue_msk: tfd queues used by this station
  *
  * The device contains an internal table of per-station information, with info
  * on security keys, aggregation parameters, and Tx rates for initial Tx
  * attempt and any retries (set by IWM_REPLY_TX_LINK_QUALITY_CMD).
  *
  * ADD_STA sets up the table entry for one station, either creating a new
  * entry, or modifying a pre-existing one.
  */
 struct iwm_mvm_add_sta_cmd {
 	uint8_t add_modify;
 	uint8_t awake_acs;
 	uint16_t tid_disable_tx;
 	uint32_t mac_id_n_color;
 	uint8_t addr[IEEE80211_ADDR_LEN]; /* _STA_ID_MODIFY_INFO_API_S_VER_1 */
 	uint16_t reserved2;
 	uint8_t sta_id;
 	uint8_t modify_mask;
 	uint16_t reserved3;
 	uint32_t station_flags;
 	uint32_t station_flags_msk;
 	uint8_t add_immediate_ba_tid;
 	uint8_t remove_immediate_ba_tid;
 	uint16_t add_immediate_ba_ssn;
 	uint16_t sleep_tx_count;
 	uint16_t sleep_state_flags;
 	uint16_t assoc_id;
 	uint16_t beamform_flags;
 	uint32_t tfd_queue_msk;
 } __packed; /* ADD_STA_CMD_API_S_VER_7 */
 
 /**
  * struct iwm_mvm_add_sta_key_cmd - add/modify sta key
  * ( IWM_REPLY_ADD_STA_KEY = 0x17 )
  * @sta_id: index of station in uCode's station table
  * @key_offset: key offset in key storage
  * @key_flags: type %iwm_sta_key_flag
  * @key: key material data
  * @key2: key material data
  * @rx_secur_seq_cnt: RX security sequence counter for the key
  * @tkip_rx_tsc_byte2: TSC[2] for key mix ph1 detection
  * @tkip_rx_ttak: 10-byte unicast TKIP TTAK for Rx
  */
 struct iwm_mvm_add_sta_key_cmd {
 	uint8_t sta_id;
 	uint8_t key_offset;
 	uint16_t key_flags;
 	uint8_t key[16];
 	uint8_t key2[16];
 	uint8_t rx_secur_seq_cnt[16];
 	uint8_t tkip_rx_tsc_byte2;
 	uint8_t reserved;
 	uint16_t tkip_rx_ttak[5];
 } __packed; /* IWM_ADD_MODIFY_STA_KEY_API_S_VER_1 */
 
 /**
  * enum iwm_mvm_add_sta_rsp_status - status in the response to ADD_STA command
  * @IWM_ADD_STA_SUCCESS: operation was executed successfully
  * @IWM_ADD_STA_STATIONS_OVERLOAD: no room left in the fw's station table
  * @IWM_ADD_STA_IMMEDIATE_BA_FAILURE: can't add Rx block ack session
  * @IWM_ADD_STA_MODIFY_NON_EXISTING_STA: driver requested to modify a station
  *	that doesn't exist.
  */
 enum iwm_mvm_add_sta_rsp_status {
 	IWM_ADD_STA_SUCCESS			= 0x1,
 	IWM_ADD_STA_STATIONS_OVERLOAD		= 0x2,
 	IWM_ADD_STA_IMMEDIATE_BA_FAILURE	= 0x4,
 	IWM_ADD_STA_MODIFY_NON_EXISTING_STA	= 0x8,
 };
 
 /**
  * struct iwm_mvm_rm_sta_cmd - Add / modify a station in the fw's station table
  * ( IWM_REMOVE_STA = 0x19 )
  * @sta_id: the station id of the station to be removed
  */
 struct iwm_mvm_rm_sta_cmd {
 	uint8_t sta_id;
 	uint8_t reserved[3];
 } __packed; /* IWM_REMOVE_STA_CMD_API_S_VER_2 */
 
 /**
  * struct iwm_mvm_mgmt_mcast_key_cmd
  * ( IWM_MGMT_MCAST_KEY = 0x1f )
  * @ctrl_flags: %iwm_sta_key_flag
  * @IGTK:
  * @K1: IGTK master key
  * @K2: IGTK sub key
  * @sta_id: station ID that support IGTK
  * @key_id:
  * @receive_seq_cnt: initial RSC/PN needed for replay check
  */
 struct iwm_mvm_mgmt_mcast_key_cmd {
 	uint32_t ctrl_flags;
 	uint8_t IGTK[16];
 	uint8_t K1[16];
 	uint8_t K2[16];
 	uint32_t key_id;
 	uint32_t sta_id;
 	uint64_t receive_seq_cnt;
 } __packed; /* SEC_MGMT_MULTICAST_KEY_CMD_API_S_VER_1 */
 
 struct iwm_mvm_wep_key {
 	uint8_t key_index;
 	uint8_t key_offset;
 	uint16_t reserved1;
 	uint8_t key_size;
 	uint8_t reserved2[3];
 	uint8_t key[16];
 } __packed;
 
 struct iwm_mvm_wep_key_cmd {
 	uint32_t mac_id_n_color;
 	uint8_t num_keys;
 	uint8_t decryption_type;
 	uint8_t flags;
 	uint8_t reserved;
 	struct iwm_mvm_wep_key wep_key[0];
 } __packed; /* SEC_CURR_WEP_KEY_CMD_API_S_VER_2 */
 
 /*
  * END mvm/fw-api-sta.h
  */
 
 /*
  * BT coex
  */
 
 enum iwm_bt_coex_mode {
 	IWM_BT_COEX_DISABLE		= 0x0,
 	IWM_BT_COEX_NW			= 0x1,
 	IWM_BT_COEX_BT			= 0x2,
 	IWM_BT_COEX_WIFI		= 0x3,
 }; /* BT_COEX_MODES_E */
 
 enum iwm_bt_coex_enabled_modules {
 	IWM_BT_COEX_MPLUT_ENABLED	= (1 << 0),
 	IWM_BT_COEX_MPLUT_BOOST_ENABLED	= (1 << 1),
 	IWM_BT_COEX_SYNC2SCO_ENABLED	= (1 << 2),
 	IWM_BT_COEX_CORUN_ENABLED	= (1 << 3),
 	IWM_BT_COEX_HIGH_BAND_RET	= (1 << 4),
 }; /* BT_COEX_MODULES_ENABLE_E_VER_1 */
 
 /**
  * struct iwm_bt_coex_cmd - bt coex configuration command
  * @mode: enum %iwm_bt_coex_mode
  * @enabled_modules: enum %iwm_bt_coex_enabled_modules
  *
  * The structure is used for the BT_COEX command.
  */
 struct iwm_bt_coex_cmd {
 	uint32_t mode;
 	uint32_t enabled_modules;
 } __packed; /* BT_COEX_CMD_API_S_VER_6 */
 
 
 /*
  * Location Aware Regulatory (LAR) API - MCC updates
  */
 
 /**
  * struct iwm_mcc_update_cmd_v1 - Request the device to update geographic
  * regulatory profile according to the given MCC (Mobile Country Code).
  * The MCC is two letter-code, ascii upper case[A-Z] or '00' for world domain.
  * 'ZZ' MCC will be used to switch to NVM default profile; in this case, the
  * MCC in the cmd response will be the relevant MCC in the NVM.
  * @mcc: given mobile country code
  * @source_id: the source from where we got the MCC, see iwm_mcc_source
  * @reserved: reserved for alignment
  */
 struct iwm_mcc_update_cmd_v1 {
 	uint16_t mcc;
 	uint8_t source_id;
 	uint8_t reserved;
 } __packed; /* LAR_UPDATE_MCC_CMD_API_S_VER_1 */
 
 /**
  * struct iwm_mcc_update_cmd - Request the device to update geographic
  * regulatory profile according to the given MCC (Mobile Country Code).
  * The MCC is two letter-code, ascii upper case[A-Z] or '00' for world domain.
  * 'ZZ' MCC will be used to switch to NVM default profile; in this case, the
  * MCC in the cmd response will be the relevant MCC in the NVM.
  * @mcc: given mobile country code
  * @source_id: the source from where we got the MCC, see iwm_mcc_source
  * @reserved: reserved for alignment
  * @key: integrity key for MCC API OEM testing
  * @reserved2: reserved
  */
 struct iwm_mcc_update_cmd {
 	uint16_t mcc;
 	uint8_t source_id;
 	uint8_t reserved;
 	uint32_t key;
 	uint32_t reserved2[5];
 } __packed; /* LAR_UPDATE_MCC_CMD_API_S_VER_2 */
 
 /**
  * iwm_mcc_update_resp_v1  - response to MCC_UPDATE_CMD.
  * Contains the new channel control profile map, if changed, and the new MCC
  * (mobile country code).
  * The new MCC may be different than what was requested in MCC_UPDATE_CMD.
  * @status: see &enum iwm_mcc_update_status
  * @mcc: the new applied MCC
  * @cap: capabilities for all channels which matches the MCC
  * @source_id: the MCC source, see iwm_mcc_source
  * @n_channels: number of channels in @channels_data (may be 14, 39, 50 or 51
  *		channels, depending on platform)
  * @channels: channel control data map, DWORD for each channel. Only the first
  *	16bits are used.
  */
 struct iwm_mcc_update_resp_v1  {
 	uint32_t status;
 	uint16_t mcc;
 	uint8_t cap;
 	uint8_t source_id;
 	uint32_t n_channels;
 	uint32_t channels[0];
 } __packed; /* LAR_UPDATE_MCC_CMD_RESP_S_VER_1 */
 
 /**
  * iwm_mcc_update_resp - response to MCC_UPDATE_CMD.
  * Contains the new channel control profile map, if changed, and the new MCC
  * (mobile country code).
  * The new MCC may be different than what was requested in MCC_UPDATE_CMD.
  * @status: see &enum iwm_mcc_update_status
  * @mcc: the new applied MCC
  * @cap: capabilities for all channels which matches the MCC
  * @source_id: the MCC source, see iwm_mcc_source
  * @time: time elapsed from the MCC test start (in 30 seconds TU)
  * @reserved: reserved.
  * @n_channels: number of channels in @channels_data (may be 14, 39, 50 or 51
  *		channels, depending on platform)
  * @channels: channel control data map, DWORD for each channel. Only the first
  *	16bits are used.
  */
 struct iwm_mcc_update_resp {
 	uint32_t status;
 	uint16_t mcc;
 	uint8_t cap;
 	uint8_t source_id;
 	uint16_t time;
 	uint16_t reserved;
 	uint32_t n_channels;
 	uint32_t channels[0];
 } __packed; /* LAR_UPDATE_MCC_CMD_RESP_S_VER_2 */
 
 /**
  * struct iwm_mcc_chub_notif - chub notifies of mcc change
  * (MCC_CHUB_UPDATE_CMD = 0xc9)
  * The Chub (Communication Hub, CommsHUB) is a HW component that connects to
  * the cellular and connectivity cores that gets updates of the mcc, and
  * notifies the ucode directly of any mcc change.
  * The ucode requests the driver to request the device to update geographic
  * regulatory  profile according to the given MCC (Mobile Country Code).
  * The MCC is two letter-code, ascii upper case[A-Z] or '00' for world domain.
  * 'ZZ' MCC will be used to switch to NVM default profile; in this case, the
  * MCC in the cmd response will be the relevant MCC in the NVM.
  * @mcc: given mobile country code
  * @source_id: identity of the change originator, see iwm_mcc_source
  * @reserved1: reserved for alignment
  */
 struct iwm_mcc_chub_notif {
 	uint16_t mcc;
 	uint8_t source_id;
 	uint8_t reserved1;
 } __packed; /* LAR_MCC_NOTIFY_S */
 
 enum iwm_mcc_update_status {
 	IWM_MCC_RESP_NEW_CHAN_PROFILE,
 	IWM_MCC_RESP_SAME_CHAN_PROFILE,
 	IWM_MCC_RESP_INVALID,
 	IWM_MCC_RESP_NVM_DISABLED,
 	IWM_MCC_RESP_ILLEGAL,
 	IWM_MCC_RESP_LOW_PRIORITY,
 	IWM_MCC_RESP_TEST_MODE_ACTIVE,
 	IWM_MCC_RESP_TEST_MODE_NOT_ACTIVE,
 	IWM_MCC_RESP_TEST_MODE_DENIAL_OF_SERVICE,
 };
 
 enum iwm_mcc_source {
 	IWM_MCC_SOURCE_OLD_FW = 0,
 	IWM_MCC_SOURCE_ME = 1,
 	IWM_MCC_SOURCE_BIOS = 2,
 	IWM_MCC_SOURCE_3G_LTE_HOST = 3,
 	IWM_MCC_SOURCE_3G_LTE_DEVICE = 4,
 	IWM_MCC_SOURCE_WIFI = 5,
 	IWM_MCC_SOURCE_RESERVED = 6,
 	IWM_MCC_SOURCE_DEFAULT = 7,
 	IWM_MCC_SOURCE_UNINITIALIZED = 8,
 	IWM_MCC_SOURCE_MCC_API = 9,
 	IWM_MCC_SOURCE_GET_CURRENT = 0x10,
 	IWM_MCC_SOURCE_GETTING_MCC_TEST_MODE = 0x11,
 };
 
 /**
  * struct iwm_dts_measurement_notif_v1 - measurements notification
  *
  * @temp: the measured temperature
  * @voltage: the measured voltage
  */
 struct iwm_dts_measurement_notif_v1 {
 	int32_t temp;
 	int32_t voltage;
 } __packed; /* TEMPERATURE_MEASUREMENT_TRIGGER_NTFY_S_VER_1*/
 
 /**
  * struct iwm_dts_measurement_notif_v2 - measurements notification
  *
  * @temp: the measured temperature
  * @voltage: the measured voltage
  * @threshold_idx: the trip index that was crossed
  */
 struct iwm_dts_measurement_notif_v2 {
 	int32_t temp;
 	int32_t voltage;
 	int32_t threshold_idx;
 } __packed; /* TEMPERATURE_MEASUREMENT_TRIGGER_NTFY_S_VER_2 */
 
 /*
  * Some cherry-picked definitions
  */
 
 #define IWM_FRAME_LIMIT	64
 
 /*
  * These functions retrieve specific information from the id field in
  * the iwm_host_cmd struct which contains the command id, the group id,
  * and the version of the command and vice versa.
 */
 static inline uint8_t
 iwm_cmd_opcode(uint32_t cmdid)
 {
 	return cmdid & 0xff;
 }
 
 static inline uint8_t
 iwm_cmd_groupid(uint32_t cmdid)
 {
 	return ((cmdid & 0xff00) >> 8);
 }
 
 static inline uint8_t
 iwm_cmd_version(uint32_t cmdid)
 {
 	return ((cmdid & 0xff0000) >> 16);
 }
 
 static inline uint32_t
 iwm_cmd_id(uint8_t opcode, uint8_t groupid, uint8_t version)
 {
 	return opcode + (groupid << 8) + (version << 16);
 }
 
 /* make uint16_t wide id out of uint8_t group and opcode */
 #define IWM_WIDE_ID(grp, opcode) ((grp << 8) | opcode)
 
 /* due to the conversion, this group is special */
 #define IWM_ALWAYS_LONG_GROUP	1
 
 struct iwm_cmd_header {
 	uint8_t code;
 	uint8_t flags;
 	uint8_t idx;
 	uint8_t qid;
 } __packed;
 
 struct iwm_cmd_header_wide {
 	uint8_t opcode;
 	uint8_t group_id;
 	uint8_t idx;
 	uint8_t qid;
 	uint16_t length;
 	uint8_t reserved;
 	uint8_t version;
 } __packed;
 
 /**
  * enum iwm_power_scheme
  * @IWM_POWER_LEVEL_CAM - Continuously Active Mode
  * @IWM_POWER_LEVEL_BPS - Balanced Power Save (default)
  * @IWM_POWER_LEVEL_LP  - Low Power
  */
 enum iwm_power_scheme {
 	IWM_POWER_SCHEME_CAM = 1,
 	IWM_POWER_SCHEME_BPS,
 	IWM_POWER_SCHEME_LP
 };
 
 #define IWM_DEF_CMD_PAYLOAD_SIZE 320
 #define IWM_MAX_CMD_PAYLOAD_SIZE ((4096 - 4) - sizeof(struct iwm_cmd_header))
 #define IWM_CMD_FAILED_MSK 0x40
 
 /**
  * struct iwm_device_cmd
  *
  * For allocation of the command and tx queues, this establishes the overall
  * size of the largest command we send to uCode, except for commands that
  * aren't fully copied and use other TFD space.
  */
 struct iwm_device_cmd {
 	union {
 		struct {
 			struct iwm_cmd_header hdr;
 			uint8_t data[IWM_DEF_CMD_PAYLOAD_SIZE];
 		};
 		struct {
 			struct iwm_cmd_header_wide hdr_wide;
 			uint8_t data_wide[IWM_DEF_CMD_PAYLOAD_SIZE -
 					sizeof(struct iwm_cmd_header_wide) +
 					sizeof(struct iwm_cmd_header)];
 		};
 	};
 } __packed;
 
 struct iwm_rx_packet {
 	/*
 	 * The first 4 bytes of the RX frame header contain both the RX frame
 	 * size and some flags.
 	 * Bit fields:
 	 * 31:    flag flush RB request
 	 * 30:    flag ignore TC (terminal counter) request
 	 * 29:    flag fast IRQ request
 	 * 28-14: Reserved
 	 * 13-00: RX frame size
 	 */
 	uint32_t len_n_flags;
 	struct iwm_cmd_header hdr;
 	uint8_t data[];
 } __packed;
 
 #define	IWM_FH_RSCSR_FRAME_SIZE_MSK	0x00003fff
 #define IWM_FH_RSCSR_FRAME_INVALID	0x55550000
 #define IWM_FH_RSCSR_FRAME_ALIGN	0x40
 
 static inline uint32_t
 iwm_rx_packet_len(const struct iwm_rx_packet *pkt)
 {
 
 	return le32toh(pkt->len_n_flags) & IWM_FH_RSCSR_FRAME_SIZE_MSK;
 }
 
 static inline uint32_t
 iwm_rx_packet_payload_len(const struct iwm_rx_packet *pkt)
 {
 
 	return iwm_rx_packet_len(pkt) - sizeof(pkt->hdr);
 }
 
 
 #define IWM_MIN_DBM	-100
 #define IWM_MAX_DBM	-33	/* realistic guess */
 
 #define IWM_READ(sc, reg)						\
 	bus_space_read_4((sc)->sc_st, (sc)->sc_sh, (reg))
 
 #define IWM_WRITE(sc, reg, val)						\
 	bus_space_write_4((sc)->sc_st, (sc)->sc_sh, (reg), (val))
 
 #define IWM_WRITE_1(sc, reg, val)					\
 	bus_space_write_1((sc)->sc_st, (sc)->sc_sh, (reg), (val))
 
 #define IWM_SETBITS(sc, reg, mask)					\
 	IWM_WRITE(sc, reg, IWM_READ(sc, reg) | (mask))
 
 #define IWM_CLRBITS(sc, reg, mask)					\
 	IWM_WRITE(sc, reg, IWM_READ(sc, reg) & ~(mask))
 
 #define IWM_BARRIER_WRITE(sc)						\
 	bus_space_barrier((sc)->sc_st, (sc)->sc_sh, 0, (sc)->sc_sz,	\
 	    BUS_SPACE_BARRIER_WRITE)
 
 #define IWM_BARRIER_READ_WRITE(sc)					\
 	bus_space_barrier((sc)->sc_st, (sc)->sc_sh, 0, (sc)->sc_sz,	\
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE)
 
 #endif	/* __IF_IWM_REG_H__ */
Index: stable/11/sys/dev/mvs/mvs_pci.c
===================================================================
--- stable/11/sys/dev/mvs/mvs_pci.c	(revision 330445)
+++ stable/11/sys/dev/mvs/mvs_pci.c	(revision 330446)
@@ -1,524 +1,524 @@
 /*-
  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <vm/uma.h>
 #include <machine/stdarg.h>
 #include <machine/resource.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 #include "mvs.h"
 
 /* local prototypes */
 static int mvs_setup_interrupt(device_t dev);
 static void mvs_intr(void *data);
 static int mvs_suspend(device_t dev);
 static int mvs_resume(device_t dev);
 static int mvs_ctlr_setup(device_t dev);
 
 static struct {
 	uint32_t	id;
 	uint8_t		rev;
 	const char	*name;
 	int		ports;
 	int		quirks;
 } mvs_ids[] = {
 	{0x504011ab, 0x00, "Marvell 88SX5040",	4,	MVS_Q_GENI},
 	{0x504111ab, 0x00, "Marvell 88SX5041",	4,	MVS_Q_GENI},
 	{0x508011ab, 0x00, "Marvell 88SX5080",	8,	MVS_Q_GENI},
 	{0x508111ab, 0x00, "Marvell 88SX5081",	8,	MVS_Q_GENI},
 	{0x604011ab, 0x00, "Marvell 88SX6040",	4,	MVS_Q_GENII},
 	{0x604111ab, 0x00, "Marvell 88SX6041",	4,	MVS_Q_GENII},
 	{0x604211ab, 0x00, "Marvell 88SX6042",	4,	MVS_Q_GENIIE},
 	{0x608011ab, 0x00, "Marvell 88SX6080",	8,	MVS_Q_GENII},
 	{0x608111ab, 0x00, "Marvell 88SX6081",	8,	MVS_Q_GENII},
 	{0x704211ab, 0x00, "Marvell 88SX7042",	4,	MVS_Q_GENIIE|MVS_Q_CT},
 	{0x02419005, 0x00, "Adaptec 1420SA",	4,	MVS_Q_GENII},
 	{0x02439005, 0x00, "Adaptec 1430SA",	4,	MVS_Q_GENIIE|MVS_Q_CT},
 	{0x00000000, 0x00, NULL,	0,	0}
 };
 
 static int
 mvs_probe(device_t dev)
 {
 	char buf[64];
 	int i;
 	uint32_t devid = pci_get_devid(dev);
 	uint8_t revid = pci_get_revid(dev);
 
 	for (i = 0; mvs_ids[i].id != 0; i++) {
 		if (mvs_ids[i].id == devid &&
 		    mvs_ids[i].rev <= revid) {
 			snprintf(buf, sizeof(buf), "%s SATA controller",
 			    mvs_ids[i].name);
 			device_set_desc_copy(dev, buf);
 			return (BUS_PROBE_DEFAULT);
 		}
 	}
 	return (ENXIO);
 }
 
 static int
 mvs_attach(device_t dev)
 {
 	struct mvs_controller *ctlr = device_get_softc(dev);
 	device_t child;
 	int	error, unit, i;
 	uint32_t devid = pci_get_devid(dev);
 	uint8_t revid = pci_get_revid(dev);
 
 	ctlr->dev = dev;
 	i = 0;
 	while (mvs_ids[i].id != 0 &&
 	    (mvs_ids[i].id != devid ||
 	     mvs_ids[i].rev > revid))
 		i++;
 	ctlr->channels = mvs_ids[i].ports;
 	ctlr->quirks = mvs_ids[i].quirks;
 	ctlr->ccc = 0;
 	resource_int_value(device_get_name(dev),
 	    device_get_unit(dev), "ccc", &ctlr->ccc);
 	ctlr->cccc = 8;
 	resource_int_value(device_get_name(dev),
 	    device_get_unit(dev), "cccc", &ctlr->cccc);
 	if (ctlr->ccc == 0 || ctlr->cccc == 0) {
 		ctlr->ccc = 0;
 		ctlr->cccc = 0;
 	}
 	if (ctlr->ccc > 100000)
 		ctlr->ccc = 100000;
 	device_printf(dev,
 	    "Gen-%s, %d %sGbps ports, Port Multiplier %s%s\n",
 	    ((ctlr->quirks & MVS_Q_GENI) ? "I" :
 	     ((ctlr->quirks & MVS_Q_GENII) ? "II" : "IIe")),
 	    ctlr->channels,
 	    ((ctlr->quirks & MVS_Q_GENI) ? "1.5" : "3"),
 	    ((ctlr->quirks & MVS_Q_GENI) ?
 	    "not supported" : "supported"),
 	    ((ctlr->quirks & MVS_Q_GENIIE) ?
 	    " with FBS" : ""));
 	mtx_init(&ctlr->mtx, "MVS controller lock", NULL, MTX_DEF);
 	/* We should have a memory BAR(0). */
 	ctlr->r_rid = PCIR_BAR(0);
 	if (!(ctlr->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &ctlr->r_rid, RF_ACTIVE)))
 		return ENXIO;
 	/* Setup our own memory management for channels. */
 	ctlr->sc_iomem.rm_start = rman_get_start(ctlr->r_mem);
 	ctlr->sc_iomem.rm_end = rman_get_end(ctlr->r_mem);
 	ctlr->sc_iomem.rm_type = RMAN_ARRAY;
 	ctlr->sc_iomem.rm_descr = "I/O memory addresses";
 	if ((error = rman_init(&ctlr->sc_iomem)) != 0) {
 		bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem);
 		return (error);
 	}
 	if ((error = rman_manage_region(&ctlr->sc_iomem,
 	    rman_get_start(ctlr->r_mem), rman_get_end(ctlr->r_mem))) != 0) {
 		bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem);
 		rman_fini(&ctlr->sc_iomem);
 		return (error);
 	}
 	pci_enable_busmaster(dev);
 	mvs_ctlr_setup(dev);
 	/* Setup interrupts. */
 	if (mvs_setup_interrupt(dev)) {
 		bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem);
 		rman_fini(&ctlr->sc_iomem);
 		return ENXIO;
 	}
 	/* Attach all channels on this controller */
 	for (unit = 0; unit < ctlr->channels; unit++) {
 		child = device_add_child(dev, "mvsch", -1);
 		if (child == NULL)
 			device_printf(dev, "failed to add channel device\n");
 		else
 			device_set_ivars(child, (void *)(intptr_t)unit);
 	}
 	bus_generic_attach(dev);
 	return 0;
 }
 
 static int
 mvs_detach(device_t dev)
 {
 	struct mvs_controller *ctlr = device_get_softc(dev);
 
 	/* Detach & delete all children */
 	device_delete_children(dev);
 
 	/* Free interrupt. */
 	if (ctlr->irq.r_irq) {
 		bus_teardown_intr(dev, ctlr->irq.r_irq,
 		    ctlr->irq.handle);
 		bus_release_resource(dev, SYS_RES_IRQ,
 		    ctlr->irq.r_irq_rid, ctlr->irq.r_irq);
 	}
 	pci_release_msi(dev);
 	/* Free memory. */
 	rman_fini(&ctlr->sc_iomem);
 	if (ctlr->r_mem)
 		bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem);
 	mtx_destroy(&ctlr->mtx);
 	return (0);
 }
 
 static int
 mvs_ctlr_setup(device_t dev)
 {
 	struct mvs_controller *ctlr = device_get_softc(dev);
 	int i, ccc = ctlr->ccc, cccc = ctlr->cccc, ccim = 0;
 
 	/* Mask chip interrupts */
 	ATA_OUTL(ctlr->r_mem, CHIP_MIM, 0x00000000);
 	/* Mask PCI interrupts */
 	ATA_OUTL(ctlr->r_mem, CHIP_PCIIM, 0x00000000);
 	/* Clear PCI interrupts */
 	ATA_OUTL(ctlr->r_mem, CHIP_PCIIC, 0x00000000);
 	if (ccc && bootverbose) {
 		device_printf(dev,
 		    "CCC with %dus/%dcmd enabled\n",
 		    ctlr->ccc, ctlr->cccc);
 	}
 	ccc *= 150;
 	/* Configure chip-global CCC */
 	if (ctlr->channels > 4 && (ctlr->quirks & MVS_Q_GENI) == 0) {
 		ATA_OUTL(ctlr->r_mem, CHIP_ICT, cccc);
 		ATA_OUTL(ctlr->r_mem, CHIP_ITT, ccc);
 		ATA_OUTL(ctlr->r_mem, CHIP_ICC, ~CHIP_ICC_ALL_PORTS);
 		if (ccc)
 			ccim |= IC_ALL_PORTS_COAL_DONE;
 		ccc = 0;
 		cccc = 0;
 	}
 	for (i = 0; i < ctlr->channels / 4; i++) {
 		/* Configure per-HC CCC */
 		ATA_OUTL(ctlr->r_mem, HC_BASE(i) + HC_ICT, cccc);
 		ATA_OUTL(ctlr->r_mem, HC_BASE(i) + HC_ITT, ccc);
 		if (ccc)
 			ccim |= (IC_HC0_COAL_DONE << (i * IC_HC_SHIFT));
 		/* Clear HC interrupts */
 		ATA_OUTL(ctlr->r_mem, HC_BASE(i) + HC_IC, 0x00000000);
 	}
 	/* Enable chip interrupts */
 	ctlr->gmim = (ccim ? ccim : (IC_DONE_HC0 | IC_DONE_HC1)) |
 	     IC_ERR_HC0 | IC_ERR_HC1;
 	ctlr->mim = ctlr->gmim | ctlr->pmim;
 	ATA_OUTL(ctlr->r_mem, CHIP_MIM, ctlr->mim);
 	/* Enable PCI interrupts */
 	ATA_OUTL(ctlr->r_mem, CHIP_PCIIM, 0x007fffff);
 	return (0);
 }
 
 static void
 mvs_edma(device_t dev, device_t child, int mode)
 {
 	struct mvs_controller *ctlr = device_get_softc(dev);
 	int unit = ((struct mvs_channel *)device_get_softc(child))->unit;
 	int bit = IC_DONE_IRQ << (unit * 2 + unit / 4) ;
 
 	if (ctlr->ccc == 0)
 		return;
 	/* CCC is not working for non-EDMA mode. Unmask device interrupts. */
 	mtx_lock(&ctlr->mtx);
 	if (mode == MVS_EDMA_OFF)
 		ctlr->pmim |= bit;
 	else
 		ctlr->pmim &= ~bit;
 	ctlr->mim = ctlr->gmim | ctlr->pmim;
 	if (!ctlr->msia)
 		ATA_OUTL(ctlr->r_mem, CHIP_MIM, ctlr->mim);
 	mtx_unlock(&ctlr->mtx);
 }
 
 static int
 mvs_suspend(device_t dev)
 {
 	struct mvs_controller *ctlr = device_get_softc(dev);
 
 	bus_generic_suspend(dev);
 	/* Mask chip interrupts */
 	ATA_OUTL(ctlr->r_mem, CHIP_MIM, 0x00000000);
 	/* Mask PCI interrupts */
 	ATA_OUTL(ctlr->r_mem, CHIP_PCIIM, 0x00000000);
 	return 0;
 }
 
 static int
 mvs_resume(device_t dev)
 {
 
 	mvs_ctlr_setup(dev);
 	return (bus_generic_resume(dev));
 }
 
 static int
 mvs_setup_interrupt(device_t dev)
 {
 	struct mvs_controller *ctlr = device_get_softc(dev);
 	int msi = 0;
 
 	/* Process hints. */
 	resource_int_value(device_get_name(dev),
 	    device_get_unit(dev), "msi", &msi);
 	if (msi < 0)
 		msi = 0;
 	else if (msi > 0)
 		msi = min(1, pci_msi_count(dev));
 	/* Allocate MSI if needed/present. */
 	if (msi && pci_alloc_msi(dev, &msi) != 0)
 		msi = 0;
 	ctlr->msi = msi;
 	/* Allocate all IRQs. */
 	ctlr->irq.r_irq_rid = msi ? 1 : 0;
 	if (!(ctlr->irq.r_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ,
 	    &ctlr->irq.r_irq_rid, RF_SHAREABLE | RF_ACTIVE))) {
 		device_printf(dev, "unable to map interrupt\n");
 		return (ENXIO);
 	}
 	if ((bus_setup_intr(dev, ctlr->irq.r_irq, ATA_INTR_FLAGS, NULL,
 	    mvs_intr, ctlr, &ctlr->irq.handle))) {
 		device_printf(dev, "unable to setup interrupt\n");
 		bus_release_resource(dev, SYS_RES_IRQ,
 		    ctlr->irq.r_irq_rid, ctlr->irq.r_irq);
 		ctlr->irq.r_irq = NULL;
 		return (ENXIO);
 	}
 	return (0);
 }
 
 /*
  * Common case interrupt handler.
  */
 static void
 mvs_intr(void *data)
 {
 	struct mvs_controller *ctlr = data;
 	struct mvs_intr_arg arg;
 	void (*function)(void *);
 	int p;
 	u_int32_t ic, aic;
 
 	ic = ATA_INL(ctlr->r_mem, CHIP_MIC);
 	if (ctlr->msi) {
-		/* We have to to mask MSI during processing. */
+		/* We have to mask MSI during processing. */
 		mtx_lock(&ctlr->mtx);
 		ATA_OUTL(ctlr->r_mem, CHIP_MIM, 0);
 		ctlr->msia = 1; /* Deny MIM update during processing. */
 		mtx_unlock(&ctlr->mtx);
 	} else if (ic == 0)
 		return;
 	/* Acknowledge all-ports CCC interrupt. */
 	if (ic & IC_ALL_PORTS_COAL_DONE)
 		ATA_OUTL(ctlr->r_mem, CHIP_ICC, ~CHIP_ICC_ALL_PORTS);
 	for (p = 0; p < ctlr->channels; p++) {
 		if ((p & 3) == 0) {
 			if (p != 0)
 				ic >>= 1;
 			if ((ic & IC_HC0) == 0) {
 				p += 3;
 				ic >>= 8;
 				continue;
 			}
 			/* Acknowledge interrupts of this HC. */
 			aic = 0;
 			if (ic & (IC_DONE_IRQ << 0))
 				aic |= HC_IC_DONE(0) | HC_IC_DEV(0);
 			if (ic & (IC_DONE_IRQ << 2))
 				aic |= HC_IC_DONE(1) | HC_IC_DEV(1);
 			if (ic & (IC_DONE_IRQ << 4))
 				aic |= HC_IC_DONE(2) | HC_IC_DEV(2);
 			if (ic & (IC_DONE_IRQ << 6))
 				aic |= HC_IC_DONE(3) | HC_IC_DEV(3);
 			if (ic & IC_HC0_COAL_DONE)
 				aic |= HC_IC_COAL;
 			ATA_OUTL(ctlr->r_mem, HC_BASE(p == 4) + HC_IC, ~aic);
 		}
 		/* Call per-port interrupt handler. */
 		arg.cause = ic & (IC_ERR_IRQ|IC_DONE_IRQ);
 		if ((arg.cause != 0) &&
 		    (function = ctlr->interrupt[p].function)) {
 			arg.arg = ctlr->interrupt[p].argument;
 			function(&arg);
 		}
 		ic >>= 2;
 	}
 	if (ctlr->msi) {
 		/* Unmasking MSI triggers next interrupt, if needed. */
 		mtx_lock(&ctlr->mtx);
 		ctlr->msia = 0;	/* Allow MIM update. */
 		ATA_OUTL(ctlr->r_mem, CHIP_MIM, ctlr->mim);
 		mtx_unlock(&ctlr->mtx);
 	}
 }
 
 static struct resource *
 mvs_alloc_resource(device_t dev, device_t child, int type, int *rid,
 		       rman_res_t start, rman_res_t end, rman_res_t count,
 		       u_int flags)
 {
 	struct mvs_controller *ctlr = device_get_softc(dev);
 	int unit = ((struct mvs_channel *)device_get_softc(child))->unit;
 	struct resource *res = NULL;
 	int offset = HC_BASE(unit >> 2) + PORT_BASE(unit & 0x03);
 	rman_res_t st;
 
 	switch (type) {
 	case SYS_RES_MEMORY:
 		st = rman_get_start(ctlr->r_mem);
 		res = rman_reserve_resource(&ctlr->sc_iomem, st + offset,
 		    st + offset + PORT_SIZE - 1, PORT_SIZE, RF_ACTIVE, child);
 		if (res) {
 			bus_space_handle_t bsh;
 			bus_space_tag_t bst;
 			bsh = rman_get_bushandle(ctlr->r_mem);
 			bst = rman_get_bustag(ctlr->r_mem);
 			bus_space_subregion(bst, bsh, offset, PORT_SIZE, &bsh);
 			rman_set_bushandle(res, bsh);
 			rman_set_bustag(res, bst);
 		}
 		break;
 	case SYS_RES_IRQ:
 		if (*rid == ATA_IRQ_RID)
 			res = ctlr->irq.r_irq;
 		break;
 	}
 	return (res);
 }
 
 static int
 mvs_release_resource(device_t dev, device_t child, int type, int rid,
 			 struct resource *r)
 {
 
 	switch (type) {
 	case SYS_RES_MEMORY:
 		rman_release_resource(r);
 		return (0);
 	case SYS_RES_IRQ:
 		if (rid != ATA_IRQ_RID)
 			return ENOENT;
 		return (0);
 	}
 	return (EINVAL);
 }
 
 static int
 mvs_setup_intr(device_t dev, device_t child, struct resource *irq, 
 		   int flags, driver_filter_t *filter, driver_intr_t *function, 
 		   void *argument, void **cookiep)
 {
 	struct mvs_controller *ctlr = device_get_softc(dev);
 	int unit = (intptr_t)device_get_ivars(child);
 
 	if (filter != NULL) {
 		printf("mvs.c: we cannot use a filter here\n");
 		return (EINVAL);
 	}
 	ctlr->interrupt[unit].function = function;
 	ctlr->interrupt[unit].argument = argument;
 	return (0);
 }
 
 static int
 mvs_teardown_intr(device_t dev, device_t child, struct resource *irq,
 		      void *cookie)
 {
 	struct mvs_controller *ctlr = device_get_softc(dev);
 	int unit = (intptr_t)device_get_ivars(child);
 
 	ctlr->interrupt[unit].function = NULL;
 	ctlr->interrupt[unit].argument = NULL;
 	return (0);
 }
 
 static int
 mvs_print_child(device_t dev, device_t child)
 {
 	int retval;
 
 	retval = bus_print_child_header(dev, child);
 	retval += printf(" at channel %d",
 	    (int)(intptr_t)device_get_ivars(child));
 	retval += bus_print_child_footer(dev, child);
 
 	return (retval);
 }
 
 static int
 mvs_child_location_str(device_t dev, device_t child, char *buf,
     size_t buflen)
 {
 
 	snprintf(buf, buflen, "channel=%d",
 	    (int)(intptr_t)device_get_ivars(child));
 	return (0);
 }
 
 static bus_dma_tag_t
 mvs_get_dma_tag(device_t bus, device_t child)
 {
 
 	return (bus_get_dma_tag(bus));
 }
 
 static device_method_t mvs_methods[] = {
 	DEVMETHOD(device_probe,     mvs_probe),
 	DEVMETHOD(device_attach,    mvs_attach),
 	DEVMETHOD(device_detach,    mvs_detach),
 	DEVMETHOD(device_suspend,   mvs_suspend),
 	DEVMETHOD(device_resume,    mvs_resume),
 	DEVMETHOD(bus_print_child,  mvs_print_child),
 	DEVMETHOD(bus_alloc_resource,       mvs_alloc_resource),
 	DEVMETHOD(bus_release_resource,     mvs_release_resource),
 	DEVMETHOD(bus_setup_intr,   mvs_setup_intr),
 	DEVMETHOD(bus_teardown_intr,mvs_teardown_intr),
 	DEVMETHOD(bus_child_location_str, mvs_child_location_str),
 	DEVMETHOD(bus_get_dma_tag,  mvs_get_dma_tag),
 	DEVMETHOD(mvs_edma,         mvs_edma),
 	{ 0, 0 }
 };
 static driver_t mvs_driver = {
         "mvs",
         mvs_methods,
         sizeof(struct mvs_controller)
 };
 DRIVER_MODULE(mvs, pci, mvs_driver, mvs_devclass, 0, 0);
 MODULE_VERSION(mvs, 1);
 MODULE_DEPEND(mvs, cam, 1, 1, 1);
 
Index: stable/11/sys/dev/mwl/if_mwl.c
===================================================================
--- stable/11/sys/dev/mwl/if_mwl.c	(revision 330445)
+++ stable/11/sys/dev/mwl/if_mwl.c	(revision 330446)
@@ -1,4834 +1,4834 @@
 /*-
  * Copyright (c) 2007-2009 Sam Leffler, Errno Consulting
  * Copyright (c) 2007-2008 Marvell Semiconductor, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for the Marvell 88W8363 Wireless LAN controller.
  */
 
 #include "opt_inet.h"
 #include "opt_mwl.h"
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/systm.h> 
 #include <sys/sysctl.h>
 #include <sys/mbuf.h>   
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/callout.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kthread.h>
 #include <sys/taskqueue.h>
 
 #include <machine/bus.h>
  
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_llc.h>
 
 #include <net/bpf.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_input.h>
 #include <net80211/ieee80211_regdomain.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #endif /* INET */
 
 #include <dev/mwl/if_mwlvar.h>
 #include <dev/mwl/mwldiag.h>
 
 /* idiomatic shorthands: MS = mask+shift, SM = shift+mask */
 #define	MS(v,x)	(((v) & x) >> x##_S)
 #define	SM(v,x)	(((v) << x##_S) & x)
 
 static struct ieee80211vap *mwl_vap_create(struct ieee80211com *,
 		    const char [IFNAMSIZ], int, enum ieee80211_opmode, int,
 		    const uint8_t [IEEE80211_ADDR_LEN],
 		    const uint8_t [IEEE80211_ADDR_LEN]);
 static void	mwl_vap_delete(struct ieee80211vap *);
 static int	mwl_setupdma(struct mwl_softc *);
 static int	mwl_hal_reset(struct mwl_softc *sc);
 static int	mwl_init(struct mwl_softc *);
 static void	mwl_parent(struct ieee80211com *);
 static int	mwl_reset(struct ieee80211vap *, u_long);
 static void	mwl_stop(struct mwl_softc *);
 static void	mwl_start(struct mwl_softc *);
 static int	mwl_transmit(struct ieee80211com *, struct mbuf *);
 static int	mwl_raw_xmit(struct ieee80211_node *, struct mbuf *,
 			const struct ieee80211_bpf_params *);
 static int	mwl_media_change(struct ifnet *);
 static void	mwl_watchdog(void *);
 static int	mwl_ioctl(struct ieee80211com *, u_long, void *);
 static void	mwl_radar_proc(void *, int);
 static void	mwl_chanswitch_proc(void *, int);
 static void	mwl_bawatchdog_proc(void *, int);
 static int	mwl_key_alloc(struct ieee80211vap *,
 			struct ieee80211_key *,
 			ieee80211_keyix *, ieee80211_keyix *);
 static int	mwl_key_delete(struct ieee80211vap *,
 			const struct ieee80211_key *);
 static int	mwl_key_set(struct ieee80211vap *,
 			const struct ieee80211_key *);
 static int	_mwl_key_set(struct ieee80211vap *,
 			const struct ieee80211_key *,
 			const uint8_t mac[IEEE80211_ADDR_LEN]);
 static int	mwl_mode_init(struct mwl_softc *);
 static void	mwl_update_mcast(struct ieee80211com *);
 static void	mwl_update_promisc(struct ieee80211com *);
 static void	mwl_updateslot(struct ieee80211com *);
 static int	mwl_beacon_setup(struct ieee80211vap *);
 static void	mwl_beacon_update(struct ieee80211vap *, int);
 #ifdef MWL_HOST_PS_SUPPORT
 static void	mwl_update_ps(struct ieee80211vap *, int);
 static int	mwl_set_tim(struct ieee80211_node *, int);
 #endif
 static int	mwl_dma_setup(struct mwl_softc *);
 static void	mwl_dma_cleanup(struct mwl_softc *);
 static struct ieee80211_node *mwl_node_alloc(struct ieee80211vap *,
 		    const uint8_t [IEEE80211_ADDR_LEN]);
 static void	mwl_node_cleanup(struct ieee80211_node *);
 static void	mwl_node_drain(struct ieee80211_node *);
 static void	mwl_node_getsignal(const struct ieee80211_node *,
 			int8_t *, int8_t *);
 static void	mwl_node_getmimoinfo(const struct ieee80211_node *,
 			struct ieee80211_mimo_info *);
 static int	mwl_rxbuf_init(struct mwl_softc *, struct mwl_rxbuf *);
 static void	mwl_rx_proc(void *, int);
 static void	mwl_txq_init(struct mwl_softc *sc, struct mwl_txq *, int);
 static int	mwl_tx_setup(struct mwl_softc *, int, int);
 static int	mwl_wme_update(struct ieee80211com *);
 static void	mwl_tx_cleanupq(struct mwl_softc *, struct mwl_txq *);
 static void	mwl_tx_cleanup(struct mwl_softc *);
 static uint16_t	mwl_calcformat(uint8_t rate, const struct ieee80211_node *);
 static int	mwl_tx_start(struct mwl_softc *, struct ieee80211_node *,
 			     struct mwl_txbuf *, struct mbuf *);
 static void	mwl_tx_proc(void *, int);
 static int	mwl_chan_set(struct mwl_softc *, struct ieee80211_channel *);
 static void	mwl_draintxq(struct mwl_softc *);
 static void	mwl_cleartxq(struct mwl_softc *, struct ieee80211vap *);
 static int	mwl_recv_action(struct ieee80211_node *,
 			const struct ieee80211_frame *,
 			const uint8_t *, const uint8_t *);
 static int	mwl_addba_request(struct ieee80211_node *,
 			struct ieee80211_tx_ampdu *, int dialogtoken,
 			int baparamset, int batimeout);
 static int	mwl_addba_response(struct ieee80211_node *,
 			struct ieee80211_tx_ampdu *, int status,
 			int baparamset, int batimeout);
 static void	mwl_addba_stop(struct ieee80211_node *,
 			struct ieee80211_tx_ampdu *);
 static int	mwl_startrecv(struct mwl_softc *);
 static MWL_HAL_APMODE mwl_getapmode(const struct ieee80211vap *,
 			struct ieee80211_channel *);
 static int	mwl_setapmode(struct ieee80211vap *, struct ieee80211_channel*);
 static void	mwl_scan_start(struct ieee80211com *);
 static void	mwl_scan_end(struct ieee80211com *);
 static void	mwl_set_channel(struct ieee80211com *);
 static int	mwl_peerstadb(struct ieee80211_node *,
 			int aid, int staid, MWL_HAL_PEERINFO *pi);
 static int	mwl_localstadb(struct ieee80211vap *);
 static int	mwl_newstate(struct ieee80211vap *, enum ieee80211_state, int);
 static int	allocstaid(struct mwl_softc *sc, int aid);
 static void	delstaid(struct mwl_softc *sc, int staid);
 static void	mwl_newassoc(struct ieee80211_node *, int);
 static void	mwl_agestations(void *);
 static int	mwl_setregdomain(struct ieee80211com *,
 			struct ieee80211_regdomain *, int,
 			struct ieee80211_channel []);
 static void	mwl_getradiocaps(struct ieee80211com *, int, int *,
 			struct ieee80211_channel []);
 static int	mwl_getchannels(struct mwl_softc *);
 
 static void	mwl_sysctlattach(struct mwl_softc *);
 static void	mwl_announce(struct mwl_softc *);
 
 SYSCTL_NODE(_hw, OID_AUTO, mwl, CTLFLAG_RD, 0, "Marvell driver parameters");
 
 static	int mwl_rxdesc = MWL_RXDESC;		/* # rx desc's to allocate */
 SYSCTL_INT(_hw_mwl, OID_AUTO, rxdesc, CTLFLAG_RW, &mwl_rxdesc,
 	    0, "rx descriptors allocated");
 static	int mwl_rxbuf = MWL_RXBUF;		/* # rx buffers to allocate */
 SYSCTL_INT(_hw_mwl, OID_AUTO, rxbuf, CTLFLAG_RWTUN, &mwl_rxbuf,
 	    0, "rx buffers allocated");
 static	int mwl_txbuf = MWL_TXBUF;		/* # tx buffers to allocate */
 SYSCTL_INT(_hw_mwl, OID_AUTO, txbuf, CTLFLAG_RWTUN, &mwl_txbuf,
 	    0, "tx buffers allocated");
 static	int mwl_txcoalesce = 8;		/* # tx packets to q before poking f/w*/
 SYSCTL_INT(_hw_mwl, OID_AUTO, txcoalesce, CTLFLAG_RWTUN, &mwl_txcoalesce,
 	    0, "tx buffers to send at once");
 static	int mwl_rxquota = MWL_RXBUF;		/* # max buffers to process */
 SYSCTL_INT(_hw_mwl, OID_AUTO, rxquota, CTLFLAG_RWTUN, &mwl_rxquota,
 	    0, "max rx buffers to process per interrupt");
 static	int mwl_rxdmalow = 3;			/* # min buffers for wakeup */
 SYSCTL_INT(_hw_mwl, OID_AUTO, rxdmalow, CTLFLAG_RWTUN, &mwl_rxdmalow,
 	    0, "min free rx buffers before restarting traffic");
 
 #ifdef MWL_DEBUG
 static	int mwl_debug = 0;
 SYSCTL_INT(_hw_mwl, OID_AUTO, debug, CTLFLAG_RWTUN, &mwl_debug,
 	    0, "control debugging printfs");
 enum {
 	MWL_DEBUG_XMIT		= 0x00000001,	/* basic xmit operation */
 	MWL_DEBUG_XMIT_DESC	= 0x00000002,	/* xmit descriptors */
 	MWL_DEBUG_RECV		= 0x00000004,	/* basic recv operation */
 	MWL_DEBUG_RECV_DESC	= 0x00000008,	/* recv descriptors */
 	MWL_DEBUG_RESET		= 0x00000010,	/* reset processing */
 	MWL_DEBUG_BEACON 	= 0x00000020,	/* beacon handling */
 	MWL_DEBUG_INTR		= 0x00000040,	/* ISR */
 	MWL_DEBUG_TX_PROC	= 0x00000080,	/* tx ISR proc */
 	MWL_DEBUG_RX_PROC	= 0x00000100,	/* rx ISR proc */
 	MWL_DEBUG_KEYCACHE	= 0x00000200,	/* key cache management */
 	MWL_DEBUG_STATE		= 0x00000400,	/* 802.11 state transitions */
 	MWL_DEBUG_NODE		= 0x00000800,	/* node management */
 	MWL_DEBUG_RECV_ALL	= 0x00001000,	/* trace all frames (beacons) */
 	MWL_DEBUG_TSO		= 0x00002000,	/* TSO processing */
 	MWL_DEBUG_AMPDU		= 0x00004000,	/* BA stream handling */
 	MWL_DEBUG_ANY		= 0xffffffff
 };
 #define	IS_BEACON(wh) \
     ((wh->i_fc[0] & (IEEE80211_FC0_TYPE_MASK|IEEE80211_FC0_SUBTYPE_MASK)) == \
 	 (IEEE80211_FC0_TYPE_MGT|IEEE80211_FC0_SUBTYPE_BEACON))
 #define	IFF_DUMPPKTS_RECV(sc, wh) \
     ((sc->sc_debug & MWL_DEBUG_RECV) && \
       ((sc->sc_debug & MWL_DEBUG_RECV_ALL) || !IS_BEACON(wh)))
 #define	IFF_DUMPPKTS_XMIT(sc) \
 	(sc->sc_debug & MWL_DEBUG_XMIT)
 
 #define	DPRINTF(sc, m, fmt, ...) do {				\
 	if (sc->sc_debug & (m))					\
 		printf(fmt, __VA_ARGS__);			\
 } while (0)
 #define	KEYPRINTF(sc, hk, mac) do {				\
 	if (sc->sc_debug & MWL_DEBUG_KEYCACHE)			\
 		mwl_keyprint(sc, __func__, hk, mac);		\
 } while (0)
 static	void mwl_printrxbuf(const struct mwl_rxbuf *bf, u_int ix);
 static	void mwl_printtxbuf(const struct mwl_txbuf *bf, u_int qnum, u_int ix);
 #else
 #define	IFF_DUMPPKTS_RECV(sc, wh)	0
 #define	IFF_DUMPPKTS_XMIT(sc)		0
 #define	DPRINTF(sc, m, fmt, ...)	do { (void )sc; } while (0)
 #define	KEYPRINTF(sc, k, mac)		do { (void )sc; } while (0)
 #endif
 
 static MALLOC_DEFINE(M_MWLDEV, "mwldev", "mwl driver dma buffers");
 
 /*
  * Each packet has fixed front matter: a 2-byte length
  * of the payload, followed by a 4-address 802.11 header
  * (regardless of the actual header and always w/o any
  * QoS header).  The payload then follows.
  */
 struct mwltxrec {
 	uint16_t fwlen;
 	struct ieee80211_frame_addr4 wh;
 } __packed;
 
 /*
  * Read/Write shorthands for accesses to BAR 0.  Note
  * that all BAR 1 operations are done in the "hal" and
  * there should be no reference to them here.
  */
 #ifdef MWL_DEBUG
 static __inline uint32_t
 RD4(struct mwl_softc *sc, bus_size_t off)
 {
 	return bus_space_read_4(sc->sc_io0t, sc->sc_io0h, off);
 }
 #endif
 
 static __inline void
 WR4(struct mwl_softc *sc, bus_size_t off, uint32_t val)
 {
 	bus_space_write_4(sc->sc_io0t, sc->sc_io0h, off, val);
 }
 
 int
 mwl_attach(uint16_t devid, struct mwl_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct mwl_hal *mh;
 	int error = 0;
 
 	DPRINTF(sc, MWL_DEBUG_ANY, "%s: devid 0x%x\n", __func__, devid);
 
 	/*
 	 * Setup the RX free list lock early, so it can be consistently
 	 * removed.
 	 */
 	MWL_RXFREE_INIT(sc);
 
 	mh = mwl_hal_attach(sc->sc_dev, devid,
 	    sc->sc_io1h, sc->sc_io1t, sc->sc_dmat);
 	if (mh == NULL) {
 		device_printf(sc->sc_dev, "unable to attach HAL\n");
 		error = EIO;
 		goto bad;
 	}
 	sc->sc_mh = mh;
 	/*
 	 * Load firmware so we can get setup.  We arbitrarily
 	 * pick station firmware; we'll re-load firmware as
 	 * needed so setting up the wrong mode isn't a big deal.
 	 */
 	if (mwl_hal_fwload(mh, NULL) != 0) {
 		device_printf(sc->sc_dev, "unable to setup builtin firmware\n");
 		error = EIO;
 		goto bad1;
 	}
 	if (mwl_hal_gethwspecs(mh, &sc->sc_hwspecs) != 0) {
 		device_printf(sc->sc_dev, "unable to fetch h/w specs\n");
 		error = EIO;
 		goto bad1;
 	}
 	error = mwl_getchannels(sc);
 	if (error != 0)
 		goto bad1;
 
 	sc->sc_txantenna = 0;		/* h/w default */
 	sc->sc_rxantenna = 0;		/* h/w default */
 	sc->sc_invalid = 0;		/* ready to go, enable int handling */
 	sc->sc_ageinterval = MWL_AGEINTERVAL;
 
 	/*
 	 * Allocate tx+rx descriptors and populate the lists.
 	 * We immediately push the information to the firmware
 	 * as otherwise it gets upset.
 	 */
 	error = mwl_dma_setup(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "failed to setup descriptors: %d\n",
 		    error);
 		goto bad1;
 	}
 	error = mwl_setupdma(sc);	/* push to firmware */
 	if (error != 0)			/* NB: mwl_setupdma prints msg */
 		goto bad1;
 
 	callout_init(&sc->sc_timer, 1);
 	callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0);
 	mbufq_init(&sc->sc_snd, ifqmaxlen);
 
 	sc->sc_tq = taskqueue_create("mwl_taskq", M_NOWAIT,
 		taskqueue_thread_enqueue, &sc->sc_tq);
 	taskqueue_start_threads(&sc->sc_tq, 1, PI_NET,
 		"%s taskq", device_get_nameunit(sc->sc_dev));
 
 	TASK_INIT(&sc->sc_rxtask, 0, mwl_rx_proc, sc);
 	TASK_INIT(&sc->sc_radartask, 0, mwl_radar_proc, sc);
 	TASK_INIT(&sc->sc_chanswitchtask, 0, mwl_chanswitch_proc, sc);
 	TASK_INIT(&sc->sc_bawatchdogtask, 0, mwl_bawatchdog_proc, sc);
 
 	/* NB: insure BK queue is the lowest priority h/w queue */
 	if (!mwl_tx_setup(sc, WME_AC_BK, MWL_WME_AC_BK)) {
 		device_printf(sc->sc_dev,
 		    "unable to setup xmit queue for %s traffic!\n",
 		     ieee80211_wme_acnames[WME_AC_BK]);
 		error = EIO;
 		goto bad2;
 	}
 	if (!mwl_tx_setup(sc, WME_AC_BE, MWL_WME_AC_BE) ||
 	    !mwl_tx_setup(sc, WME_AC_VI, MWL_WME_AC_VI) ||
 	    !mwl_tx_setup(sc, WME_AC_VO, MWL_WME_AC_VO)) {
 		/*
 		 * Not enough hardware tx queues to properly do WME;
 		 * just punt and assign them all to the same h/w queue.
 		 * We could do a better job of this if, for example,
 		 * we allocate queues when we switch from station to
 		 * AP mode.
 		 */
 		if (sc->sc_ac2q[WME_AC_VI] != NULL)
 			mwl_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_VI]);
 		if (sc->sc_ac2q[WME_AC_BE] != NULL)
 			mwl_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_BE]);
 		sc->sc_ac2q[WME_AC_BE] = sc->sc_ac2q[WME_AC_BK];
 		sc->sc_ac2q[WME_AC_VI] = sc->sc_ac2q[WME_AC_BK];
 		sc->sc_ac2q[WME_AC_VO] = sc->sc_ac2q[WME_AC_BK];
 	}
 	TASK_INIT(&sc->sc_txtask, 0, mwl_tx_proc, sc);
 
 	ic->ic_softc = sc;
 	ic->ic_name = device_get_nameunit(sc->sc_dev);
 	/* XXX not right but it's not used anywhere important */
 	ic->ic_phytype = IEEE80211_T_OFDM;
 	ic->ic_opmode = IEEE80211_M_STA;
 	ic->ic_caps =
 		  IEEE80211_C_STA		/* station mode supported */
 		| IEEE80211_C_HOSTAP		/* hostap mode */
 		| IEEE80211_C_MONITOR		/* monitor mode */
 #if 0
 		| IEEE80211_C_IBSS		/* ibss, nee adhoc, mode */
 		| IEEE80211_C_AHDEMO		/* adhoc demo mode */
 #endif
 		| IEEE80211_C_MBSS		/* mesh point link mode */
 		| IEEE80211_C_WDS		/* WDS supported */
 		| IEEE80211_C_SHPREAMBLE	/* short preamble supported */
 		| IEEE80211_C_SHSLOT		/* short slot time supported */
 		| IEEE80211_C_WME		/* WME/WMM supported */
 		| IEEE80211_C_BURST		/* xmit bursting supported */
 		| IEEE80211_C_WPA		/* capable of WPA1+WPA2 */
 		| IEEE80211_C_BGSCAN		/* capable of bg scanning */
 		| IEEE80211_C_TXFRAG		/* handle tx frags */
 		| IEEE80211_C_TXPMGT		/* capable of txpow mgt */
 		| IEEE80211_C_DFS		/* DFS supported */
 		;
 
 	ic->ic_htcaps =
 		  IEEE80211_HTCAP_SMPS_ENA	/* SM PS mode enabled */
 		| IEEE80211_HTCAP_CHWIDTH40	/* 40MHz channel width */
 		| IEEE80211_HTCAP_SHORTGI20	/* short GI in 20MHz */
 		| IEEE80211_HTCAP_SHORTGI40	/* short GI in 40MHz */
 		| IEEE80211_HTCAP_RXSTBC_2STREAM/* 1-2 spatial streams */
 #if MWL_AGGR_SIZE == 7935
 		| IEEE80211_HTCAP_MAXAMSDU_7935	/* max A-MSDU length */
 #else
 		| IEEE80211_HTCAP_MAXAMSDU_3839	/* max A-MSDU length */
 #endif
 #if 0
 		| IEEE80211_HTCAP_PSMP		/* PSMP supported */
 		| IEEE80211_HTCAP_40INTOLERANT	/* 40MHz intolerant */
 #endif
 		/* s/w capabilities */
 		| IEEE80211_HTC_HT		/* HT operation */
 		| IEEE80211_HTC_AMPDU		/* tx A-MPDU */
 		| IEEE80211_HTC_AMSDU		/* tx A-MSDU */
 		| IEEE80211_HTC_SMPS		/* SMPS available */
 		;
 
 	/*
 	 * Mark h/w crypto support.
 	 * XXX no way to query h/w support.
 	 */
 	ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP
 			  |  IEEE80211_CRYPTO_AES_CCM
 			  |  IEEE80211_CRYPTO_TKIP
 			  |  IEEE80211_CRYPTO_TKIPMIC
 			  ;
 	/*
 	 * Transmit requires space in the packet for a special
 	 * format transmit record and optional padding between
 	 * this record and the payload.  Ask the net80211 layer
 	 * to arrange this when encapsulating packets so we can
 	 * add it efficiently. 
 	 */
 	ic->ic_headroom = sizeof(struct mwltxrec) -
 		sizeof(struct ieee80211_frame);
 
 	IEEE80211_ADDR_COPY(ic->ic_macaddr, sc->sc_hwspecs.macAddr);
 
 	/* call MI attach routine. */
 	ieee80211_ifattach(ic);
 	ic->ic_setregdomain = mwl_setregdomain;
 	ic->ic_getradiocaps = mwl_getradiocaps;
 	/* override default methods */
 	ic->ic_raw_xmit = mwl_raw_xmit;
 	ic->ic_newassoc = mwl_newassoc;
 	ic->ic_updateslot = mwl_updateslot;
 	ic->ic_update_mcast = mwl_update_mcast;
 	ic->ic_update_promisc = mwl_update_promisc;
 	ic->ic_wme.wme_update = mwl_wme_update;
 	ic->ic_transmit = mwl_transmit;
 	ic->ic_ioctl = mwl_ioctl;
 	ic->ic_parent = mwl_parent;
 
 	ic->ic_node_alloc = mwl_node_alloc;
 	sc->sc_node_cleanup = ic->ic_node_cleanup;
 	ic->ic_node_cleanup = mwl_node_cleanup;
 	sc->sc_node_drain = ic->ic_node_drain;
 	ic->ic_node_drain = mwl_node_drain;
 	ic->ic_node_getsignal = mwl_node_getsignal;
 	ic->ic_node_getmimoinfo = mwl_node_getmimoinfo;
 
 	ic->ic_scan_start = mwl_scan_start;
 	ic->ic_scan_end = mwl_scan_end;
 	ic->ic_set_channel = mwl_set_channel;
 
 	sc->sc_recv_action = ic->ic_recv_action;
 	ic->ic_recv_action = mwl_recv_action;
 	sc->sc_addba_request = ic->ic_addba_request;
 	ic->ic_addba_request = mwl_addba_request;
 	sc->sc_addba_response = ic->ic_addba_response;
 	ic->ic_addba_response = mwl_addba_response;
 	sc->sc_addba_stop = ic->ic_addba_stop;
 	ic->ic_addba_stop = mwl_addba_stop;
 
 	ic->ic_vap_create = mwl_vap_create;
 	ic->ic_vap_delete = mwl_vap_delete;
 
 	ieee80211_radiotap_attach(ic,
 	    &sc->sc_tx_th.wt_ihdr, sizeof(sc->sc_tx_th),
 		MWL_TX_RADIOTAP_PRESENT,
 	    &sc->sc_rx_th.wr_ihdr, sizeof(sc->sc_rx_th),
 		MWL_RX_RADIOTAP_PRESENT);
 	/*
 	 * Setup dynamic sysctl's now that country code and
 	 * regdomain are available from the hal.
 	 */
 	mwl_sysctlattach(sc);
 
 	if (bootverbose)
 		ieee80211_announce(ic);
 	mwl_announce(sc);
 	return 0;
 bad2:
 	mwl_dma_cleanup(sc);
 bad1:
 	mwl_hal_detach(mh);
 bad:
 	MWL_RXFREE_DESTROY(sc);
 	sc->sc_invalid = 1;
 	return error;
 }
 
 int
 mwl_detach(struct mwl_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	MWL_LOCK(sc);
 	mwl_stop(sc);
 	MWL_UNLOCK(sc);
 	/*
 	 * NB: the order of these is important:
 	 * o call the 802.11 layer before detaching the hal to
 	 *   insure callbacks into the driver to delete global
 	 *   key cache entries can be handled
 	 * o reclaim the tx queue data structures after calling
 	 *   the 802.11 layer as we'll get called back to reclaim
 	 *   node state and potentially want to use them
 	 * o to cleanup the tx queues the hal is called, so detach
 	 *   it last
 	 * Other than that, it's straightforward...
 	 */
 	ieee80211_ifdetach(ic);
 	callout_drain(&sc->sc_watchdog);
 	mwl_dma_cleanup(sc);
 	MWL_RXFREE_DESTROY(sc);
 	mwl_tx_cleanup(sc);
 	mwl_hal_detach(sc->sc_mh);
 	mbufq_drain(&sc->sc_snd);
 
 	return 0;
 }
 
 /*
  * MAC address handling for multiple BSS on the same radio.
  * The first vap uses the MAC address from the EEPROM.  For
  * subsequent vap's we set the U/L bit (bit 1) in the MAC
  * address and use the next six bits as an index.
  */
 static void
 assign_address(struct mwl_softc *sc, uint8_t mac[IEEE80211_ADDR_LEN], int clone)
 {
 	int i;
 
 	if (clone && mwl_hal_ismbsscapable(sc->sc_mh)) {
 		/* NB: we only do this if h/w supports multiple bssid */
 		for (i = 0; i < 32; i++)
 			if ((sc->sc_bssidmask & (1<<i)) == 0)
 				break;
 		if (i != 0)
 			mac[0] |= (i << 2)|0x2;
 	} else
 		i = 0;
 	sc->sc_bssidmask |= 1<<i;
 	if (i == 0)
 		sc->sc_nbssid0++;
 }
 
 static void
 reclaim_address(struct mwl_softc *sc, const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	int i = mac[0] >> 2;
 	if (i != 0 || --sc->sc_nbssid0 == 0)
 		sc->sc_bssidmask &= ~(1<<i);
 }
 
 static struct ieee80211vap *
 mwl_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit,
     enum ieee80211_opmode opmode, int flags,
     const uint8_t bssid[IEEE80211_ADDR_LEN],
     const uint8_t mac0[IEEE80211_ADDR_LEN])
 {
 	struct mwl_softc *sc = ic->ic_softc;
 	struct mwl_hal *mh = sc->sc_mh;
 	struct ieee80211vap *vap, *apvap;
 	struct mwl_hal_vap *hvap;
 	struct mwl_vap *mvp;
 	uint8_t mac[IEEE80211_ADDR_LEN];
 
 	IEEE80211_ADDR_COPY(mac, mac0);
 	switch (opmode) {
 	case IEEE80211_M_HOSTAP:
 	case IEEE80211_M_MBSS:
 		if ((flags & IEEE80211_CLONE_MACADDR) == 0)
 			assign_address(sc, mac, flags & IEEE80211_CLONE_BSSID);
 		hvap = mwl_hal_newvap(mh, MWL_HAL_AP, mac);
 		if (hvap == NULL) {
 			if ((flags & IEEE80211_CLONE_MACADDR) == 0)
 				reclaim_address(sc, mac);
 			return NULL;
 		}
 		break;
 	case IEEE80211_M_STA:
 		if ((flags & IEEE80211_CLONE_MACADDR) == 0)
 			assign_address(sc, mac, flags & IEEE80211_CLONE_BSSID);
 		hvap = mwl_hal_newvap(mh, MWL_HAL_STA, mac);
 		if (hvap == NULL) {
 			if ((flags & IEEE80211_CLONE_MACADDR) == 0)
 				reclaim_address(sc, mac);
 			return NULL;
 		}
 		/* no h/w beacon miss support; always use s/w */
 		flags |= IEEE80211_CLONE_NOBEACONS;
 		break;
 	case IEEE80211_M_WDS:
 		hvap = NULL;		/* NB: we use associated AP vap */
 		if (sc->sc_napvaps == 0)
 			return NULL;	/* no existing AP vap */
 		break;
 	case IEEE80211_M_MONITOR:
 		hvap = NULL;
 		break;
 	case IEEE80211_M_IBSS:
 	case IEEE80211_M_AHDEMO:
 	default:
 		return NULL;
 	}
 
 	mvp = malloc(sizeof(struct mwl_vap), M_80211_VAP, M_WAITOK | M_ZERO);
 	mvp->mv_hvap = hvap;
 	if (opmode == IEEE80211_M_WDS) {
 		/*
 		 * WDS vaps must have an associated AP vap; find one.
 		 * XXX not right.
 		 */
 		TAILQ_FOREACH(apvap, &ic->ic_vaps, iv_next)
 			if (apvap->iv_opmode == IEEE80211_M_HOSTAP) {
 				mvp->mv_ap_hvap = MWL_VAP(apvap)->mv_hvap;
 				break;
 			}
 		KASSERT(mvp->mv_ap_hvap != NULL, ("no ap vap"));
 	}
 	vap = &mvp->mv_vap;
 	ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid);
 	/* override with driver methods */
 	mvp->mv_newstate = vap->iv_newstate;
 	vap->iv_newstate = mwl_newstate;
 	vap->iv_max_keyix = 0;	/* XXX */
 	vap->iv_key_alloc = mwl_key_alloc;
 	vap->iv_key_delete = mwl_key_delete;
 	vap->iv_key_set = mwl_key_set;
 #ifdef MWL_HOST_PS_SUPPORT
 	if (opmode == IEEE80211_M_HOSTAP || opmode == IEEE80211_M_MBSS) {
 		vap->iv_update_ps = mwl_update_ps;
 		mvp->mv_set_tim = vap->iv_set_tim;
 		vap->iv_set_tim = mwl_set_tim;
 	}
 #endif
 	vap->iv_reset = mwl_reset;
 	vap->iv_update_beacon = mwl_beacon_update;
 
 	/* override max aid so sta's cannot assoc when we're out of sta id's */
 	vap->iv_max_aid = MWL_MAXSTAID;
 	/* override default A-MPDU rx parameters */
 	vap->iv_ampdu_rxmax = IEEE80211_HTCAP_MAXRXAMPDU_64K;
 	vap->iv_ampdu_density = IEEE80211_HTCAP_MPDUDENSITY_4;
 
 	/* complete setup */
 	ieee80211_vap_attach(vap, mwl_media_change, ieee80211_media_status,
 	    mac);
 
 	switch (vap->iv_opmode) {
 	case IEEE80211_M_HOSTAP:
 	case IEEE80211_M_MBSS:
 	case IEEE80211_M_STA:
 		/*
 		 * Setup sta db entry for local address.
 		 */
 		mwl_localstadb(vap);
 		if (vap->iv_opmode == IEEE80211_M_HOSTAP ||
 		    vap->iv_opmode == IEEE80211_M_MBSS)
 			sc->sc_napvaps++;
 		else
 			sc->sc_nstavaps++;
 		break;
 	case IEEE80211_M_WDS:
 		sc->sc_nwdsvaps++;
 		break;
 	default:
 		break;
 	}
 	/*
 	 * Setup overall operating mode.
 	 */
 	if (sc->sc_napvaps)
 		ic->ic_opmode = IEEE80211_M_HOSTAP;
 	else if (sc->sc_nstavaps)
 		ic->ic_opmode = IEEE80211_M_STA;
 	else
 		ic->ic_opmode = opmode;
 
 	return vap;
 }
 
 static void
 mwl_vap_delete(struct ieee80211vap *vap)
 {
 	struct mwl_vap *mvp = MWL_VAP(vap);
 	struct mwl_softc *sc = vap->iv_ic->ic_softc;
 	struct mwl_hal *mh = sc->sc_mh;
 	struct mwl_hal_vap *hvap = mvp->mv_hvap;
 	enum ieee80211_opmode opmode = vap->iv_opmode;
 
 	/* XXX disallow ap vap delete if WDS still present */
 	if (sc->sc_running) {
 		/* quiesce h/w while we remove the vap */
 		mwl_hal_intrset(mh, 0);		/* disable interrupts */
 	}
 	ieee80211_vap_detach(vap);
 	switch (opmode) {
 	case IEEE80211_M_HOSTAP:
 	case IEEE80211_M_MBSS:
 	case IEEE80211_M_STA:
 		KASSERT(hvap != NULL, ("no hal vap handle"));
 		(void) mwl_hal_delstation(hvap, vap->iv_myaddr);
 		mwl_hal_delvap(hvap);
 		if (opmode == IEEE80211_M_HOSTAP || opmode == IEEE80211_M_MBSS)
 			sc->sc_napvaps--;
 		else
 			sc->sc_nstavaps--;
 		/* XXX don't do it for IEEE80211_CLONE_MACADDR */
 		reclaim_address(sc, vap->iv_myaddr);
 		break;
 	case IEEE80211_M_WDS:
 		sc->sc_nwdsvaps--;
 		break;
 	default:
 		break;
 	}
 	mwl_cleartxq(sc, vap);
 	free(mvp, M_80211_VAP);
 	if (sc->sc_running)
 		mwl_hal_intrset(mh, sc->sc_imask);
 }
 
 void
 mwl_suspend(struct mwl_softc *sc)
 {
 
 	MWL_LOCK(sc);
 	mwl_stop(sc);
 	MWL_UNLOCK(sc);
 }
 
 void
 mwl_resume(struct mwl_softc *sc)
 {
 	int error = EDOOFUS;
 
 	MWL_LOCK(sc);
 	if (sc->sc_ic.ic_nrunning > 0)
 		error = mwl_init(sc);
 	MWL_UNLOCK(sc);
 
 	if (error == 0)
 		ieee80211_start_all(&sc->sc_ic);	/* start all vap's */
 }
 
 void
 mwl_shutdown(void *arg)
 {
 	struct mwl_softc *sc = arg;
 
 	MWL_LOCK(sc);
 	mwl_stop(sc);
 	MWL_UNLOCK(sc);
 }
 
 /*
  * Interrupt handler.  Most of the actual processing is deferred.
  */
 void
 mwl_intr(void *arg)
 {
 	struct mwl_softc *sc = arg;
 	struct mwl_hal *mh = sc->sc_mh;
 	uint32_t status;
 
 	if (sc->sc_invalid) {
 		/*
 		 * The hardware is not ready/present, don't touch anything.
 		 * Note this can happen early on if the IRQ is shared.
 		 */
 		DPRINTF(sc, MWL_DEBUG_ANY, "%s: invalid; ignored\n", __func__);
 		return;
 	}
 	/*
 	 * Figure out the reason(s) for the interrupt.
 	 */
 	mwl_hal_getisr(mh, &status);		/* NB: clears ISR too */
 	if (status == 0)			/* must be a shared irq */
 		return;
 
 	DPRINTF(sc, MWL_DEBUG_INTR, "%s: status 0x%x imask 0x%x\n",
 	    __func__, status, sc->sc_imask);
 	if (status & MACREG_A2HRIC_BIT_RX_RDY)
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_rxtask);
 	if (status & MACREG_A2HRIC_BIT_TX_DONE)
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_txtask);
 	if (status & MACREG_A2HRIC_BIT_BA_WATCHDOG)
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_bawatchdogtask);
 	if (status & MACREG_A2HRIC_BIT_OPC_DONE)
 		mwl_hal_cmddone(mh);
 	if (status & MACREG_A2HRIC_BIT_MAC_EVENT) {
 		;
 	}
 	if (status & MACREG_A2HRIC_BIT_ICV_ERROR) {
 		/* TKIP ICV error */
 		sc->sc_stats.mst_rx_badtkipicv++;
 	}
 	if (status & MACREG_A2HRIC_BIT_QUEUE_EMPTY) {
 		/* 11n aggregation queue is empty, re-fill */
 		;
 	}
 	if (status & MACREG_A2HRIC_BIT_QUEUE_FULL) {
 		;
 	}
 	if (status & MACREG_A2HRIC_BIT_RADAR_DETECT) {
 		/* radar detected, process event */
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_radartask);
 	}
 	if (status & MACREG_A2HRIC_BIT_CHAN_SWITCH) {
 		/* DFS channel switch */
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_chanswitchtask);
 	}
 }
 
 static void
 mwl_radar_proc(void *arg, int pending)
 {
 	struct mwl_softc *sc = arg;
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	DPRINTF(sc, MWL_DEBUG_ANY, "%s: radar detected, pending %u\n",
 	    __func__, pending);
 
 	sc->sc_stats.mst_radardetect++;
 	/* XXX stop h/w BA streams? */
 
 	IEEE80211_LOCK(ic);
 	ieee80211_dfs_notify_radar(ic, ic->ic_curchan);
 	IEEE80211_UNLOCK(ic);
 }
 
 static void
 mwl_chanswitch_proc(void *arg, int pending)
 {
 	struct mwl_softc *sc = arg;
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	DPRINTF(sc, MWL_DEBUG_ANY, "%s: channel switch notice, pending %u\n",
 	    __func__, pending);
 
 	IEEE80211_LOCK(ic);
 	sc->sc_csapending = 0;
 	ieee80211_csa_completeswitch(ic);
 	IEEE80211_UNLOCK(ic);
 }
 
 static void
 mwl_bawatchdog(const MWL_HAL_BASTREAM *sp)
 {
 	struct ieee80211_node *ni = sp->data[0];
 
 	/* send DELBA and drop the stream */
 	ieee80211_ampdu_stop(ni, sp->data[1], IEEE80211_REASON_UNSPECIFIED);
 }
 
 static void
 mwl_bawatchdog_proc(void *arg, int pending)
 {
 	struct mwl_softc *sc = arg;
 	struct mwl_hal *mh = sc->sc_mh;
 	const MWL_HAL_BASTREAM *sp;
 	uint8_t bitmap, n;
 
 	sc->sc_stats.mst_bawatchdog++;
 
 	if (mwl_hal_getwatchdogbitmap(mh, &bitmap) != 0) {
 		DPRINTF(sc, MWL_DEBUG_AMPDU,
 		    "%s: could not get bitmap\n", __func__);
 		sc->sc_stats.mst_bawatchdog_failed++;
 		return;
 	}
 	DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: bitmap 0x%x\n", __func__, bitmap);
 	if (bitmap == 0xff) {
 		n = 0;
 		/* disable all ba streams */
 		for (bitmap = 0; bitmap < 8; bitmap++) {
 			sp = mwl_hal_bastream_lookup(mh, bitmap);
 			if (sp != NULL) {
 				mwl_bawatchdog(sp);
 				n++;
 			}
 		}
 		if (n == 0) {
 			DPRINTF(sc, MWL_DEBUG_AMPDU,
 			    "%s: no BA streams found\n", __func__);
 			sc->sc_stats.mst_bawatchdog_empty++;
 		}
 	} else if (bitmap != 0xaa) {
 		/* disable a single ba stream */
 		sp = mwl_hal_bastream_lookup(mh, bitmap);
 		if (sp != NULL) {
 			mwl_bawatchdog(sp);
 		} else {
 			DPRINTF(sc, MWL_DEBUG_AMPDU,
 			    "%s: no BA stream %d\n", __func__, bitmap);
 			sc->sc_stats.mst_bawatchdog_notfound++;
 		}
 	}
 }
 
 /*
  * Convert net80211 channel to a HAL channel.
  */
 static void
 mwl_mapchan(MWL_HAL_CHANNEL *hc, const struct ieee80211_channel *chan)
 {
 	hc->channel = chan->ic_ieee;
 
 	*(uint32_t *)&hc->channelFlags = 0;
 	if (IEEE80211_IS_CHAN_2GHZ(chan))
 		hc->channelFlags.FreqBand = MWL_FREQ_BAND_2DOT4GHZ;
 	else if (IEEE80211_IS_CHAN_5GHZ(chan))
 		hc->channelFlags.FreqBand = MWL_FREQ_BAND_5GHZ;
 	if (IEEE80211_IS_CHAN_HT40(chan)) {
 		hc->channelFlags.ChnlWidth = MWL_CH_40_MHz_WIDTH;
 		if (IEEE80211_IS_CHAN_HT40U(chan))
 			hc->channelFlags.ExtChnlOffset = MWL_EXT_CH_ABOVE_CTRL_CH;
 		else
 			hc->channelFlags.ExtChnlOffset = MWL_EXT_CH_BELOW_CTRL_CH;
 	} else
 		hc->channelFlags.ChnlWidth = MWL_CH_20_MHz_WIDTH;
 	/* XXX 10MHz channels */
 }
 
 /*
  * Inform firmware of our tx/rx dma setup.  The BAR 0
  * writes below are for compatibility with older firmware.
  * For current firmware we send this information with a
  * cmd block via mwl_hal_sethwdma.
  */
 static int
 mwl_setupdma(struct mwl_softc *sc)
 {
 	int error, i;
 
 	sc->sc_hwdma.rxDescRead = sc->sc_rxdma.dd_desc_paddr;
 	WR4(sc, sc->sc_hwspecs.rxDescRead, sc->sc_hwdma.rxDescRead);
 	WR4(sc, sc->sc_hwspecs.rxDescWrite, sc->sc_hwdma.rxDescRead);
 
 	for (i = 0; i < MWL_NUM_TX_QUEUES-MWL_NUM_ACK_QUEUES; i++) {
 		struct mwl_txq *txq = &sc->sc_txq[i];
 		sc->sc_hwdma.wcbBase[i] = txq->dma.dd_desc_paddr;
 		WR4(sc, sc->sc_hwspecs.wcbBase[i], sc->sc_hwdma.wcbBase[i]);
 	}
 	sc->sc_hwdma.maxNumTxWcb = mwl_txbuf;
 	sc->sc_hwdma.maxNumWCB = MWL_NUM_TX_QUEUES-MWL_NUM_ACK_QUEUES;
 
 	error = mwl_hal_sethwdma(sc->sc_mh, &sc->sc_hwdma);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "unable to setup tx/rx dma; hal status %u\n", error);
 		/* XXX */
 	}
 	return error;
 }
 
 /*
  * Inform firmware of tx rate parameters.
  * Called after a channel change.
  */
 static int
 mwl_setcurchanrates(struct mwl_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	const struct ieee80211_rateset *rs;
 	MWL_HAL_TXRATE rates;
 
 	memset(&rates, 0, sizeof(rates));
 	rs = ieee80211_get_suprates(ic, ic->ic_curchan);
 	/* rate used to send management frames */
 	rates.MgtRate = rs->rs_rates[0] & IEEE80211_RATE_VAL;
 	/* rate used to send multicast frames */
 	rates.McastRate = rates.MgtRate;
 
 	return mwl_hal_settxrate_auto(sc->sc_mh, &rates);
 }
 
 /*
  * Inform firmware of tx rate parameters.  Called whenever
  * user-settable params change and after a channel change.
  */
 static int
 mwl_setrates(struct ieee80211vap *vap)
 {
 	struct mwl_vap *mvp = MWL_VAP(vap);
 	struct ieee80211_node *ni = vap->iv_bss;
 	const struct ieee80211_txparam *tp = ni->ni_txparms;
 	MWL_HAL_TXRATE rates;
 
 	KASSERT(vap->iv_state == IEEE80211_S_RUN, ("state %d", vap->iv_state));
 
 	/*
 	 * Update the h/w rate map.
 	 * NB: 0x80 for MCS is passed through unchanged
 	 */
 	memset(&rates, 0, sizeof(rates));
 	/* rate used to send management frames */
 	rates.MgtRate = tp->mgmtrate;
 	/* rate used to send multicast frames */
 	rates.McastRate = tp->mcastrate;
 
 	/* while here calculate EAPOL fixed rate cookie */
 	mvp->mv_eapolformat = htole16(mwl_calcformat(rates.MgtRate, ni));
 
 	return mwl_hal_settxrate(mvp->mv_hvap,
 	    tp->ucastrate != IEEE80211_FIXED_RATE_NONE ?
 		RATE_FIXED : RATE_AUTO, &rates);
 }
 
 /*
  * Setup a fixed xmit rate cookie for EAPOL frames.
  */
 static void
 mwl_seteapolformat(struct ieee80211vap *vap)
 {
 	struct mwl_vap *mvp = MWL_VAP(vap);
 	struct ieee80211_node *ni = vap->iv_bss;
 	enum ieee80211_phymode mode;
 	uint8_t rate;
 
 	KASSERT(vap->iv_state == IEEE80211_S_RUN, ("state %d", vap->iv_state));
 
 	mode = ieee80211_chan2mode(ni->ni_chan);
 	/*
 	 * Use legacy rates when operating a mixed HT+non-HT bss.
 	 * NB: this may violate POLA for sta and wds vap's.
 	 */
 	if (mode == IEEE80211_MODE_11NA &&
 	    (vap->iv_flags_ht & IEEE80211_FHT_PUREN) == 0)
 		rate = vap->iv_txparms[IEEE80211_MODE_11A].mgmtrate;
 	else if (mode == IEEE80211_MODE_11NG &&
 	    (vap->iv_flags_ht & IEEE80211_FHT_PUREN) == 0)
 		rate = vap->iv_txparms[IEEE80211_MODE_11G].mgmtrate;
 	else
 		rate = vap->iv_txparms[mode].mgmtrate;
 
 	mvp->mv_eapolformat = htole16(mwl_calcformat(rate, ni));
 }
 
 /*
  * Map SKU+country code to region code for radar bin'ing.
  */
 static int
 mwl_map2regioncode(const struct ieee80211_regdomain *rd)
 {
 	switch (rd->regdomain) {
 	case SKU_FCC:
 	case SKU_FCC3:
 		return DOMAIN_CODE_FCC;
 	case SKU_CA:
 		return DOMAIN_CODE_IC;
 	case SKU_ETSI:
 	case SKU_ETSI2:
 	case SKU_ETSI3:
 		if (rd->country == CTRY_SPAIN)
 			return DOMAIN_CODE_SPAIN;
 		if (rd->country == CTRY_FRANCE || rd->country == CTRY_FRANCE2)
 			return DOMAIN_CODE_FRANCE;
 		/* XXX force 1.3.1 radar type */
 		return DOMAIN_CODE_ETSI_131;
 	case SKU_JAPAN:
 		return DOMAIN_CODE_MKK;
 	case SKU_ROW:
 		return DOMAIN_CODE_DGT;	/* Taiwan */
 	case SKU_APAC:
 	case SKU_APAC2:
 	case SKU_APAC3:
 		return DOMAIN_CODE_AUS;	/* Australia */
 	}
 	/* XXX KOREA? */
 	return DOMAIN_CODE_FCC;			/* XXX? */
 }
 
 static int
 mwl_hal_reset(struct mwl_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct mwl_hal *mh = sc->sc_mh;
 
 	mwl_hal_setantenna(mh, WL_ANTENNATYPE_RX, sc->sc_rxantenna);
 	mwl_hal_setantenna(mh, WL_ANTENNATYPE_TX, sc->sc_txantenna);
 	mwl_hal_setradio(mh, 1, WL_AUTO_PREAMBLE);
 	mwl_hal_setwmm(sc->sc_mh, (ic->ic_flags & IEEE80211_F_WME) != 0);
 	mwl_chan_set(sc, ic->ic_curchan);
 	/* NB: RF/RA performance tuned for indoor mode */
 	mwl_hal_setrateadaptmode(mh, 0);
 	mwl_hal_setoptimizationlevel(mh,
 	    (ic->ic_flags & IEEE80211_F_BURST) != 0);
 
 	mwl_hal_setregioncode(mh, mwl_map2regioncode(&ic->ic_regdomain));
 
 	mwl_hal_setaggampduratemode(mh, 1, 80);		/* XXX */
 	mwl_hal_setcfend(mh, 0);			/* XXX */
 
 	return 1;
 }
 
 static int
 mwl_init(struct mwl_softc *sc)
 {
 	struct mwl_hal *mh = sc->sc_mh;
 	int error = 0;
 
 	MWL_LOCK_ASSERT(sc);
 
 	/*
 	 * Stop anything previously setup.  This is safe
 	 * whether this is the first time through or not.
 	 */
 	mwl_stop(sc);
 
 	/*
 	 * Push vap-independent state to the firmware.
 	 */
 	if (!mwl_hal_reset(sc)) {
 		device_printf(sc->sc_dev, "unable to reset hardware\n");
 		return EIO;
 	}
 
 	/*
 	 * Setup recv (once); transmit is already good to go.
 	 */
 	error = mwl_startrecv(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "unable to start recv logic\n");
 		return error;
 	}
 
 	/*
 	 * Enable interrupts.
 	 */
 	sc->sc_imask = MACREG_A2HRIC_BIT_RX_RDY
 		     | MACREG_A2HRIC_BIT_TX_DONE
 		     | MACREG_A2HRIC_BIT_OPC_DONE
 #if 0
 		     | MACREG_A2HRIC_BIT_MAC_EVENT
 #endif
 		     | MACREG_A2HRIC_BIT_ICV_ERROR
 		     | MACREG_A2HRIC_BIT_RADAR_DETECT
 		     | MACREG_A2HRIC_BIT_CHAN_SWITCH
 #if 0
 		     | MACREG_A2HRIC_BIT_QUEUE_EMPTY
 #endif
 		     | MACREG_A2HRIC_BIT_BA_WATCHDOG
 		     | MACREQ_A2HRIC_BIT_TX_ACK
 		     ;
 
 	sc->sc_running = 1;
 	mwl_hal_intrset(mh, sc->sc_imask);
 	callout_reset(&sc->sc_watchdog, hz, mwl_watchdog, sc);
 
 	return 0;
 }
 
 static void
 mwl_stop(struct mwl_softc *sc)
 {
 
 	MWL_LOCK_ASSERT(sc);
 	if (sc->sc_running) {
 		/*
 		 * Shutdown the hardware and driver.
 		 */
 		sc->sc_running = 0;
 		callout_stop(&sc->sc_watchdog);
 		sc->sc_tx_timer = 0;
 		mwl_draintxq(sc);
 	}
 }
 
 static int
 mwl_reset_vap(struct ieee80211vap *vap, int state)
 {
 	struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap;
 	struct ieee80211com *ic = vap->iv_ic;
 
 	if (state == IEEE80211_S_RUN)
 		mwl_setrates(vap);
 	/* XXX off by 1? */
 	mwl_hal_setrtsthreshold(hvap, vap->iv_rtsthreshold);
 	/* XXX auto? 20/40 split? */
 	mwl_hal_sethtgi(hvap, (vap->iv_flags_ht &
 	    (IEEE80211_FHT_SHORTGI20|IEEE80211_FHT_SHORTGI40)) ? 1 : 0);
 	mwl_hal_setnprot(hvap, ic->ic_htprotmode == IEEE80211_PROT_NONE ?
 	    HTPROTECT_NONE : HTPROTECT_AUTO);
 	/* XXX txpower cap */
 
 	/* re-setup beacons */
 	if (state == IEEE80211_S_RUN &&
 	    (vap->iv_opmode == IEEE80211_M_HOSTAP ||
 	     vap->iv_opmode == IEEE80211_M_MBSS ||
 	     vap->iv_opmode == IEEE80211_M_IBSS)) {
 		mwl_setapmode(vap, vap->iv_bss->ni_chan);
 		mwl_hal_setnprotmode(hvap,
 		    MS(ic->ic_curhtprotmode, IEEE80211_HTINFO_OPMODE));
 		return mwl_beacon_setup(vap);
 	}
 	return 0;
 }
 
 /*
  * Reset the hardware w/o losing operational state.
- * Used to to reset or reload hardware state for a vap.
+ * Used to reset or reload hardware state for a vap.
  */
 static int
 mwl_reset(struct ieee80211vap *vap, u_long cmd)
 {
 	struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap;
 	int error = 0;
 
 	if (hvap != NULL) {			/* WDS, MONITOR, etc. */
 		struct ieee80211com *ic = vap->iv_ic;
 		struct mwl_softc *sc = ic->ic_softc;
 		struct mwl_hal *mh = sc->sc_mh;
 
 		/* XXX handle DWDS sta vap change */
 		/* XXX do we need to disable interrupts? */
 		mwl_hal_intrset(mh, 0);		/* disable interrupts */
 		error = mwl_reset_vap(vap, vap->iv_state);
 		mwl_hal_intrset(mh, sc->sc_imask);
 	}
 	return error;
 }
 
 /*
  * Allocate a tx buffer for sending a frame.  The
  * packet is assumed to have the WME AC stored so
  * we can use it to select the appropriate h/w queue.
  */
 static struct mwl_txbuf *
 mwl_gettxbuf(struct mwl_softc *sc, struct mwl_txq *txq)
 {
 	struct mwl_txbuf *bf;
 
 	/*
 	 * Grab a TX buffer and associated resources.
 	 */
 	MWL_TXQ_LOCK(txq);
 	bf = STAILQ_FIRST(&txq->free);
 	if (bf != NULL) {
 		STAILQ_REMOVE_HEAD(&txq->free, bf_list);
 		txq->nfree--;
 	}
 	MWL_TXQ_UNLOCK(txq);
 	if (bf == NULL)
 		DPRINTF(sc, MWL_DEBUG_XMIT,
 		    "%s: out of xmit buffers on q %d\n", __func__, txq->qnum);
 	return bf;
 }
 
 /*
  * Return a tx buffer to the queue it came from.  Note there
  * are two cases because we must preserve the order of buffers
  * as it reflects the fixed order of descriptors in memory
  * (the firmware pre-fetches descriptors so we cannot reorder).
  */
 static void
 mwl_puttxbuf_head(struct mwl_txq *txq, struct mwl_txbuf *bf)
 {
 	bf->bf_m = NULL;
 	bf->bf_node = NULL;
 	MWL_TXQ_LOCK(txq);
 	STAILQ_INSERT_HEAD(&txq->free, bf, bf_list);
 	txq->nfree++;
 	MWL_TXQ_UNLOCK(txq);
 }
 
 static void
 mwl_puttxbuf_tail(struct mwl_txq *txq, struct mwl_txbuf *bf)
 {
 	bf->bf_m = NULL;
 	bf->bf_node = NULL;
 	MWL_TXQ_LOCK(txq);
 	STAILQ_INSERT_TAIL(&txq->free, bf, bf_list);
 	txq->nfree++;
 	MWL_TXQ_UNLOCK(txq);
 }
 
 static int
 mwl_transmit(struct ieee80211com *ic, struct mbuf *m)
 {
 	struct mwl_softc *sc = ic->ic_softc;
 	int error;
 
 	MWL_LOCK(sc);
 	if (!sc->sc_running) {
 		MWL_UNLOCK(sc);
 		return (ENXIO);
 	}
 	error = mbufq_enqueue(&sc->sc_snd, m);
 	if (error) {
 		MWL_UNLOCK(sc);
 		return (error);
 	}
 	mwl_start(sc);
 	MWL_UNLOCK(sc);
 	return (0);
 }
 
 static void
 mwl_start(struct mwl_softc *sc)
 {
 	struct ieee80211_node *ni;
 	struct mwl_txbuf *bf;
 	struct mbuf *m;
 	struct mwl_txq *txq = NULL;	/* XXX silence gcc */
 	int nqueued;
 
 	MWL_LOCK_ASSERT(sc);
 	if (!sc->sc_running || sc->sc_invalid)
 		return;
 	nqueued = 0;
 	while ((m = mbufq_dequeue(&sc->sc_snd)) != NULL) {
 		/*
 		 * Grab the node for the destination.
 		 */
 		ni = (struct ieee80211_node *) m->m_pkthdr.rcvif;
 		KASSERT(ni != NULL, ("no node"));
 		m->m_pkthdr.rcvif = NULL;	/* committed, clear ref */
 		/*
 		 * Grab a TX buffer and associated resources.
 		 * We honor the classification by the 802.11 layer.
 		 */
 		txq = sc->sc_ac2q[M_WME_GETAC(m)];
 		bf = mwl_gettxbuf(sc, txq);
 		if (bf == NULL) {
 			m_freem(m);
 			ieee80211_free_node(ni);
 #ifdef MWL_TX_NODROP
 			sc->sc_stats.mst_tx_qstop++;
 			break;
 #else
 			DPRINTF(sc, MWL_DEBUG_XMIT,
 			    "%s: tail drop on q %d\n", __func__, txq->qnum);
 			sc->sc_stats.mst_tx_qdrop++;
 			continue;
 #endif /* MWL_TX_NODROP */
 		}
 
 		/*
 		 * Pass the frame to the h/w for transmission.
 		 */
 		if (mwl_tx_start(sc, ni, bf, m)) {
 			if_inc_counter(ni->ni_vap->iv_ifp,
 			    IFCOUNTER_OERRORS, 1);
 			mwl_puttxbuf_head(txq, bf);
 			ieee80211_free_node(ni);
 			continue;
 		}
 		nqueued++;
 		if (nqueued >= mwl_txcoalesce) {
 			/*
 			 * Poke the firmware to process queued frames;
 			 * see below about (lack of) locking.
 			 */
 			nqueued = 0;
 			mwl_hal_txstart(sc->sc_mh, 0/*XXX*/);
 		}
 	}
 	if (nqueued) {
 		/*
 		 * NB: We don't need to lock against tx done because
 		 * this just prods the firmware to check the transmit
 		 * descriptors.  The firmware will also start fetching
 		 * descriptors by itself if it notices new ones are
 		 * present when it goes to deliver a tx done interrupt
 		 * to the host. So if we race with tx done processing
 		 * it's ok.  Delivering the kick here rather than in
 		 * mwl_tx_start is an optimization to avoid poking the
 		 * firmware for each packet.
 		 *
 		 * NB: the queue id isn't used so 0 is ok.
 		 */
 		mwl_hal_txstart(sc->sc_mh, 0/*XXX*/);
 	}
 }
 
 static int
 mwl_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
 	const struct ieee80211_bpf_params *params)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct mwl_softc *sc = ic->ic_softc;
 	struct mwl_txbuf *bf;
 	struct mwl_txq *txq;
 
 	if (!sc->sc_running || sc->sc_invalid) {
 		m_freem(m);
 		return ENETDOWN;
 	}
 	/*
 	 * Grab a TX buffer and associated resources.
 	 * Note that we depend on the classification
 	 * by the 802.11 layer to get to the right h/w
 	 * queue.  Management frames must ALWAYS go on
 	 * queue 1 but we cannot just force that here
 	 * because we may receive non-mgt frames.
 	 */
 	txq = sc->sc_ac2q[M_WME_GETAC(m)];
 	bf = mwl_gettxbuf(sc, txq);
 	if (bf == NULL) {
 		sc->sc_stats.mst_tx_qstop++;
 		m_freem(m);
 		return ENOBUFS;
 	}
 	/*
 	 * Pass the frame to the h/w for transmission.
 	 */
 	if (mwl_tx_start(sc, ni, bf, m)) {
 		mwl_puttxbuf_head(txq, bf);
 
 		return EIO;		/* XXX */
 	}
 	/*
 	 * NB: We don't need to lock against tx done because
 	 * this just prods the firmware to check the transmit
 	 * descriptors.  The firmware will also start fetching
 	 * descriptors by itself if it notices new ones are
 	 * present when it goes to deliver a tx done interrupt
 	 * to the host. So if we race with tx done processing
 	 * it's ok.  Delivering the kick here rather than in
 	 * mwl_tx_start is an optimization to avoid poking the
 	 * firmware for each packet.
 	 *
 	 * NB: the queue id isn't used so 0 is ok.
 	 */
 	mwl_hal_txstart(sc->sc_mh, 0/*XXX*/);
 	return 0;
 }
 
 static int
 mwl_media_change(struct ifnet *ifp)
 {
 	struct ieee80211vap *vap = ifp->if_softc;
 	int error;
 
 	error = ieee80211_media_change(ifp);
 	/* NB: only the fixed rate can change and that doesn't need a reset */
 	if (error == ENETRESET) {
 		mwl_setrates(vap);
 		error = 0;
 	}
 	return error;
 }
 
 #ifdef MWL_DEBUG
 static void
 mwl_keyprint(struct mwl_softc *sc, const char *tag,
 	const MWL_HAL_KEYVAL *hk, const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	static const char *ciphers[] = {
 		"WEP",
 		"TKIP",
 		"AES-CCM",
 	};
 	int i, n;
 
 	printf("%s: [%u] %-7s", tag, hk->keyIndex, ciphers[hk->keyTypeId]);
 	for (i = 0, n = hk->keyLen; i < n; i++)
 		printf(" %02x", hk->key.aes[i]);
 	printf(" mac %s", ether_sprintf(mac));
 	if (hk->keyTypeId == KEY_TYPE_ID_TKIP) {
 		printf(" %s", "rxmic");
 		for (i = 0; i < sizeof(hk->key.tkip.rxMic); i++)
 			printf(" %02x", hk->key.tkip.rxMic[i]);
 		printf(" txmic");
 		for (i = 0; i < sizeof(hk->key.tkip.txMic); i++)
 			printf(" %02x", hk->key.tkip.txMic[i]);
 	}
 	printf(" flags 0x%x\n", hk->keyFlags);
 }
 #endif
 
 /*
  * Allocate a key cache slot for a unicast key.  The
  * firmware handles key allocation and every station is
  * guaranteed key space so we are always successful.
  */
 static int
 mwl_key_alloc(struct ieee80211vap *vap, struct ieee80211_key *k,
 	ieee80211_keyix *keyix, ieee80211_keyix *rxkeyix)
 {
 	struct mwl_softc *sc = vap->iv_ic->ic_softc;
 
 	if (k->wk_keyix != IEEE80211_KEYIX_NONE ||
 	    (k->wk_flags & IEEE80211_KEY_GROUP)) {
 		if (!(&vap->iv_nw_keys[0] <= k &&
 		      k < &vap->iv_nw_keys[IEEE80211_WEP_NKID])) {
 			/* should not happen */
 			DPRINTF(sc, MWL_DEBUG_KEYCACHE,
 				"%s: bogus group key\n", __func__);
 			return 0;
 		}
 		/* give the caller what they requested */
 		*keyix = *rxkeyix = k - vap->iv_nw_keys;
 	} else {
 		/*
 		 * Firmware handles key allocation.
 		 */
 		*keyix = *rxkeyix = 0;
 	}
 	return 1;
 }
 
 /*
  * Delete a key entry allocated by mwl_key_alloc.
  */
 static int
 mwl_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k)
 {
 	struct mwl_softc *sc = vap->iv_ic->ic_softc;
 	struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap;
 	MWL_HAL_KEYVAL hk;
 	const uint8_t bcastaddr[IEEE80211_ADDR_LEN] =
 	    { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 	if (hvap == NULL) {
 		if (vap->iv_opmode != IEEE80211_M_WDS) {
 			/* XXX monitor mode? */
 			DPRINTF(sc, MWL_DEBUG_KEYCACHE,
 			    "%s: no hvap for opmode %d\n", __func__,
 			    vap->iv_opmode);
 			return 0;
 		}
 		hvap = MWL_VAP(vap)->mv_ap_hvap;
 	}
 
 	DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: delete key %u\n",
 	    __func__, k->wk_keyix);
 
 	memset(&hk, 0, sizeof(hk));
 	hk.keyIndex = k->wk_keyix;
 	switch (k->wk_cipher->ic_cipher) {
 	case IEEE80211_CIPHER_WEP:
 		hk.keyTypeId = KEY_TYPE_ID_WEP;
 		break;
 	case IEEE80211_CIPHER_TKIP:
 		hk.keyTypeId = KEY_TYPE_ID_TKIP;
 		break;
 	case IEEE80211_CIPHER_AES_CCM:
 		hk.keyTypeId = KEY_TYPE_ID_AES;
 		break;
 	default:
 		/* XXX should not happen */
 		DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: unknown cipher %d\n",
 		    __func__, k->wk_cipher->ic_cipher);
 		return 0;
 	}
 	return (mwl_hal_keyreset(hvap, &hk, bcastaddr) == 0);	/*XXX*/
 }
 
 static __inline int
 addgroupflags(MWL_HAL_KEYVAL *hk, const struct ieee80211_key *k)
 {
 	if (k->wk_flags & IEEE80211_KEY_GROUP) {
 		if (k->wk_flags & IEEE80211_KEY_XMIT)
 			hk->keyFlags |= KEY_FLAG_TXGROUPKEY;
 		if (k->wk_flags & IEEE80211_KEY_RECV)
 			hk->keyFlags |= KEY_FLAG_RXGROUPKEY;
 		return 1;
 	} else
 		return 0;
 }
 
 /*
  * Set the key cache contents for the specified key.  Key cache
  * slot(s) must already have been allocated by mwl_key_alloc.
  */
 static int
 mwl_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k)
 {
 	return (_mwl_key_set(vap, k, k->wk_macaddr));
 }
 
 static int
 _mwl_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k,
 	const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 #define	GRPXMIT	(IEEE80211_KEY_XMIT | IEEE80211_KEY_GROUP)
 /* NB: static wep keys are marked GROUP+tx/rx; GTK will be tx or rx */
 #define	IEEE80211_IS_STATICKEY(k) \
 	(((k)->wk_flags & (GRPXMIT|IEEE80211_KEY_RECV)) == \
 	 (GRPXMIT|IEEE80211_KEY_RECV))
 	struct mwl_softc *sc = vap->iv_ic->ic_softc;
 	struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap;
 	const struct ieee80211_cipher *cip = k->wk_cipher;
 	const uint8_t *macaddr;
 	MWL_HAL_KEYVAL hk;
 
 	KASSERT((k->wk_flags & IEEE80211_KEY_SWCRYPT) == 0,
 		("s/w crypto set?"));
 
 	if (hvap == NULL) {
 		if (vap->iv_opmode != IEEE80211_M_WDS) {
 			/* XXX monitor mode? */
 			DPRINTF(sc, MWL_DEBUG_KEYCACHE,
 			    "%s: no hvap for opmode %d\n", __func__,
 			    vap->iv_opmode);
 			return 0;
 		}
 		hvap = MWL_VAP(vap)->mv_ap_hvap;
 	}
 	memset(&hk, 0, sizeof(hk));
 	hk.keyIndex = k->wk_keyix;
 	switch (cip->ic_cipher) {
 	case IEEE80211_CIPHER_WEP:
 		hk.keyTypeId = KEY_TYPE_ID_WEP;
 		hk.keyLen = k->wk_keylen;
 		if (k->wk_keyix == vap->iv_def_txkey)
 			hk.keyFlags = KEY_FLAG_WEP_TXKEY;
 		if (!IEEE80211_IS_STATICKEY(k)) {
 			/* NB: WEP is never used for the PTK */
 			(void) addgroupflags(&hk, k);
 		}
 		break;
 	case IEEE80211_CIPHER_TKIP:
 		hk.keyTypeId = KEY_TYPE_ID_TKIP;
 		hk.key.tkip.tsc.high = (uint32_t)(k->wk_keytsc >> 16);
 		hk.key.tkip.tsc.low = (uint16_t)k->wk_keytsc;
 		hk.keyFlags = KEY_FLAG_TSC_VALID | KEY_FLAG_MICKEY_VALID;
 		hk.keyLen = k->wk_keylen + IEEE80211_MICBUF_SIZE;
 		if (!addgroupflags(&hk, k))
 			hk.keyFlags |= KEY_FLAG_PAIRWISE;
 		break;
 	case IEEE80211_CIPHER_AES_CCM:
 		hk.keyTypeId = KEY_TYPE_ID_AES;
 		hk.keyLen = k->wk_keylen;
 		if (!addgroupflags(&hk, k))
 			hk.keyFlags |= KEY_FLAG_PAIRWISE;
 		break;
 	default:
 		/* XXX should not happen */
 		DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: unknown cipher %d\n",
 		    __func__, k->wk_cipher->ic_cipher);
 		return 0;
 	}
 	/*
 	 * NB: tkip mic keys get copied here too; the layout
 	 *     just happens to match that in ieee80211_key.
 	 */
 	memcpy(hk.key.aes, k->wk_key, hk.keyLen);
 
 	/*
 	 * Locate address of sta db entry for writing key;
 	 * the convention unfortunately is somewhat different
 	 * than how net80211, hostapd, and wpa_supplicant think.
 	 */
 	if (vap->iv_opmode == IEEE80211_M_STA) {
 		/*
 		 * NB: keys plumbed before the sta reaches AUTH state
 		 * will be discarded or written to the wrong sta db
 		 * entry because iv_bss is meaningless.  This is ok
 		 * (right now) because we handle deferred plumbing of
 		 * WEP keys when the sta reaches AUTH state.
 		 */
 		macaddr = vap->iv_bss->ni_bssid;
 		if ((k->wk_flags & IEEE80211_KEY_GROUP) == 0) {
 			/* XXX plumb to local sta db too for static key wep */
 			mwl_hal_keyset(hvap, &hk, vap->iv_myaddr);
 		}
 	} else if (vap->iv_opmode == IEEE80211_M_WDS &&
 	    vap->iv_state != IEEE80211_S_RUN) {
 		/*
 		 * Prior to RUN state a WDS vap will not it's BSS node
 		 * setup so we will plumb the key to the wrong mac
 		 * address (it'll be our local address).  Workaround
 		 * this for the moment by grabbing the correct address.
 		 */
 		macaddr = vap->iv_des_bssid;
 	} else if ((k->wk_flags & GRPXMIT) == GRPXMIT)
 		macaddr = vap->iv_myaddr;
 	else
 		macaddr = mac;
 	KEYPRINTF(sc, &hk, macaddr);
 	return (mwl_hal_keyset(hvap, &hk, macaddr) == 0);
 #undef IEEE80211_IS_STATICKEY
 #undef GRPXMIT
 }
 
 /*
  * Set the multicast filter contents into the hardware.
  * XXX f/w has no support; just defer to the os.
  */
 static void
 mwl_setmcastfilter(struct mwl_softc *sc)
 {
 #if 0
 	struct ether_multi *enm;
 	struct ether_multistep estep;
 	uint8_t macs[IEEE80211_ADDR_LEN*MWL_HAL_MCAST_MAX];/* XXX stack use */
 	uint8_t *mp;
 	int nmc;
 
 	mp = macs;
 	nmc = 0;
 	ETHER_FIRST_MULTI(estep, &sc->sc_ec, enm);
 	while (enm != NULL) {
 		/* XXX Punt on ranges. */
 		if (nmc == MWL_HAL_MCAST_MAX ||
 		    !IEEE80211_ADDR_EQ(enm->enm_addrlo, enm->enm_addrhi)) {
 			ifp->if_flags |= IFF_ALLMULTI;
 			return;
 		}
 		IEEE80211_ADDR_COPY(mp, enm->enm_addrlo);
 		mp += IEEE80211_ADDR_LEN, nmc++;
 		ETHER_NEXT_MULTI(estep, enm);
 	}
 	ifp->if_flags &= ~IFF_ALLMULTI;
 	mwl_hal_setmcast(sc->sc_mh, nmc, macs);
 #endif
 }
 
 static int
 mwl_mode_init(struct mwl_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct mwl_hal *mh = sc->sc_mh;
 
 	mwl_hal_setpromisc(mh, ic->ic_promisc > 0);
 	mwl_setmcastfilter(sc);
 
 	return 0;
 }
 
 /*
  * Callback from the 802.11 layer after a multicast state change.
  */
 static void
 mwl_update_mcast(struct ieee80211com *ic)
 {
 	struct mwl_softc *sc = ic->ic_softc;
 
 	mwl_setmcastfilter(sc);
 }
 
 /*
  * Callback from the 802.11 layer after a promiscuous mode change.
  * Note this interface does not check the operating mode as this
  * is an internal callback and we are expected to honor the current
  * state (e.g. this is used for setting the interface in promiscuous
  * mode when operating in hostap mode to do ACS).
  */
 static void
 mwl_update_promisc(struct ieee80211com *ic)
 {
 	struct mwl_softc *sc = ic->ic_softc;
 
 	mwl_hal_setpromisc(sc->sc_mh, ic->ic_promisc > 0);
 }
 
 /*
  * Callback from the 802.11 layer to update the slot time
  * based on the current setting.  We use it to notify the
  * firmware of ERP changes and the f/w takes care of things
  * like slot time and preamble.
  */
 static void
 mwl_updateslot(struct ieee80211com *ic)
 {
 	struct mwl_softc *sc = ic->ic_softc;
 	struct mwl_hal *mh = sc->sc_mh;
 	int prot;
 
 	/* NB: can be called early; suppress needless cmds */
 	if (!sc->sc_running)
 		return;
 
 	/*
 	 * Calculate the ERP flags.  The firwmare will use
 	 * this to carry out the appropriate measures.
 	 */
 	prot = 0;
 	if (IEEE80211_IS_CHAN_ANYG(ic->ic_curchan)) {
 		if ((ic->ic_flags & IEEE80211_F_SHSLOT) == 0)
 			prot |= IEEE80211_ERP_NON_ERP_PRESENT;
 		if (ic->ic_flags & IEEE80211_F_USEPROT)
 			prot |= IEEE80211_ERP_USE_PROTECTION;
 		if (ic->ic_flags & IEEE80211_F_USEBARKER)
 			prot |= IEEE80211_ERP_LONG_PREAMBLE;
 	}
 
 	DPRINTF(sc, MWL_DEBUG_RESET,
 	    "%s: chan %u MHz/flags 0x%x %s slot, (prot 0x%x ic_flags 0x%x)\n",
 	    __func__, ic->ic_curchan->ic_freq, ic->ic_curchan->ic_flags,
 	    ic->ic_flags & IEEE80211_F_SHSLOT ? "short" : "long", prot,
 	    ic->ic_flags);
 
 	mwl_hal_setgprot(mh, prot);
 }
 
 /*
  * Setup the beacon frame.
  */
 static int
 mwl_beacon_setup(struct ieee80211vap *vap)
 {
 	struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap;
 	struct ieee80211_node *ni = vap->iv_bss;
 	struct mbuf *m;
 
 	m = ieee80211_beacon_alloc(ni);
 	if (m == NULL)
 		return ENOBUFS;
 	mwl_hal_setbeacon(hvap, mtod(m, const void *), m->m_len);
 	m_free(m);
 
 	return 0;
 }
 
 /*
  * Update the beacon frame in response to a change.
  */
 static void
 mwl_beacon_update(struct ieee80211vap *vap, int item)
 {
 	struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap;
 	struct ieee80211com *ic = vap->iv_ic;
 
 	KASSERT(hvap != NULL, ("no beacon"));
 	switch (item) {
 	case IEEE80211_BEACON_ERP:
 		mwl_updateslot(ic);
 		break;
 	case IEEE80211_BEACON_HTINFO:
 		mwl_hal_setnprotmode(hvap,
 		    MS(ic->ic_curhtprotmode, IEEE80211_HTINFO_OPMODE));
 		break;
 	case IEEE80211_BEACON_CAPS:
 	case IEEE80211_BEACON_WME:
 	case IEEE80211_BEACON_APPIE:
 	case IEEE80211_BEACON_CSA:
 		break;
 	case IEEE80211_BEACON_TIM:
 		/* NB: firmware always forms TIM */
 		return;
 	}
 	/* XXX retain beacon frame and update */
 	mwl_beacon_setup(vap);
 }
 
 static void
 mwl_load_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 {
 	bus_addr_t *paddr = (bus_addr_t*) arg;
 	KASSERT(error == 0, ("error %u on bus_dma callback", error));
 	*paddr = segs->ds_addr;
 }
 
 #ifdef MWL_HOST_PS_SUPPORT
 /*
  * Handle power save station occupancy changes.
  */
 static void
 mwl_update_ps(struct ieee80211vap *vap, int nsta)
 {
 	struct mwl_vap *mvp = MWL_VAP(vap);
 
 	if (nsta == 0 || mvp->mv_last_ps_sta == 0)
 		mwl_hal_setpowersave_bss(mvp->mv_hvap, nsta);
 	mvp->mv_last_ps_sta = nsta;
 }
 
 /*
  * Handle associated station power save state changes.
  */
 static int
 mwl_set_tim(struct ieee80211_node *ni, int set)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct mwl_vap *mvp = MWL_VAP(vap);
 
 	if (mvp->mv_set_tim(ni, set)) {		/* NB: state change */
 		mwl_hal_setpowersave_sta(mvp->mv_hvap,
 		    IEEE80211_AID(ni->ni_associd), set);
 		return 1;
 	} else
 		return 0;
 }
 #endif /* MWL_HOST_PS_SUPPORT */
 
 static int
 mwl_desc_setup(struct mwl_softc *sc, const char *name,
 	struct mwl_descdma *dd,
 	int nbuf, size_t bufsize, int ndesc, size_t descsize)
 {
 	uint8_t *ds;
 	int error;
 
 	DPRINTF(sc, MWL_DEBUG_RESET,
 	    "%s: %s DMA: %u bufs (%ju) %u desc/buf (%ju)\n",
 	    __func__, name, nbuf, (uintmax_t) bufsize,
 	    ndesc, (uintmax_t) descsize);
 
 	dd->dd_name = name;
 	dd->dd_desc_len = nbuf * ndesc * descsize;
 
 	/*
 	 * Setup DMA descriptor area.
 	 */
 	error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev),	/* parent */
 		       PAGE_SIZE, 0,		/* alignment, bounds */
 		       BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
 		       BUS_SPACE_MAXADDR,	/* highaddr */
 		       NULL, NULL,		/* filter, filterarg */
 		       dd->dd_desc_len,		/* maxsize */
 		       1,			/* nsegments */
 		       dd->dd_desc_len,		/* maxsegsize */
 		       BUS_DMA_ALLOCNOW,	/* flags */
 		       NULL,			/* lockfunc */
 		       NULL,			/* lockarg */
 		       &dd->dd_dmat);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "cannot allocate %s DMA tag\n", dd->dd_name);
 		return error;
 	}
 
 	/* allocate descriptors */
 	error = bus_dmamem_alloc(dd->dd_dmat, (void**) &dd->dd_desc,
 				 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, 
 				 &dd->dd_dmamap);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "unable to alloc memory for %u %s descriptors, "
 			"error %u\n", nbuf * ndesc, dd->dd_name, error);
 		goto fail1;
 	}
 
 	error = bus_dmamap_load(dd->dd_dmat, dd->dd_dmamap,
 				dd->dd_desc, dd->dd_desc_len,
 				mwl_load_cb, &dd->dd_desc_paddr,
 				BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "unable to map %s descriptors, error %u\n",
 			dd->dd_name, error);
 		goto fail2;
 	}
 
 	ds = dd->dd_desc;
 	memset(ds, 0, dd->dd_desc_len);
 	DPRINTF(sc, MWL_DEBUG_RESET,
 	    "%s: %s DMA map: %p (%lu) -> 0x%jx (%lu)\n",
 	    __func__, dd->dd_name, ds, (u_long) dd->dd_desc_len,
 	    (uintmax_t) dd->dd_desc_paddr, /*XXX*/ (u_long) dd->dd_desc_len);
 
 	return 0;
 fail2:
 	bus_dmamem_free(dd->dd_dmat, dd->dd_desc, dd->dd_dmamap);
 fail1:
 	bus_dma_tag_destroy(dd->dd_dmat);
 	memset(dd, 0, sizeof(*dd));
 	return error;
 #undef DS2PHYS
 }
 
 static void
 mwl_desc_cleanup(struct mwl_softc *sc, struct mwl_descdma *dd)
 {
 	bus_dmamap_unload(dd->dd_dmat, dd->dd_dmamap);
 	bus_dmamem_free(dd->dd_dmat, dd->dd_desc, dd->dd_dmamap);
 	bus_dma_tag_destroy(dd->dd_dmat);
 
 	memset(dd, 0, sizeof(*dd));
 }
 
 /* 
  * Construct a tx q's free list.  The order of entries on
  * the list must reflect the physical layout of tx descriptors
  * because the firmware pre-fetches descriptors.
  *
  * XXX might be better to use indices into the buffer array.
  */
 static void
 mwl_txq_reset(struct mwl_softc *sc, struct mwl_txq *txq)
 {
 	struct mwl_txbuf *bf;
 	int i;
 
 	bf = txq->dma.dd_bufptr;
 	STAILQ_INIT(&txq->free);
 	for (i = 0; i < mwl_txbuf; i++, bf++)
 		STAILQ_INSERT_TAIL(&txq->free, bf, bf_list);
 	txq->nfree = i;
 }
 
 #define	DS2PHYS(_dd, _ds) \
 	((_dd)->dd_desc_paddr + ((caddr_t)(_ds) - (caddr_t)(_dd)->dd_desc))
 
 static int
 mwl_txdma_setup(struct mwl_softc *sc, struct mwl_txq *txq)
 {
 	int error, bsize, i;
 	struct mwl_txbuf *bf;
 	struct mwl_txdesc *ds;
 
 	error = mwl_desc_setup(sc, "tx", &txq->dma,
 			mwl_txbuf, sizeof(struct mwl_txbuf),
 			MWL_TXDESC, sizeof(struct mwl_txdesc));
 	if (error != 0)
 		return error;
 
 	/* allocate and setup tx buffers */
 	bsize = mwl_txbuf * sizeof(struct mwl_txbuf);
 	bf = malloc(bsize, M_MWLDEV, M_NOWAIT | M_ZERO);
 	if (bf == NULL) {
 		device_printf(sc->sc_dev, "malloc of %u tx buffers failed\n",
 			mwl_txbuf);
 		return ENOMEM;
 	}
 	txq->dma.dd_bufptr = bf;
 
 	ds = txq->dma.dd_desc;
 	for (i = 0; i < mwl_txbuf; i++, bf++, ds += MWL_TXDESC) {
 		bf->bf_desc = ds;
 		bf->bf_daddr = DS2PHYS(&txq->dma, ds);
 		error = bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT,
 				&bf->bf_dmamap);
 		if (error != 0) {
 			device_printf(sc->sc_dev, "unable to create dmamap for tx "
 				"buffer %u, error %u\n", i, error);
 			return error;
 		}
 	}
 	mwl_txq_reset(sc, txq);
 	return 0;
 }
 
 static void
 mwl_txdma_cleanup(struct mwl_softc *sc, struct mwl_txq *txq)
 {
 	struct mwl_txbuf *bf;
 	int i;
 
 	bf = txq->dma.dd_bufptr;
 	for (i = 0; i < mwl_txbuf; i++, bf++) {
 		KASSERT(bf->bf_m == NULL, ("mbuf on free list"));
 		KASSERT(bf->bf_node == NULL, ("node on free list"));
 		if (bf->bf_dmamap != NULL)
 			bus_dmamap_destroy(sc->sc_dmat, bf->bf_dmamap);
 	}
 	STAILQ_INIT(&txq->free);
 	txq->nfree = 0;
 	if (txq->dma.dd_bufptr != NULL) {
 		free(txq->dma.dd_bufptr, M_MWLDEV);
 		txq->dma.dd_bufptr = NULL;
 	}
 	if (txq->dma.dd_desc_len != 0)
 		mwl_desc_cleanup(sc, &txq->dma);
 }
 
 static int
 mwl_rxdma_setup(struct mwl_softc *sc)
 {
 	int error, jumbosize, bsize, i;
 	struct mwl_rxbuf *bf;
 	struct mwl_jumbo *rbuf;
 	struct mwl_rxdesc *ds;
 	caddr_t data;
 
 	error = mwl_desc_setup(sc, "rx", &sc->sc_rxdma,
 			mwl_rxdesc, sizeof(struct mwl_rxbuf),
 			1, sizeof(struct mwl_rxdesc));
 	if (error != 0)
 		return error;
 
 	/*
 	 * Receive is done to a private pool of jumbo buffers.
 	 * This allows us to attach to mbuf's and avoid re-mapping
 	 * memory on each rx we post.  We allocate a large chunk
 	 * of memory and manage it in the driver.  The mbuf free
 	 * callback method is used to reclaim frames after sending
 	 * them up the stack.  By default we allocate 2x the number of
 	 * rx descriptors configured so we have some slop to hold
 	 * us while frames are processed.
 	 */
 	if (mwl_rxbuf < 2*mwl_rxdesc) {
 		device_printf(sc->sc_dev,
 		    "too few rx dma buffers (%d); increasing to %d\n",
 		    mwl_rxbuf, 2*mwl_rxdesc);
 		mwl_rxbuf = 2*mwl_rxdesc;
 	}
 	jumbosize = roundup(MWL_AGGR_SIZE, PAGE_SIZE);
 	sc->sc_rxmemsize = mwl_rxbuf*jumbosize;
 
 	error = bus_dma_tag_create(sc->sc_dmat,	/* parent */
 		       PAGE_SIZE, 0,		/* alignment, bounds */
 		       BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
 		       BUS_SPACE_MAXADDR,	/* highaddr */
 		       NULL, NULL,		/* filter, filterarg */
 		       sc->sc_rxmemsize,	/* maxsize */
 		       1,			/* nsegments */
 		       sc->sc_rxmemsize,	/* maxsegsize */
 		       BUS_DMA_ALLOCNOW,	/* flags */
 		       NULL,			/* lockfunc */
 		       NULL,			/* lockarg */
 		       &sc->sc_rxdmat);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not create rx DMA tag\n");
 		return error;
 	}
 
 	error = bus_dmamem_alloc(sc->sc_rxdmat, (void**) &sc->sc_rxmem,
 				 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, 
 				 &sc->sc_rxmap);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not alloc %ju bytes of rx DMA memory\n",
 		    (uintmax_t) sc->sc_rxmemsize);
 		return error;
 	}
 
 	error = bus_dmamap_load(sc->sc_rxdmat, sc->sc_rxmap,
 				sc->sc_rxmem, sc->sc_rxmemsize,
 				mwl_load_cb, &sc->sc_rxmem_paddr,
 				BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not load rx DMA map\n");
 		return error;
 	}
 
 	/*
 	 * Allocate rx buffers and set them up.
 	 */
 	bsize = mwl_rxdesc * sizeof(struct mwl_rxbuf);
 	bf = malloc(bsize, M_MWLDEV, M_NOWAIT | M_ZERO);
 	if (bf == NULL) {
 		device_printf(sc->sc_dev, "malloc of %u rx buffers failed\n", bsize);
 		return error;
 	}
 	sc->sc_rxdma.dd_bufptr = bf;
 
 	STAILQ_INIT(&sc->sc_rxbuf);
 	ds = sc->sc_rxdma.dd_desc;
 	for (i = 0; i < mwl_rxdesc; i++, bf++, ds++) {
 		bf->bf_desc = ds;
 		bf->bf_daddr = DS2PHYS(&sc->sc_rxdma, ds);
 		/* pre-assign dma buffer */
 		bf->bf_data = ((uint8_t *)sc->sc_rxmem) + (i*jumbosize);
 		/* NB: tail is intentional to preserve descriptor order */
 		STAILQ_INSERT_TAIL(&sc->sc_rxbuf, bf, bf_list);
 	}
 
 	/*
 	 * Place remainder of dma memory buffers on the free list.
 	 */
 	SLIST_INIT(&sc->sc_rxfree);
 	for (; i < mwl_rxbuf; i++) {
 		data = ((uint8_t *)sc->sc_rxmem) + (i*jumbosize);
 		rbuf = MWL_JUMBO_DATA2BUF(data);
 		SLIST_INSERT_HEAD(&sc->sc_rxfree, rbuf, next);
 		sc->sc_nrxfree++;
 	}
 	return 0;
 }
 #undef DS2PHYS
 
 static void
 mwl_rxdma_cleanup(struct mwl_softc *sc)
 {
 	if (sc->sc_rxmem_paddr != 0) {
 		bus_dmamap_unload(sc->sc_rxdmat, sc->sc_rxmap);
 		sc->sc_rxmem_paddr = 0;
 	}
 	if (sc->sc_rxmem != NULL) {
 		bus_dmamem_free(sc->sc_rxdmat, sc->sc_rxmem, sc->sc_rxmap);
 		sc->sc_rxmem = NULL;
 	}
 	if (sc->sc_rxdma.dd_bufptr != NULL) {
 		free(sc->sc_rxdma.dd_bufptr, M_MWLDEV);
 		sc->sc_rxdma.dd_bufptr = NULL;
 	}
 	if (sc->sc_rxdma.dd_desc_len != 0)
 		mwl_desc_cleanup(sc, &sc->sc_rxdma);
 }
 
 static int
 mwl_dma_setup(struct mwl_softc *sc)
 {
 	int error, i;
 
 	error = mwl_rxdma_setup(sc);
 	if (error != 0) {
 		mwl_rxdma_cleanup(sc);
 		return error;
 	}
 
 	for (i = 0; i < MWL_NUM_TX_QUEUES; i++) {
 		error = mwl_txdma_setup(sc, &sc->sc_txq[i]);
 		if (error != 0) {
 			mwl_dma_cleanup(sc);
 			return error;
 		}
 	}
 	return 0;
 }
 
 static void
 mwl_dma_cleanup(struct mwl_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < MWL_NUM_TX_QUEUES; i++)
 		mwl_txdma_cleanup(sc, &sc->sc_txq[i]);
 	mwl_rxdma_cleanup(sc);
 }
 
 static struct ieee80211_node *
 mwl_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct mwl_softc *sc = ic->ic_softc;
 	const size_t space = sizeof(struct mwl_node);
 	struct mwl_node *mn;
 
 	mn = malloc(space, M_80211_NODE, M_NOWAIT|M_ZERO);
 	if (mn == NULL) {
 		/* XXX stat+msg */
 		return NULL;
 	}
 	DPRINTF(sc, MWL_DEBUG_NODE, "%s: mn %p\n", __func__, mn);
 	return &mn->mn_node;
 }
 
 static void
 mwl_node_cleanup(struct ieee80211_node *ni)
 {
 	struct ieee80211com *ic = ni->ni_ic;
         struct mwl_softc *sc = ic->ic_softc;
 	struct mwl_node *mn = MWL_NODE(ni);
 
 	DPRINTF(sc, MWL_DEBUG_NODE, "%s: ni %p ic %p staid %d\n",
 	    __func__, ni, ni->ni_ic, mn->mn_staid);
 
 	if (mn->mn_staid != 0) {
 		struct ieee80211vap *vap = ni->ni_vap;
 
 		if (mn->mn_hvap != NULL) {
 			if (vap->iv_opmode == IEEE80211_M_STA)
 				mwl_hal_delstation(mn->mn_hvap, vap->iv_myaddr);
 			else
 				mwl_hal_delstation(mn->mn_hvap, ni->ni_macaddr);
 		}
 		/*
 		 * NB: legacy WDS peer sta db entry is installed using
 		 * the associate ap's hvap; use it again to delete it.
 		 * XXX can vap be NULL?
 		 */
 		else if (vap->iv_opmode == IEEE80211_M_WDS &&
 		    MWL_VAP(vap)->mv_ap_hvap != NULL)
 			mwl_hal_delstation(MWL_VAP(vap)->mv_ap_hvap,
 			    ni->ni_macaddr);
 		delstaid(sc, mn->mn_staid);
 		mn->mn_staid = 0;
 	}
 	sc->sc_node_cleanup(ni);
 }
 
 /*
  * Reclaim rx dma buffers from packets sitting on the ampdu
  * reorder queue for a station.  We replace buffers with a
  * system cluster (if available).
  */
 static void
 mwl_ampdu_rxdma_reclaim(struct ieee80211_rx_ampdu *rap)
 {
 #if 0
 	int i, n, off;
 	struct mbuf *m;
 	void *cl;
 
 	n = rap->rxa_qframes;
 	for (i = 0; i < rap->rxa_wnd && n > 0; i++) {
 		m = rap->rxa_m[i];
 		if (m == NULL)
 			continue;
 		n--;
 		/* our dma buffers have a well-known free routine */
 		if ((m->m_flags & M_EXT) == 0 ||
 		    m->m_ext.ext_free != mwl_ext_free)
 			continue;
 		/*
 		 * Try to allocate a cluster and move the data.
 		 */
 		off = m->m_data - m->m_ext.ext_buf;
 		if (off + m->m_pkthdr.len > MCLBYTES) {
 			/* XXX no AMSDU for now */
 			continue;
 		}
 		cl = pool_cache_get_paddr(&mclpool_cache, 0,
 		    &m->m_ext.ext_paddr);
 		if (cl != NULL) {
 			/*
 			 * Copy the existing data to the cluster, remove
 			 * the rx dma buffer, and attach the cluster in
 			 * its place.  Note we preserve the offset to the
 			 * data so frames being bridged can still prepend
 			 * their headers without adding another mbuf.
 			 */
 			memcpy((caddr_t) cl + off, m->m_data, m->m_pkthdr.len);
 			MEXTREMOVE(m);
 			MEXTADD(m, cl, MCLBYTES, 0, NULL, &mclpool_cache);
 			/* setup mbuf like _MCLGET does */
 			m->m_flags |= M_CLUSTER | M_EXT_RW;
 			_MOWNERREF(m, M_EXT | M_CLUSTER);
 			/* NB: m_data is clobbered by MEXTADDR, adjust */
 			m->m_data += off;
 		}
 	}
 #endif
 }
 
 /*
  * Callback to reclaim resources.  We first let the
  * net80211 layer do it's thing, then if we are still
  * blocked by a lack of rx dma buffers we walk the ampdu
  * reorder q's to reclaim buffers by copying to a system
  * cluster.
  */
 static void
 mwl_node_drain(struct ieee80211_node *ni)
 {
 	struct ieee80211com *ic = ni->ni_ic;
         struct mwl_softc *sc = ic->ic_softc;
 	struct mwl_node *mn = MWL_NODE(ni);
 
 	DPRINTF(sc, MWL_DEBUG_NODE, "%s: ni %p vap %p staid %d\n",
 	    __func__, ni, ni->ni_vap, mn->mn_staid);
 
 	/* NB: call up first to age out ampdu q's */
 	sc->sc_node_drain(ni);
 
 	/* XXX better to not check low water mark? */
 	if (sc->sc_rxblocked && mn->mn_staid != 0 &&
 	    (ni->ni_flags & IEEE80211_NODE_HT)) {
 		uint8_t tid;
 		/*
 		 * Walk the reorder q and reclaim rx dma buffers by copying
 		 * the packet contents into clusters.
 		 */
 		for (tid = 0; tid < WME_NUM_TID; tid++) {
 			struct ieee80211_rx_ampdu *rap;
 
 			rap = &ni->ni_rx_ampdu[tid];
 			if ((rap->rxa_flags & IEEE80211_AGGR_XCHGPEND) == 0)
 				continue;
 			if (rap->rxa_qframes)
 				mwl_ampdu_rxdma_reclaim(rap);
 		}
 	}
 }
 
 static void
 mwl_node_getsignal(const struct ieee80211_node *ni, int8_t *rssi, int8_t *noise)
 {
 	*rssi = ni->ni_ic->ic_node_getrssi(ni);
 #ifdef MWL_ANT_INFO_SUPPORT
 #if 0
 	/* XXX need to smooth data */
 	*noise = -MWL_NODE_CONST(ni)->mn_ai.nf;
 #else
 	*noise = -95;		/* XXX */
 #endif
 #else
 	*noise = -95;		/* XXX */
 #endif
 }
 
 /*
  * Convert Hardware per-antenna rssi info to common format:
  * Let a1, a2, a3 represent the amplitudes per chain
  * Let amax represent max[a1, a2, a3]
  * Rssi1_dBm = RSSI_dBm + 20*log10(a1/amax)
  * Rssi1_dBm = RSSI_dBm + 20*log10(a1) - 20*log10(amax)
  * We store a table that is 4*20*log10(idx) - the extra 4 is to store or
  * maintain some extra precision.
  *
  * Values are stored in .5 db format capped at 127.
  */
 static void
 mwl_node_getmimoinfo(const struct ieee80211_node *ni,
 	struct ieee80211_mimo_info *mi)
 {
 #define	CVT(_dst, _src) do {						\
 	(_dst) = rssi + ((logdbtbl[_src] - logdbtbl[rssi_max]) >> 2);	\
 	(_dst) = (_dst) > 64 ? 127 : ((_dst) << 1);			\
 } while (0)
 	static const int8_t logdbtbl[32] = {
 	       0,   0,  24,  38,  48,  56,  62,  68, 
 	      72,  76,  80,  83,  86,  89,  92,  94, 
 	      96,  98, 100, 102, 104, 106, 107, 109, 
 	     110, 112, 113, 115, 116, 117, 118, 119
 	};
 	const struct mwl_node *mn = MWL_NODE_CONST(ni);
 	uint8_t rssi = mn->mn_ai.rsvd1/2;		/* XXX */
 	uint32_t rssi_max;
 
 	rssi_max = mn->mn_ai.rssi_a;
 	if (mn->mn_ai.rssi_b > rssi_max)
 		rssi_max = mn->mn_ai.rssi_b;
 	if (mn->mn_ai.rssi_c > rssi_max)
 		rssi_max = mn->mn_ai.rssi_c;
 
 	CVT(mi->rssi[0], mn->mn_ai.rssi_a);
 	CVT(mi->rssi[1], mn->mn_ai.rssi_b);
 	CVT(mi->rssi[2], mn->mn_ai.rssi_c);
 
 	mi->noise[0] = mn->mn_ai.nf_a;
 	mi->noise[1] = mn->mn_ai.nf_b;
 	mi->noise[2] = mn->mn_ai.nf_c;
 #undef CVT
 }
 
 static __inline void *
 mwl_getrxdma(struct mwl_softc *sc)
 {
 	struct mwl_jumbo *buf;
 	void *data;
 
 	/*
 	 * Allocate from jumbo pool.
 	 */
 	MWL_RXFREE_LOCK(sc);
 	buf = SLIST_FIRST(&sc->sc_rxfree);
 	if (buf == NULL) {
 		DPRINTF(sc, MWL_DEBUG_ANY,
 		    "%s: out of rx dma buffers\n", __func__);
 		sc->sc_stats.mst_rx_nodmabuf++;
 		data = NULL;
 	} else {
 		SLIST_REMOVE_HEAD(&sc->sc_rxfree, next);
 		sc->sc_nrxfree--;
 		data = MWL_JUMBO_BUF2DATA(buf);
 	}
 	MWL_RXFREE_UNLOCK(sc);
 	return data;
 }
 
 static __inline void
 mwl_putrxdma(struct mwl_softc *sc, void *data)
 {
 	struct mwl_jumbo *buf;
 
 	/* XXX bounds check data */
 	MWL_RXFREE_LOCK(sc);
 	buf = MWL_JUMBO_DATA2BUF(data);
 	SLIST_INSERT_HEAD(&sc->sc_rxfree, buf, next);
 	sc->sc_nrxfree++;
 	MWL_RXFREE_UNLOCK(sc);
 }
 
 static int
 mwl_rxbuf_init(struct mwl_softc *sc, struct mwl_rxbuf *bf)
 {
 	struct mwl_rxdesc *ds;
 
 	ds = bf->bf_desc;
 	if (bf->bf_data == NULL) {
 		bf->bf_data = mwl_getrxdma(sc);
 		if (bf->bf_data == NULL) {
 			/* mark descriptor to be skipped */
 			ds->RxControl = EAGLE_RXD_CTRL_OS_OWN;
 			/* NB: don't need PREREAD */
 			MWL_RXDESC_SYNC(sc, ds, BUS_DMASYNC_PREWRITE);
 			sc->sc_stats.mst_rxbuf_failed++;
 			return ENOMEM;
 		}
 	}
 	/*
 	 * NB: DMA buffer contents is known to be unmodified
 	 *     so there's no need to flush the data cache.
 	 */
 
 	/*
 	 * Setup descriptor.
 	 */
 	ds->QosCtrl = 0;
 	ds->RSSI = 0;
 	ds->Status = EAGLE_RXD_STATUS_IDLE;
 	ds->Channel = 0;
 	ds->PktLen = htole16(MWL_AGGR_SIZE);
 	ds->SQ2 = 0;
 	ds->pPhysBuffData = htole32(MWL_JUMBO_DMA_ADDR(sc, bf->bf_data));
 	/* NB: don't touch pPhysNext, set once */
 	ds->RxControl = EAGLE_RXD_CTRL_DRIVER_OWN;
 	MWL_RXDESC_SYNC(sc, ds, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	return 0;
 }
 
 static void
 mwl_ext_free(struct mbuf *m, void *data, void *arg)
 {
 	struct mwl_softc *sc = arg;
 
 	/* XXX bounds check data */
 	mwl_putrxdma(sc, data);
 	/*
 	 * If we were previously blocked by a lack of rx dma buffers
 	 * check if we now have enough to restart rx interrupt handling.
 	 * NB: we know we are called at splvm which is above splnet.
 	 */
 	if (sc->sc_rxblocked && sc->sc_nrxfree > mwl_rxdmalow) {
 		sc->sc_rxblocked = 0;
 		mwl_hal_intrset(sc->sc_mh, sc->sc_imask);
 	}
 }
 
 struct mwl_frame_bar {
 	u_int8_t	i_fc[2];
 	u_int8_t	i_dur[2];
 	u_int8_t	i_ra[IEEE80211_ADDR_LEN];
 	u_int8_t	i_ta[IEEE80211_ADDR_LEN];
 	/* ctl, seq, FCS */
 } __packed;
 
 /*
  * Like ieee80211_anyhdrsize, but handles BAR frames
  * specially so the logic below to piece the 802.11
  * header together works.
  */
 static __inline int
 mwl_anyhdrsize(const void *data)
 {
 	const struct ieee80211_frame *wh = data;
 
 	if ((wh->i_fc[0]&IEEE80211_FC0_TYPE_MASK) == IEEE80211_FC0_TYPE_CTL) {
 		switch (wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) {
 		case IEEE80211_FC0_SUBTYPE_CTS:
 		case IEEE80211_FC0_SUBTYPE_ACK:
 			return sizeof(struct ieee80211_frame_ack);
 		case IEEE80211_FC0_SUBTYPE_BAR:
 			return sizeof(struct mwl_frame_bar);
 		}
 		return sizeof(struct ieee80211_frame_min);
 	} else
 		return ieee80211_hdrsize(data);
 }
 
 static void
 mwl_handlemicerror(struct ieee80211com *ic, const uint8_t *data)
 {
 	const struct ieee80211_frame *wh;
 	struct ieee80211_node *ni;
 
 	wh = (const struct ieee80211_frame *)(data + sizeof(uint16_t));
 	ni = ieee80211_find_rxnode(ic, (const struct ieee80211_frame_min *) wh);
 	if (ni != NULL) {
 		ieee80211_notify_michael_failure(ni->ni_vap, wh, 0);
 		ieee80211_free_node(ni);
 	}
 }
 
 /*
  * Convert hardware signal strength to rssi.  The value
  * provided by the device has the noise floor added in;
  * we need to compensate for this but we don't have that
  * so we use a fixed value.
  *
  * The offset of 8 is good for both 2.4 and 5GHz.  The LNA
  * offset is already set as part of the initial gain.  This
  * will give at least +/- 3dB for 2.4GHz and +/- 5dB for 5GHz.
  */
 static __inline int
 cvtrssi(uint8_t ssi)
 {
 	int rssi = (int) ssi + 8;
 	/* XXX hack guess until we have a real noise floor */
 	rssi = 2*(87 - rssi);	/* NB: .5 dBm units */
 	return (rssi < 0 ? 0 : rssi > 127 ? 127 : rssi);
 }
 
 static void
 mwl_rx_proc(void *arg, int npending)
 {
 	struct mwl_softc *sc = arg;
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct mwl_rxbuf *bf;
 	struct mwl_rxdesc *ds;
 	struct mbuf *m;
 	struct ieee80211_qosframe *wh;
 	struct ieee80211_qosframe_addr4 *wh4;
 	struct ieee80211_node *ni;
 	struct mwl_node *mn;
 	int off, len, hdrlen, pktlen, rssi, ntodo;
 	uint8_t *data, status;
 	void *newdata;
 	int16_t nf;
 
 	DPRINTF(sc, MWL_DEBUG_RX_PROC, "%s: pending %u rdptr 0x%x wrptr 0x%x\n",
 	    __func__, npending, RD4(sc, sc->sc_hwspecs.rxDescRead),
 	    RD4(sc, sc->sc_hwspecs.rxDescWrite));
 	nf = -96;			/* XXX */
 	bf = sc->sc_rxnext;
 	for (ntodo = mwl_rxquota; ntodo > 0; ntodo--) {
 		if (bf == NULL)
 			bf = STAILQ_FIRST(&sc->sc_rxbuf);
 		ds = bf->bf_desc;
 		data = bf->bf_data;
 		if (data == NULL) {
 			/*
 			 * If data allocation failed previously there
 			 * will be no buffer; try again to re-populate it.
 			 * Note the firmware will not advance to the next
 			 * descriptor with a dma buffer so we must mimic
 			 * this or we'll get out of sync.
 			 */ 
 			DPRINTF(sc, MWL_DEBUG_ANY,
 			    "%s: rx buf w/o dma memory\n", __func__);
 			(void) mwl_rxbuf_init(sc, bf);
 			sc->sc_stats.mst_rx_dmabufmissing++;
 			break;
 		}
 		MWL_RXDESC_SYNC(sc, ds,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		if (ds->RxControl != EAGLE_RXD_CTRL_DMA_OWN)
 			break;
 #ifdef MWL_DEBUG
 		if (sc->sc_debug & MWL_DEBUG_RECV_DESC)
 			mwl_printrxbuf(bf, 0);
 #endif
 		status = ds->Status;
 		if (status & EAGLE_RXD_STATUS_DECRYPT_ERR_MASK) {
 			counter_u64_add(ic->ic_ierrors, 1);
 			sc->sc_stats.mst_rx_crypto++;
 			/*
 			 * NB: Check EAGLE_RXD_STATUS_GENERAL_DECRYPT_ERR
 			 *     for backwards compatibility.
 			 */
 			if (status != EAGLE_RXD_STATUS_GENERAL_DECRYPT_ERR &&
 			    (status & EAGLE_RXD_STATUS_TKIP_MIC_DECRYPT_ERR)) {
 				/*
 				 * MIC error, notify upper layers.
 				 */
 				bus_dmamap_sync(sc->sc_rxdmat, sc->sc_rxmap,
 				    BUS_DMASYNC_POSTREAD);
 				mwl_handlemicerror(ic, data);
 				sc->sc_stats.mst_rx_tkipmic++;
 			}
 			/* XXX too painful to tap packets */
 			goto rx_next;
 		}
 		/*
 		 * Sync the data buffer.
 		 */
 		len = le16toh(ds->PktLen);
 		bus_dmamap_sync(sc->sc_rxdmat, sc->sc_rxmap, BUS_DMASYNC_POSTREAD);
 		/*
 		 * The 802.11 header is provided all or in part at the front;
 		 * use it to calculate the true size of the header that we'll
 		 * construct below.  We use this to figure out where to copy
 		 * payload prior to constructing the header.
 		 */
 		hdrlen = mwl_anyhdrsize(data + sizeof(uint16_t));
 		off = sizeof(uint16_t) + sizeof(struct ieee80211_frame_addr4);
 
 		/* calculate rssi early so we can re-use for each aggregate */
 		rssi = cvtrssi(ds->RSSI);
 
 		pktlen = hdrlen + (len - off);
 		/*
 		 * NB: we know our frame is at least as large as
 		 * IEEE80211_MIN_LEN because there is a 4-address
 		 * frame at the front.  Hence there's no need to
 		 * vet the packet length.  If the frame in fact
 		 * is too small it should be discarded at the
 		 * net80211 layer.
 		 */
 
 		/*
 		 * Attach dma buffer to an mbuf.  We tried
 		 * doing this based on the packet size (i.e.
 		 * copying small packets) but it turns out to
 		 * be a net loss.  The tradeoff might be system
 		 * dependent (cache architecture is important).
 		 */
 		MGETHDR(m, M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			DPRINTF(sc, MWL_DEBUG_ANY,
 			    "%s: no rx mbuf\n", __func__);
 			sc->sc_stats.mst_rx_nombuf++;
 			goto rx_next;
 		}
 		/*
 		 * Acquire the replacement dma buffer before
 		 * processing the frame.  If we're out of dma
 		 * buffers we disable rx interrupts and wait
 		 * for the free pool to reach mlw_rxdmalow buffers
 		 * before starting to do work again.  If the firmware
 		 * runs out of descriptors then it will toss frames
 		 * which is better than our doing it as that can
 		 * starve our processing.  It is also important that
 		 * we always process rx'd frames in case they are
 		 * A-MPDU as otherwise the host's view of the BA
 		 * window may get out of sync with the firmware.
 		 */
 		newdata = mwl_getrxdma(sc);
 		if (newdata == NULL) {
 			/* NB: stat+msg in mwl_getrxdma */
 			m_free(m);
 			/* disable RX interrupt and mark state */
 			mwl_hal_intrset(sc->sc_mh,
 			    sc->sc_imask &~ MACREG_A2HRIC_BIT_RX_RDY);
 			sc->sc_rxblocked = 1;
 			ieee80211_drain(ic);
 			/* XXX check rxblocked and immediately start again? */
 			goto rx_stop;
 		}
 		bf->bf_data = newdata;
 		/*
 		 * Attach the dma buffer to the mbuf;
 		 * mwl_rxbuf_init will re-setup the rx
 		 * descriptor using the replacement dma
 		 * buffer we just installed above.
 		 */
 		MEXTADD(m, data, MWL_AGGR_SIZE, mwl_ext_free,
 		    data, sc, 0, EXT_NET_DRV);
 		m->m_data += off - hdrlen;
 		m->m_pkthdr.len = m->m_len = pktlen;
 		/* NB: dma buffer assumed read-only */
 
 		/*
 		 * Piece 802.11 header together.
 		 */
 		wh = mtod(m, struct ieee80211_qosframe *);
 		/* NB: don't need to do this sometimes but ... */
 		/* XXX special case so we can memcpy after m_devget? */
 		ovbcopy(data + sizeof(uint16_t), wh, hdrlen);
 		if (IEEE80211_QOS_HAS_SEQ(wh)) {
 			if (IEEE80211_IS_DSTODS(wh)) {
 				wh4 = mtod(m,
 				    struct ieee80211_qosframe_addr4*);
 				*(uint16_t *)wh4->i_qos = ds->QosCtrl;
 			} else {
 				*(uint16_t *)wh->i_qos = ds->QosCtrl;
 			}
 		}
 		/*
 		 * The f/w strips WEP header but doesn't clear
 		 * the WEP bit; mark the packet with M_WEP so
 		 * net80211 will treat the data as decrypted.
 		 * While here also clear the PWR_MGT bit since
 		 * power save is handled by the firmware and
 		 * passing this up will potentially cause the
 		 * upper layer to put a station in power save
 		 * (except when configured with MWL_HOST_PS_SUPPORT).
 		 */
 		if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED)
 			m->m_flags |= M_WEP;
 #ifdef MWL_HOST_PS_SUPPORT
 		wh->i_fc[1] &= ~IEEE80211_FC1_PROTECTED;
 #else
 		wh->i_fc[1] &= ~(IEEE80211_FC1_PROTECTED |
 		    IEEE80211_FC1_PWR_MGT);
 #endif
 
 		if (ieee80211_radiotap_active(ic)) {
 			struct mwl_rx_radiotap_header *tap = &sc->sc_rx_th;
 
 			tap->wr_flags = 0;
 			tap->wr_rate = ds->Rate;
 			tap->wr_antsignal = rssi + nf;
 			tap->wr_antnoise = nf;
 		}
 		if (IFF_DUMPPKTS_RECV(sc, wh)) {
 			ieee80211_dump_pkt(ic, mtod(m, caddr_t),
 			    len, ds->Rate, rssi);
 		}
 		/* dispatch */
 		ni = ieee80211_find_rxnode(ic,
 		    (const struct ieee80211_frame_min *) wh);
 		if (ni != NULL) {
 			mn = MWL_NODE(ni);
 #ifdef MWL_ANT_INFO_SUPPORT
 			mn->mn_ai.rssi_a = ds->ai.rssi_a;
 			mn->mn_ai.rssi_b = ds->ai.rssi_b;
 			mn->mn_ai.rssi_c = ds->ai.rssi_c;
 			mn->mn_ai.rsvd1 = rssi;
 #endif
 			/* tag AMPDU aggregates for reorder processing */
 			if (ni->ni_flags & IEEE80211_NODE_HT)
 				m->m_flags |= M_AMPDU;
 			(void) ieee80211_input(ni, m, rssi, nf);
 			ieee80211_free_node(ni);
 		} else
 			(void) ieee80211_input_all(ic, m, rssi, nf);
 rx_next:
 		/* NB: ignore ENOMEM so we process more descriptors */
 		(void) mwl_rxbuf_init(sc, bf);
 		bf = STAILQ_NEXT(bf, bf_list);
 	}
 rx_stop:
 	sc->sc_rxnext = bf;
 
 	if (mbufq_first(&sc->sc_snd) != NULL) {
 		/* NB: kick fw; the tx thread may have been preempted */
 		mwl_hal_txstart(sc->sc_mh, 0);
 		mwl_start(sc);
 	}
 }
 
 static void
 mwl_txq_init(struct mwl_softc *sc, struct mwl_txq *txq, int qnum)
 {
 	struct mwl_txbuf *bf, *bn;
 	struct mwl_txdesc *ds;
 
 	MWL_TXQ_LOCK_INIT(sc, txq);
 	txq->qnum = qnum;
 	txq->txpri = 0;	/* XXX */
 #if 0
 	/* NB: q setup by mwl_txdma_setup XXX */
 	STAILQ_INIT(&txq->free);
 #endif
 	STAILQ_FOREACH(bf, &txq->free, bf_list) {
 		bf->bf_txq = txq;
 
 		ds = bf->bf_desc;
 		bn = STAILQ_NEXT(bf, bf_list);
 		if (bn == NULL)
 			bn = STAILQ_FIRST(&txq->free);
 		ds->pPhysNext = htole32(bn->bf_daddr);
 	}
 	STAILQ_INIT(&txq->active);
 }
 
 /*
  * Setup a hardware data transmit queue for the specified
  * access control.  We record the mapping from ac's
  * to h/w queues for use by mwl_tx_start.
  */
 static int
 mwl_tx_setup(struct mwl_softc *sc, int ac, int mvtype)
 {
 	struct mwl_txq *txq;
 
 	if (ac >= nitems(sc->sc_ac2q)) {
 		device_printf(sc->sc_dev, "AC %u out of range, max %zu!\n",
 			ac, nitems(sc->sc_ac2q));
 		return 0;
 	}
 	if (mvtype >= MWL_NUM_TX_QUEUES) {
 		device_printf(sc->sc_dev, "mvtype %u out of range, max %u!\n",
 			mvtype, MWL_NUM_TX_QUEUES);
 		return 0;
 	}
 	txq = &sc->sc_txq[mvtype];
 	mwl_txq_init(sc, txq, mvtype);
 	sc->sc_ac2q[ac] = txq;
 	return 1;
 }
 
 /*
  * Update WME parameters for a transmit queue.
  */
 static int
 mwl_txq_update(struct mwl_softc *sc, int ac)
 {
 #define	MWL_EXPONENT_TO_VALUE(v)	((1<<v)-1)
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct mwl_txq *txq = sc->sc_ac2q[ac];
 	struct wmeParams *wmep = &ic->ic_wme.wme_chanParams.cap_wmeParams[ac];
 	struct mwl_hal *mh = sc->sc_mh;
 	int aifs, cwmin, cwmax, txoplim;
 
 	aifs = wmep->wmep_aifsn;
 	/* XXX in sta mode need to pass log values for cwmin/max */
 	cwmin = MWL_EXPONENT_TO_VALUE(wmep->wmep_logcwmin);
 	cwmax = MWL_EXPONENT_TO_VALUE(wmep->wmep_logcwmax);
 	txoplim = wmep->wmep_txopLimit;		/* NB: units of 32us */
 
 	if (mwl_hal_setedcaparams(mh, txq->qnum, cwmin, cwmax, aifs, txoplim)) {
 		device_printf(sc->sc_dev, "unable to update hardware queue "
 			"parameters for %s traffic!\n",
 			ieee80211_wme_acnames[ac]);
 		return 0;
 	}
 	return 1;
 #undef MWL_EXPONENT_TO_VALUE
 }
 
 /*
  * Callback from the 802.11 layer to update WME parameters.
  */
 static int
 mwl_wme_update(struct ieee80211com *ic)
 {
 	struct mwl_softc *sc = ic->ic_softc;
 
 	return !mwl_txq_update(sc, WME_AC_BE) ||
 	    !mwl_txq_update(sc, WME_AC_BK) ||
 	    !mwl_txq_update(sc, WME_AC_VI) ||
 	    !mwl_txq_update(sc, WME_AC_VO) ? EIO : 0;
 }
 
 /*
  * Reclaim resources for a setup queue.
  */
 static void
 mwl_tx_cleanupq(struct mwl_softc *sc, struct mwl_txq *txq)
 {
 	/* XXX hal work? */
 	MWL_TXQ_LOCK_DESTROY(txq);
 }
 
 /*
  * Reclaim all tx queue resources.
  */
 static void
 mwl_tx_cleanup(struct mwl_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < MWL_NUM_TX_QUEUES; i++)
 		mwl_tx_cleanupq(sc, &sc->sc_txq[i]);
 }
 
 static int
 mwl_tx_dmasetup(struct mwl_softc *sc, struct mwl_txbuf *bf, struct mbuf *m0)
 {
 	struct mbuf *m;
 	int error;
 
 	/*
 	 * Load the DMA map so any coalescing is done.  This
 	 * also calculates the number of descriptors we need.
 	 */
 	error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m0,
 				     bf->bf_segs, &bf->bf_nseg,
 				     BUS_DMA_NOWAIT);
 	if (error == EFBIG) {
 		/* XXX packet requires too many descriptors */
 		bf->bf_nseg = MWL_TXDESC+1;
 	} else if (error != 0) {
 		sc->sc_stats.mst_tx_busdma++;
 		m_freem(m0);
 		return error;
 	}
 	/*
 	 * Discard null packets and check for packets that
 	 * require too many TX descriptors.  We try to convert
 	 * the latter to a cluster.
 	 */
 	if (error == EFBIG) {		/* too many desc's, linearize */
 		sc->sc_stats.mst_tx_linear++;
 #if MWL_TXDESC > 1
 		m = m_collapse(m0, M_NOWAIT, MWL_TXDESC);
 #else
 		m = m_defrag(m0, M_NOWAIT);
 #endif
 		if (m == NULL) {
 			m_freem(m0);
 			sc->sc_stats.mst_tx_nombuf++;
 			return ENOMEM;
 		}
 		m0 = m;
 		error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m0,
 					     bf->bf_segs, &bf->bf_nseg,
 					     BUS_DMA_NOWAIT);
 		if (error != 0) {
 			sc->sc_stats.mst_tx_busdma++;
 			m_freem(m0);
 			return error;
 		}
 		KASSERT(bf->bf_nseg <= MWL_TXDESC,
 		    ("too many segments after defrag; nseg %u", bf->bf_nseg));
 	} else if (bf->bf_nseg == 0) {		/* null packet, discard */
 		sc->sc_stats.mst_tx_nodata++;
 		m_freem(m0);
 		return EIO;
 	}
 	DPRINTF(sc, MWL_DEBUG_XMIT, "%s: m %p len %u\n",
 		__func__, m0, m0->m_pkthdr.len);
 	bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREWRITE);
 	bf->bf_m = m0;
 
 	return 0;
 }
 
 static __inline int
 mwl_cvtlegacyrate(int rate)
 {
 	switch (rate) {
 	case 2:	 return 0;
 	case 4:	 return 1;
 	case 11: return 2;
 	case 22: return 3;
 	case 44: return 4;
 	case 12: return 5;
 	case 18: return 6;
 	case 24: return 7;
 	case 36: return 8;
 	case 48: return 9;
 	case 72: return 10;
 	case 96: return 11;
 	case 108:return 12;
 	}
 	return 0;
 }
 
 /*
  * Calculate fixed tx rate information per client state;
  * this value is suitable for writing to the Format field
  * of a tx descriptor.
  */
 static uint16_t
 mwl_calcformat(uint8_t rate, const struct ieee80211_node *ni)
 {
 	uint16_t fmt;
 
 	fmt = SM(3, EAGLE_TXD_ANTENNA)
 	    | (IEEE80211_IS_CHAN_HT40D(ni->ni_chan) ?
 		EAGLE_TXD_EXTCHAN_LO : EAGLE_TXD_EXTCHAN_HI);
 	if (rate & IEEE80211_RATE_MCS) {	/* HT MCS */
 		fmt |= EAGLE_TXD_FORMAT_HT
 		    /* NB: 0x80 implicitly stripped from ucastrate */
 		    | SM(rate, EAGLE_TXD_RATE);
 		/* XXX short/long GI may be wrong; re-check */
 		if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) {
 			fmt |= EAGLE_TXD_CHW_40
 			    | (ni->ni_htcap & IEEE80211_HTCAP_SHORTGI40 ?
 			        EAGLE_TXD_GI_SHORT : EAGLE_TXD_GI_LONG);
 		} else {
 			fmt |= EAGLE_TXD_CHW_20
 			    | (ni->ni_htcap & IEEE80211_HTCAP_SHORTGI20 ?
 			        EAGLE_TXD_GI_SHORT : EAGLE_TXD_GI_LONG);
 		}
 	} else {			/* legacy rate */
 		fmt |= EAGLE_TXD_FORMAT_LEGACY
 		    | SM(mwl_cvtlegacyrate(rate), EAGLE_TXD_RATE)
 		    | EAGLE_TXD_CHW_20
 		    /* XXX iv_flags & IEEE80211_F_SHPREAMBLE? */
 		    | (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE ?
 			EAGLE_TXD_PREAMBLE_SHORT : EAGLE_TXD_PREAMBLE_LONG);
 	}
 	return fmt;
 }
 
 static int
 mwl_tx_start(struct mwl_softc *sc, struct ieee80211_node *ni, struct mwl_txbuf *bf,
     struct mbuf *m0)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = ni->ni_vap;
 	int error, iswep, ismcast;
 	int hdrlen, copyhdrlen, pktlen;
 	struct mwl_txdesc *ds;
 	struct mwl_txq *txq;
 	struct ieee80211_frame *wh;
 	struct mwltxrec *tr;
 	struct mwl_node *mn;
 	uint16_t qos;
 #if MWL_TXDESC > 1
 	int i;
 #endif
 
 	wh = mtod(m0, struct ieee80211_frame *);
 	iswep = wh->i_fc[1] & IEEE80211_FC1_PROTECTED;
 	ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1);
 	hdrlen = ieee80211_anyhdrsize(wh);
 	copyhdrlen = hdrlen;
 	pktlen = m0->m_pkthdr.len;
 	if (IEEE80211_QOS_HAS_SEQ(wh)) {
 		if (IEEE80211_IS_DSTODS(wh)) {
 			qos = *(uint16_t *)
 			    (((struct ieee80211_qosframe_addr4 *) wh)->i_qos);
 			copyhdrlen -= sizeof(qos);
 		} else
 			qos = *(uint16_t *)
 			    (((struct ieee80211_qosframe *) wh)->i_qos);
 	} else
 		qos = 0;
 
 	if (iswep) {
 		const struct ieee80211_cipher *cip;
 		struct ieee80211_key *k;
 
 		/*
 		 * Construct the 802.11 header+trailer for an encrypted
 		 * frame. The only reason this can fail is because of an
 		 * unknown or unsupported cipher/key type.
 		 *
 		 * NB: we do this even though the firmware will ignore
 		 *     what we've done for WEP and TKIP as we need the
 		 *     ExtIV filled in for CCMP and this also adjusts
 		 *     the headers which simplifies our work below.
 		 */
 		k = ieee80211_crypto_encap(ni, m0);
 		if (k == NULL) {
 			/*
 			 * This can happen when the key is yanked after the
 			 * frame was queued.  Just discard the frame; the
 			 * 802.11 layer counts failures and provides
 			 * debugging/diagnostics.
 			 */
 			m_freem(m0);
 			return EIO;
 		}
 		/*
 		 * Adjust the packet length for the crypto additions
 		 * done during encap and any other bits that the f/w
 		 * will add later on.
 		 */
 		cip = k->wk_cipher;
 		pktlen += cip->ic_header + cip->ic_miclen + cip->ic_trailer;
 
 		/* packet header may have moved, reset our local pointer */
 		wh = mtod(m0, struct ieee80211_frame *);
 	}
 
 	if (ieee80211_radiotap_active_vap(vap)) {
 		sc->sc_tx_th.wt_flags = 0;	/* XXX */
 		if (iswep)
 			sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_WEP;
 #if 0
 		sc->sc_tx_th.wt_rate = ds->DataRate;
 #endif
 		sc->sc_tx_th.wt_txpower = ni->ni_txpower;
 		sc->sc_tx_th.wt_antenna = sc->sc_txantenna;
 
 		ieee80211_radiotap_tx(vap, m0);
 	}
 	/*
 	 * Copy up/down the 802.11 header; the firmware requires
 	 * we present a 2-byte payload length followed by a
 	 * 4-address header (w/o QoS), followed (optionally) by
 	 * any WEP/ExtIV header (but only filled in for CCMP).
 	 * We are assured the mbuf has sufficient headroom to
 	 * prepend in-place by the setup of ic_headroom in
 	 * mwl_attach.
 	 */
 	if (hdrlen < sizeof(struct mwltxrec)) {
 		const int space = sizeof(struct mwltxrec) - hdrlen;
 		if (M_LEADINGSPACE(m0) < space) {
 			/* NB: should never happen */
 			device_printf(sc->sc_dev,
 			    "not enough headroom, need %d found %zd, "
 			    "m_flags 0x%x m_len %d\n",
 			    space, M_LEADINGSPACE(m0), m0->m_flags, m0->m_len);
 			ieee80211_dump_pkt(ic,
 			    mtod(m0, const uint8_t *), m0->m_len, 0, -1);
 			m_freem(m0);
 			sc->sc_stats.mst_tx_noheadroom++;
 			return EIO;
 		}
 		M_PREPEND(m0, space, M_NOWAIT);
 	}
 	tr = mtod(m0, struct mwltxrec *);
 	if (wh != (struct ieee80211_frame *) &tr->wh)
 		ovbcopy(wh, &tr->wh, hdrlen);
 	/*
 	 * Note: the "firmware length" is actually the length
 	 * of the fully formed "802.11 payload".  That is, it's
 	 * everything except for the 802.11 header.  In particular
 	 * this includes all crypto material including the MIC!
 	 */
 	tr->fwlen = htole16(pktlen - hdrlen);
 
 	/*
 	 * Load the DMA map so any coalescing is done.  This
 	 * also calculates the number of descriptors we need.
 	 */
 	error = mwl_tx_dmasetup(sc, bf, m0);
 	if (error != 0) {
 		/* NB: stat collected in mwl_tx_dmasetup */
 		DPRINTF(sc, MWL_DEBUG_XMIT,
 		    "%s: unable to setup dma\n", __func__);
 		return error;
 	}
 	bf->bf_node = ni;			/* NB: held reference */
 	m0 = bf->bf_m;				/* NB: may have changed */
 	tr = mtod(m0, struct mwltxrec *);
 	wh = (struct ieee80211_frame *)&tr->wh;
 
 	/*
 	 * Formulate tx descriptor.
 	 */
 	ds = bf->bf_desc;
 	txq = bf->bf_txq;
 
 	ds->QosCtrl = qos;			/* NB: already little-endian */
 #if MWL_TXDESC == 1
 	/*
 	 * NB: multiframes should be zero because the descriptors
 	 *     are initialized to zero.  This should handle the case
 	 *     where the driver is built with MWL_TXDESC=1 but we are
 	 *     using firmware with multi-segment support.
 	 */
 	ds->PktPtr = htole32(bf->bf_segs[0].ds_addr);
 	ds->PktLen = htole16(bf->bf_segs[0].ds_len);
 #else
 	ds->multiframes = htole32(bf->bf_nseg);
 	ds->PktLen = htole16(m0->m_pkthdr.len);
 	for (i = 0; i < bf->bf_nseg; i++) {
 		ds->PktPtrArray[i] = htole32(bf->bf_segs[i].ds_addr);
 		ds->PktLenArray[i] = htole16(bf->bf_segs[i].ds_len);
 	}
 #endif
 	/* NB: pPhysNext, DataRate, and SapPktInfo setup once, don't touch */
 	ds->Format = 0;
 	ds->pad = 0;
 	ds->ack_wcb_addr = 0;
 
 	mn = MWL_NODE(ni);
 	/*
 	 * Select transmit rate.
 	 */
 	switch (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) {
 	case IEEE80211_FC0_TYPE_MGT:
 		sc->sc_stats.mst_tx_mgmt++;
 		/* fall thru... */
 	case IEEE80211_FC0_TYPE_CTL:
 		/* NB: assign to BE q to avoid bursting */
 		ds->TxPriority = MWL_WME_AC_BE;
 		break;
 	case IEEE80211_FC0_TYPE_DATA:
 		if (!ismcast) {
 			const struct ieee80211_txparam *tp = ni->ni_txparms;
 			/*
 			 * EAPOL frames get forced to a fixed rate and w/o
 			 * aggregation; otherwise check for any fixed rate
 			 * for the client (may depend on association state).
 			 */
 			if (m0->m_flags & M_EAPOL) {
 				const struct mwl_vap *mvp = MWL_VAP_CONST(vap);
 				ds->Format = mvp->mv_eapolformat;
 				ds->pad = htole16(
 				    EAGLE_TXD_FIXED_RATE | EAGLE_TXD_DONT_AGGR);
 			} else if (tp->ucastrate != IEEE80211_FIXED_RATE_NONE) {
 				/* XXX pre-calculate per node */
 				ds->Format = htole16(
 				    mwl_calcformat(tp->ucastrate, ni));
 				ds->pad = htole16(EAGLE_TXD_FIXED_RATE);
 			}
 			/* NB: EAPOL frames will never have qos set */
 			if (qos == 0)
 				ds->TxPriority = txq->qnum;
 #if MWL_MAXBA > 3
 			else if (mwl_bastream_match(&mn->mn_ba[3], qos))
 				ds->TxPriority = mn->mn_ba[3].txq;
 #endif
 #if MWL_MAXBA > 2
 			else if (mwl_bastream_match(&mn->mn_ba[2], qos))
 				ds->TxPriority = mn->mn_ba[2].txq;
 #endif
 #if MWL_MAXBA > 1
 			else if (mwl_bastream_match(&mn->mn_ba[1], qos))
 				ds->TxPriority = mn->mn_ba[1].txq;
 #endif
 #if MWL_MAXBA > 0
 			else if (mwl_bastream_match(&mn->mn_ba[0], qos))
 				ds->TxPriority = mn->mn_ba[0].txq;
 #endif
 			else
 				ds->TxPriority = txq->qnum;
 		} else
 			ds->TxPriority = txq->qnum;
 		break;
 	default:
 		device_printf(sc->sc_dev, "bogus frame type 0x%x (%s)\n",
 			wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK, __func__);
 		sc->sc_stats.mst_tx_badframetype++;
 		m_freem(m0);
 		return EIO;
 	}
 
 	if (IFF_DUMPPKTS_XMIT(sc))
 		ieee80211_dump_pkt(ic,
 		    mtod(m0, const uint8_t *)+sizeof(uint16_t),
 		    m0->m_len - sizeof(uint16_t), ds->DataRate, -1);
 
 	MWL_TXQ_LOCK(txq);
 	ds->Status = htole32(EAGLE_TXD_STATUS_FW_OWNED);
 	STAILQ_INSERT_TAIL(&txq->active, bf, bf_list);
 	MWL_TXDESC_SYNC(txq, ds, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	sc->sc_tx_timer = 5;
 	MWL_TXQ_UNLOCK(txq);
 
 	return 0;
 }
 
 static __inline int
 mwl_cvtlegacyrix(int rix)
 {
 	static const int ieeerates[] =
 	    { 2, 4, 11, 22, 44, 12, 18, 24, 36, 48, 72, 96, 108 };
 	return (rix < nitems(ieeerates) ? ieeerates[rix] : 0);
 }
 
 /*
  * Process completed xmit descriptors from the specified queue.
  */
 static int
 mwl_tx_processq(struct mwl_softc *sc, struct mwl_txq *txq)
 {
 #define	EAGLE_TXD_STATUS_MCAST \
 	(EAGLE_TXD_STATUS_MULTICAST_TX | EAGLE_TXD_STATUS_BROADCAST_TX)
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct mwl_txbuf *bf;
 	struct mwl_txdesc *ds;
 	struct ieee80211_node *ni;
 	struct mwl_node *an;
 	int nreaped;
 	uint32_t status;
 
 	DPRINTF(sc, MWL_DEBUG_TX_PROC, "%s: tx queue %u\n", __func__, txq->qnum);
 	for (nreaped = 0;; nreaped++) {
 		MWL_TXQ_LOCK(txq);
 		bf = STAILQ_FIRST(&txq->active);
 		if (bf == NULL) {
 			MWL_TXQ_UNLOCK(txq);
 			break;
 		}
 		ds = bf->bf_desc;
 		MWL_TXDESC_SYNC(txq, ds,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		if (ds->Status & htole32(EAGLE_TXD_STATUS_FW_OWNED)) {
 			MWL_TXQ_UNLOCK(txq);
 			break;
 		}
 		STAILQ_REMOVE_HEAD(&txq->active, bf_list);
 		MWL_TXQ_UNLOCK(txq);
 
 #ifdef MWL_DEBUG
 		if (sc->sc_debug & MWL_DEBUG_XMIT_DESC)
 			mwl_printtxbuf(bf, txq->qnum, nreaped);
 #endif
 		ni = bf->bf_node;
 		if (ni != NULL) {
 			an = MWL_NODE(ni);
 			status = le32toh(ds->Status);
 			if (status & EAGLE_TXD_STATUS_OK) {
 				uint16_t Format = le16toh(ds->Format);
 				uint8_t txant = MS(Format, EAGLE_TXD_ANTENNA);
 
 				sc->sc_stats.mst_ant_tx[txant]++;
 				if (status & EAGLE_TXD_STATUS_OK_RETRY)
 					sc->sc_stats.mst_tx_retries++;
 				if (status & EAGLE_TXD_STATUS_OK_MORE_RETRY)
 					sc->sc_stats.mst_tx_mretries++;
 				if (txq->qnum >= MWL_WME_AC_VO)
 					ic->ic_wme.wme_hipri_traffic++;
 				ni->ni_txrate = MS(Format, EAGLE_TXD_RATE);
 				if ((Format & EAGLE_TXD_FORMAT_HT) == 0) {
 					ni->ni_txrate = mwl_cvtlegacyrix(
 					    ni->ni_txrate);
 				} else
 					ni->ni_txrate |= IEEE80211_RATE_MCS;
 				sc->sc_stats.mst_tx_rate = ni->ni_txrate;
 			} else {
 				if (status & EAGLE_TXD_STATUS_FAILED_LINK_ERROR)
 					sc->sc_stats.mst_tx_linkerror++;
 				if (status & EAGLE_TXD_STATUS_FAILED_XRETRY)
 					sc->sc_stats.mst_tx_xretries++;
 				if (status & EAGLE_TXD_STATUS_FAILED_AGING)
 					sc->sc_stats.mst_tx_aging++;
 				if (bf->bf_m->m_flags & M_FF)
 					sc->sc_stats.mst_ff_txerr++;
 			}
 			if (bf->bf_m->m_flags & M_TXCB)
 				/* XXX strip fw len in case header inspected */
 				m_adj(bf->bf_m, sizeof(uint16_t));
 			ieee80211_tx_complete(ni, bf->bf_m,
 			    (status & EAGLE_TXD_STATUS_OK) == 0);
 		} else
 			m_freem(bf->bf_m);
 		ds->Status = htole32(EAGLE_TXD_STATUS_IDLE);
 
 		bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 
 		mwl_puttxbuf_tail(txq, bf);
 	}
 	return nreaped;
 #undef EAGLE_TXD_STATUS_MCAST
 }
 
 /*
  * Deferred processing of transmit interrupt; special-cased
  * for four hardware queues, 0-3.
  */
 static void
 mwl_tx_proc(void *arg, int npending)
 {
 	struct mwl_softc *sc = arg;
 	int nreaped;
 
 	/*
 	 * Process each active queue.
 	 */
 	nreaped = 0;
 	if (!STAILQ_EMPTY(&sc->sc_txq[0].active))
 		nreaped += mwl_tx_processq(sc, &sc->sc_txq[0]);
 	if (!STAILQ_EMPTY(&sc->sc_txq[1].active))
 		nreaped += mwl_tx_processq(sc, &sc->sc_txq[1]);
 	if (!STAILQ_EMPTY(&sc->sc_txq[2].active))
 		nreaped += mwl_tx_processq(sc, &sc->sc_txq[2]);
 	if (!STAILQ_EMPTY(&sc->sc_txq[3].active))
 		nreaped += mwl_tx_processq(sc, &sc->sc_txq[3]);
 
 	if (nreaped != 0) {
 		sc->sc_tx_timer = 0;
 		if (mbufq_first(&sc->sc_snd) != NULL) {
 			/* NB: kick fw; the tx thread may have been preempted */
 			mwl_hal_txstart(sc->sc_mh, 0);
 			mwl_start(sc);
 		}
 	}
 }
 
 static void
 mwl_tx_draintxq(struct mwl_softc *sc, struct mwl_txq *txq)
 {
 	struct ieee80211_node *ni;
 	struct mwl_txbuf *bf;
 	u_int ix;
 
 	/*
 	 * NB: this assumes output has been stopped and
 	 *     we do not need to block mwl_tx_tasklet
 	 */
 	for (ix = 0;; ix++) {
 		MWL_TXQ_LOCK(txq);
 		bf = STAILQ_FIRST(&txq->active);
 		if (bf == NULL) {
 			MWL_TXQ_UNLOCK(txq);
 			break;
 		}
 		STAILQ_REMOVE_HEAD(&txq->active, bf_list);
 		MWL_TXQ_UNLOCK(txq);
 #ifdef MWL_DEBUG
 		if (sc->sc_debug & MWL_DEBUG_RESET) {
 			struct ieee80211com *ic = &sc->sc_ic;
 			const struct mwltxrec *tr =
 			    mtod(bf->bf_m, const struct mwltxrec *);
 			mwl_printtxbuf(bf, txq->qnum, ix);
 			ieee80211_dump_pkt(ic, (const uint8_t *)&tr->wh,
 				bf->bf_m->m_len - sizeof(tr->fwlen), 0, -1);
 		}
 #endif /* MWL_DEBUG */
 		bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 		ni = bf->bf_node;
 		if (ni != NULL) {
 			/*
 			 * Reclaim node reference.
 			 */
 			ieee80211_free_node(ni);
 		}
 		m_freem(bf->bf_m);
 
 		mwl_puttxbuf_tail(txq, bf);
 	}
 }
 
 /*
  * Drain the transmit queues and reclaim resources.
  */
 static void
 mwl_draintxq(struct mwl_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < MWL_NUM_TX_QUEUES; i++)
 		mwl_tx_draintxq(sc, &sc->sc_txq[i]);
 	sc->sc_tx_timer = 0;
 }
 
 #ifdef MWL_DIAGAPI
 /*
  * Reset the transmit queues to a pristine state after a fw download.
  */
 static void
 mwl_resettxq(struct mwl_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < MWL_NUM_TX_QUEUES; i++)
 		mwl_txq_reset(sc, &sc->sc_txq[i]);
 }
 #endif /* MWL_DIAGAPI */
 
 /*
  * Clear the transmit queues of any frames submitted for the
  * specified vap.  This is done when the vap is deleted so we
  * don't potentially reference the vap after it is gone.
  * Note we cannot remove the frames; we only reclaim the node
  * reference.
  */
 static void
 mwl_cleartxq(struct mwl_softc *sc, struct ieee80211vap *vap)
 {
 	struct mwl_txq *txq;
 	struct mwl_txbuf *bf;
 	int i;
 
 	for (i = 0; i < MWL_NUM_TX_QUEUES; i++) {
 		txq = &sc->sc_txq[i];
 		MWL_TXQ_LOCK(txq);
 		STAILQ_FOREACH(bf, &txq->active, bf_list) {
 			struct ieee80211_node *ni = bf->bf_node;
 			if (ni != NULL && ni->ni_vap == vap) {
 				bf->bf_node = NULL;
 				ieee80211_free_node(ni);
 			}
 		}
 		MWL_TXQ_UNLOCK(txq);
 	}
 }
 
 static int
 mwl_recv_action(struct ieee80211_node *ni, const struct ieee80211_frame *wh,
 	const uint8_t *frm, const uint8_t *efrm)
 {
 	struct mwl_softc *sc = ni->ni_ic->ic_softc;
 	const struct ieee80211_action *ia;
 
 	ia = (const struct ieee80211_action *) frm;
 	if (ia->ia_category == IEEE80211_ACTION_CAT_HT &&
 	    ia->ia_action == IEEE80211_ACTION_HT_MIMOPWRSAVE) {
 		const struct ieee80211_action_ht_mimopowersave *mps =
 		    (const struct ieee80211_action_ht_mimopowersave *) ia;
 
 		mwl_hal_setmimops(sc->sc_mh, ni->ni_macaddr,
 		    mps->am_control & IEEE80211_A_HT_MIMOPWRSAVE_ENA,
 		    MS(mps->am_control, IEEE80211_A_HT_MIMOPWRSAVE_MODE));
 		return 0;
 	} else
 		return sc->sc_recv_action(ni, wh, frm, efrm);
 }
 
 static int
 mwl_addba_request(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap,
 	int dialogtoken, int baparamset, int batimeout)
 {
 	struct mwl_softc *sc = ni->ni_ic->ic_softc;
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct mwl_node *mn = MWL_NODE(ni);
 	struct mwl_bastate *bas;
 
 	bas = tap->txa_private;
 	if (bas == NULL) {
 		const MWL_HAL_BASTREAM *sp;
 		/*
 		 * Check for a free BA stream slot.
 		 */
 #if MWL_MAXBA > 3
 		if (mn->mn_ba[3].bastream == NULL)
 			bas = &mn->mn_ba[3];
 		else
 #endif
 #if MWL_MAXBA > 2
 		if (mn->mn_ba[2].bastream == NULL)
 			bas = &mn->mn_ba[2];
 		else
 #endif
 #if MWL_MAXBA > 1
 		if (mn->mn_ba[1].bastream == NULL)
 			bas = &mn->mn_ba[1];
 		else
 #endif
 #if MWL_MAXBA > 0
 		if (mn->mn_ba[0].bastream == NULL)
 			bas = &mn->mn_ba[0];
 		else 
 #endif
 		{
 			/* sta already has max BA streams */
 			/* XXX assign BA stream to highest priority tid */
 			DPRINTF(sc, MWL_DEBUG_AMPDU,
 			    "%s: already has max bastreams\n", __func__);
 			sc->sc_stats.mst_ampdu_reject++;
 			return 0;
 		}
 		/* NB: no held reference to ni */
 		sp = mwl_hal_bastream_alloc(MWL_VAP(vap)->mv_hvap,
 		    (baparamset & IEEE80211_BAPS_POLICY_IMMEDIATE) != 0,
 		    ni->ni_macaddr, tap->txa_tid, ni->ni_htparam,
 		    ni, tap);
 		if (sp == NULL) {
 			/*
 			 * No available stream, return 0 so no
 			 * a-mpdu aggregation will be done.
 			 */
 			DPRINTF(sc, MWL_DEBUG_AMPDU,
 			    "%s: no bastream available\n", __func__);
 			sc->sc_stats.mst_ampdu_nostream++;
 			return 0;
 		}
 		DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: alloc bastream %p\n",
 		    __func__, sp);
 		/* NB: qos is left zero so we won't match in mwl_tx_start */
 		bas->bastream = sp;
 		tap->txa_private = bas;
 	}
 	/* fetch current seq# from the firmware; if available */
 	if (mwl_hal_bastream_get_seqno(sc->sc_mh, bas->bastream,
 	    vap->iv_opmode == IEEE80211_M_STA ? vap->iv_myaddr : ni->ni_macaddr,
 	    &tap->txa_start) != 0)
 		tap->txa_start = 0;
 	return sc->sc_addba_request(ni, tap, dialogtoken, baparamset, batimeout);
 }
 
 static int
 mwl_addba_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap,
 	int code, int baparamset, int batimeout)
 {
 	struct mwl_softc *sc = ni->ni_ic->ic_softc;
 	struct mwl_bastate *bas;
 
 	bas = tap->txa_private;
 	if (bas == NULL) {
 		/* XXX should not happen */
 		DPRINTF(sc, MWL_DEBUG_AMPDU,
 		    "%s: no BA stream allocated, TID %d\n",
 		    __func__, tap->txa_tid);
 		sc->sc_stats.mst_addba_nostream++;
 		return 0;
 	}
 	if (code == IEEE80211_STATUS_SUCCESS) {
 		struct ieee80211vap *vap = ni->ni_vap;
 		int bufsiz, error;
 
 		/*
 		 * Tell the firmware to setup the BA stream;
 		 * we know resources are available because we
 		 * pre-allocated one before forming the request.
 		 */
 		bufsiz = MS(baparamset, IEEE80211_BAPS_BUFSIZ);
 		if (bufsiz == 0)
 			bufsiz = IEEE80211_AGGR_BAWMAX;
 		error = mwl_hal_bastream_create(MWL_VAP(vap)->mv_hvap,
 		    bas->bastream, bufsiz, bufsiz, tap->txa_start);
 		if (error != 0) {
 			/*
 			 * Setup failed, return immediately so no a-mpdu
 			 * aggregation will be done.
 			 */
 			mwl_hal_bastream_destroy(sc->sc_mh, bas->bastream);
 			mwl_bastream_free(bas);
 			tap->txa_private = NULL;
 
 			DPRINTF(sc, MWL_DEBUG_AMPDU,
 			    "%s: create failed, error %d, bufsiz %d TID %d "
 			    "htparam 0x%x\n", __func__, error, bufsiz,
 			    tap->txa_tid, ni->ni_htparam);
 			sc->sc_stats.mst_bacreate_failed++;
 			return 0;
 		}
 		/* NB: cache txq to avoid ptr indirect */
 		mwl_bastream_setup(bas, tap->txa_tid, bas->bastream->txq);
 		DPRINTF(sc, MWL_DEBUG_AMPDU,
 		    "%s: bastream %p assigned to txq %d TID %d bufsiz %d "
 		    "htparam 0x%x\n", __func__, bas->bastream,
 		    bas->txq, tap->txa_tid, bufsiz, ni->ni_htparam);
 	} else {
 		/*
 		 * Other side NAK'd us; return the resources.
 		 */
 		DPRINTF(sc, MWL_DEBUG_AMPDU,
 		    "%s: request failed with code %d, destroy bastream %p\n",
 		    __func__, code, bas->bastream);
 		mwl_hal_bastream_destroy(sc->sc_mh, bas->bastream);
 		mwl_bastream_free(bas);
 		tap->txa_private = NULL;
 	}
 	/* NB: firmware sends BAR so we don't need to */
 	return sc->sc_addba_response(ni, tap, code, baparamset, batimeout);
 }
 
 static void
 mwl_addba_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap)
 {
 	struct mwl_softc *sc = ni->ni_ic->ic_softc;
 	struct mwl_bastate *bas;
 
 	bas = tap->txa_private;
 	if (bas != NULL) {
 		DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: destroy bastream %p\n",
 		    __func__, bas->bastream);
 		mwl_hal_bastream_destroy(sc->sc_mh, bas->bastream);
 		mwl_bastream_free(bas);
 		tap->txa_private = NULL;
 	}
 	sc->sc_addba_stop(ni, tap);
 }
 
 /*
  * Setup the rx data structures.  This should only be
  * done once or we may get out of sync with the firmware.
  */
 static int
 mwl_startrecv(struct mwl_softc *sc)
 {
 	if (!sc->sc_recvsetup) {
 		struct mwl_rxbuf *bf, *prev;
 		struct mwl_rxdesc *ds;
 
 		prev = NULL;
 		STAILQ_FOREACH(bf, &sc->sc_rxbuf, bf_list) {
 			int error = mwl_rxbuf_init(sc, bf);
 			if (error != 0) {
 				DPRINTF(sc, MWL_DEBUG_RECV,
 					"%s: mwl_rxbuf_init failed %d\n",
 					__func__, error);
 				return error;
 			}
 			if (prev != NULL) {
 				ds = prev->bf_desc;
 				ds->pPhysNext = htole32(bf->bf_daddr);
 			}
 			prev = bf;
 		}
 		if (prev != NULL) {
 			ds = prev->bf_desc;
 			ds->pPhysNext =
 			    htole32(STAILQ_FIRST(&sc->sc_rxbuf)->bf_daddr);
 		}
 		sc->sc_recvsetup = 1;
 	}
 	mwl_mode_init(sc);		/* set filters, etc. */
 	return 0;
 }
 
 static MWL_HAL_APMODE
 mwl_getapmode(const struct ieee80211vap *vap, struct ieee80211_channel *chan)
 {
 	MWL_HAL_APMODE mode;
 
 	if (IEEE80211_IS_CHAN_HT(chan)) {
 		if (vap->iv_flags_ht & IEEE80211_FHT_PUREN)
 			mode = AP_MODE_N_ONLY;
 		else if (IEEE80211_IS_CHAN_5GHZ(chan))
 			mode = AP_MODE_AandN;
 		else if (vap->iv_flags & IEEE80211_F_PUREG)
 			mode = AP_MODE_GandN;
 		else
 			mode = AP_MODE_BandGandN;
 	} else if (IEEE80211_IS_CHAN_ANYG(chan)) {
 		if (vap->iv_flags & IEEE80211_F_PUREG)
 			mode = AP_MODE_G_ONLY;
 		else
 			mode = AP_MODE_MIXED;
 	} else if (IEEE80211_IS_CHAN_B(chan))
 		mode = AP_MODE_B_ONLY;
 	else if (IEEE80211_IS_CHAN_A(chan))
 		mode = AP_MODE_A_ONLY;
 	else
 		mode = AP_MODE_MIXED;		/* XXX should not happen? */
 	return mode;
 }
 
 static int
 mwl_setapmode(struct ieee80211vap *vap, struct ieee80211_channel *chan)
 {
 	struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap;
 	return mwl_hal_setapmode(hvap, mwl_getapmode(vap, chan));
 }
 
 /*
  * Set/change channels.
  */
 static int
 mwl_chan_set(struct mwl_softc *sc, struct ieee80211_channel *chan)
 {
 	struct mwl_hal *mh = sc->sc_mh;
 	struct ieee80211com *ic = &sc->sc_ic;
 	MWL_HAL_CHANNEL hchan;
 	int maxtxpow;
 
 	DPRINTF(sc, MWL_DEBUG_RESET, "%s: chan %u MHz/flags 0x%x\n",
 	    __func__, chan->ic_freq, chan->ic_flags);
 
 	/*
 	 * Convert to a HAL channel description with
 	 * the flags constrained to reflect the current
 	 * operating mode.
 	 */
 	mwl_mapchan(&hchan, chan);
 	mwl_hal_intrset(mh, 0);		/* disable interrupts */
 #if 0
 	mwl_draintxq(sc);		/* clear pending tx frames */
 #endif
 	mwl_hal_setchannel(mh, &hchan);
 	/*
 	 * Tx power is cap'd by the regulatory setting and
 	 * possibly a user-set limit.  We pass the min of
 	 * these to the hal to apply them to the cal data
 	 * for this channel.
 	 * XXX min bound?
 	 */
 	maxtxpow = 2*chan->ic_maxregpower;
 	if (maxtxpow > ic->ic_txpowlimit)
 		maxtxpow = ic->ic_txpowlimit;
 	mwl_hal_settxpower(mh, &hchan, maxtxpow / 2);
 	/* NB: potentially change mcast/mgt rates */
 	mwl_setcurchanrates(sc);
 
 	/*
 	 * Update internal state.
 	 */
 	sc->sc_tx_th.wt_chan_freq = htole16(chan->ic_freq);
 	sc->sc_rx_th.wr_chan_freq = htole16(chan->ic_freq);
 	if (IEEE80211_IS_CHAN_A(chan)) {
 		sc->sc_tx_th.wt_chan_flags = htole16(IEEE80211_CHAN_A);
 		sc->sc_rx_th.wr_chan_flags = htole16(IEEE80211_CHAN_A);
 	} else if (IEEE80211_IS_CHAN_ANYG(chan)) {
 		sc->sc_tx_th.wt_chan_flags = htole16(IEEE80211_CHAN_G);
 		sc->sc_rx_th.wr_chan_flags = htole16(IEEE80211_CHAN_G);
 	} else {
 		sc->sc_tx_th.wt_chan_flags = htole16(IEEE80211_CHAN_B);
 		sc->sc_rx_th.wr_chan_flags = htole16(IEEE80211_CHAN_B);
 	}
 	sc->sc_curchan = hchan;
 	mwl_hal_intrset(mh, sc->sc_imask);
 
 	return 0;
 }
 
 static void
 mwl_scan_start(struct ieee80211com *ic)
 {
 	struct mwl_softc *sc = ic->ic_softc;
 
 	DPRINTF(sc, MWL_DEBUG_STATE, "%s\n", __func__);
 }
 
 static void
 mwl_scan_end(struct ieee80211com *ic)
 {
 	struct mwl_softc *sc = ic->ic_softc;
 
 	DPRINTF(sc, MWL_DEBUG_STATE, "%s\n", __func__);
 }
 
 static void
 mwl_set_channel(struct ieee80211com *ic)
 {
 	struct mwl_softc *sc = ic->ic_softc;
 
 	(void) mwl_chan_set(sc, ic->ic_curchan);
 }
 
 /* 
  * Handle a channel switch request.  We inform the firmware
  * and mark the global state to suppress various actions.
  * NB: we issue only one request to the fw; we may be called
  * multiple times if there are multiple vap's.
  */
 static void
 mwl_startcsa(struct ieee80211vap *vap)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct mwl_softc *sc = ic->ic_softc;
 	MWL_HAL_CHANNEL hchan;
 
 	if (sc->sc_csapending)
 		return;
 
 	mwl_mapchan(&hchan, ic->ic_csa_newchan);
 	/* 1 =>'s quiet channel */
 	mwl_hal_setchannelswitchie(sc->sc_mh, &hchan, 1, ic->ic_csa_count);
 	sc->sc_csapending = 1;
 }
 
 /*
  * Plumb any static WEP key for the station.  This is
  * necessary as we must propagate the key from the
  * global key table of the vap to each sta db entry.
  */
 static void
 mwl_setanywepkey(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	if ((vap->iv_flags & (IEEE80211_F_PRIVACY|IEEE80211_F_WPA)) ==
 		IEEE80211_F_PRIVACY &&
 	    vap->iv_def_txkey != IEEE80211_KEYIX_NONE &&
 	    vap->iv_nw_keys[vap->iv_def_txkey].wk_keyix != IEEE80211_KEYIX_NONE)
 		(void) _mwl_key_set(vap, &vap->iv_nw_keys[vap->iv_def_txkey],
 				    mac);
 }
 
 static int
 mwl_peerstadb(struct ieee80211_node *ni, int aid, int staid, MWL_HAL_PEERINFO *pi)
 {
 #define	WME(ie) ((const struct ieee80211_wme_info *) ie)
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct mwl_hal_vap *hvap;
 	int error;
 
 	if (vap->iv_opmode == IEEE80211_M_WDS) {
 		/*
 		 * WDS vap's do not have a f/w vap; instead they piggyback
 		 * on an AP vap and we must install the sta db entry and
 		 * crypto state using that AP's handle (the WDS vap has none).
 		 */
 		hvap = MWL_VAP(vap)->mv_ap_hvap;
 	} else
 		hvap = MWL_VAP(vap)->mv_hvap;
 	error = mwl_hal_newstation(hvap, ni->ni_macaddr,
 	    aid, staid, pi,
 	    ni->ni_flags & (IEEE80211_NODE_QOS | IEEE80211_NODE_HT),
 	    ni->ni_ies.wme_ie != NULL ? WME(ni->ni_ies.wme_ie)->wme_info : 0);
 	if (error == 0) {
 		/*
 		 * Setup security for this station.  For sta mode this is
 		 * needed even though do the same thing on transition to
 		 * AUTH state because the call to mwl_hal_newstation
 		 * clobbers the crypto state we setup.
 		 */
 		mwl_setanywepkey(vap, ni->ni_macaddr);
 	}
 	return error;
 #undef WME
 }
 
 static void
 mwl_setglobalkeys(struct ieee80211vap *vap)
 {
 	struct ieee80211_key *wk;
 
 	wk = &vap->iv_nw_keys[0];
 	for (; wk < &vap->iv_nw_keys[IEEE80211_WEP_NKID]; wk++)
 		if (wk->wk_keyix != IEEE80211_KEYIX_NONE)
 			(void) _mwl_key_set(vap, wk, vap->iv_myaddr);
 }
 
 /*
  * Convert a legacy rate set to a firmware bitmask.
  */
 static uint32_t
 get_rate_bitmap(const struct ieee80211_rateset *rs)
 {
 	uint32_t rates;
 	int i;
 
 	rates = 0;
 	for (i = 0; i < rs->rs_nrates; i++)
 		switch (rs->rs_rates[i] & IEEE80211_RATE_VAL) {
 		case 2:	  rates |= 0x001; break;
 		case 4:	  rates |= 0x002; break;
 		case 11:  rates |= 0x004; break;
 		case 22:  rates |= 0x008; break;
 		case 44:  rates |= 0x010; break;
 		case 12:  rates |= 0x020; break;
 		case 18:  rates |= 0x040; break;
 		case 24:  rates |= 0x080; break;
 		case 36:  rates |= 0x100; break;
 		case 48:  rates |= 0x200; break;
 		case 72:  rates |= 0x400; break;
 		case 96:  rates |= 0x800; break;
 		case 108: rates |= 0x1000; break;
 		}
 	return rates;
 }
 
 /*
  * Construct an HT firmware bitmask from an HT rate set.
  */
 static uint32_t
 get_htrate_bitmap(const struct ieee80211_htrateset *rs)
 {
 	uint32_t rates;
 	int i;
 
 	rates = 0;
 	for (i = 0; i < rs->rs_nrates; i++) {
 		if (rs->rs_rates[i] < 16)
 			rates |= 1<<rs->rs_rates[i];
 	}
 	return rates;
 }
 
 /*
  * Craft station database entry for station.
  * NB: use host byte order here, the hal handles byte swapping.
  */
 static MWL_HAL_PEERINFO *
 mkpeerinfo(MWL_HAL_PEERINFO *pi, const struct ieee80211_node *ni)
 {
 	const struct ieee80211vap *vap = ni->ni_vap;
 
 	memset(pi, 0, sizeof(*pi));
 	pi->LegacyRateBitMap = get_rate_bitmap(&ni->ni_rates);
 	pi->CapInfo = ni->ni_capinfo;
 	if (ni->ni_flags & IEEE80211_NODE_HT) {
 		/* HT capabilities, etc */
 		pi->HTCapabilitiesInfo = ni->ni_htcap;
 		/* XXX pi.HTCapabilitiesInfo */
 	        pi->MacHTParamInfo = ni->ni_htparam;	
 		pi->HTRateBitMap = get_htrate_bitmap(&ni->ni_htrates);
 		pi->AddHtInfo.ControlChan = ni->ni_htctlchan;
 		pi->AddHtInfo.AddChan = ni->ni_ht2ndchan;
 		pi->AddHtInfo.OpMode = ni->ni_htopmode;
 		pi->AddHtInfo.stbc = ni->ni_htstbc;
 
 		/* constrain according to local configuration */
 		if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40) == 0)
 			pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_SHORTGI40;
 		if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20) == 0)
 			pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_SHORTGI20;
 		if (ni->ni_chw != 40)
 			pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_CHWIDTH40;
 	}
 	return pi;
 }
 
 /*
  * Re-create the local sta db entry for a vap to ensure
  * up to date WME state is pushed to the firmware.  Because
  * this resets crypto state this must be followed by a
  * reload of any keys in the global key table.
  */
 static int
 mwl_localstadb(struct ieee80211vap *vap)
 {
 #define	WME(ie) ((const struct ieee80211_wme_info *) ie)
 	struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap;
 	struct ieee80211_node *bss;
 	MWL_HAL_PEERINFO pi;
 	int error;
 
 	switch (vap->iv_opmode) {
 	case IEEE80211_M_STA:
 		bss = vap->iv_bss;
 		error = mwl_hal_newstation(hvap, vap->iv_myaddr, 0, 0,
 		    vap->iv_state == IEEE80211_S_RUN ?
 			mkpeerinfo(&pi, bss) : NULL,
 		    (bss->ni_flags & (IEEE80211_NODE_QOS | IEEE80211_NODE_HT)),
 		    bss->ni_ies.wme_ie != NULL ?
 			WME(bss->ni_ies.wme_ie)->wme_info : 0);
 		if (error == 0)
 			mwl_setglobalkeys(vap);
 		break;
 	case IEEE80211_M_HOSTAP:
 	case IEEE80211_M_MBSS:
 		error = mwl_hal_newstation(hvap, vap->iv_myaddr,
 		    0, 0, NULL, vap->iv_flags & IEEE80211_F_WME, 0);
 		if (error == 0)
 			mwl_setglobalkeys(vap);
 		break;
 	default:
 		error = 0;
 		break;
 	}
 	return error;
 #undef WME
 }
 
 static int
 mwl_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
 {
 	struct mwl_vap *mvp = MWL_VAP(vap);
 	struct mwl_hal_vap *hvap = mvp->mv_hvap;
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ieee80211_node *ni = NULL;
 	struct mwl_softc *sc = ic->ic_softc;
 	struct mwl_hal *mh = sc->sc_mh;
 	enum ieee80211_state ostate = vap->iv_state;
 	int error;
 
 	DPRINTF(sc, MWL_DEBUG_STATE, "%s: %s: %s -> %s\n",
 	    vap->iv_ifp->if_xname, __func__,
 	    ieee80211_state_name[ostate], ieee80211_state_name[nstate]);
 
 	callout_stop(&sc->sc_timer);
 	/*
 	 * Clear current radar detection state.
 	 */
 	if (ostate == IEEE80211_S_CAC) {
 		/* stop quiet mode radar detection */
 		mwl_hal_setradardetection(mh, DR_CHK_CHANNEL_AVAILABLE_STOP);
 	} else if (sc->sc_radarena) {
 		/* stop in-service radar detection */
 		mwl_hal_setradardetection(mh, DR_DFS_DISABLE);
 		sc->sc_radarena = 0;
 	}
 	/*
 	 * Carry out per-state actions before doing net80211 work.
 	 */
 	if (nstate == IEEE80211_S_INIT) {
 		/* NB: only ap+sta vap's have a fw entity */
 		if (hvap != NULL)
 			mwl_hal_stop(hvap);
 	} else if (nstate == IEEE80211_S_SCAN) {
 		mwl_hal_start(hvap);
 		/* NB: this disables beacon frames */
 		mwl_hal_setinframode(hvap);
 	} else if (nstate == IEEE80211_S_AUTH) {
 		/*
 		 * Must create a sta db entry in case a WEP key needs to
 		 * be plumbed.  This entry will be overwritten if we
 		 * associate; otherwise it will be reclaimed on node free.
 		 */
 		ni = vap->iv_bss;
 		MWL_NODE(ni)->mn_hvap = hvap;
 		(void) mwl_peerstadb(ni, 0, 0, NULL);
 	} else if (nstate == IEEE80211_S_CSA) {
 		/* XXX move to below? */
 		if (vap->iv_opmode == IEEE80211_M_HOSTAP ||
 		    vap->iv_opmode == IEEE80211_M_MBSS)
 			mwl_startcsa(vap);
 	} else if (nstate == IEEE80211_S_CAC) {
 		/* XXX move to below? */
 		/* stop ap xmit and enable quiet mode radar detection */
 		mwl_hal_setradardetection(mh, DR_CHK_CHANNEL_AVAILABLE_START);
 	}
 
 	/*
 	 * Invoke the parent method to do net80211 work.
 	 */
 	error = mvp->mv_newstate(vap, nstate, arg);
 
 	/*
 	 * Carry out work that must be done after net80211 runs;
 	 * this work requires up to date state (e.g. iv_bss).
 	 */
 	if (error == 0 && nstate == IEEE80211_S_RUN) {
 		/* NB: collect bss node again, it may have changed */
 		ni = vap->iv_bss;
 
 		DPRINTF(sc, MWL_DEBUG_STATE,
 		    "%s: %s(RUN): iv_flags 0x%08x bintvl %d bssid %s "
 		    "capinfo 0x%04x chan %d\n",
 		    vap->iv_ifp->if_xname, __func__, vap->iv_flags,
 		    ni->ni_intval, ether_sprintf(ni->ni_bssid), ni->ni_capinfo,
 		    ieee80211_chan2ieee(ic, ic->ic_curchan));
 
 		/*
 		 * Recreate local sta db entry to update WME/HT state.
 		 */
 		mwl_localstadb(vap);
 		switch (vap->iv_opmode) {
 		case IEEE80211_M_HOSTAP:
 		case IEEE80211_M_MBSS:
 			if (ostate == IEEE80211_S_CAC) {
 				/* enable in-service radar detection */
 				mwl_hal_setradardetection(mh,
 				    DR_IN_SERVICE_MONITOR_START);
 				sc->sc_radarena = 1;
 			}
 			/*
 			 * Allocate and setup the beacon frame
 			 * (and related state).
 			 */
 			error = mwl_reset_vap(vap, IEEE80211_S_RUN);
 			if (error != 0) {
 				DPRINTF(sc, MWL_DEBUG_STATE,
 				    "%s: beacon setup failed, error %d\n",
 				    __func__, error);
 				goto bad;
 			}
 			/* NB: must be after setting up beacon */
 			mwl_hal_start(hvap);
 			break;
 		case IEEE80211_M_STA:
 			DPRINTF(sc, MWL_DEBUG_STATE, "%s: %s: aid 0x%x\n",
 			    vap->iv_ifp->if_xname, __func__, ni->ni_associd);
 			/*
 			 * Set state now that we're associated.
 			 */
 			mwl_hal_setassocid(hvap, ni->ni_bssid, ni->ni_associd);
 			mwl_setrates(vap);
 			mwl_hal_setrtsthreshold(hvap, vap->iv_rtsthreshold);
 			if ((vap->iv_flags & IEEE80211_F_DWDS) &&
 			    sc->sc_ndwdsvaps++ == 0)
 				mwl_hal_setdwds(mh, 1);
 			break;
 		case IEEE80211_M_WDS:
 			DPRINTF(sc, MWL_DEBUG_STATE, "%s: %s: bssid %s\n",
 			    vap->iv_ifp->if_xname, __func__,
 			    ether_sprintf(ni->ni_bssid));
 			mwl_seteapolformat(vap);
 			break;
 		default:
 			break;
 		}
 		/*
 		 * Set CS mode according to operating channel;
 		 * this mostly an optimization for 5GHz.
 		 *
 		 * NB: must follow mwl_hal_start which resets csmode
 		 */
 		if (IEEE80211_IS_CHAN_5GHZ(ic->ic_bsschan))
 			mwl_hal_setcsmode(mh, CSMODE_AGGRESSIVE);
 		else
 			mwl_hal_setcsmode(mh, CSMODE_AUTO_ENA);
 		/*
 		 * Start timer to prod firmware.
 		 */
 		if (sc->sc_ageinterval != 0)
 			callout_reset(&sc->sc_timer, sc->sc_ageinterval*hz,
 			    mwl_agestations, sc);
 	} else if (nstate == IEEE80211_S_SLEEP) {
 		/* XXX set chip in power save */
 	} else if ((vap->iv_flags & IEEE80211_F_DWDS) &&
 	    --sc->sc_ndwdsvaps == 0)
 		mwl_hal_setdwds(mh, 0);
 bad:
 	return error;
 }
 
 /*
  * Manage station id's; these are separate from AID's
  * as AID's may have values out of the range of possible
  * station id's acceptable to the firmware.
  */
 static int
 allocstaid(struct mwl_softc *sc, int aid)
 {
 	int staid;
 
 	if (!(0 < aid && aid < MWL_MAXSTAID) || isset(sc->sc_staid, aid)) {
 		/* NB: don't use 0 */
 		for (staid = 1; staid < MWL_MAXSTAID; staid++)
 			if (isclr(sc->sc_staid, staid))
 				break;
 	} else
 		staid = aid;
 	setbit(sc->sc_staid, staid);
 	return staid;
 }
 
 static void
 delstaid(struct mwl_softc *sc, int staid)
 {
 	clrbit(sc->sc_staid, staid);
 }
 
 /*
  * Setup driver-specific state for a newly associated node.
  * Note that we're called also on a re-associate, the isnew
  * param tells us if this is the first time or not.
  */
 static void
 mwl_newassoc(struct ieee80211_node *ni, int isnew)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
         struct mwl_softc *sc = vap->iv_ic->ic_softc;
 	struct mwl_node *mn = MWL_NODE(ni);
 	MWL_HAL_PEERINFO pi;
 	uint16_t aid;
 	int error;
 
 	aid = IEEE80211_AID(ni->ni_associd);
 	if (isnew) {
 		mn->mn_staid = allocstaid(sc, aid);
 		mn->mn_hvap = MWL_VAP(vap)->mv_hvap;
 	} else {
 		mn = MWL_NODE(ni);
 		/* XXX reset BA stream? */
 	}
 	DPRINTF(sc, MWL_DEBUG_NODE, "%s: mac %s isnew %d aid %d staid %d\n",
 	    __func__, ether_sprintf(ni->ni_macaddr), isnew, aid, mn->mn_staid);
 	error = mwl_peerstadb(ni, aid, mn->mn_staid, mkpeerinfo(&pi, ni));
 	if (error != 0) {
 		DPRINTF(sc, MWL_DEBUG_NODE,
 		    "%s: error %d creating sta db entry\n",
 		    __func__, error);
 		/* XXX how to deal with error? */
 	}
 }
 
 /*
  * Periodically poke the firmware to age out station state
  * (power save queues, pending tx aggregates).
  */
 static void
 mwl_agestations(void *arg)
 {
 	struct mwl_softc *sc = arg;
 
 	mwl_hal_setkeepalive(sc->sc_mh);
 	if (sc->sc_ageinterval != 0)		/* NB: catch dynamic changes */
 		callout_schedule(&sc->sc_timer, sc->sc_ageinterval*hz);
 }
 
 static const struct mwl_hal_channel *
 findhalchannel(const MWL_HAL_CHANNELINFO *ci, int ieee)
 {
 	int i;
 
 	for (i = 0; i < ci->nchannels; i++) {
 		const struct mwl_hal_channel *hc = &ci->channels[i];
 		if (hc->ieee == ieee)
 			return hc;
 	}
 	return NULL;
 }
 
 static int
 mwl_setregdomain(struct ieee80211com *ic, struct ieee80211_regdomain *rd,
 	int nchan, struct ieee80211_channel chans[])
 {
 	struct mwl_softc *sc = ic->ic_softc;
 	struct mwl_hal *mh = sc->sc_mh;
 	const MWL_HAL_CHANNELINFO *ci;
 	int i;
 
 	for (i = 0; i < nchan; i++) {
 		struct ieee80211_channel *c = &chans[i];
 		const struct mwl_hal_channel *hc;
 
 		if (IEEE80211_IS_CHAN_2GHZ(c)) {
 			mwl_hal_getchannelinfo(mh, MWL_FREQ_BAND_2DOT4GHZ,
 			    IEEE80211_IS_CHAN_HT40(c) ?
 				MWL_CH_40_MHz_WIDTH : MWL_CH_20_MHz_WIDTH, &ci);
 		} else if (IEEE80211_IS_CHAN_5GHZ(c)) {
 			mwl_hal_getchannelinfo(mh, MWL_FREQ_BAND_5GHZ,
 			    IEEE80211_IS_CHAN_HT40(c) ?
 				MWL_CH_40_MHz_WIDTH : MWL_CH_20_MHz_WIDTH, &ci);
 		} else {
 			device_printf(sc->sc_dev,
 			    "%s: channel %u freq %u/0x%x not 2.4/5GHz\n",
 			    __func__, c->ic_ieee, c->ic_freq, c->ic_flags);
 			return EINVAL;
 		}
 		/* 
 		 * Verify channel has cal data and cap tx power.
 		 */
 		hc = findhalchannel(ci, c->ic_ieee);
 		if (hc != NULL) {
 			if (c->ic_maxpower > 2*hc->maxTxPow)
 				c->ic_maxpower = 2*hc->maxTxPow;
 			goto next;
 		}
 		if (IEEE80211_IS_CHAN_HT40(c)) {
 			/*
 			 * Look for the extension channel since the
 			 * hal table only has the primary channel.
 			 */
 			hc = findhalchannel(ci, c->ic_extieee);
 			if (hc != NULL) {
 				if (c->ic_maxpower > 2*hc->maxTxPow)
 					c->ic_maxpower = 2*hc->maxTxPow;
 				goto next;
 			}
 		}
 		device_printf(sc->sc_dev,
 		    "%s: no cal data for channel %u ext %u freq %u/0x%x\n",
 		    __func__, c->ic_ieee, c->ic_extieee,
 		    c->ic_freq, c->ic_flags);
 		return EINVAL;
 	next:
 		;
 	}
 	return 0;
 }
 
 #define	IEEE80211_CHAN_HTG	(IEEE80211_CHAN_HT|IEEE80211_CHAN_G)
 #define	IEEE80211_CHAN_HTA	(IEEE80211_CHAN_HT|IEEE80211_CHAN_A)
 
 static void
 addht40channels(struct ieee80211_channel chans[], int maxchans, int *nchans,
 	const MWL_HAL_CHANNELINFO *ci, int flags)
 {
 	int i, error;
 
 	for (i = 0; i < ci->nchannels; i++) {
 		const struct mwl_hal_channel *hc = &ci->channels[i];
 
 		error = ieee80211_add_channel_ht40(chans, maxchans, nchans,
 		    hc->ieee, hc->maxTxPow, flags);
 		if (error != 0 && error != ENOENT)
 			break;
 	}
 }
 
 static void
 addchannels(struct ieee80211_channel chans[], int maxchans, int *nchans,
 	const MWL_HAL_CHANNELINFO *ci, const uint8_t bands[])
 {
 	int i, error;
 
 	error = 0;
 	for (i = 0; i < ci->nchannels && error == 0; i++) {
 		const struct mwl_hal_channel *hc = &ci->channels[i];
 
 		error = ieee80211_add_channel(chans, maxchans, nchans,
 		    hc->ieee, hc->freq, hc->maxTxPow, 0, bands);
 	}
 }
 
 static void
 getchannels(struct mwl_softc *sc, int maxchans, int *nchans,
 	struct ieee80211_channel chans[])
 {
 	const MWL_HAL_CHANNELINFO *ci;
 	uint8_t bands[IEEE80211_MODE_BYTES];
 
 	/*
 	 * Use the channel info from the hal to craft the
 	 * channel list.  Note that we pass back an unsorted
 	 * list; the caller is required to sort it for us
 	 * (if desired).
 	 */
 	*nchans = 0;
 	if (mwl_hal_getchannelinfo(sc->sc_mh,
 	    MWL_FREQ_BAND_2DOT4GHZ, MWL_CH_20_MHz_WIDTH, &ci) == 0) {
 		memset(bands, 0, sizeof(bands));
 		setbit(bands, IEEE80211_MODE_11B);
 		setbit(bands, IEEE80211_MODE_11G);
 		setbit(bands, IEEE80211_MODE_11NG);
 		addchannels(chans, maxchans, nchans, ci, bands);
 	}
 	if (mwl_hal_getchannelinfo(sc->sc_mh,
 	    MWL_FREQ_BAND_5GHZ, MWL_CH_20_MHz_WIDTH, &ci) == 0) {
 		memset(bands, 0, sizeof(bands));
 		setbit(bands, IEEE80211_MODE_11A);
 		setbit(bands, IEEE80211_MODE_11NA);
 		addchannels(chans, maxchans, nchans, ci, bands);
 	}
 	if (mwl_hal_getchannelinfo(sc->sc_mh,
 	    MWL_FREQ_BAND_2DOT4GHZ, MWL_CH_40_MHz_WIDTH, &ci) == 0)
 		addht40channels(chans, maxchans, nchans, ci, IEEE80211_CHAN_HTG);
 	if (mwl_hal_getchannelinfo(sc->sc_mh,
 	    MWL_FREQ_BAND_5GHZ, MWL_CH_40_MHz_WIDTH, &ci) == 0)
 		addht40channels(chans, maxchans, nchans, ci, IEEE80211_CHAN_HTA);
 }
 
 static void
 mwl_getradiocaps(struct ieee80211com *ic,
 	int maxchans, int *nchans, struct ieee80211_channel chans[])
 {
 	struct mwl_softc *sc = ic->ic_softc;
 
 	getchannels(sc, maxchans, nchans, chans);
 }
 
 static int
 mwl_getchannels(struct mwl_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	/*
 	 * Use the channel info from the hal to craft the
 	 * channel list for net80211.  Note that we pass up
 	 * an unsorted list; net80211 will sort it for us.
 	 */
 	memset(ic->ic_channels, 0, sizeof(ic->ic_channels));
 	ic->ic_nchans = 0;
 	getchannels(sc, IEEE80211_CHAN_MAX, &ic->ic_nchans, ic->ic_channels);
 
 	ic->ic_regdomain.regdomain = SKU_DEBUG;
 	ic->ic_regdomain.country = CTRY_DEFAULT;
 	ic->ic_regdomain.location = 'I';
 	ic->ic_regdomain.isocc[0] = ' ';	/* XXX? */
 	ic->ic_regdomain.isocc[1] = ' ';
 	return (ic->ic_nchans == 0 ? EIO : 0);
 }
 #undef IEEE80211_CHAN_HTA
 #undef IEEE80211_CHAN_HTG
 
 #ifdef MWL_DEBUG
 static void
 mwl_printrxbuf(const struct mwl_rxbuf *bf, u_int ix)
 {
 	const struct mwl_rxdesc *ds = bf->bf_desc;
 	uint32_t status = le32toh(ds->Status);
 
 	printf("R[%2u] (DS.V:%p DS.P:0x%jx) NEXT:%08x DATA:%08x RC:%02x%s\n"
 	       "      STAT:%02x LEN:%04x RSSI:%02x CHAN:%02x RATE:%02x QOS:%04x HT:%04x\n",
 	    ix, ds, (uintmax_t)bf->bf_daddr, le32toh(ds->pPhysNext),
 	    le32toh(ds->pPhysBuffData), ds->RxControl, 
 	    ds->RxControl != EAGLE_RXD_CTRL_DRIVER_OWN ?
 	        "" : (status & EAGLE_RXD_STATUS_OK) ? " *" : " !",
 	    ds->Status, le16toh(ds->PktLen), ds->RSSI, ds->Channel,
 	    ds->Rate, le16toh(ds->QosCtrl), le16toh(ds->HtSig2));
 }
 
 static void
 mwl_printtxbuf(const struct mwl_txbuf *bf, u_int qnum, u_int ix)
 {
 	const struct mwl_txdesc *ds = bf->bf_desc;
 	uint32_t status = le32toh(ds->Status);
 
 	printf("Q%u[%3u]", qnum, ix);
 	printf(" (DS.V:%p DS.P:0x%jx)\n", ds, (uintmax_t)bf->bf_daddr);
 	printf("    NEXT:%08x DATA:%08x LEN:%04x STAT:%08x%s\n",
 	    le32toh(ds->pPhysNext),
 	    le32toh(ds->PktPtr), le16toh(ds->PktLen), status,
 	    status & EAGLE_TXD_STATUS_USED ?
 		"" : (status & 3) != 0 ? " *" : " !");
 	printf("    RATE:%02x PRI:%x QOS:%04x SAP:%08x FORMAT:%04x\n",
 	    ds->DataRate, ds->TxPriority, le16toh(ds->QosCtrl),
 	    le32toh(ds->SapPktInfo), le16toh(ds->Format));
 #if MWL_TXDESC > 1
 	printf("    MULTIFRAMES:%u LEN:%04x %04x %04x %04x %04x %04x\n"
 	    , le32toh(ds->multiframes)
 	    , le16toh(ds->PktLenArray[0]), le16toh(ds->PktLenArray[1])
 	    , le16toh(ds->PktLenArray[2]), le16toh(ds->PktLenArray[3])
 	    , le16toh(ds->PktLenArray[4]), le16toh(ds->PktLenArray[5])
 	);
 	printf("    DATA:%08x %08x %08x %08x %08x %08x\n"
 	    , le32toh(ds->PktPtrArray[0]), le32toh(ds->PktPtrArray[1])
 	    , le32toh(ds->PktPtrArray[2]), le32toh(ds->PktPtrArray[3])
 	    , le32toh(ds->PktPtrArray[4]), le32toh(ds->PktPtrArray[5])
 	);
 #endif
 #if 0
 { const uint8_t *cp = (const uint8_t *) ds;
   int i;
   for (i = 0; i < sizeof(struct mwl_txdesc); i++) {
 	printf("%02x ", cp[i]);
 	if (((i+1) % 16) == 0)
 		printf("\n");
   }
   printf("\n");
 }
 #endif
 }
 #endif /* MWL_DEBUG */
 
 #if 0
 static void
 mwl_txq_dump(struct mwl_txq *txq)
 {
 	struct mwl_txbuf *bf;
 	int i = 0;
 
 	MWL_TXQ_LOCK(txq);
 	STAILQ_FOREACH(bf, &txq->active, bf_list) {
 		struct mwl_txdesc *ds = bf->bf_desc;
 		MWL_TXDESC_SYNC(txq, ds,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 #ifdef MWL_DEBUG
 		mwl_printtxbuf(bf, txq->qnum, i);
 #endif
 		i++;
 	}
 	MWL_TXQ_UNLOCK(txq);
 }
 #endif
 
 static void
 mwl_watchdog(void *arg)
 {
 	struct mwl_softc *sc = arg;
 
 	callout_reset(&sc->sc_watchdog, hz, mwl_watchdog, sc);
 	if (sc->sc_tx_timer == 0 || --sc->sc_tx_timer > 0)
 		return;
 
 	if (sc->sc_running && !sc->sc_invalid) {
 		if (mwl_hal_setkeepalive(sc->sc_mh))
 			device_printf(sc->sc_dev,
 			    "transmit timeout (firmware hung?)\n");
 		else
 			device_printf(sc->sc_dev,
 			    "transmit timeout\n");
 #if 0
 		mwl_reset(sc);
 mwl_txq_dump(&sc->sc_txq[0]);/*XXX*/
 #endif
 		counter_u64_add(sc->sc_ic.ic_oerrors, 1);
 		sc->sc_stats.mst_watchdog++;
 	}
 }
 
 #ifdef MWL_DIAGAPI
 /*
  * Diagnostic interface to the HAL.  This is used by various
  * tools to do things like retrieve register contents for
  * debugging.  The mechanism is intentionally opaque so that
  * it can change frequently w/o concern for compatibility.
  */
 static int
 mwl_ioctl_diag(struct mwl_softc *sc, struct mwl_diag *md)
 {
 	struct mwl_hal *mh = sc->sc_mh;
 	u_int id = md->md_id & MWL_DIAG_ID;
 	void *indata = NULL;
 	void *outdata = NULL;
 	u_int32_t insize = md->md_in_size;
 	u_int32_t outsize = md->md_out_size;
 	int error = 0;
 
 	if (md->md_id & MWL_DIAG_IN) {
 		/*
 		 * Copy in data.
 		 */
 		indata = malloc(insize, M_TEMP, M_NOWAIT);
 		if (indata == NULL) {
 			error = ENOMEM;
 			goto bad;
 		}
 		error = copyin(md->md_in_data, indata, insize);
 		if (error)
 			goto bad;
 	}
 	if (md->md_id & MWL_DIAG_DYN) {
 		/*
 		 * Allocate a buffer for the results (otherwise the HAL
 		 * returns a pointer to a buffer where we can read the
 		 * results).  Note that we depend on the HAL leaving this
 		 * pointer for us to use below in reclaiming the buffer;
 		 * may want to be more defensive.
 		 */
 		outdata = malloc(outsize, M_TEMP, M_NOWAIT);
 		if (outdata == NULL) {
 			error = ENOMEM;
 			goto bad;
 		}
 	}
 	if (mwl_hal_getdiagstate(mh, id, indata, insize, &outdata, &outsize)) {
 		if (outsize < md->md_out_size)
 			md->md_out_size = outsize;
 		if (outdata != NULL)
 			error = copyout(outdata, md->md_out_data,
 					md->md_out_size);
 	} else {
 		error = EINVAL;
 	}
 bad:
 	if ((md->md_id & MWL_DIAG_IN) && indata != NULL)
 		free(indata, M_TEMP);
 	if ((md->md_id & MWL_DIAG_DYN) && outdata != NULL)
 		free(outdata, M_TEMP);
 	return error;
 }
 
 static int
 mwl_ioctl_reset(struct mwl_softc *sc, struct mwl_diag *md)
 {
 	struct mwl_hal *mh = sc->sc_mh;
 	int error;
 
 	MWL_LOCK_ASSERT(sc);
 
 	if (md->md_id == 0 && mwl_hal_fwload(mh, NULL) != 0) {
 		device_printf(sc->sc_dev, "unable to load firmware\n");
 		return EIO;
 	}
 	if (mwl_hal_gethwspecs(mh, &sc->sc_hwspecs) != 0) {
 		device_printf(sc->sc_dev, "unable to fetch h/w specs\n");
 		return EIO;
 	}
 	error = mwl_setupdma(sc);
 	if (error != 0) {
 		/* NB: mwl_setupdma prints a msg */
 		return error;
 	}
 	/*
 	 * Reset tx/rx data structures; after reload we must
 	 * re-start the driver's notion of the next xmit/recv.
 	 */
 	mwl_draintxq(sc);		/* clear pending frames */
 	mwl_resettxq(sc);		/* rebuild tx q lists */
 	sc->sc_rxnext = NULL;		/* force rx to start at the list head */
 	return 0;
 }
 #endif /* MWL_DIAGAPI */
 
 static void
 mwl_parent(struct ieee80211com *ic)
 {
 	struct mwl_softc *sc = ic->ic_softc;
 	int startall = 0;
 
 	MWL_LOCK(sc);
 	if (ic->ic_nrunning > 0) {
 		if (sc->sc_running) {
 			/*
 			 * To avoid rescanning another access point,
 			 * do not call mwl_init() here.  Instead,
 			 * only reflect promisc mode settings.
 			 */
 			mwl_mode_init(sc);
 		} else {
 			/*
 			 * Beware of being called during attach/detach
 			 * to reset promiscuous mode.  In that case we
 			 * will still be marked UP but not RUNNING.
 			 * However trying to re-init the interface
 			 * is the wrong thing to do as we've already
 			 * torn down much of our state.  There's
 			 * probably a better way to deal with this.
 			 */
 			if (!sc->sc_invalid) {
 				mwl_init(sc);	/* XXX lose error */
 				startall = 1;
 			}
 		}
 	} else
 		mwl_stop(sc);
 	MWL_UNLOCK(sc);
 	if (startall)
 		ieee80211_start_all(ic);
 }
 
 static int
 mwl_ioctl(struct ieee80211com *ic, u_long cmd, void *data)
 {
 	struct mwl_softc *sc = ic->ic_softc;
 	struct ifreq *ifr = data;
 	int error = 0;
 
 	switch (cmd) {
 	case SIOCGMVSTATS:
 		mwl_hal_gethwstats(sc->sc_mh, &sc->sc_stats.hw_stats);
 #if 0
 		/* NB: embed these numbers to get a consistent view */
 		sc->sc_stats.mst_tx_packets =
 		    ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
 		sc->sc_stats.mst_rx_packets =
 		    ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
 #endif
 		/*
 		 * NB: Drop the softc lock in case of a page fault;
 		 * we'll accept any potential inconsisentcy in the
 		 * statistics.  The alternative is to copy the data
 		 * to a local structure.
 		 */
 		return (copyout(&sc->sc_stats,
 				ifr->ifr_data, sizeof (sc->sc_stats)));
 #ifdef MWL_DIAGAPI
 	case SIOCGMVDIAG:
 		/* XXX check privs */
 		return mwl_ioctl_diag(sc, (struct mwl_diag *) ifr);
 	case SIOCGMVRESET:
 		/* XXX check privs */
 		MWL_LOCK(sc);
 		error = mwl_ioctl_reset(sc,(struct mwl_diag *) ifr); 
 		MWL_UNLOCK(sc);
 		break;
 #endif /* MWL_DIAGAPI */
 	default:
 		error = ENOTTY;
 		break;
 	}
 	return (error);
 }
 
 #ifdef	MWL_DEBUG
 static int
 mwl_sysctl_debug(SYSCTL_HANDLER_ARGS)
 {
 	struct mwl_softc *sc = arg1;
 	int debug, error;
 
 	debug = sc->sc_debug | (mwl_hal_getdebug(sc->sc_mh) << 24);
 	error = sysctl_handle_int(oidp, &debug, 0, req);
 	if (error || !req->newptr)
 		return error;
 	mwl_hal_setdebug(sc->sc_mh, debug >> 24);
 	sc->sc_debug = debug & 0x00ffffff;
 	return 0;
 }
 #endif /* MWL_DEBUG */
 
 static void
 mwl_sysctlattach(struct mwl_softc *sc)
 {
 #ifdef	MWL_DEBUG
 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev);
 	struct sysctl_oid *tree = device_get_sysctl_tree(sc->sc_dev);
 
 	sc->sc_debug = mwl_debug;
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 		"debug", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
 		mwl_sysctl_debug, "I", "control debugging printfs");
 #endif
 }
 
 /*
  * Announce various information on device/driver attach.
  */
 static void
 mwl_announce(struct mwl_softc *sc)
 {
 
 	device_printf(sc->sc_dev, "Rev A%d hardware, v%d.%d.%d.%d firmware (regioncode %d)\n",
 		sc->sc_hwspecs.hwVersion,
 		(sc->sc_hwspecs.fwReleaseNumber>>24) & 0xff,
 		(sc->sc_hwspecs.fwReleaseNumber>>16) & 0xff,
 		(sc->sc_hwspecs.fwReleaseNumber>>8) & 0xff,
 		(sc->sc_hwspecs.fwReleaseNumber>>0) & 0xff,
 		sc->sc_hwspecs.regionCode);
 	sc->sc_fwrelease = sc->sc_hwspecs.fwReleaseNumber;
 
 	if (bootverbose) {
 		int i;
 		for (i = 0; i <= WME_AC_VO; i++) {
 			struct mwl_txq *txq = sc->sc_ac2q[i];
 			device_printf(sc->sc_dev, "Use hw queue %u for %s traffic\n",
 				txq->qnum, ieee80211_wme_acnames[i]);
 		}
 	}
 	if (bootverbose || mwl_rxdesc != MWL_RXDESC)
 		device_printf(sc->sc_dev, "using %u rx descriptors\n", mwl_rxdesc);
 	if (bootverbose || mwl_rxbuf != MWL_RXBUF)
 		device_printf(sc->sc_dev, "using %u rx buffers\n", mwl_rxbuf);
 	if (bootverbose || mwl_txbuf != MWL_TXBUF)
 		device_printf(sc->sc_dev, "using %u tx buffers\n", mwl_txbuf);
 	if (bootverbose && mwl_hal_ismbsscapable(sc->sc_mh))
 		device_printf(sc->sc_dev, "multi-bss support\n");
 #ifdef MWL_TX_NODROP
 	if (bootverbose)
 		device_printf(sc->sc_dev, "no tx drop\n");
 #endif
 }
Index: stable/11/sys/dev/sfxge/common/siena_nvram.c
===================================================================
--- stable/11/sys/dev/sfxge/common/siena_nvram.c	(revision 330445)
+++ stable/11/sys/dev/sfxge/common/siena_nvram.c	(revision 330446)
@@ -1,737 +1,737 @@
 /*-
  * Copyright (c) 2009-2016 Solarflare Communications Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice,
  *    this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  *    this list of conditions and the following disclaimer in the documentation
  *    and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * The views and conclusions contained in the software and documentation are
  * those of the authors and should not be interpreted as representing official
  * policies, either expressed or implied, of the FreeBSD Project.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "efx.h"
 #include "efx_impl.h"
 
 #if EFSYS_OPT_SIENA
 
 #if EFSYS_OPT_VPD || EFSYS_OPT_NVRAM
 
 	__checkReturn		efx_rc_t
 siena_nvram_partn_size(
 	__in			efx_nic_t *enp,
 	__in			uint32_t partn,
 	__out			size_t *sizep)
 {
 	efx_rc_t rc;
 
 	if ((1 << partn) & ~enp->en_u.siena.enu_partn_mask) {
 		rc = ENOTSUP;
 		goto fail1;
 	}
 
 	if ((rc = efx_mcdi_nvram_info(enp, partn, sizep,
 	    NULL, NULL, NULL)) != 0) {
 		goto fail2;
 	}
 
 	return (0);
 
 fail2:
 	EFSYS_PROBE(fail2);
 fail1:
 	EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
 	return (rc);
 }
 
 	__checkReturn		efx_rc_t
 siena_nvram_partn_lock(
 	__in			efx_nic_t *enp,
 	__in			uint32_t partn)
 {
 	efx_rc_t rc;
 
 	if ((rc = efx_mcdi_nvram_update_start(enp, partn)) != 0) {
 		goto fail1;
 	}
 
 	return (0);
 
 fail1:
 	EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
 	return (rc);
 }
 
 	__checkReturn		efx_rc_t
 siena_nvram_partn_read(
 	__in			efx_nic_t *enp,
 	__in			uint32_t partn,
 	__in			unsigned int offset,
 	__out_bcount(size)	caddr_t data,
 	__in			size_t size)
 {
 	size_t chunk;
 	efx_rc_t rc;
 
 	while (size > 0) {
 		chunk = MIN(size, SIENA_NVRAM_CHUNK);
 
 		if ((rc = efx_mcdi_nvram_read(enp, partn, offset, data, chunk,
 			    MC_CMD_NVRAM_READ_IN_V2_DEFAULT)) != 0) {
 			goto fail1;
 		}
 
 		size -= chunk;
 		data += chunk;
 		offset += chunk;
 	}
 
 	return (0);
 
 fail1:
 	EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
 	return (rc);
 }
 
 	__checkReturn		efx_rc_t
 siena_nvram_partn_erase(
 	__in			efx_nic_t *enp,
 	__in			uint32_t partn,
 	__in			unsigned int offset,
 	__in			size_t size)
 {
 	efx_rc_t rc;
 
 	if ((rc = efx_mcdi_nvram_erase(enp, partn, offset, size)) != 0) {
 		goto fail1;
 	}
 
 	return (0);
 
 fail1:
 	EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
 	return (rc);
 }
 
 	__checkReturn		efx_rc_t
 siena_nvram_partn_write(
 	__in			efx_nic_t *enp,
 	__in			uint32_t partn,
 	__in			unsigned int offset,
 	__out_bcount(size)	caddr_t data,
 	__in			size_t size)
 {
 	size_t chunk;
 	efx_rc_t rc;
 
 	while (size > 0) {
 		chunk = MIN(size, SIENA_NVRAM_CHUNK);
 
 		if ((rc = efx_mcdi_nvram_write(enp, partn, offset,
 			    data, chunk)) != 0) {
 			goto fail1;
 		}
 
 		size -= chunk;
 		data += chunk;
 		offset += chunk;
 	}
 
 	return (0);
 
 fail1:
 	EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
 	return (rc);
 }
 
 	__checkReturn		efx_rc_t
 siena_nvram_partn_unlock(
 	__in			efx_nic_t *enp,
 	__in			uint32_t partn)
 {
 	boolean_t reboot;
 	efx_rc_t rc;
 
 	/*
 	 * Reboot into the new image only for PHYs. The driver has to
 	 * explicitly cope with an MC reboot after a firmware update.
 	 */
 	reboot = (partn == MC_CMD_NVRAM_TYPE_PHY_PORT0 ||
 		    partn == MC_CMD_NVRAM_TYPE_PHY_PORT1 ||
 		    partn == MC_CMD_NVRAM_TYPE_DISABLED_CALLISTO);
 
 	rc = efx_mcdi_nvram_update_finish(enp, partn, reboot, NULL);
 	if (rc != 0)
 		goto fail1;
 
 	return (0);
 
 fail1:
 	EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
 	return (rc);
 }
 
 #endif	/* EFSYS_OPT_VPD || EFSYS_OPT_NVRAM */
 
 #if EFSYS_OPT_NVRAM
 
 typedef struct siena_parttbl_entry_s {
 	unsigned int		partn;
 	unsigned int		port;
 	efx_nvram_type_t	nvtype;
 } siena_parttbl_entry_t;
 
 static siena_parttbl_entry_t siena_parttbl[] = {
 	{MC_CMD_NVRAM_TYPE_DISABLED_CALLISTO,	1, EFX_NVRAM_NULLPHY},
 	{MC_CMD_NVRAM_TYPE_DISABLED_CALLISTO,	2, EFX_NVRAM_NULLPHY},
 	{MC_CMD_NVRAM_TYPE_MC_FW,		1, EFX_NVRAM_MC_FIRMWARE},
 	{MC_CMD_NVRAM_TYPE_MC_FW,		2, EFX_NVRAM_MC_FIRMWARE},
 	{MC_CMD_NVRAM_TYPE_MC_FW_BACKUP,	1, EFX_NVRAM_MC_GOLDEN},
 	{MC_CMD_NVRAM_TYPE_MC_FW_BACKUP,	2, EFX_NVRAM_MC_GOLDEN},
 	{MC_CMD_NVRAM_TYPE_EXP_ROM,		1, EFX_NVRAM_BOOTROM},
 	{MC_CMD_NVRAM_TYPE_EXP_ROM,		2, EFX_NVRAM_BOOTROM},
 	{MC_CMD_NVRAM_TYPE_EXP_ROM_CFG_PORT0,	1, EFX_NVRAM_BOOTROM_CFG},
 	{MC_CMD_NVRAM_TYPE_EXP_ROM_CFG_PORT1,	2, EFX_NVRAM_BOOTROM_CFG},
 	{MC_CMD_NVRAM_TYPE_PHY_PORT0,		1, EFX_NVRAM_PHY},
 	{MC_CMD_NVRAM_TYPE_PHY_PORT1,		2, EFX_NVRAM_PHY},
 	{MC_CMD_NVRAM_TYPE_FPGA,		1, EFX_NVRAM_FPGA},
 	{MC_CMD_NVRAM_TYPE_FPGA,		2, EFX_NVRAM_FPGA},
 	{MC_CMD_NVRAM_TYPE_FPGA_BACKUP,		1, EFX_NVRAM_FPGA_BACKUP},
 	{MC_CMD_NVRAM_TYPE_FPGA_BACKUP,		2, EFX_NVRAM_FPGA_BACKUP},
 	{MC_CMD_NVRAM_TYPE_FC_FW,		1, EFX_NVRAM_FCFW},
 	{MC_CMD_NVRAM_TYPE_FC_FW,		2, EFX_NVRAM_FCFW},
 	{MC_CMD_NVRAM_TYPE_CPLD,		1, EFX_NVRAM_CPLD},
 	{MC_CMD_NVRAM_TYPE_CPLD,		2, EFX_NVRAM_CPLD},
 	{MC_CMD_NVRAM_TYPE_LICENSE,		1, EFX_NVRAM_LICENSE},
 	{MC_CMD_NVRAM_TYPE_LICENSE,		2, EFX_NVRAM_LICENSE}
 };
 
 	__checkReturn		efx_rc_t
 siena_nvram_type_to_partn(
 	__in			efx_nic_t *enp,
 	__in			efx_nvram_type_t type,
 	__out			uint32_t *partnp)
 {
 	efx_mcdi_iface_t *emip = &(enp->en_mcdi.em_emip);
 	unsigned int i;
 
 	EFSYS_ASSERT3U(type, <, EFX_NVRAM_NTYPES);
 	EFSYS_ASSERT(partnp != NULL);
 
 	for (i = 0; i < EFX_ARRAY_SIZE(siena_parttbl); i++) {
 		siena_parttbl_entry_t *entry = &siena_parttbl[i];
 
 		if (entry->port == emip->emi_port && entry->nvtype == type) {
 			*partnp = entry->partn;
 			return (0);
 		}
 	}
 
 	return (ENOTSUP);
 }
 
 
 #if EFSYS_OPT_DIAG
 
 	__checkReturn		efx_rc_t
 siena_nvram_test(
 	__in			efx_nic_t *enp)
 {
 	efx_mcdi_iface_t *emip = &(enp->en_mcdi.em_emip);
 	siena_parttbl_entry_t *entry;
 	unsigned int i;
 	efx_rc_t rc;
 
 	/*
 	 * Iterate over the list of supported partition types
 	 * applicable to *this* port
 	 */
 	for (i = 0; i < EFX_ARRAY_SIZE(siena_parttbl); i++) {
 		entry = &siena_parttbl[i];
 
 		if (entry->port != emip->emi_port ||
 		    !(enp->en_u.siena.enu_partn_mask & (1 << entry->partn)))
 			continue;
 
 		if ((rc = efx_mcdi_nvram_test(enp, entry->partn)) != 0) {
 			goto fail1;
 		}
 	}
 
 	return (0);
 
 fail1:
 	EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
 	return (rc);
 }
 
 #endif	/* EFSYS_OPT_DIAG */
 
 
 #define	SIENA_DYNAMIC_CFG_SIZE(_nitems)					\
 	(sizeof (siena_mc_dynamic_config_hdr_t) + ((_nitems) *		\
 	sizeof (((siena_mc_dynamic_config_hdr_t *)NULL)->fw_version[0])))
 
 	__checkReturn		efx_rc_t
 siena_nvram_get_dynamic_cfg(
 	__in			efx_nic_t *enp,
 	__in			uint32_t partn,
 	__in			boolean_t vpd,
 	__out			siena_mc_dynamic_config_hdr_t **dcfgp,
 	__out			size_t *sizep)
 {
 	siena_mc_dynamic_config_hdr_t *dcfg = NULL;
 	size_t size;
 	uint8_t cksum;
 	unsigned int vpd_offset;
 	unsigned int vpd_length;
 	unsigned int hdr_length;
 	unsigned int nversions;
 	unsigned int pos;
 	unsigned int region;
 	efx_rc_t rc;
 
 	EFSYS_ASSERT(partn == MC_CMD_NVRAM_TYPE_DYNAMIC_CFG_PORT0 ||
 		    partn == MC_CMD_NVRAM_TYPE_DYNAMIC_CFG_PORT1);
 
 	/*
 	 * Allocate sufficient memory for the entire dynamiccfg area, even
 	 * if we're not actually going to read in the VPD.
 	 */
 	if ((rc = siena_nvram_partn_size(enp, partn, &size)) != 0)
 		goto fail1;
 
 	EFSYS_KMEM_ALLOC(enp->en_esip, size, dcfg);
 	if (dcfg == NULL) {
 		rc = ENOMEM;
 		goto fail2;
 	}
 
 	if ((rc = siena_nvram_partn_read(enp, partn, 0,
 	    (caddr_t)dcfg, SIENA_NVRAM_CHUNK)) != 0)
 		goto fail3;
 
 	/* Verify the magic */
 	if (EFX_DWORD_FIELD(dcfg->magic, EFX_DWORD_0)
 	    != SIENA_MC_DYNAMIC_CONFIG_MAGIC)
 		goto invalid1;
 
 	/* All future versions of the structure must be backwards compatible */
 	EFX_STATIC_ASSERT(SIENA_MC_DYNAMIC_CONFIG_VERSION == 0);
 
 	hdr_length = EFX_WORD_FIELD(dcfg->length, EFX_WORD_0);
 	nversions = EFX_DWORD_FIELD(dcfg->num_fw_version_items, EFX_DWORD_0);
 	vpd_offset = EFX_DWORD_FIELD(dcfg->dynamic_vpd_offset, EFX_DWORD_0);
 	vpd_length = EFX_DWORD_FIELD(dcfg->dynamic_vpd_length, EFX_DWORD_0);
 
 	/* Verify the hdr doesn't overflow the partn size */
 	if (hdr_length > size || vpd_offset > size || vpd_length > size ||
 	    vpd_length + vpd_offset > size)
 		goto invalid2;
 
 	/* Verify the header has room for all it's versions */
 	if (hdr_length < SIENA_DYNAMIC_CFG_SIZE(0) ||
 	    hdr_length < SIENA_DYNAMIC_CFG_SIZE(nversions))
 		goto invalid3;
 
 	/*
 	 * Read the remaining portion of the dcfg, either including
 	 * the whole of VPD (there is no vpd length in this structure,
 	 * so we have to parse each tag), or just the dcfg header itself
 	 */
 	region = vpd ? vpd_offset + vpd_length : hdr_length;
 	if (region > SIENA_NVRAM_CHUNK) {
 		if ((rc = siena_nvram_partn_read(enp, partn, SIENA_NVRAM_CHUNK,
 		    (caddr_t)dcfg + SIENA_NVRAM_CHUNK,
 		    region - SIENA_NVRAM_CHUNK)) != 0)
 			goto fail4;
 	}
 
 	/* Verify checksum */
 	cksum = 0;
 	for (pos = 0; pos < hdr_length; pos++)
 		cksum += ((uint8_t *)dcfg)[pos];
 	if (cksum != 0)
 		goto invalid4;
 
 	goto done;
 
 invalid4:
 	EFSYS_PROBE(invalid4);
 invalid3:
 	EFSYS_PROBE(invalid3);
 invalid2:
 	EFSYS_PROBE(invalid2);
 invalid1:
 	EFSYS_PROBE(invalid1);
 
 	/*
 	 * Construct a new "null" dcfg, with an empty version vector,
 	 * and an empty VPD chunk trailing. This has the neat side effect
 	 * of testing the exception paths in the write path.
 	 */
 	EFX_POPULATE_DWORD_1(dcfg->magic,
 			    EFX_DWORD_0, SIENA_MC_DYNAMIC_CONFIG_MAGIC);
 	EFX_POPULATE_WORD_1(dcfg->length, EFX_WORD_0, sizeof (*dcfg));
 	EFX_POPULATE_BYTE_1(dcfg->version, EFX_BYTE_0,
 			    SIENA_MC_DYNAMIC_CONFIG_VERSION);
 	EFX_POPULATE_DWORD_1(dcfg->dynamic_vpd_offset,
 			    EFX_DWORD_0, sizeof (*dcfg));
 	EFX_POPULATE_DWORD_1(dcfg->dynamic_vpd_length, EFX_DWORD_0, 0);
 	EFX_POPULATE_DWORD_1(dcfg->num_fw_version_items, EFX_DWORD_0, 0);
 
 done:
 	*dcfgp = dcfg;
 	*sizep = size;
 
 	return (0);
 
 fail4:
 	EFSYS_PROBE(fail4);
 fail3:
 	EFSYS_PROBE(fail3);
 
 	EFSYS_KMEM_FREE(enp->en_esip, size, dcfg);
 
 fail2:
 	EFSYS_PROBE(fail2);
 fail1:
 	EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
 	return (rc);
 }
 
 	__checkReturn		efx_rc_t
 siena_nvram_get_subtype(
 	__in			efx_nic_t *enp,
 	__in			uint32_t partn,
 	__out			uint32_t *subtypep)
 {
 	efx_mcdi_req_t req;
 	uint8_t payload[MAX(MC_CMD_GET_BOARD_CFG_IN_LEN,
 			    MC_CMD_GET_BOARD_CFG_OUT_LENMAX)];
 	efx_word_t *fw_list;
 	efx_rc_t rc;
 
 	(void) memset(payload, 0, sizeof (payload));
 	req.emr_cmd = MC_CMD_GET_BOARD_CFG;
 	req.emr_in_buf = payload;
 	req.emr_in_length = MC_CMD_GET_BOARD_CFG_IN_LEN;
 	req.emr_out_buf = payload;
 	req.emr_out_length = MC_CMD_GET_BOARD_CFG_OUT_LENMAX;
 
 	efx_mcdi_execute(enp, &req);
 
 	if (req.emr_rc != 0) {
 		rc = req.emr_rc;
 		goto fail1;
 	}
 
 	if (req.emr_out_length_used < MC_CMD_GET_BOARD_CFG_OUT_LENMIN) {
 		rc = EMSGSIZE;
 		goto fail2;
 	}
 
 	if (req.emr_out_length_used <
 	    MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_OFST +
 	    (partn + 1) * sizeof (efx_word_t)) {
 		rc = ENOENT;
 		goto fail3;
 	}
 
 	fw_list = MCDI_OUT2(req, efx_word_t,
 			    GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST);
 	*subtypep = EFX_WORD_FIELD(fw_list[partn], EFX_WORD_0);
 
 	return (0);
 
 fail3:
 	EFSYS_PROBE(fail3);
 fail2:
 	EFSYS_PROBE(fail2);
 fail1:
 	EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
 	return (rc);
 }
 
 	__checkReturn		efx_rc_t
 siena_nvram_partn_get_version(
 	__in			efx_nic_t *enp,
 	__in			uint32_t partn,
 	__out			uint32_t *subtypep,
 	__out_ecount(4)		uint16_t version[4])
 {
 	siena_mc_dynamic_config_hdr_t *dcfg;
 	siena_parttbl_entry_t *entry;
 	uint32_t dcfg_partn;
 	unsigned int i;
 	efx_rc_t rc;
 
 	if ((1 << partn) & ~enp->en_u.siena.enu_partn_mask) {
 		rc = ENOTSUP;
 		goto fail1;
 	}
 
 	if ((rc = siena_nvram_get_subtype(enp, partn, subtypep)) != 0)
 		goto fail2;
 
 	/*
 	 * Some partitions are accessible from both ports (for instance BOOTROM)
 	 * Find the highest version reported by all dcfg structures on ports
 	 * that have access to this partition.
 	 */
 	version[0] = version[1] = version[2] = version[3] = 0;
 	for (i = 0; i < EFX_ARRAY_SIZE(siena_parttbl); i++) {
 		siena_mc_fw_version_t *verp;
 		unsigned int nitems;
 		uint16_t temp[4];
 		size_t length;
 
 		entry = &siena_parttbl[i];
 		if (entry->partn != partn)
 			continue;
 
 		dcfg_partn = (entry->port == 1)
 			? MC_CMD_NVRAM_TYPE_DYNAMIC_CFG_PORT0
 			: MC_CMD_NVRAM_TYPE_DYNAMIC_CFG_PORT1;
 		/*
 		 * Ingore missing partitions on port 2, assuming they're due
-		 * to to running on a single port part.
+		 * to running on a single port part.
 		 */
 		if ((1 << dcfg_partn) &  ~enp->en_u.siena.enu_partn_mask) {
 			if (entry->port == 2)
 				continue;
 		}
 
 		if ((rc = siena_nvram_get_dynamic_cfg(enp, dcfg_partn,
 		    B_FALSE, &dcfg, &length)) != 0)
 			goto fail3;
 
 		nitems = EFX_DWORD_FIELD(dcfg->num_fw_version_items,
 			    EFX_DWORD_0);
 		if (nitems < entry->partn)
 			goto done;
 
 		verp = &dcfg->fw_version[partn];
 		temp[0] = EFX_WORD_FIELD(verp->version_w, EFX_WORD_0);
 		temp[1] = EFX_WORD_FIELD(verp->version_x, EFX_WORD_0);
 		temp[2] = EFX_WORD_FIELD(verp->version_y, EFX_WORD_0);
 		temp[3] = EFX_WORD_FIELD(verp->version_z, EFX_WORD_0);
 		if (memcmp(version, temp, sizeof (temp)) < 0)
 			memcpy(version, temp, sizeof (temp));
 
 done:
 		EFSYS_KMEM_FREE(enp->en_esip, length, dcfg);
 	}
 
 	return (0);
 
 fail3:
 	EFSYS_PROBE(fail3);
 fail2:
 	EFSYS_PROBE(fail2);
 fail1:
 	EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
 	return (rc);
 }
 
 	__checkReturn		efx_rc_t
 siena_nvram_partn_rw_start(
 	__in			efx_nic_t *enp,
 	__in			uint32_t partn,
 	__out			size_t *chunk_sizep)
 {
 	efx_rc_t rc;
 
 	if ((rc = siena_nvram_partn_lock(enp, partn)) != 0)
 		goto fail1;
 
 	if (chunk_sizep != NULL)
 		*chunk_sizep = SIENA_NVRAM_CHUNK;
 
 	return (0);
 
 fail1:
 	EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
 	return (rc);
 }
 
 	__checkReturn		efx_rc_t
 siena_nvram_partn_rw_finish(
 	__in			efx_nic_t *enp,
 	__in			uint32_t partn)
 {
 	efx_rc_t rc;
 
 	if ((rc = siena_nvram_partn_unlock(enp, partn)) != 0)
 		goto fail1;
 
 	return (0);
 
 fail1:
 	EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
 	return (rc);
 }
 
 	__checkReturn		efx_rc_t
 siena_nvram_partn_set_version(
 	__in			efx_nic_t *enp,
 	__in			uint32_t partn,
 	__in_ecount(4)		uint16_t version[4])
 {
 	efx_mcdi_iface_t *emip = &(enp->en_mcdi.em_emip);
 	siena_mc_dynamic_config_hdr_t *dcfg = NULL;
 	siena_mc_fw_version_t *fwverp;
 	uint32_t dcfg_partn;
 	size_t dcfg_size;
 	unsigned int hdr_length;
 	unsigned int vpd_length;
 	unsigned int vpd_offset;
 	unsigned int nitems;
 	unsigned int required_hdr_length;
 	unsigned int pos;
 	uint8_t cksum;
 	uint32_t subtype;
 	size_t length;
 	efx_rc_t rc;
 
 	dcfg_partn = (emip->emi_port == 1)
 		? MC_CMD_NVRAM_TYPE_DYNAMIC_CFG_PORT0
 		: MC_CMD_NVRAM_TYPE_DYNAMIC_CFG_PORT1;
 
 	if ((rc = siena_nvram_partn_size(enp, dcfg_partn, &dcfg_size)) != 0)
 		goto fail1;
 
 	if ((rc = siena_nvram_partn_lock(enp, dcfg_partn)) != 0)
 		goto fail2;
 
 	if ((rc = siena_nvram_get_dynamic_cfg(enp, dcfg_partn,
 	    B_TRUE, &dcfg, &length)) != 0)
 		goto fail3;
 
 	hdr_length = EFX_WORD_FIELD(dcfg->length, EFX_WORD_0);
 	nitems = EFX_DWORD_FIELD(dcfg->num_fw_version_items, EFX_DWORD_0);
 	vpd_length = EFX_DWORD_FIELD(dcfg->dynamic_vpd_length, EFX_DWORD_0);
 	vpd_offset = EFX_DWORD_FIELD(dcfg->dynamic_vpd_offset, EFX_DWORD_0);
 
 	/*
 	 * NOTE: This function will blatt any fields trailing the version
 	 * vector, or the VPD chunk.
 	 */
 	required_hdr_length = SIENA_DYNAMIC_CFG_SIZE(partn + 1);
 	if (required_hdr_length + vpd_length > length) {
 		rc = ENOSPC;
 		goto fail4;
 	}
 
 	if (vpd_offset < required_hdr_length) {
 		(void) memmove((caddr_t)dcfg + required_hdr_length,
 			(caddr_t)dcfg + vpd_offset, vpd_length);
 		vpd_offset = required_hdr_length;
 		EFX_POPULATE_DWORD_1(dcfg->dynamic_vpd_offset,
 				    EFX_DWORD_0, vpd_offset);
 	}
 
 	if (hdr_length < required_hdr_length) {
 		(void) memset((caddr_t)dcfg + hdr_length, 0,
 			required_hdr_length - hdr_length);
 		hdr_length = required_hdr_length;
 		EFX_POPULATE_WORD_1(dcfg->length,
 				    EFX_WORD_0, hdr_length);
 	}
 
 	/* Get the subtype to insert into the fw_subtype array */
 	if ((rc = siena_nvram_get_subtype(enp, partn, &subtype)) != 0)
 		goto fail5;
 
 	/* Fill out the new version */
 	fwverp = &dcfg->fw_version[partn];
 	EFX_POPULATE_DWORD_1(fwverp->fw_subtype, EFX_DWORD_0, subtype);
 	EFX_POPULATE_WORD_1(fwverp->version_w, EFX_WORD_0, version[0]);
 	EFX_POPULATE_WORD_1(fwverp->version_x, EFX_WORD_0, version[1]);
 	EFX_POPULATE_WORD_1(fwverp->version_y, EFX_WORD_0, version[2]);
 	EFX_POPULATE_WORD_1(fwverp->version_z, EFX_WORD_0, version[3]);
 
 	/* Update the version count */
 	if (nitems < partn + 1) {
 		nitems = partn + 1;
 		EFX_POPULATE_DWORD_1(dcfg->num_fw_version_items,
 				    EFX_DWORD_0, nitems);
 	}
 
 	/* Update the checksum */
 	cksum = 0;
 	for (pos = 0; pos < hdr_length; pos++)
 		cksum += ((uint8_t *)dcfg)[pos];
 	dcfg->csum.eb_u8[0] -= cksum;
 
 	/* Erase and write the new partition */
 	if ((rc = siena_nvram_partn_erase(enp, dcfg_partn, 0, dcfg_size)) != 0)
 		goto fail6;
 
 	/* Write out the new structure to nvram */
 	if ((rc = siena_nvram_partn_write(enp, dcfg_partn, 0,
 	    (caddr_t)dcfg, vpd_offset + vpd_length)) != 0)
 		goto fail7;
 
 	EFSYS_KMEM_FREE(enp->en_esip, length, dcfg);
 
 	siena_nvram_partn_unlock(enp, dcfg_partn);
 
 	return (0);
 
 fail7:
 	EFSYS_PROBE(fail7);
 fail6:
 	EFSYS_PROBE(fail6);
 fail5:
 	EFSYS_PROBE(fail5);
 fail4:
 	EFSYS_PROBE(fail4);
 
 	EFSYS_KMEM_FREE(enp->en_esip, length, dcfg);
 fail3:
 	EFSYS_PROBE(fail3);
 fail2:
 	EFSYS_PROBE(fail2);
 fail1:
 	EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
 	return (rc);
 }
 
 #endif	/* EFSYS_OPT_NVRAM */
 
 #endif	/* EFSYS_OPT_SIENA */
Index: stable/11/sys/dev/sio/sio.c
===================================================================
--- stable/11/sys/dev/sio/sio.c	(revision 330445)
+++ stable/11/sys/dev/sio/sio.c	(revision 330446)
@@ -1,2642 +1,2642 @@
 /*-
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)com.c	7.5 (Berkeley) 5/16/91
  *	from: i386/isa sio.c,v 1.234
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_gdb.h"
 #include "opt_kdb.h"
 #include "opt_sio.h"
 
 /*
  * Serial driver, based on 386BSD-0.1 com driver.
  * Mostly rewritten to use pseudo-DMA.
  * Works for National Semiconductor NS8250-NS16550AF UARTs.
  * COM driver, based on HP dca driver.
  *
  * Changes for PC Card integration:
  *	- Added PC Card driver table and handlers
  */
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/interrupt.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/serial.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/tty.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <sys/timepps.h>
 #include <sys/uio.h>
 #include <sys/cons.h>
 
 #include <isa/isavar.h>
 
 #include <machine/resource.h>
 
 #include <dev/sio/sioreg.h>
 #include <dev/sio/siovar.h>
 
 #ifdef COM_ESP
 #include <dev/ic/esp.h>
 #endif
 #include <dev/ic/ns16550.h>
 
 #define	LOTS_OF_EVENTS	64	/* helps separate urgent events from input */
 
 #ifdef COM_MULTIPORT
 /* checks in flags for multiport and which is multiport "master chip"
  * for a given card
  */
 #define	COM_ISMULTIPORT(flags)	((flags) & 0x01)
 #define	COM_MPMASTER(flags)	(((flags) >> 8) & 0x0ff)
 #define	COM_NOTAST4(flags)	((flags) & 0x04)
 #else
 #define	COM_ISMULTIPORT(flags)	(0)
 #endif /* COM_MULTIPORT */
 
 #define	COM_C_IIR_TXRDYBUG	0x80000
 #define	COM_CONSOLE(flags)	((flags) & 0x10)
 #define	COM_DEBUGGER(flags)	((flags) & 0x80)
 #define	COM_FIFOSIZE(flags)	(((flags) & 0xff000000) >> 24)
 #define	COM_FORCECONSOLE(flags)	((flags) & 0x20)
 #define	COM_IIR_TXRDYBUG(flags)	((flags) & COM_C_IIR_TXRDYBUG)
 #define	COM_LLCONSOLE(flags)	((flags) & 0x40)
 #define	COM_LOSESOUTINTS(flags)	((flags) & 0x08)
 #define	COM_NOFIFO(flags)	((flags) & 0x02)
 #define	COM_NOPROBE(flags)	((flags) & 0x40000)
 #define	COM_NOSCR(flags)	((flags) & 0x100000)
 #define	COM_PPSCTS(flags)	((flags) & 0x10000)
 #define	COM_ST16650A(flags)	((flags) & 0x20000)
 #define	COM_TI16754(flags)	((flags) & 0x200000)
 
 #define	sio_getreg(com, off) \
 	(bus_space_read_1((com)->bst, (com)->bsh, (off)))
 #define	sio_setreg(com, off, value) \
 	(bus_space_write_1((com)->bst, (com)->bsh, (off), (value)))
 
 /*
  * com state bits.
  * (CS_BUSY | CS_TTGO) and (CS_BUSY | CS_TTGO | CS_ODEVREADY) must be higher
  * than the other bits so that they can be tested as a group without masking
  * off the low bits.
  *
  * The following com and tty flags correspond closely:
  *	CS_BUSY		= TS_BUSY (maintained by comstart(), siopoll() and
  *				   comstop())
  *	CS_TTGO		= ~TS_TTSTOP (maintained by comparam() and comstart())
  *	CS_CTS_OFLOW	= CCTS_OFLOW (maintained by comparam())
  *	CS_RTS_IFLOW	= CRTS_IFLOW (maintained by comparam())
  * TS_FLUSH is not used.
  * XXX I think TIOCSETA doesn't clear TS_TTSTOP when it clears IXON.
  * XXX CS_*FLOW should be CF_*FLOW in com->flags (control flags not state).
  */
 #define	CS_BUSY		0x80	/* output in progress */
 #define	CS_TTGO		0x40	/* output not stopped by XOFF */
 #define	CS_ODEVREADY	0x20	/* external device h/w ready (CTS) */
 #define	CS_CHECKMSR	1	/* check of MSR scheduled */
 #define	CS_CTS_OFLOW	2	/* use CTS output flow control */
 #define	CS_ODONE	4	/* output completed */
 #define	CS_RTS_IFLOW	8	/* use RTS input flow control */
 #define	CSE_BUSYCHECK	1	/* siobusycheck() scheduled */
 
 static	char const * const	error_desc[] = {
 #define	CE_OVERRUN			0
 	"silo overflow",
 #define	CE_INTERRUPT_BUF_OVERFLOW	1
 	"interrupt-level buffer overflow",
 #define	CE_TTY_BUF_OVERFLOW		2
 	"tty-level buffer overflow",
 };
 
 #define	CE_NTYPES			3
 #define	CE_RECORD(com, errnum)		(++(com)->delta_error_counts[errnum])
 
 /* types.  XXX - should be elsewhere */
 typedef u_int	Port_t;		/* hardware port */
 typedef u_char	bool_t;		/* boolean */
 
 /* queue of linear buffers */
 struct lbq {
 	u_char	*l_head;	/* next char to process */
 	u_char	*l_tail;	/* one past the last char to process */
 	struct lbq *l_next;	/* next in queue */
 	bool_t	l_queued;	/* nonzero if queued */
 };
 
 /* com device structure */
 struct com_s {
 	u_char	state;		/* miscellaneous flag bits */
 	u_char	cfcr_image;	/* copy of value written to CFCR */
 #ifdef COM_ESP
 	bool_t	esp;		/* is this unit a hayes esp board? */
 #endif
 	u_char	extra_state;	/* more flag bits, separate for order trick */
 	u_char	fifo_image;	/* copy of value written to FIFO */
 	bool_t	hasfifo;	/* nonzero for 16550 UARTs */
 	bool_t	loses_outints;	/* nonzero if device loses output interrupts */
 	u_char	mcr_image;	/* copy of value written to MCR */
 #ifdef COM_MULTIPORT
 	bool_t	multiport;	/* is this unit part of a multiport device? */
 #endif /* COM_MULTIPORT */
 	bool_t	no_irq;		/* nonzero if irq is not attached */
 	bool_t  gone;		/* hardware disappeared */
 	bool_t	poll;		/* nonzero if polling is required */
 	bool_t	poll_output;	/* nonzero if polling for output is required */
 	bool_t	st16650a;	/* nonzero if Startech 16650A compatible */
 	int	unit;		/* unit	number */
 	u_int	flags;		/* copy of device flags */
 	u_int	tx_fifo_size;
 
 	/*
 	 * The high level of the driver never reads status registers directly
 	 * because there would be too many side effects to handle conveniently.
 	 * Instead, it reads copies of the registers stored here by the
 	 * interrupt handler.
 	 */
 	u_char	last_modem_status;	/* last MSR read by intr handler */
 	u_char	prev_modem_status;	/* last MSR handled by high level */
 
 	u_char	*ibuf;		/* start of input buffer */
 	u_char	*ibufend;	/* end of input buffer */
 	u_char	*ibufold;	/* old input buffer, to be freed */
 	u_char	*ihighwater;	/* threshold in input buffer */
 	u_char	*iptr;		/* next free spot in input buffer */
 	int	ibufsize;	/* size of ibuf (not include error bytes) */
 	int	ierroff;	/* offset of error bytes in ibuf */
 
 	struct lbq	obufq;	/* head of queue of output buffers */
 	struct lbq	obufs[2];	/* output buffers */
 
 	bus_space_tag_t		bst;
 	bus_space_handle_t	bsh;
 
 	Port_t	data_port;	/* i/o ports */
 #ifdef COM_ESP
 	Port_t	esp_port;
 #endif
 	Port_t	int_ctl_port;
 	Port_t	int_id_port;
 	Port_t	modem_ctl_port;
 	Port_t	line_status_port;
 	Port_t	modem_status_port;
 
 	struct tty	*tp;	/* cross reference */
 
 	struct	pps_state pps;
 	int	pps_bit;
 #ifdef KDB
 	int	alt_brk_state;
 #endif
 
 	u_long	bytes_in;	/* statistics */
 	u_long	bytes_out;
 	u_int	delta_error_counts[CE_NTYPES];
 	u_long	error_counts[CE_NTYPES];
 
 	u_long	rclk;
 
 	struct resource *irqres;
 	struct resource *ioportres;
 	int	ioportrid;
 	void	*cookie;
 
 	/*
 	 * Data area for output buffers.  Someday we should build the output
 	 * buffer queue without copying data.
 	 */
 	u_char	obuf1[256];
 	u_char	obuf2[256];
 };
 
 #ifdef COM_ESP
 static	int	espattach(struct com_s *com, Port_t esp_port);
 #endif
 
 static	void	combreak(struct tty *tp, int sig);
 static	timeout_t siobusycheck;
 static	u_int	siodivisor(u_long rclk, speed_t speed);
 static	void	comclose(struct tty *tp);
 static	int	comopen(struct tty *tp, struct cdev *dev);
 static	void	sioinput(struct com_s *com);
 static	void	siointr1(struct com_s *com);
 static	int	siointr(void *arg);
 static	int	commodem(struct tty *tp, int sigon, int sigoff);
 static	int	comparam(struct tty *tp, struct termios *t);
 static	void	siopoll(void *);
 static	void	siosettimeout(void);
 static	int	siosetwater(struct com_s *com, speed_t speed);
 static	void	comstart(struct tty *tp);
 static	void	comstop(struct tty *tp, int rw);
 static	timeout_t comwakeup;
 
 char		sio_driver_name[] = "sio";
 static struct	mtx sio_lock;
 static int	sio_inited;
 
 /* table and macro for fast conversion from a unit number to its com struct */
 devclass_t	sio_devclass;
 /*
  * XXX Assmues that devclass_get_device, devclass_get_softc and
  * device_get_softc are fast interrupt safe.  The current implementation
  * of these functions are.
  */
 #define	com_addr(unit)	((struct com_s *) \
 			 devclass_get_softc(sio_devclass, unit)) /* XXX */
 
 int	comconsole = -1;
 static	volatile speed_t	comdefaultrate = CONSPEED;
 static	u_long			comdefaultrclk = DEFAULT_RCLK;
 SYSCTL_ULONG(_machdep, OID_AUTO, conrclk, CTLFLAG_RW, &comdefaultrclk, 0, "");
 static	speed_t			gdbdefaultrate = GDBSPEED;
 SYSCTL_UINT(_machdep, OID_AUTO, gdbspeed, CTLFLAG_RW,
 	    &gdbdefaultrate, GDBSPEED, "");
 static	u_int	com_events;	/* input chars + weighted output completions */
 static	Port_t	siocniobase;
 static	int	siocnunit = -1;
 static	void	*sio_slow_ih;
 static	void	*sio_fast_ih;
 static	int	sio_timeout;
 static	int	sio_timeouts_until_log;
 static	struct	callout_handle sio_timeout_handle
     = CALLOUT_HANDLE_INITIALIZER(&sio_timeout_handle);
 static	int	sio_numunits;
 
 #ifdef GDB
 static	Port_t	siogdbiobase = 0;
 #endif
 
 #ifdef COM_ESP
 /* XXX configure this properly. */
 /* XXX quite broken for new-bus. */
 static	Port_t	likely_com_ports[] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, };
 static	Port_t	likely_esp_ports[] = { 0x140, 0x180, 0x280, 0 };
 #endif
 
 /*
  * handle sysctl read/write requests for console speed
  * 
  * In addition to setting comdefaultrate for I/O through /dev/console,
  * also set the initial and lock values for the /dev/ttyXX device
  * if there is one associated with the console.  Finally, if the /dev/tty
  * device has already been open, change the speed on the open running port
  * itself.
  */
 
 static int
 sysctl_machdep_comdefaultrate(SYSCTL_HANDLER_ARGS)
 {
 	int error, s;
 	speed_t newspeed;
 	struct com_s *com;
 	struct tty *tp;
 
 	newspeed = comdefaultrate;
 
 	error = sysctl_handle_opaque(oidp, &newspeed, sizeof newspeed, req);
 	if (error || !req->newptr)
 		return (error);
 
 	comdefaultrate = newspeed;
 
 	if (comconsole < 0)		/* serial console not selected? */
 		return (0);
 
 	com = com_addr(comconsole);
 	if (com == NULL)
 		return (ENXIO);
 
 	tp = com->tp;
 	if (tp == NULL)
 		return (ENXIO);
 
 	/*
 	 * set the initial and lock rates for /dev/ttydXX and /dev/cuaXX
 	 * (note, the lock rates really are boolean -- if non-zero, disallow
 	 *  speed changes)
 	 */
 	tp->t_init_in.c_ispeed  = tp->t_init_in.c_ospeed =
 	tp->t_lock_in.c_ispeed  = tp->t_lock_in.c_ospeed =
 	tp->t_init_out.c_ispeed = tp->t_init_out.c_ospeed =
 	tp->t_lock_out.c_ispeed = tp->t_lock_out.c_ospeed = comdefaultrate;
 
 	if (tp->t_state & TS_ISOPEN) {
 		tp->t_termios.c_ispeed =
 		tp->t_termios.c_ospeed = comdefaultrate;
 		s = spltty();
 		error = comparam(tp, &tp->t_termios);
 		splx(s);
 	}
 	return error;
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, conspeed, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH,
 	    0, 0, sysctl_machdep_comdefaultrate, "I", "");
 TUNABLE_INT("machdep.conspeed", __DEVOLATILE(int *, &comdefaultrate));
 
 #define SET_FLAG(dev, bit) device_set_flags(dev, device_get_flags(dev) | (bit))
 #define CLR_FLAG(dev, bit) device_set_flags(dev, device_get_flags(dev) & ~(bit))
 
 /*
  *	Unload the driver and clear the table.
  *	XXX this is mostly wrong.
  *	XXX TODO:
  *	This is usually called when the card is ejected, but
  *	can be caused by a kldunload of a controller driver.
  *	The idea is to reset the driver's view of the device
  *	and ensure that any driver entry points such as
  *	read and write do not hang.
  */
 int
 siodetach(device_t dev)
 {
 	struct com_s	*com;
 
 	com = (struct com_s *) device_get_softc(dev);
 	if (com == NULL) {
 		device_printf(dev, "NULL com in siounload\n");
 		return (0);
 	}
 	com->gone = TRUE;
 	if (com->tp)
 		ttyfree(com->tp);
 	if (com->irqres) {
 		bus_teardown_intr(dev, com->irqres, com->cookie);
 		bus_release_resource(dev, SYS_RES_IRQ, 0, com->irqres);
 	}
 	if (com->ioportres)
 		bus_release_resource(dev, SYS_RES_IOPORT, com->ioportrid,
 				     com->ioportres);
 	if (com->ibuf != NULL)
 		free(com->ibuf, M_DEVBUF);
 
 	device_set_softc(dev, NULL);
 	free(com, M_DEVBUF);
 	return (0);
 }
 
 int
 sioprobe(dev, xrid, rclk, noprobe)
 	device_t	dev;
 	int		xrid;
 	u_long		rclk;
 	int		noprobe;
 {
 #if 0
 	static bool_t	already_init;
 	device_t	xdev;
 #endif
 	struct com_s	*com;
 	u_int		divisor;
 	bool_t		failures[10];
 	int		fn;
 	device_t	idev;
 	Port_t		iobase;
 	intrmask_t	irqmap[4];
 	intrmask_t	irqs;
 	u_char		mcr_image;
 	int		result;
 	u_long		xirq;
 	u_int		flags = device_get_flags(dev);
 	int		rid;
 	struct resource *port;
 
 	rid = xrid;
 	port = bus_alloc_resource_anywhere(dev, SYS_RES_IOPORT, &rid,
 					   IO_COMSIZE, RF_ACTIVE);
 	if (!port)
 		return (ENXIO);
 
 	com = malloc(sizeof(*com), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (com == NULL) {
 		bus_release_resource(dev, SYS_RES_IOPORT, rid, port);
 		return (ENOMEM);
 	}
 	device_set_softc(dev, com);
 	com->bst = rman_get_bustag(port);
 	com->bsh = rman_get_bushandle(port);
 	if (rclk == 0)
 		rclk = DEFAULT_RCLK;
 	com->rclk = rclk;
 
 	while (sio_inited != 2)
 		if (atomic_cmpset_int(&sio_inited, 0, 1)) {
 			mtx_init(&sio_lock, sio_driver_name, NULL,
 			    (comconsole != -1) ?
 			    MTX_SPIN | MTX_QUIET : MTX_SPIN);
 			atomic_store_rel_int(&sio_inited, 2);
 		}
 
 #if 0
 	/*
 	 * XXX this is broken - when we are first called, there are no
 	 * previously configured IO ports.  We could hard code
 	 * 0x3f8, 0x2f8, 0x3e8, 0x2e8 etc but that's probably worse.
 	 * This code has been doing nothing since the conversion since
 	 * "count" is zero the first time around.
 	 */
 	if (!already_init) {
 		/*
 		 * Turn off MCR_IENABLE for all likely serial ports.  An unused
 		 * port with its MCR_IENABLE gate open will inhibit interrupts
 		 * from any used port that shares the interrupt vector.
 		 * XXX the gate enable is elsewhere for some multiports.
 		 */
 		device_t *devs;
 		int count, i, xioport;
 
 		devclass_get_devices(sio_devclass, &devs, &count);
 		for (i = 0; i < count; i++) {
 			xdev = devs[i];
 			if (device_is_enabled(xdev) &&
 			    bus_get_resource(xdev, SYS_RES_IOPORT, 0, &xioport,
 					     NULL) == 0)
 				outb(xioport + com_mcr, 0);
 		}
 		free(devs, M_TEMP);
 		already_init = TRUE;
 	}
 #endif
 
 	if (COM_LLCONSOLE(flags)) {
 		printf("sio%d: reserved for low-level i/o\n",
 		       device_get_unit(dev));
 		bus_release_resource(dev, SYS_RES_IOPORT, rid, port);
 		device_set_softc(dev, NULL);
 		free(com, M_DEVBUF);
 		return (ENXIO);
 	}
 
 	/*
 	 * If the device is on a multiport card and has an AST/4
 	 * compatible interrupt control register, initialize this
 	 * register and prepare to leave MCR_IENABLE clear in the mcr.
 	 * Otherwise, prepare to set MCR_IENABLE in the mcr.
 	 * Point idev to the device struct giving the correct id_irq.
 	 * This is the struct for the master device if there is one.
 	 */
 	idev = dev;
 	mcr_image = MCR_IENABLE;
 #ifdef COM_MULTIPORT
 	if (COM_ISMULTIPORT(flags)) {
 		Port_t xiobase;
 		u_long io;
 
 		idev = devclass_get_device(sio_devclass, COM_MPMASTER(flags));
 		if (idev == NULL) {
 			printf("sio%d: master device %d not configured\n",
 			       device_get_unit(dev), COM_MPMASTER(flags));
 			idev = dev;
 		}
 		if (!COM_NOTAST4(flags)) {
 			if (bus_get_resource(idev, SYS_RES_IOPORT, 0, &io,
 					     NULL) == 0) {
 				xiobase = io;
 				if (bus_get_resource(idev, SYS_RES_IRQ, 0,
 				    NULL, NULL) == 0)
 					outb(xiobase + com_scr, 0x80);
 				else
 					outb(xiobase + com_scr, 0);
 			}
 			mcr_image = 0;
 		}
 	}
 #endif /* COM_MULTIPORT */
 	if (bus_get_resource(idev, SYS_RES_IRQ, 0, NULL, NULL) != 0)
 		mcr_image = 0;
 
 	bzero(failures, sizeof failures);
 	iobase = rman_get_start(port);
 
 	/*
 	 * We don't want to get actual interrupts, just masked ones.
 	 * Interrupts from this line should already be masked in the ICU,
 	 * but mask them in the processor as well in case there are some
 	 * (misconfigured) shared interrupts.
 	 */
 	mtx_lock_spin(&sio_lock);
 /* EXTRA DELAY? */
 
 	/*
 	 * For the TI16754 chips, set prescaler to 1 (4 is often the
 	 * default after-reset value) as otherwise it's impossible to
 	 * get highest baudrates.
 	 */
 	if (COM_TI16754(flags)) {
 		u_char cfcr, efr;
 
 		cfcr = sio_getreg(com, com_cfcr);
 		sio_setreg(com, com_cfcr, CFCR_EFR_ENABLE);
 		efr = sio_getreg(com, com_efr);
 		/* Unlock extended features to turn off prescaler. */
 		sio_setreg(com, com_efr, efr | EFR_EFE);
 		/* Disable EFR. */
 		sio_setreg(com, com_cfcr, (cfcr != CFCR_EFR_ENABLE) ? cfcr : 0);
 		/* Turn off prescaler. */
 		sio_setreg(com, com_mcr,
 			   sio_getreg(com, com_mcr) & ~MCR_PRESCALE);
 		sio_setreg(com, com_cfcr, CFCR_EFR_ENABLE);
 		sio_setreg(com, com_efr, efr);
 		sio_setreg(com, com_cfcr, cfcr);
 	}
 
 	/*
 	 * Initialize the speed and the word size and wait long enough to
 	 * drain the maximum of 16 bytes of junk in device output queues.
 	 * The speed is undefined after a master reset and must be set
 	 * before relying on anything related to output.  There may be
 	 * junk after a (very fast) soft reboot and (apparently) after
 	 * master reset.
 	 * XXX what about the UART bug avoided by waiting in comparam()?
-	 * We don't want to to wait long enough to drain at 2 bps.
+	 * We don't want to wait long enough to drain at 2 bps.
 	 */
 	if (iobase == siocniobase)
 		DELAY((16 + 1) * 1000000 / (comdefaultrate / 10));
 	else {
 		sio_setreg(com, com_cfcr, CFCR_DLAB | CFCR_8BITS);
 		divisor = siodivisor(rclk, SIO_TEST_SPEED);
 		sio_setreg(com, com_dlbl, divisor & 0xff);
 		sio_setreg(com, com_dlbh, divisor >> 8);
 		sio_setreg(com, com_cfcr, CFCR_8BITS);
 		DELAY((16 + 1) * 1000000 / (SIO_TEST_SPEED / 10));
 	}
 
 	/*
 	 * Enable the interrupt gate and disable device interrupts.  This
 	 * should leave the device driving the interrupt line low and
 	 * guarantee an edge trigger if an interrupt can be generated.
 	 */
 /* EXTRA DELAY? */
 	sio_setreg(com, com_mcr, mcr_image);
 	sio_setreg(com, com_ier, 0);
 	DELAY(1000);		/* XXX */
 	irqmap[0] = isa_irq_pending();
 
 	/*
 	 * Attempt to set loopback mode so that we can send a null byte
 	 * without annoying any external device.
 	 */
 /* EXTRA DELAY? */
 	sio_setreg(com, com_mcr, mcr_image | MCR_LOOPBACK);
 
 	/*
 	 * Attempt to generate an output interrupt.  On 8250's, setting
 	 * IER_ETXRDY generates an interrupt independent of the current
 	 * setting and independent of whether the THR is empty.  On 16450's,
 	 * setting IER_ETXRDY generates an interrupt independent of the
 	 * current setting.  On 16550A's, setting IER_ETXRDY only
 	 * generates an interrupt when IER_ETXRDY is not already set.
 	 */
 	sio_setreg(com, com_ier, IER_ETXRDY);
 
 	/*
 	 * On some 16x50 incompatibles, setting IER_ETXRDY doesn't generate
 	 * an interrupt.  They'd better generate one for actually doing
 	 * output.  Loopback may be broken on the same incompatibles but
 	 * it's unlikely to do more than allow the null byte out.
 	 */
 	sio_setreg(com, com_data, 0);
 	if (iobase == siocniobase)
 		DELAY((1 + 2) * 1000000 / (comdefaultrate / 10));
 	else
 		DELAY((1 + 2) * 1000000 / (SIO_TEST_SPEED / 10));
 
 	/*
 	 * Turn off loopback mode so that the interrupt gate works again
 	 * (MCR_IENABLE was hidden).  This should leave the device driving
 	 * an interrupt line high.  It doesn't matter if the interrupt
 	 * line oscillates while we are not looking at it, since interrupts
 	 * are disabled.
 	 */
 /* EXTRA DELAY? */
 	sio_setreg(com, com_mcr, mcr_image);
  
 	/*
 	 * It seems my Xircom CBEM56G Cardbus modem wants to be reset
 	 * to 8 bits *again*, or else probe test 0 will fail.
 	 * gwk@sgi.com, 4/19/2001
 	 */
 	sio_setreg(com, com_cfcr, CFCR_8BITS);
 
 	/*
 	 * Some PCMCIA cards (Palido 321s, DC-1S, ...) have the "TXRDY bug",
 	 * so we probe for a buggy IIR_TXRDY implementation even in the
 	 * noprobe case.  We don't probe for it in the !noprobe case because
 	 * noprobe is always set for PCMCIA cards and the problem is not
 	 * known to affect any other cards.
 	 */
 	if (noprobe) {
 		/* Read IIR a few times. */
 		for (fn = 0; fn < 2; fn ++) {
 			DELAY(10000);
 			failures[6] = sio_getreg(com, com_iir);
 		}
 
 		/* IIR_TXRDY should be clear.  Is it? */
 		result = 0;
 		if (failures[6] & IIR_TXRDY) {
 			/*
 			 * No.  We seem to have the bug.  Does our fix for
 			 * it work?
 			 */
 			sio_setreg(com, com_ier, 0);
 			if (sio_getreg(com, com_iir) & IIR_NOPEND) {
 				/* Yes.  We discovered the TXRDY bug! */
 				SET_FLAG(dev, COM_C_IIR_TXRDYBUG);
 			} else {
 				/* No.  Just fail.  XXX */
 				result = ENXIO;
 				sio_setreg(com, com_mcr, 0);
 			}
 		} else {
 			/* Yes.  No bug. */
 			CLR_FLAG(dev, COM_C_IIR_TXRDYBUG);
 		}
 		sio_setreg(com, com_ier, 0);
 		sio_setreg(com, com_cfcr, CFCR_8BITS);
 		mtx_unlock_spin(&sio_lock);
 		bus_release_resource(dev, SYS_RES_IOPORT, rid, port);
 		if (iobase == siocniobase)
 			result = 0;
 		/*
 		 * XXX: Since we don't return 0, we shouldn't be relying on
 		 * the softc that we set to persist to the call to attach
 		 * since other probe routines may be called, and the malloc
 		 * here causes subr_bus to not allocate anything for the
 		 * other probes.  Instead, this softc is preserved and other
 		 * probe routines can corrupt it.
 		 */
 		if (result != 0) {
 			device_set_softc(dev, NULL);
 			free(com, M_DEVBUF);
 		}
 		return (result == 0 ? BUS_PROBE_DEFAULT + 1 : result);
 	}
 
 	/*
 	 * Check that
 	 *	o the CFCR, IER and MCR in UART hold the values written to them
 	 *	  (the values happen to be all distinct - this is good for
 	 *	  avoiding false positive tests from bus echoes).
 	 *	o an output interrupt is generated and its vector is correct.
 	 *	o the interrupt goes away when the IIR in the UART is read.
 	 */
 /* EXTRA DELAY? */
 	failures[0] = sio_getreg(com, com_cfcr) - CFCR_8BITS;
 	failures[1] = sio_getreg(com, com_ier) - IER_ETXRDY;
 	failures[2] = sio_getreg(com, com_mcr) - mcr_image;
 	DELAY(10000);		/* Some internal modems need this time */
 	irqmap[1] = isa_irq_pending();
 	failures[4] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_TXRDY;
 	DELAY(1000);		/* XXX */
 	irqmap[2] = isa_irq_pending();
 	failures[6] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND;
 
 	/*
 	 * Turn off all device interrupts and check that they go off properly.
 	 * Leave MCR_IENABLE alone.  For ports without a master port, it gates
 	 * the OUT2 output of the UART to
 	 * the ICU input.  Closing the gate would give a floating ICU input
 	 * (unless there is another device driving it) and spurious interrupts.
 	 * (On the system that this was first tested on, the input floats high
 	 * and gives a (masked) interrupt as soon as the gate is closed.)
 	 */
 	sio_setreg(com, com_ier, 0);
 	sio_setreg(com, com_cfcr, CFCR_8BITS);	/* dummy to avoid bus echo */
 	failures[7] = sio_getreg(com, com_ier);
 	DELAY(1000);		/* XXX */
 	irqmap[3] = isa_irq_pending();
 	failures[9] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND;
 
 	mtx_unlock_spin(&sio_lock);
 
 	irqs = irqmap[1] & ~irqmap[0];
 	if (bus_get_resource(idev, SYS_RES_IRQ, 0, &xirq, NULL) == 0 &&
 	    ((1 << xirq) & irqs) == 0) {
 		printf(
 		"sio%d: configured irq %ld not in bitmap of probed irqs %#x\n",
 		    device_get_unit(dev), xirq, irqs);
 		printf(
 		"sio%d: port may not be enabled\n",
 		    device_get_unit(dev));
 	}
 	if (bootverbose)
 		printf("sio%d: irq maps: %#x %#x %#x %#x\n",
 		    device_get_unit(dev),
 		    irqmap[0], irqmap[1], irqmap[2], irqmap[3]);
 
 	result = 0;
 	for (fn = 0; fn < sizeof failures; ++fn)
 		if (failures[fn]) {
 			sio_setreg(com, com_mcr, 0);
 			result = ENXIO;
 			if (bootverbose) {
 				printf("sio%d: probe failed test(s):",
 				    device_get_unit(dev));
 				for (fn = 0; fn < sizeof failures; ++fn)
 					if (failures[fn])
 						printf(" %d", fn);
 				printf("\n");
 			}
 			break;
 		}
 	bus_release_resource(dev, SYS_RES_IOPORT, rid, port);
 	if (iobase == siocniobase)
 		result = 0;
 	/*
 	 * XXX: Since we don't return 0, we shouldn't be relying on the softc
 	 * that we set to persist to the call to attach since other probe
 	 * routines may be called, and the malloc here causes subr_bus to not
 	 * allocate anything for the other probes.  Instead, this softc is
 	 * preserved and other probe routines can corrupt it.
 	 */
 	if (result != 0) {
 		device_set_softc(dev, NULL);
 		free(com, M_DEVBUF);
 	}
 	return (result == 0 ? BUS_PROBE_DEFAULT + 1 : result);
 }
 
 #ifdef COM_ESP
 static int
 espattach(com, esp_port)
 	struct com_s		*com;
 	Port_t			esp_port;
 {
 	u_char	dips;
 	u_char	val;
 
 	/*
 	 * Check the ESP-specific I/O port to see if we're an ESP
 	 * card.  If not, return failure immediately.
 	 */
 	if ((inb(esp_port) & 0xf3) == 0) {
 		printf(" port 0x%x is not an ESP board?\n", esp_port);
 		return (0);
 	}
 
 	/*
 	 * We've got something that claims to be a Hayes ESP card.
 	 * Let's hope so.
 	 */
 
 	/* Get the dip-switch configuration */
 	outb(esp_port + ESP_CMD1, ESP_GETDIPS);
 	dips = inb(esp_port + ESP_STATUS1);
 
 	/*
 	 * Bits 0,1 of dips say which COM port we are.
 	 */
 	if (rman_get_start(com->ioportres) == likely_com_ports[dips & 0x03])
 		printf(" : ESP");
 	else {
 		printf(" esp_port has com %d\n", dips & 0x03);
 		return (0);
 	}
 
 	/*
 	 * Check for ESP version 2.0 or later:  bits 4,5,6 = 010.
 	 */
 	outb(esp_port + ESP_CMD1, ESP_GETTEST);
 	val = inb(esp_port + ESP_STATUS1);	/* clear reg 1 */
 	val = inb(esp_port + ESP_STATUS2);
 	if ((val & 0x70) < 0x20) {
 		printf("-old (%o)", val & 0x70);
 		return (0);
 	}
 
 	/*
 	 * Check for ability to emulate 16550:  bit 7 == 1
 	 */
 	if ((dips & 0x80) == 0) {
 		printf(" slave");
 		return (0);
 	}
 
 	/*
 	 * Okay, we seem to be a Hayes ESP card.  Whee.
 	 */
 	com->esp = TRUE;
 	com->esp_port = esp_port;
 	return (1);
 }
 #endif /* COM_ESP */
 
 int
 sioattach(dev, xrid, rclk)
 	device_t	dev;
 	int		xrid;
 	u_long		rclk;
 {
 	struct com_s	*com;
 #ifdef COM_ESP
 	Port_t		*espp;
 #endif
 	Port_t		iobase;
 	int		unit;
 	u_int		flags;
 	int		rid;
 	struct resource *port;
 	int		ret;
 	int		error;
 	struct tty	*tp;
 
 	rid = xrid;
 	port = bus_alloc_resource_anywhere(dev, SYS_RES_IOPORT, &rid,
 					   IO_COMSIZE, RF_ACTIVE);
 	if (!port)
 		return (ENXIO);
 
 	iobase = rman_get_start(port);
 	unit = device_get_unit(dev);
 	com = device_get_softc(dev);
 	flags = device_get_flags(dev);
 
 	if (unit >= sio_numunits)
 		sio_numunits = unit + 1;
 	/*
 	 * sioprobe() has initialized the device registers as follows:
 	 *	o cfcr = CFCR_8BITS.
 	 *	  It is most important that CFCR_DLAB is off, so that the
 	 *	  data port is not hidden when we enable interrupts.
 	 *	o ier = 0.
 	 *	  Interrupts are only enabled when the line is open.
 	 *	o mcr = MCR_IENABLE, or 0 if the port has AST/4 compatible
 	 *	  interrupt control register or the config specifies no irq.
 	 *	  Keeping MCR_DTR and MCR_RTS off might stop the external
 	 *	  device from sending before we are ready.
 	 */
 	bzero(com, sizeof *com);
 	com->unit = unit;
 	com->ioportres = port;
 	com->ioportrid = rid;
 	com->bst = rman_get_bustag(port);
 	com->bsh = rman_get_bushandle(port);
 	com->cfcr_image = CFCR_8BITS;
 	com->loses_outints = COM_LOSESOUTINTS(flags) != 0;
 	com->no_irq = bus_get_resource(dev, SYS_RES_IRQ, 0, NULL, NULL) != 0;
 	com->tx_fifo_size = 1;
 	com->obufs[0].l_head = com->obuf1;
 	com->obufs[1].l_head = com->obuf2;
 
 	com->data_port = iobase + com_data;
 	com->int_ctl_port = iobase + com_ier;
 	com->int_id_port = iobase + com_iir;
 	com->modem_ctl_port = iobase + com_mcr;
 	com->mcr_image = inb(com->modem_ctl_port);
 	com->line_status_port = iobase + com_lsr;
 	com->modem_status_port = iobase + com_msr;
 
 	tp = com->tp = ttyalloc();
 	tp->t_oproc = comstart;
 	tp->t_param = comparam;
 	tp->t_stop = comstop;
 	tp->t_modem = commodem;
 	tp->t_break = combreak;
 	tp->t_close = comclose;
 	tp->t_open = comopen;
 	tp->t_sc = com;
 
 	if (rclk == 0)
 		rclk = DEFAULT_RCLK;
 	com->rclk = rclk;
 
 	if (unit == comconsole)
 		ttyconsolemode(tp, comdefaultrate);
 	error = siosetwater(com, tp->t_init_in.c_ispeed);
 	mtx_unlock_spin(&sio_lock);
 	if (error) {
 		/*
 		 * Leave i/o resources allocated if this is a `cn'-level
 		 * console, so that other devices can't snarf them.
 		 */
 		if (iobase != siocniobase)
 			bus_release_resource(dev, SYS_RES_IOPORT, rid, port);
 		return (ENOMEM);
 	}
 
 	/* attempt to determine UART type */
 	printf("sio%d: type", unit);
 
 	if (!COM_ISMULTIPORT(flags) &&
 	    !COM_IIR_TXRDYBUG(flags) && !COM_NOSCR(flags)) {
 		u_char	scr;
 		u_char	scr1;
 		u_char	scr2;
 
 		scr = sio_getreg(com, com_scr);
 		sio_setreg(com, com_scr, 0xa5);
 		scr1 = sio_getreg(com, com_scr);
 		sio_setreg(com, com_scr, 0x5a);
 		scr2 = sio_getreg(com, com_scr);
 		sio_setreg(com, com_scr, scr);
 		if (scr1 != 0xa5 || scr2 != 0x5a) {
 			printf(" 8250 or not responding");
 			goto determined_type;
 		}
 	}
 	sio_setreg(com, com_fifo, FIFO_ENABLE | FIFO_RX_HIGH);
 	DELAY(100);
 	switch (inb(com->int_id_port) & IIR_FIFO_MASK) {
 	case FIFO_RX_LOW:
 		printf(" 16450");
 		break;
 	case FIFO_RX_MEDL:
 		printf(" 16450?");
 		break;
 	case FIFO_RX_MEDH:
 		printf(" 16550?");
 		break;
 	case FIFO_RX_HIGH:
 		if (COM_NOFIFO(flags)) {
 			printf(" 16550A fifo disabled");
 			break;
 		}
 		com->hasfifo = TRUE;
 		if (COM_ST16650A(flags)) {
 			printf(" ST16650A");
 			com->st16650a = TRUE;
 			com->tx_fifo_size = 32;
 			break;
 		}
 		if (COM_TI16754(flags)) {
 			printf(" TI16754");
 			com->tx_fifo_size = 64;
 			break;
 		}
 		printf(" 16550A");
 #ifdef COM_ESP
 		for (espp = likely_esp_ports; *espp != 0; espp++)
 			if (espattach(com, *espp)) {
 				com->tx_fifo_size = 1024;
 				break;
 			}
 		if (com->esp)
 			break;
 #endif
 		com->tx_fifo_size = COM_FIFOSIZE(flags);
 		if (com->tx_fifo_size == 0)
 			com->tx_fifo_size = 16;
 		else
 			printf(" lookalike with %u bytes FIFO",
 			       com->tx_fifo_size);
 		break;
 	}
 #ifdef COM_ESP
 	if (com->esp) {
 		/*
 		 * Set 16550 compatibility mode.
 		 * We don't use the ESP_MODE_SCALE bit to increase the
 		 * fifo trigger levels because we can't handle large
 		 * bursts of input.
 		 * XXX flow control should be set in comparam(), not here.
 		 */
 		outb(com->esp_port + ESP_CMD1, ESP_SETMODE);
 		outb(com->esp_port + ESP_CMD2, ESP_MODE_RTS | ESP_MODE_FIFO);
 
 		/* Set RTS/CTS flow control. */
 		outb(com->esp_port + ESP_CMD1, ESP_SETFLOWTYPE);
 		outb(com->esp_port + ESP_CMD2, ESP_FLOW_RTS);
 		outb(com->esp_port + ESP_CMD2, ESP_FLOW_CTS);
 
 		/* Set flow-control levels. */
 		outb(com->esp_port + ESP_CMD1, ESP_SETRXFLOW);
 		outb(com->esp_port + ESP_CMD2, HIBYTE(768));
 		outb(com->esp_port + ESP_CMD2, LOBYTE(768));
 		outb(com->esp_port + ESP_CMD2, HIBYTE(512));
 		outb(com->esp_port + ESP_CMD2, LOBYTE(512));
 	}
 #endif /* COM_ESP */
 	sio_setreg(com, com_fifo, 0);
 determined_type: ;
 
 #ifdef COM_MULTIPORT
 	if (COM_ISMULTIPORT(flags)) {
 		device_t masterdev;
 
 		com->multiport = TRUE;
 		printf(" (multiport");
 		if (unit == COM_MPMASTER(flags))
 			printf(" master");
 		printf(")");
 		masterdev = devclass_get_device(sio_devclass,
 		    COM_MPMASTER(flags));
 		com->no_irq = (masterdev == NULL || bus_get_resource(masterdev,
 		    SYS_RES_IRQ, 0, NULL, NULL) != 0);
 	 }
 #endif /* COM_MULTIPORT */
 	if (unit == comconsole)
 		printf(", console");
 	if (COM_IIR_TXRDYBUG(flags))
 		printf(" with a buggy IIR_TXRDY implementation");
 	printf("\n");
 
 	if (sio_fast_ih == NULL) {
 		swi_add(&tty_intr_event, "sio", siopoll, NULL, SWI_TTY, 0,
 		    &sio_fast_ih);
 		swi_add(&clk_intr_event, "sio", siopoll, NULL, SWI_CLOCK, 0,
 		    &sio_slow_ih);
 	}
 
 	com->flags = flags;
 	com->pps.ppscap = PPS_CAPTUREASSERT | PPS_CAPTURECLEAR;
 	tp->t_pps = &com->pps;
 
 	if (COM_PPSCTS(flags))
 		com->pps_bit = MSR_CTS;
 	else
 		com->pps_bit = MSR_DCD;
 	pps_init(&com->pps);
 
 	rid = 0;
 	com->irqres = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE);
 	if (com->irqres) {
 		ret = bus_setup_intr(dev, com->irqres,
 				     INTR_TYPE_TTY,
 				     siointr, NULL, com, 
 				     &com->cookie);
 		if (ret) {
 			ret = bus_setup_intr(dev,
 					     com->irqres, INTR_TYPE_TTY,
 					     NULL, (driver_intr_t *)siointr, com, &com->cookie);
 			if (ret == 0)
 				device_printf(dev, "unable to activate interrupt in fast mode - using normal mode\n");
 		}
 		if (ret)
 			device_printf(dev, "could not activate interrupt\n");
 #if defined(KDB)
 		/*
 		 * Enable interrupts for early break-to-debugger support
 		 * on the console.
 		 */
 		if (ret == 0 && unit == comconsole)
 			outb(siocniobase + com_ier, IER_ERXRDY | IER_ERLS |
 			    IER_EMSC);
 #endif
 	}
 
 	/* We're ready, open the doors... */
 	ttycreate(tp, TS_CALLOUT, "d%r", unit);
 
 	return (0);
 }
 
 static int
 comopen(struct tty *tp, struct cdev *dev)
 {
 	struct com_s	*com;
 	int i;
 
 	com = tp->t_sc;
 	com->poll = com->no_irq;
 	com->poll_output = com->loses_outints;
 	if (com->hasfifo) {
 		/*
 		 * (Re)enable and drain fifos.
 		 *
 		 * Certain SMC chips cause problems if the fifos
 		 * are enabled while input is ready.  Turn off the
 		 * fifo if necessary to clear the input.  We test
 		 * the input ready bit after enabling the fifos
 		 * since we've already enabled them in comparam()
 		 * and to handle races between enabling and fresh
 		 * input.
 		 */
 		for (i = 0; i < 500; i++) {
 			sio_setreg(com, com_fifo,
 				   FIFO_RCV_RST | FIFO_XMT_RST
 				   | com->fifo_image);
 			/*
 			 * XXX the delays are for superstitious
 			 * historical reasons.  It must be less than
 			 * the character time at the maximum
 			 * supported speed (87 usec at 115200 bps
 			 * 8N1).  Otherwise we might loop endlessly
 			 * if data is streaming in.  We used to use
 			 * delays of 100.  That usually worked
 			 * because DELAY(100) used to usually delay
 			 * for about 85 usec instead of 100.
 			 */
 			DELAY(50);
 			if (!(inb(com->line_status_port) & LSR_RXRDY))
 				break;
 			sio_setreg(com, com_fifo, 0);
 			DELAY(50);
 			(void) inb(com->data_port);
 		}
 		if (i == 500)
 			return (EIO);
 	}
 
 	mtx_lock_spin(&sio_lock);
 	(void) inb(com->line_status_port);
 	(void) inb(com->data_port);
 	com->prev_modem_status = com->last_modem_status
 	    = inb(com->modem_status_port);
 	outb(com->int_ctl_port,
 	     IER_ERXRDY | IER_ERLS | IER_EMSC
 	     | (COM_IIR_TXRDYBUG(com->flags) ? 0 : IER_ETXRDY));
 	mtx_unlock_spin(&sio_lock);
 	siosettimeout();
 	/* XXX: should be generic ? */
 	if (com->prev_modem_status & MSR_DCD || ISCALLOUT(dev))
 		ttyld_modem(tp, 1);
 	return (0);
 }
 
 static void
 comclose(tp)
 	struct tty	*tp;
 {
 	int		s;
 	struct com_s	*com;
 
 	s = spltty();
 	com = tp->t_sc;
 	com->poll = FALSE;
 	com->poll_output = FALSE;
 	sio_setreg(com, com_cfcr, com->cfcr_image &= ~CFCR_SBREAK);
 
 #if defined(KDB)
 	/*
 	 * Leave interrupts enabled and don't clear DTR if this is the
 	 * console. This allows us to detect break-to-debugger events
 	 * while the console device is closed.
 	 */
 	if (com->unit != comconsole)
 #endif
 	{
 		sio_setreg(com, com_ier, 0);
 		if (tp->t_cflag & HUPCL
 		    /*
 		     * XXX we will miss any carrier drop between here and the
 		     * next open.  Perhaps we should watch DCD even when the
 		     * port is closed; it is not sufficient to check it at
 		     * the next open because it might go up and down while
 		     * we're not watching.
 		     */
 		    || (!tp->t_actout
 		        && !(com->prev_modem_status & MSR_DCD)
 		        && !(tp->t_init_in.c_cflag & CLOCAL))
 		    || !(tp->t_state & TS_ISOPEN)) {
 			(void)commodem(tp, 0, SER_DTR);
 			ttydtrwaitstart(tp);
 		}
 	}
 	if (com->hasfifo) {
 		/*
 		 * Disable fifos so that they are off after controlled
 		 * reboots.  Some BIOSes fail to detect 16550s when the
 		 * fifos are enabled.
 		 */
 		sio_setreg(com, com_fifo, 0);
 	}
 	tp->t_actout = FALSE;
 	wakeup(&tp->t_actout);
 	wakeup(TSA_CARR_ON(tp));	/* restart any wopeners */
 	siosettimeout();
 	splx(s);
 }
 
 static void
 siobusycheck(chan)
 	void	*chan;
 {
 	struct com_s	*com;
 	int		s;
 
 	com = (struct com_s *)chan;
 
 	/*
 	 * Clear TS_BUSY if low-level output is complete.
 	 * spl locking is sufficient because siointr1() does not set CS_BUSY.
 	 * If siointr1() clears CS_BUSY after we look at it, then we'll get
 	 * called again.  Reading the line status port outside of siointr1()
 	 * is safe because CS_BUSY is clear so there are no output interrupts
 	 * to lose.
 	 */
 	s = spltty();
 	if (com->state & CS_BUSY)
 		com->extra_state &= ~CSE_BUSYCHECK;	/* False alarm. */
 	else if ((inb(com->line_status_port) & (LSR_TSRE | LSR_TXRDY))
 	    == (LSR_TSRE | LSR_TXRDY)) {
 		com->tp->t_state &= ~TS_BUSY;
 		ttwwakeup(com->tp);
 		com->extra_state &= ~CSE_BUSYCHECK;
 	} else
 		timeout(siobusycheck, com, hz / 100);
 	splx(s);
 }
 
 static u_int
 siodivisor(rclk, speed)
 	u_long	rclk;
 	speed_t	speed;
 {
 	long	actual_speed;
 	u_int	divisor;
 	int	error;
 
 	if (speed == 0)
 		return (0);
 #if UINT_MAX > (ULONG_MAX - 1) / 8
 	if (speed > (ULONG_MAX - 1) / 8)
 		return (0);
 #endif
 	divisor = (rclk / (8UL * speed) + 1) / 2;
 	if (divisor == 0 || divisor >= 65536)
 		return (0);
 	actual_speed = rclk / (16UL * divisor);
 
 	/* 10 times error in percent: */
 	error = ((actual_speed - (long)speed) * 2000 / (long)speed + 1) / 2;
 
 	/* 3.0% maximum error tolerance: */
 	if (error < -30 || error > 30)
 		return (0);
 
 	return (divisor);
 }
 
 /*
  * Call this function with the sio_lock mutex held.  It will return with the
  * lock still held.
  */
 static void
 sioinput(com)
 	struct com_s	*com;
 {
 	u_char		*buf;
 	int		incc;
 	u_char		line_status;
 	int		recv_data;
 	struct tty	*tp;
 
 	buf = com->ibuf;
 	tp = com->tp;
 	if (!(tp->t_state & TS_ISOPEN) || !(tp->t_cflag & CREAD)) {
 		com_events -= (com->iptr - com->ibuf);
 		com->iptr = com->ibuf;
 		return;
 	}
 	if (tp->t_state & TS_CAN_BYPASS_L_RINT) {
 		/*
 		 * Avoid the grotesquely inefficient lineswitch routine
 		 * (ttyinput) in "raw" mode.  It usually takes about 450
 		 * instructions (that's without canonical processing or echo!).
 		 * slinput is reasonably fast (usually 40 instructions plus
 		 * call overhead).
 		 */
 		do {
 			/*
 			 * This may look odd, but it is using save-and-enable
 			 * semantics instead of the save-and-disable semantics
 			 * that are used everywhere else.
 			 */
 			mtx_unlock_spin(&sio_lock);
 			incc = com->iptr - buf;
 			if (tp->t_rawq.c_cc + incc > tp->t_ihiwat
 			    && (com->state & CS_RTS_IFLOW
 				|| tp->t_iflag & IXOFF)
 			    && !(tp->t_state & TS_TBLOCK))
 				ttyblock(tp);
 			com->delta_error_counts[CE_TTY_BUF_OVERFLOW]
 				+= b_to_q((char *)buf, incc, &tp->t_rawq);
 			buf += incc;
 			tk_nin += incc;
 			tk_rawcc += incc;
 			tp->t_rawcc += incc;
 			ttwakeup(tp);
 			if (tp->t_state & TS_TTSTOP
 			    && (tp->t_iflag & IXANY
 				|| tp->t_cc[VSTART] == tp->t_cc[VSTOP])) {
 				tp->t_state &= ~TS_TTSTOP;
 				tp->t_lflag &= ~FLUSHO;
 				comstart(tp);
 			}
 			mtx_lock_spin(&sio_lock);
 		} while (buf < com->iptr);
 	} else {
 		do {
 			/*
 			 * This may look odd, but it is using save-and-enable
 			 * semantics instead of the save-and-disable semantics
 			 * that are used everywhere else.
 			 */
 			mtx_unlock_spin(&sio_lock);
 			line_status = buf[com->ierroff];
 			recv_data = *buf++;
 			if (line_status
 			    & (LSR_BI | LSR_FE | LSR_OE | LSR_PE)) {
 				if (line_status & LSR_BI)
 					recv_data |= TTY_BI;
 				if (line_status & LSR_FE)
 					recv_data |= TTY_FE;
 				if (line_status & LSR_OE)
 					recv_data |= TTY_OE;
 				if (line_status & LSR_PE)
 					recv_data |= TTY_PE;
 			}
 			ttyld_rint(tp, recv_data);
 			mtx_lock_spin(&sio_lock);
 		} while (buf < com->iptr);
 	}
 	com_events -= (com->iptr - com->ibuf);
 	com->iptr = com->ibuf;
 
 	/*
 	 * There is now room for another low-level buffer full of input,
 	 * so enable RTS if it is now disabled and there is room in the
 	 * high-level buffer.
 	 */
 	if ((com->state & CS_RTS_IFLOW) && !(com->mcr_image & MCR_RTS) &&
 	    !(tp->t_state & TS_TBLOCK))
 		outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS);
 }
 
 static int
 siointr(arg)
 	void		*arg;
 {
 	struct com_s	*com;
 
 #ifndef COM_MULTIPORT
 	com = (struct com_s *)arg;
 
 	mtx_lock_spin(&sio_lock);
 	siointr1(com);
 	mtx_unlock_spin(&sio_lock);
 #else /* COM_MULTIPORT */
 	bool_t		possibly_more_intrs;
 	int		unit;
 
 	/*
 	 * Loop until there is no activity on any port.  This is necessary
 	 * to get an interrupt edge more than to avoid another interrupt.
 	 * If the IRQ signal is just an OR of the IRQ signals from several
 	 * devices, then the edge from one may be lost because another is
 	 * on.
 	 */
 	mtx_lock_spin(&sio_lock);
 	do {
 		possibly_more_intrs = FALSE;
 		for (unit = 0; unit < sio_numunits; ++unit) {
 			com = com_addr(unit);
 			/*
 			 * XXX COM_LOCK();
 			 * would it work here, or be counter-productive?
 			 */
 			if (com != NULL 
 			    && !com->gone
 			    && (inb(com->int_id_port) & IIR_IMASK)
 			       != IIR_NOPEND) {
 				siointr1(com);
 				possibly_more_intrs = TRUE;
 			}
 			/* XXX COM_UNLOCK(); */
 		}
 	} while (possibly_more_intrs);
 	mtx_unlock_spin(&sio_lock);
 #endif /* COM_MULTIPORT */
 	return(FILTER_HANDLED);
 }
 
 static struct timespec siots[8];
 static int siotso;
 static int volatile siotsunit = -1;
 
 static int
 sysctl_siots(SYSCTL_HANDLER_ARGS)
 {
 	char buf[128];
 	long long delta;
 	size_t len;
 	int error, i, tso;
 
 	for (i = 1, tso = siotso; i < tso; i++) {
 		delta = (long long)(siots[i].tv_sec - siots[i - 1].tv_sec) *
 		    1000000000 +
 		    (siots[i].tv_nsec - siots[i - 1].tv_nsec);
 		len = sprintf(buf, "%lld\n", delta);
 		if (delta >= 110000)
 			len += sprintf(buf + len - 1, ": *** %ld.%09ld\n",
 			    (long)siots[i].tv_sec, siots[i].tv_nsec) - 1;
 		if (i == tso - 1)
 			buf[len - 1] = '\0';
 		error = SYSCTL_OUT(req, buf, len);
 		if (error != 0)
 			return (error);
 	}
 	return (0);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, siots, CTLTYPE_STRING | CTLFLAG_RD,
     0, 0, sysctl_siots, "A", "sio timestamps");
 
 static void
 siointr1(com)
 	struct com_s	*com;
 {
 	u_char	int_ctl;
 	u_char	int_ctl_new;
 	u_char	line_status;
 	u_char	modem_status;
 	u_char	*ioptr;
 	u_char	recv_data;
 
 #ifdef KDB
 again:
 #endif
 
 	if (COM_IIR_TXRDYBUG(com->flags)) {
 		int_ctl = inb(com->int_ctl_port);
 		int_ctl_new = int_ctl;
 	} else {
 		int_ctl = 0;
 		int_ctl_new = 0;
 	}
 
 	while (!com->gone) {
 		if (com->pps.ppsparam.mode & PPS_CAPTUREBOTH) {
 			modem_status = inb(com->modem_status_port);
 		        if ((modem_status ^ com->last_modem_status) &
 			    com->pps_bit) {
 				pps_capture(&com->pps);
 				pps_event(&com->pps,
 				    (modem_status & com->pps_bit) ? 
 				    PPS_CAPTUREASSERT : PPS_CAPTURECLEAR);
 			}
 		}
 		line_status = inb(com->line_status_port);
 
 		/* input event? (check first to help avoid overruns) */
 		while (line_status & LSR_RCV_MASK) {
 			/* break/unnattached error bits or real input? */
 			if (!(line_status & LSR_RXRDY))
 				recv_data = 0;
 			else
 				recv_data = inb(com->data_port);
 #ifdef KDB
 			if (com->unit == comconsole &&
 			    kdb_alt_break(recv_data, &com->alt_brk_state) != 0)
 				goto again;
 #endif /* KDB */
 			if (line_status & (LSR_BI | LSR_FE | LSR_PE)) {
 				/*
 				 * Don't store BI if IGNBRK or FE/PE if IGNPAR.
 				 * Otherwise, push the work to a higher level
 				 * (to handle PARMRK) if we're bypassing.
 				 * Otherwise, convert BI/FE and PE+INPCK to 0.
 				 *
 				 * This makes bypassing work right in the
 				 * usual "raw" case (IGNBRK set, and IGNPAR
 				 * and INPCK clear).
 				 *
 				 * Note: BI together with FE/PE means just BI.
 				 */
 				if (line_status & LSR_BI) {
 #if defined(KDB)
 					if (com->unit == comconsole) {
 						kdb_break();
 						goto cont;
 					}
 #endif
 					if (com->tp == NULL
 					    || com->tp->t_iflag & IGNBRK)
 						goto cont;
 				} else {
 					if (com->tp == NULL
 					    || com->tp->t_iflag & IGNPAR)
 						goto cont;
 				}
 				if (com->tp->t_state & TS_CAN_BYPASS_L_RINT
 				    && (line_status & (LSR_BI | LSR_FE)
 					|| com->tp->t_iflag & INPCK))
 					recv_data = 0;
 			}
 			++com->bytes_in;
 			if (com->tp != NULL &&
 			    com->tp->t_hotchar != 0 && recv_data == com->tp->t_hotchar)
 				swi_sched(sio_fast_ih, 0);
 			ioptr = com->iptr;
 			if (ioptr >= com->ibufend)
 				CE_RECORD(com, CE_INTERRUPT_BUF_OVERFLOW);
 			else {
 				if (com->tp != NULL && com->tp->t_do_timestamp)
 					microtime(&com->tp->t_timestamp);
 				++com_events;
 				swi_sched(sio_slow_ih, SWI_DELAY);
 #if 0 /* for testing input latency vs efficiency */
 if (com->iptr - com->ibuf == 8)
 	swi_sched(sio_fast_ih, 0);
 #endif
 				ioptr[0] = recv_data;
 				ioptr[com->ierroff] = line_status;
 				com->iptr = ++ioptr;
 				if (ioptr == com->ihighwater
 				    && com->state & CS_RTS_IFLOW)
 					outb(com->modem_ctl_port,
 					     com->mcr_image &= ~MCR_RTS);
 				if (line_status & LSR_OE)
 					CE_RECORD(com, CE_OVERRUN);
 			}
 cont:
 			if (line_status & LSR_TXRDY
 			    && com->state >= (CS_BUSY | CS_TTGO | CS_ODEVREADY))
 				goto txrdy;
 
 			/*
 			 * "& 0x7F" is to avoid the gcc-1.40 generating a slow
 			 * jump from the top of the loop to here
 			 */
 			line_status = inb(com->line_status_port) & 0x7F;
 		}
 
 		/* modem status change? (always check before doing output) */
 		modem_status = inb(com->modem_status_port);
 		if (modem_status != com->last_modem_status) {
 			/*
 			 * Schedule high level to handle DCD changes.  Note
 			 * that we don't use the delta bits anywhere.  Some
 			 * UARTs mess them up, and it's easy to remember the
 			 * previous bits and calculate the delta.
 			 */
 			com->last_modem_status = modem_status;
 			if (!(com->state & CS_CHECKMSR)) {
 				com_events += LOTS_OF_EVENTS;
 				com->state |= CS_CHECKMSR;
 				swi_sched(sio_fast_ih, 0);
 			}
 
 			/* handle CTS change immediately for crisp flow ctl */
 			if (com->state & CS_CTS_OFLOW) {
 				if (modem_status & MSR_CTS)
 					com->state |= CS_ODEVREADY;
 				else
 					com->state &= ~CS_ODEVREADY;
 			}
 		}
 
 txrdy:
 		/* output queued and everything ready? */
 		if (line_status & LSR_TXRDY
 		    && com->state >= (CS_BUSY | CS_TTGO | CS_ODEVREADY)) {
 			ioptr = com->obufq.l_head;
 			if (com->tx_fifo_size > 1 && com->unit != siotsunit) {
 				u_int	ocount;
 
 				ocount = com->obufq.l_tail - ioptr;
 				if (ocount > com->tx_fifo_size)
 					ocount = com->tx_fifo_size;
 				com->bytes_out += ocount;
 				do
 					outb(com->data_port, *ioptr++);
 				while (--ocount != 0);
 			} else {
 				outb(com->data_port, *ioptr++);
 				++com->bytes_out;
 				if (com->unit == siotsunit
 				    && siotso < nitems(siots))
 					nanouptime(&siots[siotso++]);
 			}
 			com->obufq.l_head = ioptr;
 			if (COM_IIR_TXRDYBUG(com->flags))
 				int_ctl_new = int_ctl | IER_ETXRDY;
 			if (ioptr >= com->obufq.l_tail) {
 				struct lbq	*qp;
 
 				qp = com->obufq.l_next;
 				qp->l_queued = FALSE;
 				qp = qp->l_next;
 				if (qp != NULL) {
 					com->obufq.l_head = qp->l_head;
 					com->obufq.l_tail = qp->l_tail;
 					com->obufq.l_next = qp;
 				} else {
 					/* output just completed */
 					if (COM_IIR_TXRDYBUG(com->flags))
 						int_ctl_new = int_ctl
 							      & ~IER_ETXRDY;
 					com->state &= ~CS_BUSY;
 				}
 				if (!(com->state & CS_ODONE)) {
 					com_events += LOTS_OF_EVENTS;
 					com->state |= CS_ODONE;
 					/* handle at high level ASAP */
 					swi_sched(sio_fast_ih, 0);
 				}
 			}
 			if (COM_IIR_TXRDYBUG(com->flags)
 			    && int_ctl != int_ctl_new)
 				outb(com->int_ctl_port, int_ctl_new);
 		}
 
 		/* finished? */
 #ifndef COM_MULTIPORT
 		if ((inb(com->int_id_port) & IIR_IMASK) == IIR_NOPEND)
 #endif /* COM_MULTIPORT */
 			return;
 	}
 }
 
 /* software interrupt handler for SWI_TTY */
 static void
 siopoll(void *dummy)
 {
 	int		unit;
 
 	if (com_events == 0)
 		return;
 repeat:
 	for (unit = 0; unit < sio_numunits; ++unit) {
 		struct com_s	*com;
 		int		incc;
 		struct tty	*tp;
 
 		com = com_addr(unit);
 		if (com == NULL)
 			continue;
 		tp = com->tp;
 		if (tp == NULL || com->gone) {
 			/*
 			 * Discard any events related to never-opened or
 			 * going-away devices.
 			 */
 			mtx_lock_spin(&sio_lock);
 			incc = com->iptr - com->ibuf;
 			com->iptr = com->ibuf;
 			if (com->state & CS_CHECKMSR) {
 				incc += LOTS_OF_EVENTS;
 				com->state &= ~CS_CHECKMSR;
 			}
 			com_events -= incc;
 			mtx_unlock_spin(&sio_lock);
 			continue;
 		}
 		if (com->iptr != com->ibuf) {
 			mtx_lock_spin(&sio_lock);
 			sioinput(com);
 			mtx_unlock_spin(&sio_lock);
 		}
 		if (com->state & CS_CHECKMSR) {
 			u_char	delta_modem_status;
 
 			mtx_lock_spin(&sio_lock);
 			delta_modem_status = com->last_modem_status
 					     ^ com->prev_modem_status;
 			com->prev_modem_status = com->last_modem_status;
 			com_events -= LOTS_OF_EVENTS;
 			com->state &= ~CS_CHECKMSR;
 			mtx_unlock_spin(&sio_lock);
 			if (delta_modem_status & MSR_DCD)
 				ttyld_modem(tp,
 				    com->prev_modem_status & MSR_DCD);
 		}
 		if (com->state & CS_ODONE) {
 			mtx_lock_spin(&sio_lock);
 			com_events -= LOTS_OF_EVENTS;
 			com->state &= ~CS_ODONE;
 			mtx_unlock_spin(&sio_lock);
 			if (!(com->state & CS_BUSY)
 			    && !(com->extra_state & CSE_BUSYCHECK)) {
 				timeout(siobusycheck, com, hz / 100);
 				com->extra_state |= CSE_BUSYCHECK;
 			}
 			ttyld_start(tp);
 		}
 		if (com_events == 0)
 			break;
 	}
 	if (com_events >= LOTS_OF_EVENTS)
 		goto repeat;
 }
 
 static void
 combreak(tp, sig)
 	struct tty 	*tp;
 	int		sig;
 {
 	struct com_s	*com;
 
 	com = tp->t_sc;
 
 	if (sig)
 		sio_setreg(com, com_cfcr, com->cfcr_image |= CFCR_SBREAK);
 	else
 		sio_setreg(com, com_cfcr, com->cfcr_image &= ~CFCR_SBREAK);
 }
 
 static int
 comparam(tp, t)
 	struct tty	*tp;
 	struct termios	*t;
 {
 	u_int		cfcr;
 	int		cflag;
 	struct com_s	*com;
 	u_int		divisor;
 	u_char		dlbh;
 	u_char		dlbl;
 	u_char		efr_flowbits;
 	int		s;
 
 	com = tp->t_sc;
 	if (com == NULL)
 		return (ENODEV);
 
 	/* check requested parameters */
 	if (t->c_ispeed != (t->c_ospeed != 0 ? t->c_ospeed : tp->t_ospeed))
 		return (EINVAL);
 	divisor = siodivisor(com->rclk, t->c_ispeed);
 	if (divisor == 0)
 		return (EINVAL);
 
 	/* parameters are OK, convert them to the com struct and the device */
 	s = spltty();
 	if (t->c_ospeed == 0)
 		(void)commodem(tp, 0, SER_DTR);	/* hang up line */
 	else
 		(void)commodem(tp, SER_DTR, 0);
 	cflag = t->c_cflag;
 	switch (cflag & CSIZE) {
 	case CS5:
 		cfcr = CFCR_5BITS;
 		break;
 	case CS6:
 		cfcr = CFCR_6BITS;
 		break;
 	case CS7:
 		cfcr = CFCR_7BITS;
 		break;
 	default:
 		cfcr = CFCR_8BITS;
 		break;
 	}
 	if (cflag & PARENB) {
 		cfcr |= CFCR_PENAB;
 		if (!(cflag & PARODD))
 			cfcr |= CFCR_PEVEN;
 	}
 	if (cflag & CSTOPB)
 		cfcr |= CFCR_STOPB;
 
 	if (com->hasfifo) {
 		/*
 		 * Use a fifo trigger level low enough so that the input
 		 * latency from the fifo is less than about 16 msec and
 		 * the total latency is less than about 30 msec.  These
 		 * latencies are reasonable for humans.  Serial comms
 		 * protocols shouldn't expect anything better since modem
 		 * latencies are larger.
 		 *
 		 * The fifo trigger level cannot be set at RX_HIGH for high
 		 * speed connections without further work on reducing 
 		 * interrupt disablement times in other parts of the system,
 		 * without producing silo overflow errors.
 		 */
 		com->fifo_image = com->unit == siotsunit ? 0
 				  : t->c_ispeed <= 4800
 				  ? FIFO_ENABLE : FIFO_ENABLE | FIFO_RX_MEDH;
 #ifdef COM_ESP
 		/*
 		 * The Hayes ESP card needs the fifo DMA mode bit set
 		 * in compatibility mode.  If not, it will interrupt
 		 * for each character received.
 		 */
 		if (com->esp)
 			com->fifo_image |= FIFO_DMA_MODE;
 #endif
 		sio_setreg(com, com_fifo, com->fifo_image);
 	}
 
 	/*
 	 * This returns with interrupts disabled so that we can complete
 	 * the speed change atomically.  Keeping interrupts disabled is
 	 * especially important while com_data is hidden.
 	 */
 	(void) siosetwater(com, t->c_ispeed);
 
 	sio_setreg(com, com_cfcr, cfcr | CFCR_DLAB);
 	/*
 	 * Only set the divisor registers if they would change, since on
 	 * some 16550 incompatibles (UMC8669F), setting them while input
 	 * is arriving loses sync until data stops arriving.
 	 */
 	dlbl = divisor & 0xFF;
 	if (sio_getreg(com, com_dlbl) != dlbl)
 		sio_setreg(com, com_dlbl, dlbl);
 	dlbh = divisor >> 8;
 	if (sio_getreg(com, com_dlbh) != dlbh)
 		sio_setreg(com, com_dlbh, dlbh);
 
 	efr_flowbits = 0;
 
 	if (cflag & CRTS_IFLOW) {
 		com->state |= CS_RTS_IFLOW;
 		efr_flowbits |= EFR_AUTORTS;
 		/*
 		 * If CS_RTS_IFLOW just changed from off to on, the change
 		 * needs to be propagated to MCR_RTS.  This isn't urgent,
 		 * so do it later by calling comstart() instead of repeating
 		 * a lot of code from comstart() here.
 		 */
 	} else if (com->state & CS_RTS_IFLOW) {
 		com->state &= ~CS_RTS_IFLOW;
 		/*
 		 * CS_RTS_IFLOW just changed from on to off.  Force MCR_RTS
 		 * on here, since comstart() won't do it later.
 		 */
 		outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS);
 	}
 
 	/*
 	 * Set up state to handle output flow control.
 	 * XXX - worth handling MDMBUF (DCD) flow control at the lowest level?
 	 * Now has 10+ msec latency, while CTS flow has 50- usec latency.
 	 */
 	com->state |= CS_ODEVREADY;
 	com->state &= ~CS_CTS_OFLOW;
 	if (cflag & CCTS_OFLOW) {
 		com->state |= CS_CTS_OFLOW;
 		efr_flowbits |= EFR_AUTOCTS;
 		if (!(com->last_modem_status & MSR_CTS))
 			com->state &= ~CS_ODEVREADY;
 	}
 
 	if (com->st16650a) {
 		sio_setreg(com, com_lcr, LCR_EFR_ENABLE);
 		sio_setreg(com, com_efr,
 			   (sio_getreg(com, com_efr)
 			    & ~(EFR_AUTOCTS | EFR_AUTORTS)) | efr_flowbits);
 	}
 	sio_setreg(com, com_cfcr, com->cfcr_image = cfcr);
 
 	/* XXX shouldn't call functions while intrs are disabled. */
 	ttyldoptim(tp);
 
 	mtx_unlock_spin(&sio_lock);
 	splx(s);
 	comstart(tp);
 	if (com->ibufold != NULL) {
 		free(com->ibufold, M_DEVBUF);
 		com->ibufold = NULL;
 	}
 	return (0);
 }
 
 /*
  * This function must be called with the sio_lock mutex released and will
  * return with it obtained.
  */
 static int
 siosetwater(com, speed)
 	struct com_s	*com;
 	speed_t		speed;
 {
 	int		cp4ticks;
 	u_char		*ibuf;
 	int		ibufsize;
 	struct tty	*tp;
 
 	/*
 	 * Make the buffer size large enough to handle a softtty interrupt
 	 * latency of about 2 ticks without loss of throughput or data
 	 * (about 3 ticks if input flow control is not used or not honoured,
 	 * but a bit less for CS5-CS7 modes).
 	 */
 	cp4ticks = speed / 10 / hz * 4;
 	for (ibufsize = 128; ibufsize < cp4ticks;)
 		ibufsize <<= 1;
 	if (ibufsize == com->ibufsize) {
 		mtx_lock_spin(&sio_lock);
 		return (0);
 	}
 
 	/*
 	 * Allocate input buffer.  The extra factor of 2 in the size is
 	 * to allow for an error byte for each input byte.
 	 */
 	ibuf = malloc(2 * ibufsize, M_DEVBUF, M_NOWAIT);
 	if (ibuf == NULL) {
 		mtx_lock_spin(&sio_lock);
 		return (ENOMEM);
 	}
 
 	/* Initialize non-critical variables. */
 	com->ibufold = com->ibuf;
 	com->ibufsize = ibufsize;
 	tp = com->tp;
 	if (tp != NULL) {
 		tp->t_ififosize = 2 * ibufsize;
 		tp->t_ispeedwat = (speed_t)-1;
 		tp->t_ospeedwat = (speed_t)-1;
 	}
 
 	/*
 	 * Read current input buffer, if any.  Continue with interrupts
 	 * disabled.
 	 */
 	mtx_lock_spin(&sio_lock);
 	if (com->iptr != com->ibuf)
 		sioinput(com);
 
 	/*-
 	 * Initialize critical variables, including input buffer watermarks.
 	 * The external device is asked to stop sending when the buffer
 	 * exactly reaches high water, or when the high level requests it.
 	 * The high level is notified immediately (rather than at a later
 	 * clock tick) when this watermark is reached.
 	 * The buffer size is chosen so the watermark should almost never
 	 * be reached.
 	 * The low watermark is invisibly 0 since the buffer is always
 	 * emptied all at once.
 	 */
 	com->iptr = com->ibuf = ibuf;
 	com->ibufend = ibuf + ibufsize;
 	com->ierroff = ibufsize;
 	com->ihighwater = ibuf + 3 * ibufsize / 4;
 	return (0);
 }
 
 static void
 comstart(tp)
 	struct tty	*tp;
 {
 	struct com_s	*com;
 	int		s;
 
 	com = tp->t_sc;
 	if (com == NULL)
 		return;
 	s = spltty();
 	mtx_lock_spin(&sio_lock);
 	if (tp->t_state & TS_TTSTOP)
 		com->state &= ~CS_TTGO;
 	else
 		com->state |= CS_TTGO;
 	if (tp->t_state & TS_TBLOCK) {
 		if (com->mcr_image & MCR_RTS && com->state & CS_RTS_IFLOW)
 			outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS);
 	} else {
 		if (!(com->mcr_image & MCR_RTS) && com->iptr < com->ihighwater
 		    && com->state & CS_RTS_IFLOW)
 			outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS);
 	}
 	mtx_unlock_spin(&sio_lock);
 	if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) {
 		ttwwakeup(tp);
 		splx(s);
 		return;
 	}
 	if (tp->t_outq.c_cc != 0) {
 		struct lbq	*qp;
 		struct lbq	*next;
 
 		if (!com->obufs[0].l_queued) {
 			com->obufs[0].l_tail
 			    = com->obuf1 + q_to_b(&tp->t_outq, com->obuf1,
 						  sizeof com->obuf1);
 			com->obufs[0].l_next = NULL;
 			com->obufs[0].l_queued = TRUE;
 			mtx_lock_spin(&sio_lock);
 			if (com->state & CS_BUSY) {
 				qp = com->obufq.l_next;
 				while ((next = qp->l_next) != NULL)
 					qp = next;
 				qp->l_next = &com->obufs[0];
 			} else {
 				com->obufq.l_head = com->obufs[0].l_head;
 				com->obufq.l_tail = com->obufs[0].l_tail;
 				com->obufq.l_next = &com->obufs[0];
 				com->state |= CS_BUSY;
 			}
 			mtx_unlock_spin(&sio_lock);
 		}
 		if (tp->t_outq.c_cc != 0 && !com->obufs[1].l_queued) {
 			com->obufs[1].l_tail
 			    = com->obuf2 + q_to_b(&tp->t_outq, com->obuf2,
 						  sizeof com->obuf2);
 			com->obufs[1].l_next = NULL;
 			com->obufs[1].l_queued = TRUE;
 			mtx_lock_spin(&sio_lock);
 			if (com->state & CS_BUSY) {
 				qp = com->obufq.l_next;
 				while ((next = qp->l_next) != NULL)
 					qp = next;
 				qp->l_next = &com->obufs[1];
 			} else {
 				com->obufq.l_head = com->obufs[1].l_head;
 				com->obufq.l_tail = com->obufs[1].l_tail;
 				com->obufq.l_next = &com->obufs[1];
 				com->state |= CS_BUSY;
 			}
 			mtx_unlock_spin(&sio_lock);
 		}
 		tp->t_state |= TS_BUSY;
 	}
 	mtx_lock_spin(&sio_lock);
 	if (com->state >= (CS_BUSY | CS_TTGO))
 		siointr1(com);	/* fake interrupt to start output */
 	mtx_unlock_spin(&sio_lock);
 	ttwwakeup(tp);
 	splx(s);
 }
 
 static void
 comstop(tp, rw)
 	struct tty	*tp;
 	int		rw;
 {
 	struct com_s	*com;
 
 	com = tp->t_sc;
 	if (com == NULL || com->gone)
 		return;
 	mtx_lock_spin(&sio_lock);
 	if (rw & FWRITE) {
 		if (com->hasfifo)
 #ifdef COM_ESP
 		    /* XXX avoid h/w bug. */
 		    if (!com->esp)
 #endif
 			sio_setreg(com, com_fifo,
 				   FIFO_XMT_RST | com->fifo_image);
 		com->obufs[0].l_queued = FALSE;
 		com->obufs[1].l_queued = FALSE;
 		if (com->state & CS_ODONE)
 			com_events -= LOTS_OF_EVENTS;
 		com->state &= ~(CS_ODONE | CS_BUSY);
 		com->tp->t_state &= ~TS_BUSY;
 	}
 	if (rw & FREAD) {
 		if (com->hasfifo)
 #ifdef COM_ESP
 		    /* XXX avoid h/w bug. */
 		    if (!com->esp)
 #endif
 			sio_setreg(com, com_fifo,
 				   FIFO_RCV_RST | com->fifo_image);
 		com_events -= (com->iptr - com->ibuf);
 		com->iptr = com->ibuf;
 	}
 	mtx_unlock_spin(&sio_lock);
 	comstart(tp);
 }
 
 static int
 commodem(struct tty *tp, int sigon, int sigoff)
 {
 	struct com_s	*com;
 	int	bitand, bitor, msr;
 
 	com = tp->t_sc;
 	if (com->gone)
 		return(0);
 	if (sigon != 0 || sigoff != 0) {
 		bitand = bitor = 0;
 		if (sigoff & SER_DTR)
 			bitand |= MCR_DTR;
 		if (sigoff & SER_RTS)
 			bitand |= MCR_RTS;
 		if (sigon & SER_DTR)
 			bitor |= MCR_DTR;
 		if (sigon & SER_RTS)
 			bitor |= MCR_RTS;
 		bitand = ~bitand;
 		mtx_lock_spin(&sio_lock);
 		com->mcr_image &= bitand;
 		com->mcr_image |= bitor;
 		outb(com->modem_ctl_port, com->mcr_image);
 		mtx_unlock_spin(&sio_lock);
 		return (0);
 	} else {
 		bitor = 0;
 		if (com->mcr_image & MCR_DTR)
 			bitor |= SER_DTR;
 		if (com->mcr_image & MCR_RTS)
 			bitor |= SER_RTS;
 		msr = com->prev_modem_status;
 		if (msr & MSR_CTS)
 			bitor |= SER_CTS;
 		if (msr & MSR_DCD)
 			bitor |= SER_DCD;
 		if (msr & MSR_DSR)
 			bitor |= SER_DSR;
 		if (msr & MSR_DSR)
 			bitor |= SER_DSR;
 		if (msr & (MSR_RI | MSR_TERI))
 			bitor |= SER_RI;
 		return (bitor);
 	}
 }
 
 static void
 siosettimeout()
 {
 	struct com_s	*com;
 	bool_t		someopen;
 	int		unit;
 
 	/*
 	 * Set our timeout period to 1 second if no polled devices are open.
 	 * Otherwise set it to max(1/200, 1/hz).
 	 * Enable timeouts iff some device is open.
 	 */
 	untimeout(comwakeup, (void *)NULL, sio_timeout_handle);
 	sio_timeout = hz;
 	someopen = FALSE;
 	for (unit = 0; unit < sio_numunits; ++unit) {
 		com = com_addr(unit);
 		if (com != NULL && com->tp != NULL
 		    && com->tp->t_state & TS_ISOPEN && !com->gone) {
 			someopen = TRUE;
 			if (com->poll || com->poll_output) {
 				sio_timeout = hz > 200 ? hz / 200 : 1;
 				break;
 			}
 		}
 	}
 	if (someopen) {
 		sio_timeouts_until_log = hz / sio_timeout;
 		sio_timeout_handle = timeout(comwakeup, (void *)NULL,
 					     sio_timeout);
 	} else {
 		/* Flush error messages, if any. */
 		sio_timeouts_until_log = 1;
 		comwakeup((void *)NULL);
 		untimeout(comwakeup, (void *)NULL, sio_timeout_handle);
 	}
 }
 
 static void
 comwakeup(chan)
 	void	*chan;
 {
 	struct com_s	*com;
 	int		unit;
 
 	sio_timeout_handle = timeout(comwakeup, (void *)NULL, sio_timeout);
 
 	/*
 	 * Recover from lost output interrupts.
 	 * Poll any lines that don't use interrupts.
 	 */
 	for (unit = 0; unit < sio_numunits; ++unit) {
 		com = com_addr(unit);
 		if (com != NULL && !com->gone
 		    && (com->state >= (CS_BUSY | CS_TTGO) || com->poll)) {
 			mtx_lock_spin(&sio_lock);
 			siointr1(com);
 			mtx_unlock_spin(&sio_lock);
 		}
 	}
 
 	/*
 	 * Check for and log errors, but not too often.
 	 */
 	if (--sio_timeouts_until_log > 0)
 		return;
 	sio_timeouts_until_log = hz / sio_timeout;
 	for (unit = 0; unit < sio_numunits; ++unit) {
 		int	errnum;
 
 		com = com_addr(unit);
 		if (com == NULL)
 			continue;
 		if (com->gone)
 			continue;
 		for (errnum = 0; errnum < CE_NTYPES; ++errnum) {
 			u_int	delta;
 			u_long	total;
 
 			mtx_lock_spin(&sio_lock);
 			delta = com->delta_error_counts[errnum];
 			com->delta_error_counts[errnum] = 0;
 			mtx_unlock_spin(&sio_lock);
 			if (delta == 0)
 				continue;
 			total = com->error_counts[errnum] += delta;
 			log(LOG_ERR, "sio%d: %u more %s%s (total %lu)\n",
 			    unit, delta, error_desc[errnum],
 			    delta == 1 ? "" : "s", total);
 		}
 	}
 }
 
 /*
  * Following are all routines needed for SIO to act as console
  */
 struct siocnstate {
 	u_char	dlbl;
 	u_char	dlbh;
 	u_char	ier;
 	u_char	cfcr;
 	u_char	mcr;
 };
 
 /*
  * This is a function in order to not replicate "ttyd%d" more
  * places than absolutely necessary.
  */
 static void
 siocnset(struct consdev *cd, int unit)
 {
 
 	cd->cn_unit = unit;
 	sprintf(cd->cn_name, "ttyd%d", unit);
 }
 
 static speed_t siocngetspeed(Port_t, u_long rclk);
 static void siocnclose(struct siocnstate *sp, Port_t iobase);
 static void siocnopen(struct siocnstate *sp, Port_t iobase, int speed);
 static void siocntxwait(Port_t iobase);
 
 static cn_probe_t sio_cnprobe;
 static cn_init_t sio_cninit;
 static cn_term_t sio_cnterm;
 static cn_getc_t sio_cngetc;
 static cn_putc_t sio_cnputc;
 static cn_grab_t sio_cngrab;
 static cn_ungrab_t sio_cnungrab;
 
 CONSOLE_DRIVER(sio);
 
 static void
 siocntxwait(iobase)
 	Port_t	iobase;
 {
 	int	timo;
 
 	/*
 	 * Wait for any pending transmission to finish.  Required to avoid
 	 * the UART lockup bug when the speed is changed, and for normal
 	 * transmits.
 	 */
 	timo = 100000;
 	while ((inb(iobase + com_lsr) & (LSR_TSRE | LSR_TXRDY))
 	       != (LSR_TSRE | LSR_TXRDY) && --timo != 0)
 		;
 }
 
 /*
  * Read the serial port specified and try to figure out what speed
  * it's currently running at.  We're assuming the serial port has
  * been initialized and is basically idle.  This routine is only intended
  * to be run at system startup.
  *
  * If the value read from the serial port doesn't make sense, return 0.
  */
 
 static speed_t
 siocngetspeed(iobase, rclk)
 	Port_t	iobase;
 	u_long	rclk;
 {
 	u_int	divisor;
 	u_char	dlbh;
 	u_char	dlbl;
 	u_char  cfcr;
 
 	cfcr = inb(iobase + com_cfcr);
 	outb(iobase + com_cfcr, CFCR_DLAB | cfcr);
 
 	dlbl = inb(iobase + com_dlbl);
 	dlbh = inb(iobase + com_dlbh);
 
 	outb(iobase + com_cfcr, cfcr);
 
 	divisor = dlbh << 8 | dlbl;
 
 	/* XXX there should be more sanity checking. */
 	if (divisor == 0)
 		return (CONSPEED);
 	return (rclk / (16UL * divisor));
 }
 
 static void
 siocnopen(sp, iobase, speed)
 	struct siocnstate	*sp;
 	Port_t			iobase;
 	int			speed;
 {
 	u_int	divisor;
 	u_char	dlbh;
 	u_char	dlbl;
 
 	/*
 	 * Save all the device control registers except the fifo register
 	 * and set our default ones (cs8 -parenb speed=comdefaultrate).
 	 * We can't save the fifo register since it is read-only.
 	 */
 	sp->ier = inb(iobase + com_ier);
 	outb(iobase + com_ier, 0);	/* spltty() doesn't stop siointr() */
 	siocntxwait(iobase);
 	sp->cfcr = inb(iobase + com_cfcr);
 	outb(iobase + com_cfcr, CFCR_DLAB | CFCR_8BITS);
 	sp->dlbl = inb(iobase + com_dlbl);
 	sp->dlbh = inb(iobase + com_dlbh);
 	/*
 	 * Only set the divisor registers if they would change, since on
 	 * some 16550 incompatibles (Startech), setting them clears the
 	 * data input register.  This also reduces the effects of the
 	 * UMC8669F bug.
 	 */
 	divisor = siodivisor(comdefaultrclk, speed);
 	dlbl = divisor & 0xFF;
 	if (sp->dlbl != dlbl)
 		outb(iobase + com_dlbl, dlbl);
 	dlbh = divisor >> 8;
 	if (sp->dlbh != dlbh)
 		outb(iobase + com_dlbh, dlbh);
 	outb(iobase + com_cfcr, CFCR_8BITS);
 	sp->mcr = inb(iobase + com_mcr);
 	/*
 	 * We don't want interrupts, but must be careful not to "disable"
 	 * them by clearing the MCR_IENABLE bit, since that might cause
 	 * an interrupt by floating the IRQ line.
 	 */
 	outb(iobase + com_mcr, (sp->mcr & MCR_IENABLE) | MCR_DTR | MCR_RTS);
 }
 
 static void
 siocnclose(sp, iobase)
 	struct siocnstate	*sp;
 	Port_t			iobase;
 {
 	/*
 	 * Restore the device control registers.
 	 */
 	siocntxwait(iobase);
 	outb(iobase + com_cfcr, CFCR_DLAB | CFCR_8BITS);
 	if (sp->dlbl != inb(iobase + com_dlbl))
 		outb(iobase + com_dlbl, sp->dlbl);
 	if (sp->dlbh != inb(iobase + com_dlbh))
 		outb(iobase + com_dlbh, sp->dlbh);
 	outb(iobase + com_cfcr, sp->cfcr);
 	/*
 	 * XXX damp oscillations of MCR_DTR and MCR_RTS by not restoring them.
 	 */
 	outb(iobase + com_mcr, sp->mcr | MCR_DTR | MCR_RTS);
 	outb(iobase + com_ier, sp->ier);
 }
 
 static void
 sio_cnprobe(cp)
 	struct consdev	*cp;
 {
 	speed_t			boot_speed;
 	u_char			cfcr;
 	u_int			divisor;
 	int			s, unit;
 	struct siocnstate	sp;
 
 	/*
 	 * Find our first enabled console, if any.  If it is a high-level
 	 * console device, then initialize it and return successfully.
 	 * If it is a low-level console device, then initialize it and
 	 * return unsuccessfully.  It must be initialized in both cases
 	 * for early use by console drivers and debuggers.  Initializing
 	 * the hardware is not necessary in all cases, since the i/o
 	 * routines initialize it on the fly, but it is necessary if
 	 * input might arrive while the hardware is switched back to an
 	 * uninitialized state.  We can't handle multiple console devices
 	 * yet because our low-level routines don't take a device arg.
 	 * We trust the user to set the console flags properly so that we
 	 * don't need to probe.
 	 */
 	cp->cn_pri = CN_DEAD;
 
 	for (unit = 0; unit < 16; unit++) { /* XXX need to know how many */
 		int flags;
 
 		if (resource_disabled("sio", unit))
 			continue;
 		if (resource_int_value("sio", unit, "flags", &flags))
 			continue;
 		if (COM_CONSOLE(flags) || COM_DEBUGGER(flags)) {
 			int port;
 			Port_t iobase;
 
 			if (resource_int_value("sio", unit, "port", &port))
 				continue;
 			iobase = port;
 			s = spltty();
 			if ((boothowto & RB_SERIAL) && COM_CONSOLE(flags)) {
 				boot_speed =
 				    siocngetspeed(iobase, comdefaultrclk);
 				if (boot_speed)
 					comdefaultrate = boot_speed;
 			}
 
 			/*
 			 * Initialize the divisor latch.  We can't rely on
 			 * siocnopen() to do this the first time, since it 
 			 * avoids writing to the latch if the latch appears
 			 * to have the correct value.  Also, if we didn't
 			 * just read the speed from the hardware, then we
 			 * need to set the speed in hardware so that
 			 * switching it later is null.
 			 */
 			cfcr = inb(iobase + com_cfcr);
 			outb(iobase + com_cfcr, CFCR_DLAB | cfcr);
 			divisor = siodivisor(comdefaultrclk, comdefaultrate);
 			outb(iobase + com_dlbl, divisor & 0xff);
 			outb(iobase + com_dlbh, divisor >> 8);
 			outb(iobase + com_cfcr, cfcr);
 
 			siocnopen(&sp, iobase, comdefaultrate);
 
 			splx(s);
 			if (COM_CONSOLE(flags) && !COM_LLCONSOLE(flags)) {
 				siocnset(cp, unit);
 				cp->cn_pri = COM_FORCECONSOLE(flags)
 					     || boothowto & RB_SERIAL
 					     ? CN_REMOTE : CN_NORMAL;
 				siocniobase = iobase;
 				siocnunit = unit;
 			}
 #ifdef GDB
 			if (COM_DEBUGGER(flags))
 				siogdbiobase = iobase;
 #endif
 		}
 	}
 }
 
 static void
 sio_cninit(cp)
 	struct consdev	*cp;
 {
 	comconsole = cp->cn_unit;
 }
 
 static void
 sio_cnterm(cp)
 	struct consdev	*cp;
 {
 	comconsole = -1;
 }
 
 static void
 sio_cngrab(struct consdev *cp)
 {
 }
 
 static void
 sio_cnungrab(struct consdev *cp)
 {
 }
 
 static int
 sio_cngetc(struct consdev *cd)
 {
 	int	c;
 	Port_t	iobase;
 	int	s;
 	struct siocnstate	sp;
 	speed_t	speed;
 
 	if (cd != NULL && cd->cn_unit == siocnunit) {
 		iobase = siocniobase;
 		speed = comdefaultrate;
 	} else {
 #ifdef GDB
 		iobase = siogdbiobase;
 		speed = gdbdefaultrate;
 #else
 		return (-1);
 #endif
 	}
 	s = spltty();
 	siocnopen(&sp, iobase, speed);
 	if (inb(iobase + com_lsr) & LSR_RXRDY)
 		c = inb(iobase + com_data);
 	else
 		c = -1;
 	siocnclose(&sp, iobase);
 	splx(s);
 	return (c);
 }
 
 static void
 sio_cnputc(struct consdev *cd, int c)
 {
 	int	need_unlock;
 	int	s;
 	struct siocnstate	sp;
 	Port_t	iobase;
 	speed_t	speed;
 
 	if (cd != NULL && cd->cn_unit == siocnunit) {
 		iobase = siocniobase;
 		speed = comdefaultrate;
 	} else {
 #ifdef GDB
 		iobase = siogdbiobase;
 		speed = gdbdefaultrate;
 #else
 		return;
 #endif
 	}
 	s = spltty();
 	need_unlock = 0;
 	if (!kdb_active && sio_inited == 2 && !mtx_owned(&sio_lock)) {
 		mtx_lock_spin(&sio_lock);
 		need_unlock = 1;
 	}
 	siocnopen(&sp, iobase, speed);
 	siocntxwait(iobase);
 	outb(iobase + com_data, c);
 	siocnclose(&sp, iobase);
 	if (need_unlock)
 		mtx_unlock_spin(&sio_lock);
 	splx(s);
 }
 
 /*
  * Remote gdb(1) support.
  */
 
 #if defined(GDB)
 
 #include <gdb/gdb.h>
 
 static gdb_probe_f siogdbprobe;
 static gdb_init_f siogdbinit;
 static gdb_term_f siogdbterm;
 static gdb_getc_f siogdbgetc;
 static gdb_putc_f siogdbputc;
 
 GDB_DBGPORT(sio, siogdbprobe, siogdbinit, siogdbterm, siogdbgetc, siogdbputc);
 
 static int
 siogdbprobe(void)
 {
 	return ((siogdbiobase != 0) ? 0 : -1);
 }
 
 static void
 siogdbinit(void)
 {
 }
 
 static void
 siogdbterm(void)
 {
 }
 
 static void
 siogdbputc(int c)
 {
 	sio_cnputc(NULL, c);
 }
 
 static int
 siogdbgetc(void)
 {
 	return (sio_cngetc(NULL));
 }
 
 #endif
Index: stable/11/sys/dev/sound/isa/mss.h
===================================================================
--- stable/11/sys/dev/sound/isa/mss.h	(revision 330445)
+++ stable/11/sys/dev/sound/isa/mss.h	(revision 330446)
@@ -1,421 +1,421 @@
 /*-
  * Copyright (c) 1999 Doug Rabson
  * Copyright (c) 1997 Luigi Rizzo
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * This file contains information and macro definitions for
  * AD1848-compatible devices, used in the MSS/WSS compatible boards.
  */
 
 /*
  *
 
 The codec part of the board is seen as a set of 4 registers mapped
 at the base address for the board (default 0x534). Note that some
 (early) boards implemented 4 additional registers 4 location before
 (usually 0x530) to store configuration information. This is a source
 of confusion in that one never knows what address to specify. The
 (current) convention is to use the old address (0x530) in the kernel
 configuration file and consider MSS registers start four location
 ahead.
 
  *
  */
 
 struct mixer_def {
     	u_int regno:7;
     	u_int polarity:1;	/* 1 means reversed */
     	u_int bitoffs:4;
     	u_int nbits:4;
 };
 typedef struct mixer_def mixer_ent;
 typedef struct mixer_def mixer_tab[32][2];
 
 #define MIX_ENT(name, reg_l, pol_l, pos_l, len_l, reg_r, pol_r, pos_r, len_r) \
     	{{reg_l, pol_l, pos_l, len_l}, {reg_r, pol_r, pos_r, len_r}}
 
 #define PMIX_ENT(name, reg_l, pos_l, len_l, reg_r, pos_r, len_r) \
     	{{reg_l, 0, pos_l, len_l}, {reg_r, 0, pos_r, len_r}}
 
 #define MIX_NONE(name) MIX_ENT(name, 0,0,0,0, 0,0,0,0)
 
 /*
  * The four visible registers of the MSS :
  *
  */
 
 #define MSS_INDEX        (0 + 4)
 #define	MSS_IDXBUSY		0x80	/* readonly, set when busy */
 #define	MSS_MCE			0x40	/* the MCE bit. */
 	/*
 	 * the MCE bit must be set whenever the current mode of the
 	 * codec is changed; this in particular is true for the
 	 * Data Format (I8, I28) and Interface Config(I9) registers.
 	 * Only exception are CEN and PEN which can be changed on the fly.
 	 * The DAC output is muted when MCE is set.
 	 */
 #define	MSS_TRD			0x20	/* Transfer request disable */
 	/*
 	 * When TRD is set, DMA transfers cease when the INT bit in
 	 * the MSS status reg is set. Must be cleared for automode
 	 * DMA, set otherwise.
 	 */
 #define	MSS_IDXMASK		0x1f	/* mask for indirect address */
 
 #define MSS_IDATA  	(1 + 4)
 	/*
 	 * data to be transferred to the indirect register addressed
 	 * by index addr. During init and sw. powerdown, cannot be
 	 * written to, and is always read as 0x80 (consistent with the
 	 * busy flag).
 	 */
 
 #define MSS_STATUS      (2 + 4)
 
 #define	IS_CUL		0x80	/* capture upper/lower */
 #define	IS_CLR		0x40	/* capture left/right */
 #define	IS_CRDY		0x20	/* capture ready for programmed i/o */
 #define	IS_SER		0x10	/* sample error (overrun/underrun) */
 #define	IS_PUL		0x08	/* playback upper/lower */
 #define	IS_PLR		0x04	/* playback left/right */
 #define	IS_PRDY		0x02	/* playback ready for programmed i/o */
 #define	IS_INT		0x01	/* int status (1 = active) */
 	/*
 	 * IS_INT is clreared by any write to the status register.
 	 */
 #if 0
 #define io_Polled_IO(d)         ((d)->io_base+3+4)
 	/*
 	 * this register is used in case of polled i/o
 	 */
 #endif
 
 /*
  * The MSS has a set of 16 (or 32 depending on the model) indirect
  * registers accessible through the data port by specifying the
  * appropriate address in the address register.
  *
  * The 16 low registers are uniformly handled in AD1848/CS4248 compatible
  * mode (often called MODE1). For the upper 16 registers there are
  * some differences among different products, mainly Crystal uses them
  * differently from OPTi.
  *
  */
 
 /*
  * volume registers
  */
 
 #define	I6_MUTE		0x80
 
 /*
  * register I9 -- interface configuration.
  */
 
 #define	I9_PEN		0x01	/* playback enable */
 #define	I9_CEN		0x02	/* capture enable */
 
 /*
  * values used in bd_flags
  */
 #define	BD_F_MCE_BIT	0x0001
 #define	BD_F_IRQ_OK	0x0002
 #define	BD_F_TMR_RUN	0x0004
 #define BD_F_MSS_OFFSET 0x0008	/* offset mss writes by -4 */
 #define BD_F_DUPLEX	0x0010
 #define BD_F_924PNP	0x0020	/* OPTi924 is in PNP mode */
 
 /*
  * sound/ad1848_mixer.h
  *
  * Definitions for the mixer of AD1848 and compatible codecs.
  *
  * Copyright by Hannu Savolainen 1994
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met: 1. Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer. 2.
  * Redistributions in binary form must reproduce the above copyright notice,
  * this list of conditions and the following disclaimer in the documentation
  * and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*
  * The AD1848 codec has generic input lines called Line, Aux1 and Aux2.
  * Soundcard manufacturers have connected actual inputs (CD, synth, line,
  * etc) to these inputs in different order. Therefore it's difficult
- * to assign mixer channels to to these inputs correctly. The following
+ * to assign mixer channels to these inputs correctly. The following
  * contains two alternative mappings. The first one is for GUS MAX and
  * the second is just a generic one (line1, line2 and line3).
  * (Actually this is not a mapping but rather some kind of interleaving
  * solution).
  */
 
 #define MSS_REC_DEVICES	\
     (SOUND_MASK_LINE | SOUND_MASK_MIC | SOUND_MASK_CD|SOUND_MASK_IMIX)
 
 
 /*
  * Table of mixer registers. There is a default table for the
  * AD1848/CS423x clones, one for the OPTI931 and one for the
  * OPTi930. As more MSS clones come out, there ought to be
  * more tables.
  *
  * Fields in the table are : polarity, register, offset, bits
  *
  * The channel numbering used by individual soundcards is not fixed.
  * Some cards have assigned different meanings for the AUX1, AUX2
  * and LINE inputs. Some have different features...
  *
  * Following there is a macro ...MIXER_DEVICES which is a bitmap
  * of all non-zero fields in the table.
  * MODE1_MIXER_DEVICES is the basic mixer of the 1848 in mode 1
  * registers I0..I15)
  *
  */
 
 mixer_ent mix_devices[32][2] = {
 MIX_NONE(SOUND_MIXER_VOLUME),
 MIX_NONE(SOUND_MIXER_BASS),
 MIX_NONE(SOUND_MIXER_TREBLE),
 #ifdef PC98	/* PC98's synth is assigned to AUX#2 */
 MIX_ENT(SOUND_MIXER_SYNTH,	 4, 1, 0, 5,	 5, 1, 0, 5),
 #else		/* AT386's synth is assigned to AUX#1 */
 MIX_ENT(SOUND_MIXER_SYNTH,	 2, 1, 0, 5,	 3, 1, 0, 5),
 #endif
 MIX_ENT(SOUND_MIXER_PCM,	 6, 1, 0, 6,	 7, 1, 0, 6),
 MIX_ENT(SOUND_MIXER_SPEAKER,	26, 1, 0, 4,	 0, 0, 0, 0),
 MIX_ENT(SOUND_MIXER_LINE,	18, 1, 0, 5,	19, 1, 0, 5),
 MIX_ENT(SOUND_MIXER_MIC,	 0, 0, 5, 1,	 1, 0, 5, 1),
 #ifdef PC98	/* PC98's cd-audio is assigned to AUX#1 */
 MIX_ENT(SOUND_MIXER_CD,	 	 2, 1, 0, 5,	 3, 1, 0, 5),
 #else		/* AT386's cd-audio is assigned to AUX#2 */
 MIX_ENT(SOUND_MIXER_CD,	 	 4, 1, 0, 5,	 5, 1, 0, 5),
 #endif
 MIX_ENT(SOUND_MIXER_IMIX,	13, 1, 2, 6,	 0, 0, 0, 0),
 MIX_NONE(SOUND_MIXER_ALTPCM),
 MIX_NONE(SOUND_MIXER_RECLEV),
 MIX_ENT(SOUND_MIXER_IGAIN,	 0, 0, 0, 4,	 1, 0, 0, 4),
 MIX_NONE(SOUND_MIXER_OGAIN),
 MIX_NONE(SOUND_MIXER_LINE1),
 MIX_NONE(SOUND_MIXER_LINE2),
 MIX_NONE(SOUND_MIXER_LINE3),
 };
 
 #define MODE2_MIXER_DEVICES	\
     (SOUND_MASK_SYNTH | SOUND_MASK_PCM    | SOUND_MASK_SPEAKER | \
      SOUND_MASK_LINE  | SOUND_MASK_MIC    | SOUND_MASK_CD      | \
      SOUND_MASK_IMIX  | SOUND_MASK_IGAIN                         )
 
 #define MODE1_MIXER_DEVICES	\
     (SOUND_MASK_SYNTH | SOUND_MASK_PCM    | SOUND_MASK_MIC     | \
      SOUND_MASK_CD    | SOUND_MASK_IMIX   | SOUND_MASK_IGAIN     )
 
 
 mixer_ent opti930_devices[32][2] = {
 MIX_ENT(SOUND_MIXER_VOLUME,	22, 1, 0, 4,	23, 1, 0, 4),
 MIX_NONE(SOUND_MIXER_BASS),
 MIX_NONE(SOUND_MIXER_TREBLE),
 MIX_ENT(SOUND_MIXER_SYNTH,	4,  1, 0, 4,    5,  1, 0, 4),
 MIX_ENT(SOUND_MIXER_PCM,	6,  1, 1, 5,	7,  1, 1, 5),
 MIX_ENT(SOUND_MIXER_LINE,	18, 1, 1, 4,	19, 1, 1, 4),
 MIX_NONE(SOUND_MIXER_SPEAKER),
 MIX_ENT(SOUND_MIXER_MIC,	21, 1, 0, 4,	22, 1, 0, 4),
 MIX_ENT(SOUND_MIXER_CD,		2,  1, 1, 4,	3,  1, 1, 4),
 MIX_NONE(SOUND_MIXER_IMIX),
 MIX_NONE(SOUND_MIXER_ALTPCM),
 MIX_NONE(SOUND_MIXER_RECLEV),
 MIX_NONE(SOUND_MIXER_IGAIN),
 MIX_NONE(SOUND_MIXER_OGAIN),
 MIX_NONE(SOUND_MIXER_LINE1),
 MIX_NONE(SOUND_MIXER_LINE2),
 MIX_NONE(SOUND_MIXER_LINE3),
 };
 
 #define OPTI930_MIXER_DEVICES	\
     (SOUND_MASK_VOLUME | SOUND_MASK_SYNTH | SOUND_MASK_PCM | \
      SOUND_MASK_LINE   | SOUND_MASK_MIC   | SOUND_MASK_CD )
 
 /*
  * entries for the opti931...
  */
 
 mixer_ent opti931_devices[32][2] = {	/* for the opti931 */
 MIX_ENT(SOUND_MIXER_VOLUME,	22, 1, 1, 5,	23, 1, 1, 5),
 MIX_NONE(SOUND_MIXER_BASS),
 MIX_NONE(SOUND_MIXER_TREBLE),
 MIX_ENT(SOUND_MIXER_SYNTH,	 4, 1, 1, 4,	 5, 1, 1, 4),
 MIX_ENT(SOUND_MIXER_PCM,	 6, 1, 0, 5,	 7, 1, 0, 5),
 MIX_NONE(SOUND_MIXER_SPEAKER),
 MIX_ENT(SOUND_MIXER_LINE,	18, 1, 1, 4,	19, 1, 1, 4),
 MIX_ENT(SOUND_MIXER_MIC,	 0, 0, 5, 1,	 1, 0, 5, 1),
 MIX_ENT(SOUND_MIXER_CD,	 	 2, 1, 1, 4,	 3, 1, 1, 4),
 MIX_NONE(SOUND_MIXER_IMIX),
 MIX_NONE(SOUND_MIXER_ALTPCM),
 MIX_NONE(SOUND_MIXER_RECLEV),
 MIX_ENT(SOUND_MIXER_IGAIN,	 0, 0, 0, 4,	 1, 0, 0, 4),
 MIX_NONE(SOUND_MIXER_OGAIN),
 MIX_ENT(SOUND_MIXER_LINE1, 	16, 1, 1, 4,	17, 1, 1, 4),
 MIX_NONE(SOUND_MIXER_LINE2),
 MIX_NONE(SOUND_MIXER_LINE3),
 };
 
 #define OPTI931_MIXER_DEVICES	\
     (SOUND_MASK_VOLUME | SOUND_MASK_SYNTH | SOUND_MASK_PCM | \
      SOUND_MASK_LINE   | SOUND_MASK_MIC   | SOUND_MASK_CD  | \
      SOUND_MASK_IGAIN  | SOUND_MASK_LINE1                    )
 
 /*
  * Register definitions for the Yamaha OPL3-SA[23x].
  */
 #define OPL3SAx_POWER	0x01		/* Power Management (R/W) */
 #define OPL3SAx_POWER_PDX	0x01	/* Set to 1 to halt oscillator */
 #define OPL3SAx_POWER_PDN	0x02	/* Set to 1 to power down */
 #define OPL3SAx_POWER_PSV	0x04	/* Set to 1 to power save */
 #define OPL3SAx_POWER_ADOWN	0x20	/* Analog power (?) */
 
 #define OPL3SAx_SYSTEM	0x02		/* System control (R/W) */
 #define OPL3SAx_SYSTEM_VZE	0x01	/* I2S audio routing */
 #define OPL3SAx_SYSTEM_IDSEL	0x03	/* SB compat version select */
 #define OPL3SAx_SYSTEM_SBHE	0x80	/* 0 for AT bus, 1 for XT bus */
 
 #define OPL3SAx_IRQCONF	0x03		/* Interrupt configuration (R/W */
 #define OPL3SAx_IRQCONF_WSSA	0x01	/* WSS interrupts through IRQA */
 #define OPL3SAx_IRQCONF_SBA	0x02	/* WSS interrupts through IRQA */
 #define OPL3SAx_IRQCONF_MPUA	0x04	/* WSS interrupts through IRQA */
 #define OPL3SAx_IRQCONF_OPL3A	0x08	/* WSS interrupts through IRQA */
 #define OPL3SAx_IRQCONF_WSSB	0x10	/* WSS interrupts through IRQB */
 #define OPL3SAx_IRQCONF_SBB	0x20	/* WSS interrupts through IRQB */
 #define OPL3SAx_IRQCONF_MPUB	0x40	/* WSS interrupts through IRQB */
 #define OPL3SAx_IRQCONF_OPL3B	0x80	/* WSS interrupts through IRQB */
 
 #define OPL3SAx_IRQSTATUSA 0x04		/* Interrupt (IRQ-A) Status (RO) */
 #define OPL3SAx_IRQSTATUSB 0x05		/* Interrupt (IRQ-B) Status (RO) */
 #define OPL3SAx_IRQSTATUS_PI	0x01	/* Playback Flag of CODEC */
 #define OPL3SAx_IRQSTATUS_CI	0x02	/* Recording Flag of CODEC */
 #define OPL3SAx_IRQSTATUS_TI	0x04	/* Timer Flag of CODEC */
 #define OPL3SAx_IRQSTATUS_SB	0x08	/* SB compat Playback Interrupt Flag */
 #define OPL3SAx_IRQSTATUS_MPU	0x10	/* MPU401 Interrupt Flag */
 #define OPL3SAx_IRQSTATUS_OPL3	0x20	/* Internal FM Timer Flag */
 #define OPL3SAx_IRQSTATUS_MV	0x40	/* HW Volume Interrupt Flag */
 #define OPL3SAx_IRQSTATUS_PI	0x01	/* Playback Flag of CODEC */
 #define OPL3SAx_IRQSTATUS_CI	0x02	/* Recording Flag of CODEC */
 #define OPL3SAx_IRQSTATUS_TI	0x04	/* Timer Flag of CODEC */
 #define OPL3SAx_IRQSTATUS_SB	0x08	/* SB compat Playback Interrupt Flag */
 #define OPL3SAx_IRQSTATUS_MPU	0x10	/* MPU401 Interrupt Flag */
 #define OPL3SAx_IRQSTATUS_OPL3	0x20	/* Internal FM Timer Flag */
 #define OPL3SAx_IRQSTATUS_MV	0x40	/* HW Volume Interrupt Flag */
 
 #define OPL3SAx_DMACONF	0x06		/* DMA configuration (R/W) */
 #define OPL3SAx_DMACONF_WSSPA	0x01	/* WSS Playback on DMA-A */
 #define OPL3SAx_DMACONF_WSSRA	0x02	/* WSS Recording on DMA-A */
 #define OPL3SAx_DMACONF_SBA	0x02	/* SB Playback on DMA-A */
 #define OPL3SAx_DMACONF_WSSPB	0x10	/* WSS Playback on DMA-A */
 #define OPL3SAx_DMACONF_WSSRB	0x20	/* WSS Recording on DMA-A */
 #define OPL3SAx_DMACONF_SBB	0x20	/* SB Playback on DMA-A */
 
 #define OPL3SAx_VOLUMEL	0x07		/* Master Volume Left (R/W) */
 #define OPL3SAx_VOLUMEL_MVL	0x0f	/* Attenuation level */
 #define OPL3SAx_VOLUMEL_MVLM	0x80	/* Mute */
 
 #define OPL3SAx_VOLUMER	0x08		/* Master Volume Right (R/W) */
 #define OPL3SAx_VOLUMER_MVR	0x0f	/* Attenuation level */
 #define OPL3SAx_VOLUMER_MVRM	0x80	/* Mute */
 
 #define OPL3SAx_MIC	0x09		/* MIC Volume (R/W) */
 #define OPL3SAx_VOLUMER_MCV	0x1f	/* Attenuation level */
 #define OPL3SAx_VOLUMER_MICM	0x80	/* Mute */
 
 #define OPL3SAx_MISC	0x0a		/* Miscellaneous */
 #define OPL3SAx_MISC_VER	0x07	/* Version */
 #define OPL3SAx_MISC_MODE	0x08	/* SB or WSS mode */
 #define OPL3SAx_MISC_MCSW	0x10	/*  */
 #define OPL3SAx_MISC_VEN	0x80	/* Enable hardware volume control */
 
 #define OPL3SAx_WSSDMA	0x0b		/* WSS DMA Counter (RW) (4 regs) */
 
 #define OPL3SAx_WSSIRQSCAN 0x0f		/* WSS Interrupt Scan out/in (R/W) */
 #define OPL3SAx_WSSIRQSCAN_SPI	0x01
 #define OPL3SAx_WSSIRQSCAN_SCI	0x02
 #define OPL3SAx_WSSIRQSCAN_STI	0x04
 
 #define OPL3SAx_SBSTATE	0x10		/* SB compat Internal State (R/W) */
 #define OPL3SAx_SBSTATE_SBPDR	0x01	/* SB Power Down Request */
 #define OPL3SAx_SBSTATE_SE	0x02	/* Scan Enable */
 #define OPL3SAx_SBSTATE_SM	0x04	/* Scan Mode */
 #define OPL3SAx_SBSTATE_SS	0x08	/* Scan Select */
 #define OPL3SAx_SBSTATE_SBPDA	0x80	/* SB Power Down Acknowledge */
 
 #define OPL3SAx_SBDATA 0x11		/* SB compat State Scan Data (R/W) */
 
 #define OPL3SAx_DIGITALPOWER 0x12	/* Digital Partial Power Down (R/W) */
 #define OPL3SAx_DIGITALPOWER_PnP  0x01
 #define OPL3SAx_DIGITALPOWER_SB	  0x02
 #define OPL3SAx_DIGITALPOWER_WSSP 0x04
 #define OPL3SAx_DIGITALPOWER_WSSR 0x08
 #define OPL3SAx_DIGITALPOWER_FM	  0x10
 #define OPL3SAx_DIGITALPOWER_MCLK0 0x20
 #define OPL3SAx_DIGITALPOWER_MPU  0x40
 #define OPL3SAx_DIGITALPOWER_JOY  0x80
 
 #define OPL3SAx_ANALOGPOWER 0x13	/* Analog Partial Power Down (R/W) */
 #define OPL3SAx_ANALOGPOWER_WIDE  0x01
 #define OPL3SAx_ANALOGPOWER_SBDAC 0x02
 #define OPL3SAx_ANALOGPOWER_DA    0x04
 #define OPL3SAx_ANALOGPOWER_AD    0x08
 #define OPL3SAx_ANALOGPOWER_FMDAC 0x10
 
 #define OPL3SAx_WIDE	0x14		/* Enhanced control(WIDE) (R/W) */
 #define OPL3SAx_WIDE_WIDEL	0x07	/* Wide level on Left Channel */
 #define OPL3SAx_WIDE_WIDER	0x70	/* Wide level on Right Channel */
 
 #define OPL3SAx_BASS	0x15		/* Enhanced control(BASS) (R/W) */
 #define OPL3SAx_BASS_BASSL	0x07	/* Bass level on Left Channel */
 #define OPL3SAx_BASS_BASSR	0x70	/* Bass level on Right Channel */
 
 #define OPL3SAx_TREBLE	0x16		/* Enhanced control(TREBLE) (R/W) */
 #define OPL3SAx_TREBLE_TREBLEL	0x07	/* Treble level on Left Channel */
 #define OPL3SAx_TREBLE_TREBLER	0x70	/* Treble level on Right Channel */
 
 #define OPL3SAx_HWVOL	0x17		/* HW Volume IRQ Configuration (R/W) */
 #define OPL3SAx_HWVOL_IRQA	0x10	/* HW Volume IRQ on IRQ-A */
 #define OPL3SAx_HWVOL_IRQB	0x20	/* HW Volume IRQ on IRQ-B */
 
 
Index: stable/11/sys/dev/syscons/scvgarndr.c
===================================================================
--- stable/11/sys/dev/syscons/scvgarndr.c	(revision 330445)
+++ stable/11/sys/dev/syscons/scvgarndr.c	(revision 330446)
@@ -1,1282 +1,1282 @@
 /*-
  * Copyright (c) 1999 Kazutaka YOKOTA <yokota@zodiac.mech.utsunomiya-u.ac.jp>
  * All rights reserved.
  *
  * This code is derived from software contributed to The DragonFly Project
  * by Sascha Wildner <saw@online.de>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer as
  *    the first lines of this file unmodified.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_syscons.h"
 #include "opt_vga.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/fbio.h>
 #include <sys/consio.h>
 
 #include <machine/bus.h>
 
 #include <dev/fb/fbreg.h>
 #include <dev/fb/vgareg.h>
 #include <dev/syscons/syscons.h>
 
 #include <isa/isareg.h>
 
 #ifndef SC_RENDER_DEBUG
 #define SC_RENDER_DEBUG		0
 #endif
 
 static vr_clear_t		vga_txtclear;
 static vr_draw_border_t		vga_txtborder;
 static vr_draw_t		vga_txtdraw;
 static vr_set_cursor_t		vga_txtcursor_shape;
 static vr_draw_cursor_t		vga_txtcursor;
 static vr_blink_cursor_t	vga_txtblink;
 #ifndef SC_NO_CUTPASTE
 static vr_draw_mouse_t		vga_txtmouse;
 #else
 #define vga_txtmouse		(vr_draw_mouse_t *)vga_nop
 #endif
 
 #ifdef SC_PIXEL_MODE
 static vr_init_t		vga_rndrinit;
 static vr_clear_t		vga_pxlclear_direct;
 static vr_clear_t		vga_pxlclear_planar;
 static vr_draw_border_t		vga_pxlborder_direct;
 static vr_draw_border_t		vga_pxlborder_planar;
 static vr_draw_t		vga_egadraw;
 static vr_draw_t		vga_vgadraw_direct;
 static vr_draw_t		vga_vgadraw_planar;
 static vr_set_cursor_t		vga_pxlcursor_shape;
 static vr_draw_cursor_t		vga_pxlcursor_direct;
 static vr_draw_cursor_t		vga_pxlcursor_planar;
 static vr_blink_cursor_t	vga_pxlblink_direct;
 static vr_blink_cursor_t	vga_pxlblink_planar;
 #ifndef SC_NO_CUTPASTE
 static vr_draw_mouse_t		vga_pxlmouse_direct;
 static vr_draw_mouse_t		vga_pxlmouse_planar;
 #else
 #define vga_pxlmouse_direct	(vr_draw_mouse_t *)vga_nop
 #define vga_pxlmouse_planar	(vr_draw_mouse_t *)vga_nop
 #endif
 #endif /* SC_PIXEL_MODE */
 
 #ifndef SC_NO_MODE_CHANGE
 static vr_draw_border_t		vga_grborder;
 #endif
 
 static void			vga_nop(scr_stat *scp);
 
 static sc_rndr_sw_t txtrndrsw = {
 	(vr_init_t *)vga_nop,
 	vga_txtclear,
 	vga_txtborder,
 	vga_txtdraw,	
 	vga_txtcursor_shape,
 	vga_txtcursor,
 	vga_txtblink,
 	(vr_set_mouse_t *)vga_nop,
 	vga_txtmouse,
 };
 RENDERER(mda, 0, txtrndrsw, vga_set);
 RENDERER(cga, 0, txtrndrsw, vga_set);
 RENDERER(ega, 0, txtrndrsw, vga_set);
 RENDERER(vga, 0, txtrndrsw, vga_set);
 
 #ifdef SC_PIXEL_MODE
 static sc_rndr_sw_t egarndrsw = {
 	(vr_init_t *)vga_nop,
 	vga_pxlclear_planar,
 	vga_pxlborder_planar,
 	vga_egadraw,
 	vga_pxlcursor_shape,
 	vga_pxlcursor_planar,
 	vga_pxlblink_planar,
 	(vr_set_mouse_t *)vga_nop,
 	vga_pxlmouse_planar,
 };
 RENDERER(ega, PIXEL_MODE, egarndrsw, vga_set);
 
 static sc_rndr_sw_t vgarndrsw = {
 	vga_rndrinit,
 	(vr_clear_t *)vga_nop,
 	(vr_draw_border_t *)vga_nop,
 	(vr_draw_t *)vga_nop,
 	vga_pxlcursor_shape,
 	(vr_draw_cursor_t *)vga_nop,
 	(vr_blink_cursor_t *)vga_nop,
 	(vr_set_mouse_t *)vga_nop,
 	(vr_draw_mouse_t *)vga_nop,
 };
 RENDERER(vga, PIXEL_MODE, vgarndrsw, vga_set);
 #endif /* SC_PIXEL_MODE */
 
 #ifndef SC_NO_MODE_CHANGE
 static sc_rndr_sw_t grrndrsw = {
 	(vr_init_t *)vga_nop,
 	(vr_clear_t *)vga_nop,
 	vga_grborder,
 	(vr_draw_t *)vga_nop,
 	(vr_set_cursor_t *)vga_nop,
 	(vr_draw_cursor_t *)vga_nop,
 	(vr_blink_cursor_t *)vga_nop,
 	(vr_set_mouse_t *)vga_nop,
 	(vr_draw_mouse_t *)vga_nop,
 };
 RENDERER(cga, GRAPHICS_MODE, grrndrsw, vga_set);
 RENDERER(ega, GRAPHICS_MODE, grrndrsw, vga_set);
 RENDERER(vga, GRAPHICS_MODE, grrndrsw, vga_set);
 #endif /* SC_NO_MODE_CHANGE */
 
 RENDERER_MODULE(vga, vga_set);
 
 #ifndef SC_NO_CUTPASTE
 #if !defined(SC_ALT_MOUSE_IMAGE) || defined(SC_PIXEL_MODE)
 static u_short mouse_and_mask[16] = {
 	0xc000, 0xe000, 0xf000, 0xf800, 0xfc00, 0xfe00, 0xff00, 0xff80,
 	0xfe00, 0x1e00, 0x1f00, 0x0f00, 0x0f00, 0x0000, 0x0000, 0x0000
 };
 static u_short mouse_or_mask[16] = {
 	0x0000, 0x4000, 0x6000, 0x7000, 0x7800, 0x7c00, 0x7e00, 0x6800,
 	0x0c00, 0x0c00, 0x0600, 0x0600, 0x0000, 0x0000, 0x0000, 0x0000
 };
 #endif
 #endif
 
 #ifdef SC_PIXEL_MODE
 #define	GET_PIXEL(scp, pos, x, w)					\
 ({									\
 	(scp)->sc->adp->va_window +					\
 	    (x) * (scp)->xoff +						\
 	    (scp)->yoff * (scp)->font_size * (w) +			\
 	    (x) * ((pos) % (scp)->xsize) +				\
 	    (scp)->font_size * (w) * ((pos) / (scp)->xsize);		\
 })
 
 #define	DRAW_PIXEL(scp, pos, color) do {				\
 	switch ((scp)->sc->adp->va_info.vi_depth) {			\
 	case 32:							\
 		writel((pos), vga_palette32[color]);			\
 		break;							\
 	case 24:							\
 		if (((pos) & 1) == 0) {					\
 			writew((pos), vga_palette32[color]);		\
 			writeb((pos) + 2, vga_palette32[color] >> 16);	\
 		} else {						\
 			writeb((pos), vga_palette32[color]);		\
 			writew((pos) + 1, vga_palette32[color] >> 8);	\
 		}							\
 		break;							\
 	case 16:							\
 		if ((scp)->sc->adp->va_info.vi_pixel_fsizes[1] == 5)	\
 			writew((pos), vga_palette15[color]);		\
 		else							\
 			writew((pos), vga_palette16[color]);		\
 		break;							\
 	case 15:							\
 		writew((pos), vga_palette15[color]);			\
 		break;							\
 	case 8:								\
 		writeb((pos), (uint8_t)(color));			\
 	}								\
 } while (0)
 	
 static uint32_t vga_palette32[16] = {
 	0x000000, 0x0000ad, 0x00ad00, 0x00adad,
 	0xad0000, 0xad00ad, 0xad5200, 0xadadad,
 	0x525252, 0x5252ff, 0x52ff52, 0x52ffff,
 	0xff5252, 0xff52ff, 0xffff52, 0xffffff
 };
 
 static uint16_t vga_palette16[16] = {
 	0x0000, 0x0016, 0x0560, 0x0576, 0xb000, 0xb016, 0xb2a0, 0xb576,
 	0x52aa, 0x52bf, 0x57ea, 0x57ff, 0xfaaa, 0xfabf, 0xffea, 0xffff
 };
 
 static uint16_t vga_palette15[16] = {
 	0x0000, 0x0016, 0x02c0, 0x02d6, 0x5800, 0x5816, 0x5940, 0x5ad6,
 	0x294a, 0x295f, 0x2bea, 0x2bff, 0x7d4a, 0x7d5f, 0x7fea, 0x7fff
 };
 
 #ifndef SC_NO_CUTPASTE
 static uint32_t mouse_buf32[256];
 static uint16_t mouse_buf16[256];
 static uint8_t  mouse_buf8[256];
 #endif
 #endif
 
 static void
 vga_nop(scr_stat *scp)
 {
 }
 
 /* text mode renderer */
 
 static void
 vga_txtclear(scr_stat *scp, int c, int attr)
 {
 	sc_vtb_clear(&scp->scr, c, attr);
 }
 
 static void
 vga_txtborder(scr_stat *scp, int color)
 {
 	vidd_set_border(scp->sc->adp, color);
 }
 
 static void
 vga_txtdraw(scr_stat *scp, int from, int count, int flip)
 {
 	vm_offset_t p;
 	int c;
 	int a;
 
 	if (from + count > scp->xsize*scp->ysize)
 		count = scp->xsize*scp->ysize - from;
 
 	if (flip) {
 		for (p = sc_vtb_pointer(&scp->scr, from); count-- > 0; ++from) {
 			c = sc_vtb_getc(&scp->vtb, from);
 			a = sc_vtb_geta(&scp->vtb, from);
 			a = (a & 0x8800) | ((a & 0x7000) >> 4) 
 				| ((a & 0x0700) << 4);
 			p = sc_vtb_putchar(&scp->scr, p, c, a);
 		}
 	} else {
 		sc_vtb_copy(&scp->vtb, from, &scp->scr, from, count);
 	}
 }
 
 static void 
 vga_txtcursor_shape(scr_stat *scp, int base, int height, int blink)
 {
 	if (base < 0 || base >= scp->font_size)
 		return;
 	/* the caller may set height <= 0 in order to disable the cursor */
 #if 0
 	scp->curs_attr.base = base;
 	scp->curs_attr.height = height;
 #endif
 	vidd_set_hw_cursor_shape(scp->sc->adp, base, height,
 	    scp->font_size, blink);
 }
 
 static void
 draw_txtcharcursor(scr_stat *scp, int at, u_short c, u_short a, int flip)
 {
 	sc_softc_t *sc;
 
 	sc = scp->sc;
 	scp->cursor_saveunder_char = c;
 	scp->cursor_saveunder_attr = a;
 
 #ifndef SC_NO_FONT_LOADING
 	if (scp->curs_attr.flags & CONS_CHAR_CURSOR) {
 		unsigned char *font;
 		int h;
 		int i;
 
 		if (scp->font_size < 14) {
 			font = sc->font_8;
 			h = 8;
 		} else if (scp->font_size >= 16) {
 			font = sc->font_16;
 			h = 16;
 		} else {
 			font = sc->font_14;
 			h = 14;
 		}
 		if (scp->curs_attr.base >= h)
 			return;
 		if (flip)
 			a = (a & 0x8800)
 				| ((a & 0x7000) >> 4) | ((a & 0x0700) << 4);
 		bcopy(font + c*h, font + sc->cursor_char*h, h);
 		font = font + sc->cursor_char*h;
 		for (i = imax(h - scp->curs_attr.base - scp->curs_attr.height, 0);
 			i < h - scp->curs_attr.base; ++i) {
 			font[i] ^= 0xff;
 		}
 		/* XXX */
 		vidd_load_font(sc->adp, 0, h, 8, font, sc->cursor_char, 1);
 		sc_vtb_putc(&scp->scr, at, sc->cursor_char, a);
 	} else
 #endif /* SC_NO_FONT_LOADING */
 	{
 		if ((a & 0x7000) == 0x7000) {
 			a &= 0x8f00;
 			if ((a & 0x0700) == 0)
 				a |= 0x0700;
 		} else {
 			a |= 0x7000;
 			if ((a & 0x0700) == 0x0700)
 				a &= 0xf000;
 		}
 		if (flip)
 			a = (a & 0x8800)
 				| ((a & 0x7000) >> 4) | ((a & 0x0700) << 4);
 		sc_vtb_putc(&scp->scr, at, c, a);
 	}
 }
 
 static void
 vga_txtcursor(scr_stat *scp, int at, int blink, int on, int flip)
 {
 	video_adapter_t *adp;
 	int cursor_attr;
 
 	if (scp->curs_attr.height <= 0)	/* the text cursor is disabled */
 		return;
 
 	adp = scp->sc->adp;
 	if (blink) {
 		scp->status |= VR_CURSOR_BLINK;
 		if (on) {
 			scp->status |= VR_CURSOR_ON;
 			vidd_set_hw_cursor(adp, at%scp->xsize,
 			    at/scp->xsize);
 		} else {
 			if (scp->status & VR_CURSOR_ON)
 				vidd_set_hw_cursor(adp, -1, -1);
 			scp->status &= ~VR_CURSOR_ON;
 		}
 	} else {
 		scp->status &= ~VR_CURSOR_BLINK;
 		if (on) {
 			scp->status |= VR_CURSOR_ON;
 			draw_txtcharcursor(scp, at,
 					   sc_vtb_getc(&scp->scr, at),
 					   sc_vtb_geta(&scp->scr, at),
 					   flip);
 		} else {
 			cursor_attr = scp->cursor_saveunder_attr;
 			if (flip)
 				cursor_attr = (cursor_attr & 0x8800)
 					| ((cursor_attr & 0x7000) >> 4)
 					| ((cursor_attr & 0x0700) << 4);
 			if (scp->status & VR_CURSOR_ON)
 				sc_vtb_putc(&scp->scr, at,
 					    scp->cursor_saveunder_char,
 					    cursor_attr);
 			scp->status &= ~VR_CURSOR_ON;
 		}
 	}
 }
 
 static void
 vga_txtblink(scr_stat *scp, int at, int flip)
 {
 }
 
 int sc_txtmouse_no_retrace_wait;
 
 #ifndef SC_NO_CUTPASTE
 
 static void
 draw_txtmouse(scr_stat *scp, int x, int y)
 {
 #ifndef SC_ALT_MOUSE_IMAGE
     if (ISMOUSEAVAIL(scp->sc->adp->va_flags)) {
 	u_char font_buf[128];
 	u_short cursor[32];
 	u_char c;
 	int pos;
 	int xoffset, yoffset;
 	int crtc_addr;
 	int i;
 
 	/* prepare mousepointer char's bitmaps */
 	pos = (y/scp->font_size - scp->yoff)*scp->xsize + x/8 - scp->xoff;
 	bcopy(scp->font + sc_vtb_getc(&scp->scr, pos)*scp->font_size,
 	      &font_buf[0], scp->font_size);
 	bcopy(scp->font + sc_vtb_getc(&scp->scr, pos + 1)*scp->font_size,
 	      &font_buf[32], scp->font_size);
 	bcopy(scp->font 
 		 + sc_vtb_getc(&scp->scr, pos + scp->xsize)*scp->font_size,
 	      &font_buf[64], scp->font_size);
 	bcopy(scp->font
 		 + sc_vtb_getc(&scp->scr, pos + scp->xsize + 1)*scp->font_size,
 	      &font_buf[96], scp->font_size);
 	for (i = 0; i < scp->font_size; ++i) {
 		cursor[i] = font_buf[i]<<8 | font_buf[i+32];
 		cursor[i + scp->font_size] = font_buf[i+64]<<8 | font_buf[i+96];
 	}
 
 	/* now and-or in the mousepointer image */
 	xoffset = x%8;
 	yoffset = y%scp->font_size;
 	for (i = 0; i < 16; ++i) {
 		cursor[i + yoffset] =
 	    		(cursor[i + yoffset] & ~(mouse_and_mask[i] >> xoffset))
 	    		| (mouse_or_mask[i] >> xoffset);
 	}
 	for (i = 0; i < scp->font_size; ++i) {
 		font_buf[i] = (cursor[i] & 0xff00) >> 8;
 		font_buf[i + 32] = cursor[i] & 0xff;
 		font_buf[i + 64] = (cursor[i + scp->font_size] & 0xff00) >> 8;
 		font_buf[i + 96] = cursor[i + scp->font_size] & 0xff;
 	}
 
 #if 1
 	/* wait for vertical retrace to avoid jitter on some videocards */
 	crtc_addr = scp->sc->adp->va_crtc_addr;
 	while (!sc_txtmouse_no_retrace_wait &&
 	    !(inb(crtc_addr + 6) & 0x08))
 		/* idle */ ;
 #endif
 	c = scp->sc->mouse_char;
 	vidd_load_font(scp->sc->adp, 0, 32, 8, font_buf, c, 4); 
 
 	sc_vtb_putc(&scp->scr, pos, c, sc_vtb_geta(&scp->scr, pos));
 	/* FIXME: may be out of range! */
 	sc_vtb_putc(&scp->scr, pos + scp->xsize, c + 2,
 		    sc_vtb_geta(&scp->scr, pos + scp->xsize));
 	if (x < (scp->xsize - 1)*8) {
 		sc_vtb_putc(&scp->scr, pos + 1, c + 1,
 			    sc_vtb_geta(&scp->scr, pos + 1));
 		sc_vtb_putc(&scp->scr, pos + scp->xsize + 1, c + 3,
 			    sc_vtb_geta(&scp->scr, pos + scp->xsize + 1));
 	}
     } else
 #endif /* SC_ALT_MOUSE_IMAGE */
     {
-	/* Red, magenta and brown are mapped to green to to keep it readable */
+	/* Red, magenta and brown are mapped to green to keep it readable */
 	static const int col_conv[16] = {
 		6, 6, 6, 6, 2, 2, 2, 6, 14, 14, 14, 14, 10, 10, 10, 14
 	};
 	int pos;
 	int color;
 	int a;
 
 	pos = (y/scp->font_size - scp->yoff)*scp->xsize + x/8 - scp->xoff;
 	a = sc_vtb_geta(&scp->scr, pos);
 	if (scp->sc->adp->va_flags & V_ADP_COLOR)
 		color = (col_conv[(a & 0xf000) >> 12] << 12)
 			| ((a & 0x0f00) | 0x0800);
 	else
 		color = ((a & 0xf000) >> 4) | ((a & 0x0f00) << 4);
 	sc_vtb_putc(&scp->scr, pos, sc_vtb_getc(&scp->scr, pos), color);
     }
 }
 
 static void
 remove_txtmouse(scr_stat *scp, int x, int y)
 {
 }
 
 static void 
 vga_txtmouse(scr_stat *scp, int x, int y, int on)
 {
 	if (on)
 		draw_txtmouse(scp, x, y);
 	else
 		remove_txtmouse(scp, x, y);
 }
 
 #endif /* SC_NO_CUTPASTE */
 
 #ifdef SC_PIXEL_MODE
 
 /* pixel (raster text) mode renderer */
 
 static void
 vga_rndrinit(scr_stat *scp)
 {
 	if (scp->sc->adp->va_info.vi_mem_model == V_INFO_MM_PLANAR) {
 		scp->rndr->clear = vga_pxlclear_planar;
 		scp->rndr->draw_border = vga_pxlborder_planar;
 		scp->rndr->draw = vga_vgadraw_planar;
 		scp->rndr->draw_cursor = vga_pxlcursor_planar;
 		scp->rndr->blink_cursor = vga_pxlblink_planar;
 		scp->rndr->draw_mouse = vga_pxlmouse_planar;
 	} else
 	if (scp->sc->adp->va_info.vi_mem_model == V_INFO_MM_DIRECT ||
 	    scp->sc->adp->va_info.vi_mem_model == V_INFO_MM_PACKED) {
 		scp->rndr->clear = vga_pxlclear_direct;
 		scp->rndr->draw_border = vga_pxlborder_direct;
 		scp->rndr->draw = vga_vgadraw_direct;
 		scp->rndr->draw_cursor = vga_pxlcursor_direct;
 		scp->rndr->blink_cursor = vga_pxlblink_direct;
 		scp->rndr->draw_mouse = vga_pxlmouse_direct;
 	}
 }
 
 static void
 vga_pxlclear_direct(scr_stat *scp, int c, int attr)
 {
 	vm_offset_t p;
 	int line_width;
 	int pixel_size;
 	int lines;
 	int i;
 
 	line_width = scp->sc->adp->va_line_width;
 	pixel_size = scp->sc->adp->va_info.vi_pixel_size;
 	lines = scp->ysize * scp->font_size; 
 	p = scp->sc->adp->va_window +
 	    line_width * scp->yoff * scp->font_size +
 	    scp->xoff * 8 * pixel_size;
 
 	for (i = 0; i < lines; ++i) {
 		bzero_io((void *)p, scp->xsize * 8 * pixel_size);
 		p += line_width;
 	}
 }
 
 static void
 vga_pxlclear_planar(scr_stat *scp, int c, int attr)
 {
 	vm_offset_t p;
 	int line_width;
 	int lines;
 	int i;
 
 	/* XXX: we are just filling the screen with the background color... */
 	outw(GDCIDX, 0x0005);		/* read mode 0, write mode 0 */
 	outw(GDCIDX, 0x0003);		/* data rotate/function select */
 	outw(GDCIDX, 0x0f01);		/* set/reset enable */
 	outw(GDCIDX, 0xff08);		/* bit mask */
 	outw(GDCIDX, ((attr & 0xf000) >> 4) | 0x00); /* set/reset */
 	line_width = scp->sc->adp->va_line_width;
 	lines = scp->ysize*scp->font_size; 
 	p = scp->sc->adp->va_window + line_width*scp->yoff*scp->font_size
 		+ scp->xoff;
 	for (i = 0; i < lines; ++i) {
 		bzero_io((void *)p, scp->xsize);
 		p += line_width;
 	}
 	outw(GDCIDX, 0x0000);		/* set/reset */
 	outw(GDCIDX, 0x0001);		/* set/reset enable */
 }
 
 static void
 vga_pxlborder_direct(scr_stat *scp, int color)
 {
 	vm_offset_t s;
 	vm_offset_t e;
 	vm_offset_t f;
 	int line_width;
 	int pixel_size;
 	int x;
 	int y;
 	int i;
 
 	line_width = scp->sc->adp->va_line_width;
 	pixel_size = scp->sc->adp->va_info.vi_pixel_size;
 
 	if (scp->yoff > 0) {
 		s = scp->sc->adp->va_window;
 		e = s + line_width * scp->yoff * scp->font_size;
 
 		for (f = s; f < e; f += pixel_size)
 			DRAW_PIXEL(scp, f, color);
 	}
 
 	y = (scp->yoff + scp->ysize) * scp->font_size;
 
 	if (scp->ypixel > y) {
 		s = scp->sc->adp->va_window + line_width * y;
 		e = s + line_width * (scp->ypixel - y);
 
 		for (f = s; f < e; f += pixel_size)
 			DRAW_PIXEL(scp, f, color);
 	}
 
 	y = scp->yoff * scp->font_size;
 	x = scp->xpixel / 8 - scp->xoff - scp->xsize;
 
 	for (i = 0; i < scp->ysize * scp->font_size; ++i) {
 		if (scp->xoff > 0) {
 			s = scp->sc->adp->va_window + line_width * (y + i);
 			e = s + scp->xoff * 8 * pixel_size;
 
 			for (f = s; f < e; f += pixel_size)
 				DRAW_PIXEL(scp, f, color);
 		}
 
 		if (x > 0) {
 			s = scp->sc->adp->va_window + line_width * (y + i) +
 			    scp->xoff * 8 * pixel_size +
 			    scp->xsize * 8 * pixel_size;
 			e = s + x * 8 * pixel_size;
 
 			for (f = s; f < e; f += pixel_size)
 				DRAW_PIXEL(scp, f, color);
 		}
 	}
 }
 
 static void
 vga_pxlborder_planar(scr_stat *scp, int color)
 {
 	vm_offset_t p;
 	int line_width;
 	int x;
 	int y;
 	int i;
 
 	vidd_set_border(scp->sc->adp, color);
 
 	outw(GDCIDX, 0x0005);		/* read mode 0, write mode 0 */
 	outw(GDCIDX, 0x0003);		/* data rotate/function select */
 	outw(GDCIDX, 0x0f01);		/* set/reset enable */
 	outw(GDCIDX, 0xff08);		/* bit mask */
 	outw(GDCIDX, (color << 8) | 0x00);	/* set/reset */
 	line_width = scp->sc->adp->va_line_width;
 	p = scp->sc->adp->va_window;
 	if (scp->yoff > 0)
 		bzero_io((void *)p, line_width*scp->yoff*scp->font_size);
 	y = (scp->yoff + scp->ysize)*scp->font_size;
 	if (scp->ypixel > y)
 		bzero_io((void *)(p + line_width*y), line_width*(scp->ypixel - y));
 	y = scp->yoff*scp->font_size;
 	x = scp->xpixel/8 - scp->xoff - scp->xsize;
 	for (i = 0; i < scp->ysize*scp->font_size; ++i) {
 		if (scp->xoff > 0)
 			bzero_io((void *)(p + line_width*(y + i)), scp->xoff);
 		if (x > 0)
 			bzero_io((void *)(p + line_width*(y + i)
 				     + scp->xoff + scp->xsize), x);
 	}
 	outw(GDCIDX, 0x0000);		/* set/reset */
 	outw(GDCIDX, 0x0001);		/* set/reset enable */
 }
 
 static void 
 vga_egadraw(scr_stat *scp, int from, int count, int flip)
 {
 	vm_offset_t d;
 	vm_offset_t e;
 	u_char *f;
 	u_short bg;
 	u_short col1, col2;
 	int line_width;
 	int i, j;
 	int a;
 	u_char c;
 
 	line_width = scp->sc->adp->va_line_width;
 
 	d = GET_PIXEL(scp, from, 1, line_width);
 
 	outw(GDCIDX, 0x0005);		/* read mode 0, write mode 0 */
 	outw(GDCIDX, 0x0003);		/* data rotate/function select */
 	outw(GDCIDX, 0x0f01);		/* set/reset enable */
 	bg = -1;
 	if (from + count > scp->xsize*scp->ysize)
 		count = scp->xsize*scp->ysize - from;
 	for (i = from; count-- > 0; ++i) {
 		a = sc_vtb_geta(&scp->vtb, i);
 		if (flip) {
 			col1 = ((a & 0x7000) >> 4) | (a & 0x0800);
 			col2 = ((a & 0x8000) >> 4) | (a & 0x0700);
 		} else {
 			col1 = (a & 0x0f00);
 			col2 = (a & 0xf000) >> 4;
 		}
 		/* set background color in EGA/VGA latch */
 		if (bg != col2) {
 			bg = col2;
 			outw(GDCIDX, bg | 0x00);	/* set/reset */
 			outw(GDCIDX, 0xff08);		/* bit mask */
 			writeb(d, 0);
 			c = readb(d);	/* set bg color in the latch */
 		}
 		/* foreground color */
 		outw(GDCIDX, col1 | 0x00);		/* set/reset */
 		e = d;
 		f = &(scp->font[sc_vtb_getc(&scp->vtb, i)*scp->font_size]);
 		for (j = 0; j < scp->font_size; ++j, ++f) {
 			outw(GDCIDX, (*f << 8) | 0x08);	/* bit mask */
 	        	writeb(e, 0);
 			e += line_width;
 		}
 		++d;
 		if ((i % scp->xsize) == scp->xsize - 1)
 			d += scp->font_size * line_width - scp->xsize;
 	}
 	outw(GDCIDX, 0x0000);		/* set/reset */
 	outw(GDCIDX, 0x0001);		/* set/reset enable */
 	outw(GDCIDX, 0xff08);		/* bit mask */
 }
 
 static void
 vga_vgadraw_direct(scr_stat *scp, int from, int count, int flip)
 {
 	vm_offset_t d;
 	vm_offset_t e;
 	u_char *f;
 	u_short col1, col2, color;
 	int line_width, pixel_size;
 	int i, j, k;
 	int a;
 
 	line_width = scp->sc->adp->va_line_width;
 	pixel_size = scp->sc->adp->va_info.vi_pixel_size;
 
 	d = GET_PIXEL(scp, from, 8 * pixel_size, line_width);
 
 	if (from + count > scp->xsize * scp->ysize)
 		count = scp->xsize * scp->ysize - from;
 
 	for (i = from; count-- > 0; ++i) {
 		a = sc_vtb_geta(&scp->vtb, i);
 
 		if (flip) {
 			col1 = (((a & 0x7000) >> 4) | (a & 0x0800)) >> 8;
 			col2 = (((a & 0x8000) >> 4) | (a & 0x0700)) >> 8;
 		} else {
 			col1 = (a & 0x0f00) >> 8;
 			col2 = (a & 0xf000) >> 12;
 		}
 
 		e = d;
 		f = &(scp->font[sc_vtb_getc(&scp->vtb, i) * scp->font_size]);
 
 		for (j = 0; j < scp->font_size; ++j, ++f) {
 			for (k = 0; k < 8; ++k) {
 				color = *f & (1 << (7 - k)) ? col1 : col2;
 				DRAW_PIXEL(scp, e + pixel_size * k, color);
 			}
 
 			e += line_width;
 		}
 
 		d += 8 * pixel_size;
 
 		if ((i % scp->xsize) == scp->xsize - 1)
 			d += scp->font_size * line_width -
 			    scp->xsize * 8 * pixel_size;
 	}
 }
 
 static void
 vga_vgadraw_planar(scr_stat *scp, int from, int count, int flip)
 {
 	vm_offset_t d;
 	vm_offset_t e;
 	u_char *f;
 	u_short bg;
 	u_short col1, col2;
 	int line_width;
 	int i, j;
 	int a;
 	u_char c;
 
 	line_width = scp->sc->adp->va_line_width;
 
 	d = GET_PIXEL(scp, from, 1, line_width);
 
 	outw(GDCIDX, 0x0305);		/* read mode 0, write mode 3 */
 	outw(GDCIDX, 0x0003);		/* data rotate/function select */
 	outw(GDCIDX, 0x0f01);		/* set/reset enable */
 	outw(GDCIDX, 0xff08);		/* bit mask */
 	bg = -1;
 	if (from + count > scp->xsize*scp->ysize)
 		count = scp->xsize*scp->ysize - from;
 	for (i = from; count-- > 0; ++i) {
 		a = sc_vtb_geta(&scp->vtb, i);
 		if (flip) {
 			col1 = ((a & 0x7000) >> 4) | (a & 0x0800);
 			col2 = ((a & 0x8000) >> 4) | (a & 0x0700);
 		} else {
 			col1 = (a & 0x0f00);
 			col2 = (a & 0xf000) >> 4;
 		}
 		/* set background color in EGA/VGA latch */
 		if (bg != col2) {
 			bg = col2;
 			outw(GDCIDX, 0x0005);	/* read mode 0, write mode 0 */
 			outw(GDCIDX, bg | 0x00); /* set/reset */
 			writeb(d, 0);
 			c = readb(d);		/* set bg color in the latch */
 			outw(GDCIDX, 0x0305);	/* read mode 0, write mode 3 */
 		}
 		/* foreground color */
 		outw(GDCIDX, col1 | 0x00);	/* set/reset */
 		e = d;
 		f = &(scp->font[sc_vtb_getc(&scp->vtb, i)*scp->font_size]);
 		for (j = 0; j < scp->font_size; ++j, ++f) {
 	        	writeb(e, *f);
 			e += line_width;
 		}
 		++d;
 		if ((i % scp->xsize) == scp->xsize - 1)
 			d += scp->font_size * line_width - scp->xsize;
 	}
 	outw(GDCIDX, 0x0005);		/* read mode 0, write mode 0 */
 	outw(GDCIDX, 0x0000);		/* set/reset */
 	outw(GDCIDX, 0x0001);		/* set/reset enable */
 }
 
 static void 
 vga_pxlcursor_shape(scr_stat *scp, int base, int height, int blink)
 {
 	if (base < 0 || base >= scp->font_size)
 		return;
 	/* the caller may set height <= 0 in order to disable the cursor */
 #if 0
 	scp->curs_attr.base = base;
 	scp->curs_attr.height = height;
 #endif
 }
 
 static void 
 draw_pxlcursor_direct(scr_stat *scp, int at, int on, int flip)
 {
 	vm_offset_t d;
 	u_char *f;
 	int line_width, pixel_size;
 	int height;
 	int col1, col2, color;
 	int a;
 	int i, j;
 
 	line_width = scp->sc->adp->va_line_width;
 	pixel_size = scp->sc->adp->va_info.vi_pixel_size;
 
 	d = GET_PIXEL(scp, at, 8 * pixel_size, line_width) +
 	    (scp->font_size - scp->curs_attr.base - 1) * line_width;
 
 	a = sc_vtb_geta(&scp->vtb, at);
 
 	if (flip) {
 		col1 = ((on) ? (a & 0x0f00) : ((a & 0xf000) >> 4)) >> 8;
 		col2 = ((on) ? ((a & 0xf000) >> 4) : (a & 0x0f00)) >> 8;
 	} else {
 		col1 = ((on) ? ((a & 0xf000) >> 4) : (a & 0x0f00)) >> 8;
 		col2 = ((on) ? (a & 0x0f00) : ((a & 0xf000) >> 4)) >> 8;
 	}
 
 	f = &(scp->font[sc_vtb_getc(&scp->vtb, at) * scp->font_size +
 	      scp->font_size - scp->curs_attr.base - 1]);
 
 	height = imin(scp->curs_attr.height, scp->font_size);
 
 	for (i = 0; i < height; ++i, --f) {
 		for (j = 0; j < 8; ++j) {
 			color = *f & (1 << (7 - j)) ? col1 : col2;
 			DRAW_PIXEL(scp, d + pixel_size * j, color);
 		}
 
 		d -= line_width;
 	}
 }
 
 static void 
 draw_pxlcursor_planar(scr_stat *scp, int at, int on, int flip)
 {
 	vm_offset_t d;
 	u_char *f;
 	int line_width;
 	int height;
 	int col;
 	int a;
 	int i;
 	u_char c;
 
 	line_width = scp->sc->adp->va_line_width;
 
 	d = GET_PIXEL(scp, at, 1, line_width) +
 	    (scp->font_size - scp->curs_attr.base - 1) * line_width;
 
 	outw(GDCIDX, 0x0005);		/* read mode 0, write mode 0 */
 	outw(GDCIDX, 0x0003);		/* data rotate/function select */
 	outw(GDCIDX, 0x0f01);		/* set/reset enable */
 	/* set background color in EGA/VGA latch */
 	a = sc_vtb_geta(&scp->vtb, at);
 	if (flip)
 		col = (on) ? ((a & 0xf000) >> 4) : (a & 0x0f00);
 	else
 		col = (on) ? (a & 0x0f00) : ((a & 0xf000) >> 4);
 	outw(GDCIDX, col | 0x00);	/* set/reset */
 	outw(GDCIDX, 0xff08);		/* bit mask */
 	writeb(d, 0);
 	c = readb(d);			/* set bg color in the latch */
 	/* foreground color */
 	if (flip)
 		col = (on) ? (a & 0x0f00) : ((a & 0xf000) >> 4);
 	else
 		col = (on) ? ((a & 0xf000) >> 4) : (a & 0x0f00);
 	outw(GDCIDX, col | 0x00);	/* set/reset */
 	f = &(scp->font[sc_vtb_getc(&scp->vtb, at)*scp->font_size
 		+ scp->font_size - scp->curs_attr.base - 1]);
 	height = imin(scp->curs_attr.height, scp->font_size);
 	for (i = 0; i < height; ++i, --f) {
 		outw(GDCIDX, (*f << 8) | 0x08);	/* bit mask */
 	       	writeb(d, 0);
 		d -= line_width;
 	}
 	outw(GDCIDX, 0x0000);		/* set/reset */
 	outw(GDCIDX, 0x0001);		/* set/reset enable */
 	outw(GDCIDX, 0xff08);		/* bit mask */
 }
 
 static int pxlblinkrate = 0;
 
 static void 
 vga_pxlcursor_direct(scr_stat *scp, int at, int blink, int on, int flip)
 {
 	if (scp->curs_attr.height <= 0)	/* the text cursor is disabled */
 		return;
 
 	if (on) {
 		if (!blink) {
 			scp->status |= VR_CURSOR_ON;
 			draw_pxlcursor_direct(scp, at, on, flip);
 		} else if (++pxlblinkrate & 4) {
 			pxlblinkrate = 0;
 			scp->status ^= VR_CURSOR_ON;
 			draw_pxlcursor_direct(scp, at,
 					      scp->status & VR_CURSOR_ON,
 					      flip);
 		}
 	} else {
 		if (scp->status & VR_CURSOR_ON)
 			draw_pxlcursor_direct(scp, at, on, flip);
 		scp->status &= ~VR_CURSOR_ON;
 	}
 	if (blink)
 		scp->status |= VR_CURSOR_BLINK;
 	else
 		scp->status &= ~VR_CURSOR_BLINK;
 }
 
 static void 
 vga_pxlcursor_planar(scr_stat *scp, int at, int blink, int on, int flip)
 {
 	if (scp->curs_attr.height <= 0)	/* the text cursor is disabled */
 		return;
 
 	if (on) {
 		if (!blink) {
 			scp->status |= VR_CURSOR_ON;
 			draw_pxlcursor_planar(scp, at, on, flip);
 		} else if (++pxlblinkrate & 4) {
 			pxlblinkrate = 0;
 			scp->status ^= VR_CURSOR_ON;
 			draw_pxlcursor_planar(scp, at,
 					      scp->status & VR_CURSOR_ON,
 					      flip);
 		}
 	} else {
 		if (scp->status & VR_CURSOR_ON)
 			draw_pxlcursor_planar(scp, at, on, flip);
 		scp->status &= ~VR_CURSOR_ON;
 	}
 	if (blink)
 		scp->status |= VR_CURSOR_BLINK;
 	else
 		scp->status &= ~VR_CURSOR_BLINK;
 }
 
 static void
 vga_pxlblink_direct(scr_stat *scp, int at, int flip)
 {
 	if (!(scp->status & VR_CURSOR_BLINK))
 		return;
 	if (!(++pxlblinkrate & 4))
 		return;
 	pxlblinkrate = 0;
 	scp->status ^= VR_CURSOR_ON;
 	draw_pxlcursor_direct(scp, at, scp->status & VR_CURSOR_ON, flip);
 }
 
 static void
 vga_pxlblink_planar(scr_stat *scp, int at, int flip)
 {
 	if (!(scp->status & VR_CURSOR_BLINK))
 		return;
 	if (!(++pxlblinkrate & 4))
 		return;
 	pxlblinkrate = 0;
 	scp->status ^= VR_CURSOR_ON;
 	draw_pxlcursor_planar(scp, at, scp->status & VR_CURSOR_ON, flip);
 }
 
 #ifndef SC_NO_CUTPASTE
 
 static void
 draw_pxlmouse_planar(scr_stat *scp, int x, int y)
 {
 	vm_offset_t p;
 	int line_width;
 	int xoff, yoff;
 	int ymax;
 	u_short m;
 	int i, j;
 
 	line_width = scp->sc->adp->va_line_width;
 	xoff = (x - scp->xoff*8)%8;
 	yoff = y - rounddown(y, line_width);
 	ymax = imin(y + 16, scp->ypixel);
 
 	outw(GDCIDX, 0x0805);		/* read mode 1, write mode 0 */
 	outw(GDCIDX, 0x0001);		/* set/reset enable */
 	outw(GDCIDX, 0x0002);		/* color compare */
 	outw(GDCIDX, 0x0007);		/* color don't care */
 	outw(GDCIDX, 0xff08);		/* bit mask */
 	outw(GDCIDX, 0x0803);		/* data rotate/function select (and) */
 	p = scp->sc->adp->va_window + line_width*y + x/8;
 	if (x < scp->xpixel - 8) {
 		for (i = y, j = 0; i < ymax; ++i, ++j) {
 			m = ~(mouse_and_mask[j] >> xoff);
 #if defined(__i386__) || defined(__amd64__)
 			*(u_char *)p &= m >> 8;
 			*(u_char *)(p + 1) &= m;
 #else
 			writeb(p, readb(p) & (m >> 8));
 			writeb(p + 1, readb(p + 1) & (m >> 8));
 #endif
 			p += line_width;
 		}
 	} else {
 		xoff += 8;
 		for (i = y, j = 0; i < ymax; ++i, ++j) {
 			m = ~(mouse_and_mask[j] >> xoff);
 #if defined(__i386__) || defined(__amd64__)
 			*(u_char *)p &= m;
 #else
 			writeb(p, readb(p) & (m >> 8));
 #endif
 			p += line_width;
 		}
 	}
 	outw(GDCIDX, 0x1003);		/* data rotate/function select (or) */
 	p = scp->sc->adp->va_window + line_width*y + x/8;
 	if (x < scp->xpixel - 8) {
 		for (i = y, j = 0; i < ymax; ++i, ++j) {
 			m = mouse_or_mask[j] >> xoff;
 #if defined(__i386__) || defined(__amd64__)
 			*(u_char *)p &= m >> 8;
 			*(u_char *)(p + 1) &= m;
 #else
 			writeb(p, readb(p) & (m >> 8));
 			writeb(p + 1, readb(p + 1) & (m >> 8));
 #endif
 			p += line_width;
 		}
 	} else {
 		for (i = y, j = 0; i < ymax; ++i, ++j) {
 			m = mouse_or_mask[j] >> xoff;
 #if defined(__i386__) || defined(__amd64__)
 			*(u_char *)p &= m;
 #else
 			writeb(p, readb(p) & (m >> 8));
 #endif
 			p += line_width;
 		}
 	}
 	outw(GDCIDX, 0x0005);		/* read mode 0, write mode 0 */
 	outw(GDCIDX, 0x0003);		/* data rotate/function select */
 }
 
 static void
 remove_pxlmouse_planar(scr_stat *scp, int x, int y)
 {
 	vm_offset_t p;
 	int col, row;
 	int pos;
 	int line_width;
 	int ymax;
 	int i;
 
 	/* erase the mouse cursor image */
 	col = x/8 - scp->xoff;
 	row = y/scp->font_size - scp->yoff;
 	pos = row*scp->xsize + col;
 	i = (col < scp->xsize - 1) ? 2 : 1;
 	(*scp->rndr->draw)(scp, pos, i, FALSE);
 	if (row < scp->ysize - 1)
 		(*scp->rndr->draw)(scp, pos + scp->xsize, i, FALSE);
 
 	/* paint border if necessary */
 	line_width = scp->sc->adp->va_line_width;
 	outw(GDCIDX, 0x0005);		/* read mode 0, write mode 0 */
 	outw(GDCIDX, 0x0003);		/* data rotate/function select */
 	outw(GDCIDX, 0x0f01);		/* set/reset enable */
 	outw(GDCIDX, 0xff08);		/* bit mask */
 	outw(GDCIDX, (scp->border << 8) | 0x00);	/* set/reset */
 	if (row == scp->ysize - 1) {
 		i = (scp->ysize + scp->yoff)*scp->font_size;
 		ymax = imin(i + scp->font_size, scp->ypixel);
 		p = scp->sc->adp->va_window + i*line_width + scp->xoff + col;
 		if (col < scp->xsize - 1) {
 			for (; i < ymax; ++i) {
 				writeb(p, 0);
 				writeb(p + 1, 0);
 				p += line_width;
 			}
 		} else {
 			for (; i < ymax; ++i) {
 				writeb(p, 0);
 				p += line_width;
 			}
 		}
 	}
 	if ((col == scp->xsize - 1) && (scp->xoff > 0)) {
 		i = (row + scp->yoff)*scp->font_size;
 		ymax = imin(i + scp->font_size*2, scp->ypixel);
 		p = scp->sc->adp->va_window + i*line_width
 			+ scp->xoff + scp->xsize;
 		for (; i < ymax; ++i) {
 			writeb(p, 0);
 			p += line_width;
 		}
 	}
 	outw(GDCIDX, 0x0000);		/* set/reset */
 	outw(GDCIDX, 0x0001);		/* set/reset enable */
 }
 
 static void 
 vga_pxlmouse_direct(scr_stat *scp, int x, int y, int on)
 {
 	vm_offset_t p;
 	int line_width, pixel_size;
 	int xend, yend;
 	static int x_old = 0, xend_old = 0;
 	static int y_old = 0, yend_old = 0;
 	int i, j;
 	uint32_t *u32;
 	uint16_t *u16;
 	uint8_t  *u8;
 	int bpp;
 
 	if (!on)
 		return;
 
 	bpp = scp->sc->adp->va_info.vi_depth;
 
 	if ((bpp == 16) && (scp->sc->adp->va_info.vi_pixel_fsizes[1] == 5))
 		bpp = 15;
 
 	line_width = scp->sc->adp->va_line_width;
 	pixel_size = scp->sc->adp->va_info.vi_pixel_size;
 
 	xend = imin(x + 16, scp->xpixel);
 	yend = imin(y + 16, scp->ypixel);
 
 	p = scp->sc->adp->va_window + y_old * line_width + x_old * pixel_size;
 
 	for (i = 0; i < (yend_old - y_old); i++) {
 		for (j = (xend_old - x_old - 1); j >= 0; j--) {
 			switch (bpp) {
 			case 32:
 				u32 = (uint32_t*)(p + j * pixel_size);
 				writel(u32, mouse_buf32[i * 16 + j]);
 				break;
 			case 16:
 				/* FALLTHROUGH */
 			case 15:
 				u16 = (uint16_t*)(p + j * pixel_size);
 				writew(u16, mouse_buf16[i * 16 + j]);
 				break;
 			case 8:
 				u8 = (uint8_t*)(p + j * pixel_size);
 				writeb(u8, mouse_buf8[i * 16 + j]);
 				break;
 			}
 		}
 
 		p += line_width;
 	}
 
 	p = scp->sc->adp->va_window + y * line_width + x * pixel_size;
 
 	for (i = 0; i < (yend - y); i++) {
 		for (j = (xend - x - 1); j >= 0; j--) {
 			switch (bpp) {
 			case 32:
 				u32 = (uint32_t*)(p + j * pixel_size);
 				mouse_buf32[i * 16 + j] = *u32;
 				if (mouse_or_mask[i] & (1 << (15 - j)))
 					writel(u32, vga_palette32[15]);
 				else if (mouse_and_mask[i] & (1 << (15 - j)))
 					writel(u32, 0);
 				break;
 			case 16:
 				u16 = (uint16_t*)(p + j * pixel_size);
 				mouse_buf16[i * 16 + j] = *u16;
 				if (mouse_or_mask[i] & (1 << (15 - j)))
 					writew(u16, vga_palette16[15]);
 				else if (mouse_and_mask[i] & (1 << (15 - j)))
 					writew(u16, 0);
 				break;
 			case 15:
 				u16 = (uint16_t*)(p  + j * pixel_size);
 				mouse_buf16[i * 16 + j] = *u16;
 				if (mouse_or_mask[i] & (1 << (15 - j)))
 					writew(u16, vga_palette15[15]);
 				else if (mouse_and_mask[i] & (1 << (15 - j)))
 					writew(u16, 0);
 				break;
 			case 8:
 				u8 = (uint8_t*)(p + j * pixel_size);
 				mouse_buf8[i * 16 + j] = *u8;
 				if (mouse_or_mask[i] & (1 << (15 - j)))
 					writeb(u8, 15);
 				else if (mouse_and_mask[i] & (1 << (15 - j)))
 					writeb(u8, 0);
 				break;
 			}
 		}
 
 		p += line_width;
 	}
 
 	x_old = x;
 	y_old = y;
 	xend_old = xend;
 	yend_old = yend;
 }
 
 static void 
 vga_pxlmouse_planar(scr_stat *scp, int x, int y, int on)
 {
 	if (on)
 		draw_pxlmouse_planar(scp, x, y);
 	else
 		remove_pxlmouse_planar(scp, x, y);
 }
 
 #endif /* SC_NO_CUTPASTE */
 #endif /* SC_PIXEL_MODE */
 
 #ifndef SC_NO_MODE_CHANGE
 
 /* graphics mode renderer */
 
 static void
 vga_grborder(scr_stat *scp, int color)
 {
 	vidd_set_border(scp->sc->adp, color);
 }
 
 #endif
Index: stable/11/sys/fs/nfsclient/nfs_clrpcops.c
===================================================================
--- stable/11/sys/fs/nfsclient/nfs_clrpcops.c	(revision 330445)
+++ stable/11/sys/fs/nfsclient/nfs_clrpcops.c	(revision 330446)
@@ -1,6703 +1,6703 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Rpc op calls, generally called from the vnode op calls or through the
  * buffer cache, for NFS v2, 3 and 4.
  * These do not normally make any changes to vnode arguments or use
  * structures that might change between the VFS variants. The returned
  * arguments are all at the end, after the NFSPROC_T *p one.
  */
 
 #ifndef APPLEKEXT
 #include "opt_inet6.h"
 
 #include <fs/nfs/nfsport.h>
 #include <sys/sysctl.h>
 
 SYSCTL_DECL(_vfs_nfs);
 
 static int	nfsignore_eexist = 0;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
     &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
 
 /*
  * Global variables
  */
 extern int nfs_numnfscbd;
 extern struct timeval nfsboottime;
 extern u_int32_t newnfs_false, newnfs_true;
 extern nfstype nfsv34_type[9];
 extern int nfsrv_useacl;
 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
 extern int nfscl_debuglevel;
 NFSCLSTATEMUTEX;
 int nfstest_outofseq = 0;
 int nfscl_assumeposixlocks = 1;
 int nfscl_enablecallb = 0;
 short nfsv4_cbport = NFSV4_CBPORT;
 int nfstest_openallsetattr = 0;
 #endif	/* !APPLEKEXT */
 
 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
 
 /*
  * nfscl_getsameserver() can return one of three values:
  * NFSDSP_USETHISSESSION - Use this session for the DS.
  * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
  *     session.
  * NFSDSP_NOTFOUND - No matching server was found.
  */
 enum nfsclds_state {
 	NFSDSP_USETHISSESSION = 0,
 	NFSDSP_SEQTHISSESSION = 1,
 	NFSDSP_NOTFOUND = 2,
 };
 
 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
     nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *);
 static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
     struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
     void *);
 static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
     nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
     struct nfsvattr *, struct nfsfh **, int *, int *, void *);
 static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
     int *, void *, int *);
 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
     struct nfscllockowner *, u_int64_t, u_int64_t,
     u_int32_t, struct ucred *, NFSPROC_T *, int);
 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
     struct acl *, nfsv4stateid_t *, void *);
 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
     uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
     struct ucred *, NFSPROC_T *);
 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_storage *,
     struct nfsclds **, NFSPROC_T *);
 static void nfscl_initsessionslots(struct nfsclsession *);
 static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
     struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
     NFSPROC_T *);
 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
     struct nfsclds *, uint64_t, int, struct nfsfh *, struct ucred *,
     NFSPROC_T *);
 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
     struct nfsfh *, int, struct ucred *, NFSPROC_T *);
 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
     struct nfsclds *, struct nfsclds **);
 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
     struct nfsfh *, struct ucred *, NFSPROC_T *);
 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
     uint64_t, uint64_t, nfsv4stateid_t *, int, int);
 static int nfsrv_parselayoutget(struct nfsrv_descript *, nfsv4stateid_t *,
     int *, struct nfsclflayouthead *);
 static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
     struct nfscldeleg **, struct ucred *, NFSPROC_T *);
 static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
     struct nfsfh **, int *, int *, void *, int *);
 static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
     struct nfscldeleg **, nfsv4stateid_t *, int, int, int *,
     struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
 static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
     struct nfsfh **, int *, int *, void *, int *, nfsv4stateid_t *,
     int, int, int *, struct nfsclflayouthead *, int *);
 static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
     struct nfsclflayouthead *, int, int *, struct ucred *, NFSPROC_T *);
 
 /*
  * nfs null call from vfs.
  */
 APPLESTATIC int
 nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
 {
 	int error;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	
 	NFSCL_REQSTART(nd, NFSPROC_NULL, vp);
 	error = nfscl_request(nd, vp, p, cred, NULL);
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs access rpc op.
  * For nfs version 3 and 4, use the access rpc to check accessibility. If file
  * modes are changed on the server, accesses might still fail later.
  */
 APPLESTATIC int
 nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
 {
 	int error;
 	u_int32_t mode, rmode;
 
 	if (acmode & VREAD)
 		mode = NFSACCESS_READ;
 	else
 		mode = 0;
 	if (vnode_vtype(vp) == VDIR) {
 		if (acmode & VWRITE)
 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
 				 NFSACCESS_DELETE);
 		if (acmode & VEXEC)
 			mode |= NFSACCESS_LOOKUP;
 	} else {
 		if (acmode & VWRITE)
 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
 		if (acmode & VEXEC)
 			mode |= NFSACCESS_EXECUTE;
 	}
 
 	/*
 	 * Now, just call nfsrpc_accessrpc() to do the actual RPC.
 	 */
 	error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode,
 	    NULL);
 
 	/*
 	 * The NFS V3 spec does not clarify whether or not
 	 * the returned access bits can be a superset of
 	 * the ones requested, so...
 	 */
 	if (!error && (rmode & mode) != mode)
 		error = EACCES;
 	return (error);
 }
 
 /*
  * The actual rpc, separated out for Darwin.
  */
 APPLESTATIC int
 nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep,
     void *stuff)
 {
 	u_int32_t *tl;
 	u_int32_t supported, rmode;
 	int error;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	nfsattrbit_t attrbits;
 
 	*attrflagp = 0;
 	supported = mode;
 	NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(mode);
 	if (nd->nd_flag & ND_NFSV4) {
 		/*
 		 * And do a Getattr op.
 		 */
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		NFSGETATTR_ATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & ND_NFSV3) {
 		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 		if (error)
 			goto nfsmout;
 	}
 	if (!nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV4) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			supported = fxdr_unsigned(u_int32_t, *tl++);
 		} else {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		}
 		rmode = fxdr_unsigned(u_int32_t, *tl);
 		if (nd->nd_flag & ND_NFSV4)
 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 
 		/*
 		 * It's not obvious what should be done about
 		 * unsupported access modes. For now, be paranoid
 		 * and clear the unsupported ones.
 		 */
 		rmode &= supported;
 		*rmodep = rmode;
 	} else
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs open rpc
  */
 APPLESTATIC int
 nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfsclopen *op;
 	struct nfscldeleg *dp;
 	struct nfsfh *nfhp;
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	u_int32_t mode, clidrev;
 	int ret, newone, error, expireret = 0, retrycnt;
 
 	/*
 	 * For NFSv4, Open Ops are only done on Regular Files.
 	 */
 	if (vnode_vtype(vp) != VREG)
 		return (0);
 	mode = 0;
 	if (amode & FREAD)
 		mode |= NFSV4OPEN_ACCESSREAD;
 	if (amode & FWRITE)
 		mode |= NFSV4OPEN_ACCESSWRITE;
 	nfhp = np->n_fhp;
 
 	retrycnt = 0;
 #ifdef notdef
 { char name[100]; int namel;
 namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99;
 bcopy(NFS4NODENAME(np->n_v4), name, namel);
 name[namel] = '\0';
 printf("rpcopen p=0x%x name=%s",p->p_pid,name);
 if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]);
 else printf(" fhl=0\n");
 }
 #endif
 	do {
 	    dp = NULL;
 	    error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
 		cred, p, NULL, &op, &newone, &ret, 1);
 	    if (error) {
 		return (error);
 	    }
 	    if (nmp->nm_clp != NULL)
 		clidrev = nmp->nm_clp->nfsc_clientidrev;
 	    else
 		clidrev = 0;
 	    if (ret == NFSCLOPEN_DOOPEN) {
 		if (np->n_v4 != NULL) {
 			/*
 			 * For the first attempt, try and get a layout, if
 			 * pNFS is enabled for the mount.
 			 */
 			if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
 			    nfs_numnfscbd == 0 ||
 			    (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
 				error = nfsrpc_openrpc(nmp, vp,
 				    np->n_v4->n4_data,
 				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
 				    np->n_fhp->nfh_len, mode, op,
 				    NFS4NODENAME(np->n_v4),
 				    np->n_v4->n4_namelen,
 				    &dp, 0, 0x0, cred, p, 0, 0);
 			else
 				error = nfsrpc_getopenlayout(nmp, vp,
 				    np->n_v4->n4_data,
 				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
 				    np->n_fhp->nfh_len, mode, op,
 				    NFS4NODENAME(np->n_v4),
 				    np->n_v4->n4_namelen, &dp, cred, p);
 			if (dp != NULL) {
 #ifdef APPLE
 				OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag);
 #else
 				NFSLOCKNODE(np);
 				np->n_flag &= ~NDELEGMOD;
 				/*
 				 * Invalidate the attribute cache, so that
 				 * attributes that pre-date the issue of a
 				 * delegation are not cached, since the
 				 * cached attributes will remain valid while
 				 * the delegation is held.
 				 */
 				NFSINVALATTRCACHE(np);
 				NFSUNLOCKNODE(np);
 #endif
 				(void) nfscl_deleg(nmp->nm_mountp,
 				    op->nfso_own->nfsow_clp,
 				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
 			}
 		} else {
 			error = EIO;
 		}
 		newnfs_copyincred(cred, &op->nfso_cred);
 	    } else if (ret == NFSCLOPEN_SETCRED)
 		/*
 		 * This is a new local open on a delegation. It needs
 		 * to have credentials so that an open can be done
 		 * against the server during recovery.
 		 */
 		newnfs_copyincred(cred, &op->nfso_cred);
 
 	    /*
 	     * nfso_opencnt is the count of how many VOP_OPEN()s have
 	     * been done on this Open successfully and a VOP_CLOSE()
 	     * is expected for each of these.
 	     * If error is non-zero, don't increment it, since the Open
 	     * hasn't succeeded yet.
 	     */
 	    if (!error)
 		op->nfso_opencnt++;
 	    nfscl_openrelease(nmp, op, error, newone);
 	    if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 		error == NFSERR_BADSESSION) {
 		(void) nfs_catnap(PZERO, error, "nfs_open");
 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
 		&& clidrev != 0) {
 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
 		retrycnt++;
 	    }
 	} while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 	    error == NFSERR_BADSESSION ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
 	if (error && retrycnt >= 4)
 		error = EIO;
 	return (error);
 }
 
 /*
  * the actual open rpc
  */
 APPLESTATIC int
 nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
     u_int8_t *name, int namelen, struct nfscldeleg **dpp,
     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
     int syscred, int recursed)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfscldeleg *dp, *ndp = NULL;
 	struct nfsvattr nfsva;
 	u_int32_t rflags, deleg;
 	nfsattrbit_t attrbits;
 	int error, ret, acesize, limitby;
 	struct nfsclsession *tsep;
 
 	dp = *dpp;
 	*dpp = NULL;
 	nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
 	tsep = nfsmnt_mdssession(nmp);
 	*tl++ = tsep->nfsess_clientid.lval[0];
 	*tl = tsep->nfsess_clientid.lval[1];
 	(void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
 	if (reclaim) {
 		*tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(delegtype);
 	} else {
 		if (dp != NULL) {
 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 			if (NFSHASNFSV4N(nmp))
 				*tl++ = 0;
 			else
 				*tl++ = dp->nfsdl_stateid.seqid;
 			*tl++ = dp->nfsdl_stateid.other[0];
 			*tl++ = dp->nfsdl_stateid.other[1];
 			*tl = dp->nfsdl_stateid.other[2];
 		} else {
 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
 		}
 		(void) nfsm_strtom(nd, name, namelen);
 	}
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
 	(void) nfsrv_putattrbit(nd, &attrbits);
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
 	if (!nd->nd_repstat) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
 		    6 * NFSX_UNSIGNED);
 		op->nfso_stateid.seqid = *tl++;
 		op->nfso_stateid.other[0] = *tl++;
 		op->nfso_stateid.other[1] = *tl++;
 		op->nfso_stateid.other[2] = *tl;
 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 		if (error)
 			goto nfsmout;
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		deleg = fxdr_unsigned(u_int32_t, *tl);
 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
 			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
 			      NFSCLFLAGS_FIRSTDELEG))
 				op->nfso_own->nfsow_clp->nfsc_flags |=
 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
 			MALLOC(ndp, struct nfscldeleg *,
 			    sizeof (struct nfscldeleg) + newfhlen,
 			    M_NFSCLDELEG, M_WAITOK);
 			LIST_INIT(&ndp->nfsdl_owner);
 			LIST_INIT(&ndp->nfsdl_lock);
 			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
 			ndp->nfsdl_fhlen = newfhlen;
 			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
 			nfscl_lockinit(&ndp->nfsdl_rwlock);
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
 			    NFSX_UNSIGNED);
 			ndp->nfsdl_stateid.seqid = *tl++;
 			ndp->nfsdl_stateid.other[0] = *tl++;
 			ndp->nfsdl_stateid.other[1] = *tl++;
 			ndp->nfsdl_stateid.other[2] = *tl++;
 			ret = fxdr_unsigned(int, *tl);
 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
 				ndp->nfsdl_flags = NFSCLDL_WRITE;
 				/*
 				 * Indicates how much the file can grow.
 				 */
 				NFSM_DISSECT(tl, u_int32_t *,
 				    3 * NFSX_UNSIGNED);
 				limitby = fxdr_unsigned(int, *tl++);
 				switch (limitby) {
 				case NFSV4OPEN_LIMITSIZE:
 					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
 					break;
 				case NFSV4OPEN_LIMITBLOCKS:
 					ndp->nfsdl_sizelimit =
 					    fxdr_unsigned(u_int64_t, *tl++);
 					ndp->nfsdl_sizelimit *=
 					    fxdr_unsigned(u_int64_t, *tl);
 					break;
 				default:
 					error = NFSERR_BADXDR;
 					goto nfsmout;
 				}
 			} else {
 				ndp->nfsdl_flags = NFSCLDL_READ;
 			}
 			if (ret)
 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
 			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
 			    &acesize, p);
 			if (error)
 				goto nfsmout;
 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 		    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
 		    NULL, NULL, NULL, p, cred);
 		if (error)
 			goto nfsmout;
 		if (ndp != NULL) {
 			ndp->nfsdl_change = nfsva.na_filerev;
 			ndp->nfsdl_modtime = nfsva.na_mtime;
 			ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
 		}
 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
 		    do {
 			ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
 			    cred, p);
 			if (ret == NFSERR_DELAY)
 			    (void) nfs_catnap(PZERO, ret, "nfs_open");
 		    } while (ret == NFSERR_DELAY);
 		    error = ret;
 		}
 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
 		    nfscl_assumeposixlocks)
 		    op->nfso_posixlock = 1;
 		else
 		    op->nfso_posixlock = 0;
 
 		/*
 		 * If the server is handing out delegations, but we didn't
 		 * get one because an OpenConfirm was required, try the
 		 * Open again, to get a delegation. This is a harmless no-op,
 		 * from a server's point of view.
 		 */
 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
 		    (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
 		    && !error && dp == NULL && ndp == NULL && !recursed) {
 		    do {
 			ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
 			    newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
 			    cred, p, syscred, 1);
 			if (ret == NFSERR_DELAY)
 			    (void) nfs_catnap(PZERO, ret, "nfs_open2");
 		    } while (ret == NFSERR_DELAY);
 		    if (ret) {
 			if (ndp != NULL) {
 				FREE((caddr_t)ndp, M_NFSCLDELEG);
 				ndp = NULL;
 			}
 			if (ret == NFSERR_STALECLIENTID ||
 			    ret == NFSERR_STALEDONTRECOVER ||
 			    ret == NFSERR_BADSESSION)
 				error = ret;
 		    }
 		}
 	}
 	if (nd->nd_repstat != 0 && error == 0)
 		error = nd->nd_repstat;
 	if (error == NFSERR_STALECLIENTID)
 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
 nfsmout:
 	if (!error)
 		*dpp = ndp;
 	else if (ndp != NULL)
 		FREE((caddr_t)ndp, M_NFSCLDELEG);
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * open downgrade rpc
  */
 APPLESTATIC int
 nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
     struct ucred *cred, NFSPROC_T *p)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error;
 
 	NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
 	if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp))))
 		*tl++ = 0;
 	else
 		*tl++ = op->nfso_stateid.seqid;
 	*tl++ = op->nfso_stateid.other[0];
 	*tl++ = op->nfso_stateid.other[1];
 	*tl++ = op->nfso_stateid.other[2];
 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
 	*tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
 	error = nfscl_request(nd, vp, p, cred, NULL);
 	if (error)
 		return (error);
 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
 	if (!nd->nd_repstat) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 		op->nfso_stateid.seqid = *tl++;
 		op->nfso_stateid.other[0] = *tl++;
 		op->nfso_stateid.other[1] = *tl++;
 		op->nfso_stateid.other[2] = *tl;
 	}
 	if (nd->nd_repstat && error == 0)
 		error = nd->nd_repstat;
 	if (error == NFSERR_STALESTATEID)
 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * V4 Close operation.
  */
 APPLESTATIC int
 nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
 {
 	struct nfsclclient *clp;
 	int error;
 
 	if (vnode_vtype(vp) != VREG)
 		return (0);
 	if (doclose)
 		error = nfscl_doclose(vp, &clp, p);
 	else
 		error = nfscl_getclose(vp, &clp);
 	if (error)
 		return (error);
 
 	nfscl_clientrelease(clp);
 	return (0);
 }
 
 /*
  * Close the open.
  */
 APPLESTATIC void
 nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfscllockowner *lp, *nlp;
 	struct nfscllock *lop, *nlop;
 	struct ucred *tcred;
 	u_int64_t off = 0, len = 0;
 	u_int32_t type = NFSV4LOCKT_READ;
 	int error, do_unlock, trycnt;
 
 	tcred = newnfs_getcred();
 	newnfs_copycred(&op->nfso_cred, tcred);
 	/*
 	 * (Theoretically this could be done in the same
 	 *  compound as the close, but having multiple
 	 *  sequenced Ops in the same compound might be
 	 *  too scary for some servers.)
 	 */
 	if (op->nfso_posixlock) {
 		off = 0;
 		len = NFS64BITSSET;
 		type = NFSV4LOCKT_READ;
 	}
 
 	/*
 	 * Since this function is only called from VOP_INACTIVE(), no
 	 * other thread will be manipulating this Open. As such, the
 	 * lock lists are not being changed by other threads, so it should
 	 * be safe to do this without locking.
 	 */
 	LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
 		do_unlock = 1;
 		LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
 			if (op->nfso_posixlock == 0) {
 				off = lop->nfslo_first;
 				len = lop->nfslo_end - lop->nfslo_first;
 				if (lop->nfslo_type == F_WRLCK)
 					type = NFSV4LOCKT_WRITE;
 				else
 					type = NFSV4LOCKT_READ;
 			}
 			if (do_unlock) {
 				trycnt = 0;
 				do {
 					error = nfsrpc_locku(nd, nmp, lp, off,
 					    len, type, tcred, p, 0);
 					if ((nd->nd_repstat == NFSERR_GRACE ||
 					    nd->nd_repstat == NFSERR_DELAY) &&
 					    error == 0)
 						(void) nfs_catnap(PZERO,
 						    (int)nd->nd_repstat,
 						    "nfs_close");
 				} while ((nd->nd_repstat == NFSERR_GRACE ||
 				    nd->nd_repstat == NFSERR_DELAY) &&
 				    error == 0 && trycnt++ < 5);
 				if (op->nfso_posixlock)
 					do_unlock = 0;
 			}
 			nfscl_freelock(lop, 0);
 		}
 		/*
 		 * Do a ReleaseLockOwner.
 		 * The lock owner name nfsl_owner may be used by other opens for
 		 * other files but the lock_owner4 name that nfsrpc_rellockown()
 		 * puts on the wire has the file handle for this file appended
 		 * to it, so it can be done now.
 		 */
 		(void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
 		    lp->nfsl_open->nfso_fhlen, tcred, p);
 	}
 
 	/*
 	 * There could be other Opens for different files on the same
 	 * OpenOwner, so locking is required.
 	 */
 	NFSLOCKCLSTATE();
 	nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
 	NFSUNLOCKCLSTATE();
 	do {
 		error = nfscl_tryclose(op, tcred, nmp, p);
 		if (error == NFSERR_GRACE)
 			(void) nfs_catnap(PZERO, error, "nfs_close");
 	} while (error == NFSERR_GRACE);
 	NFSLOCKCLSTATE();
 	nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
 
 	LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
 		nfscl_freelockowner(lp, 0);
 	nfscl_freeopen(op, 0);
 	NFSUNLOCKCLSTATE();
 	NFSFREECRED(tcred);
 }
 
 /*
  * The actual Close RPC.
  */
 APPLESTATIC int
 nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
     int syscred)
 {
 	u_int32_t *tl;
 	int error;
 
 	nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
 	    op->nfso_fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
 	if (NFSHASNFSV4N(nmp))
 		*tl++ = 0;
 	else
 		*tl++ = op->nfso_stateid.seqid;
 	*tl++ = op->nfso_stateid.other[0];
 	*tl++ = op->nfso_stateid.other[1];
 	*tl = op->nfso_stateid.other[2];
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
 	if (nd->nd_repstat == 0)
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 	error = nd->nd_repstat;
 	if (error == NFSERR_STALESTATEID)
 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * V4 Open Confirm RPC.
  */
 APPLESTATIC int
 nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp;
 	int error;
 
 	nmp = VFSTONFS(vnode_mount(vp));
 	if (NFSHASNFSV4N(nmp))
 		return (0);		/* No confirmation for NFSv4.1. */
 	nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
 	*tl++ = op->nfso_stateid.seqid;
 	*tl++ = op->nfso_stateid.other[0];
 	*tl++ = op->nfso_stateid.other[1];
 	*tl++ = op->nfso_stateid.other[2];
 	*tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
 	error = nfscl_request(nd, vp, p, cred, NULL);
 	if (error)
 		return (error);
 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
 	if (!nd->nd_repstat) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 		op->nfso_stateid.seqid = *tl++;
 		op->nfso_stateid.other[0] = *tl++;
 		op->nfso_stateid.other[1] = *tl++;
 		op->nfso_stateid.other[2] = *tl;
 	}
 	error = nd->nd_repstat;
 	if (error == NFSERR_STALESTATEID)
 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
  */
 APPLESTATIC int
 nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
     struct ucred *cred, NFSPROC_T *p)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	nfsattrbit_t attrbits;
 	u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
 	u_short port;
 	int error, isinet6 = 0, callblen;
 	nfsquad_t confirm;
 	u_int32_t lease;
 	static u_int32_t rev = 0;
 	struct nfsclds *dsp;
 	struct in6_addr a6;
 	struct nfsclsession *tsep;
 
 	if (nfsboottime.tv_sec == 0)
 		NFSSETBOOTTIME(nfsboottime);
 	clp->nfsc_rev = rev++;
 	if (NFSHASNFSV4N(nmp)) {
 		/*
 		 * Either there was no previous session or the
 		 * previous session has failed, so...
 		 * do an ExchangeID followed by the CreateSession.
 		 */
 		error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq,
 		    NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p);
 		NFSCL_DEBUG(1, "aft exch=%d\n", error);
 		if (error == 0)
 			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
 			    &nmp->nm_sockreq,
 			    dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
 		if (error == 0) {
 			NFSLOCKMNT(nmp);
 			/*
 			 * The old sessions cannot be safely free'd
 			 * here, since they may still be used by
 			 * in-progress RPCs.
 			 */
 			tsep = NULL;
 			if (TAILQ_FIRST(&nmp->nm_sess) != NULL)
 				tsep = NFSMNT_MDSSESSION(nmp);
 			TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
 			    nfsclds_list);
 			/*
 			 * Wake up RPCs waiting for a slot on the
 			 * old session. These will then fail with
 			 * NFSERR_BADSESSION and be retried with the
 			 * new session by nfsv4_setsequence().
 			 * Also wakeup() processes waiting for the
 			 * new session.
 			 */
 			if (tsep != NULL)
 				wakeup(&tsep->nfsess_slots);
 			wakeup(&nmp->nm_sess);
 			NFSUNLOCKMNT(nmp);
 		} else
 			nfscl_freenfsclds(dsp);
 		NFSCL_DEBUG(1, "aft createsess=%d\n", error);
 		if (error == 0 && reclaim == 0) {
 			error = nfsrpc_reclaimcomplete(nmp, cred, p);
 			NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
 			if (error == NFSERR_COMPLETEALREADY ||
 			    error == NFSERR_NOTSUPP)
 				/* Ignore this error. */
 				error = 0;
 		}
 		return (error);
 	}
 
 	/*
 	 * Allocate a single session structure for NFSv4.0, because some of
 	 * the fields are used by NFSv4.0 although it doesn't do a session.
 	 */
 	dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
 	mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
 	mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
 	NFSLOCKMNT(nmp);
 	TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
 	tsep = NFSMNT_MDSSESSION(nmp);
 	NFSUNLOCKMNT(nmp);
 
 	nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);
 	*tl = txdr_unsigned(clp->nfsc_rev);
 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
 
 	/*
 	 * set up the callback address
 	 */
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFS_CALLBCKPROG);
 	callblen = strlen(nfsv4_callbackaddr);
 	if (callblen == 0)
 		cp = nfscl_getmyip(nmp, &a6, &isinet6);
 	if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
 	    (callblen > 0 || cp != NULL)) {
 		port = htons(nfsv4_cbport);
 		cp2 = (u_int8_t *)&port;
 #ifdef INET6
 		if ((callblen > 0 &&
 		     strchr(nfsv4_callbackaddr, ':')) || isinet6) {
 			char ip6buf[INET6_ADDRSTRLEN], *ip6add;
 
 			(void) nfsm_strtom(nd, "tcp6", 4);
 			if (callblen == 0) {
 				ip6_sprintf(ip6buf, (struct in6_addr *)cp);
 				ip6add = ip6buf;
 			} else {
 				ip6add = nfsv4_callbackaddr;
 			}
 			snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
 			    ip6add, cp2[0], cp2[1]);
 		} else
 #endif
 		{
 			(void) nfsm_strtom(nd, "tcp", 3);
 			if (callblen == 0)
 				snprintf(addr, INET6_ADDRSTRLEN + 9,
 				    "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
 				    cp[2], cp[3], cp2[0], cp2[1]);
 			else
 				snprintf(addr, INET6_ADDRSTRLEN + 9,
 				    "%s.%d.%d", nfsv4_callbackaddr,
 				    cp2[0], cp2[1]);
 		}
 		(void) nfsm_strtom(nd, addr, strlen(addr));
 	} else {
 		(void) nfsm_strtom(nd, "tcp", 3);
 		(void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
 	}
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(clp->nfsc_cbident);
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	if (nd->nd_repstat == 0) {
 	    NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 	    tsep->nfsess_clientid.lval[0] = *tl++;
 	    tsep->nfsess_clientid.lval[1] = *tl++;
 	    confirm.lval[0] = *tl++;
 	    confirm.lval[1] = *tl;
 	    mbuf_freem(nd->nd_mrep);
 	    nd->nd_mrep = NULL;
 
 	    /*
 	     * and confirm it.
 	     */
 	    nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
 		NULL);
 	    NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 	    *tl++ = tsep->nfsess_clientid.lval[0];
 	    *tl++ = tsep->nfsess_clientid.lval[1];
 	    *tl++ = confirm.lval[0];
 	    *tl = confirm.lval[1];
 	    nd->nd_flag |= ND_USEGSSNAME;
 	    error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
 		cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	    if (error)
 		return (error);
 	    mbuf_freem(nd->nd_mrep);
 	    nd->nd_mrep = NULL;
 	    if (nd->nd_repstat == 0) {
 		nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh,
 		    nmp->nm_fhsize, NULL, NULL);
 		NFSZERO_ATTRBIT(&attrbits);
 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		nd->nd_flag |= ND_USEGSSNAME;
 		error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
 		    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 		if (error)
 		    return (error);
 		if (nd->nd_repstat == 0) {
 		    error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL,
 			NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred);
 		    if (error)
 			goto nfsmout;
 		    clp->nfsc_renew = NFSCL_RENEW(lease);
 		    clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
 		    clp->nfsc_clientidrev++;
 		    if (clp->nfsc_clientidrev == 0)
 			clp->nfsc_clientidrev++;
 		}
 	    }
 	}
 	error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs getattr call.
  */
 APPLESTATIC int
 nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
     struct nfsvattr *nap, void *stuff)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error;
 	nfsattrbit_t attrbits;
 	
 	NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSGETATTR_ATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	if (!nd->nd_repstat)
 		error = nfsm_loadattr(nd, nap);
 	else
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs getattr call with non-vnode arguemnts.
  */
 APPLESTATIC int
 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
     uint32_t *leasep)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error, vers = NFS_VER2;
 	nfsattrbit_t attrbits;
 	
 	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL);
 	if (nd->nd_flag & ND_NFSV4) {
 		vers = NFS_VER4;
 		NFSGETATTR_ATTRBIT(&attrbits);
 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	} else if (nd->nd_flag & ND_NFSV3) {
 		vers = NFS_VER3;
 	}
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, vers, NULL, 1, xidp, NULL);
 	if (error)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		if ((nd->nd_flag & ND_NFSV4) != 0)
 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 			    NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
 			    NULL, NULL);
 		else
 			error = nfsm_loadattr(nd, nap);
 	} else
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do an nfs setattr operation.
  */
 APPLESTATIC int
 nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp,
     void *stuff)
 {
 	int error, expireret = 0, openerr, retrycnt;
 	u_int32_t clidrev = 0, mode;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsfh *nfhp;
 	nfsv4stateid_t stateid;
 	void *lckp;
 
 	if (nmp->nm_clp != NULL)
 		clidrev = nmp->nm_clp->nfsc_clientidrev;
 	if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
 		mode = NFSV4OPEN_ACCESSWRITE;
 	else
 		mode = NFSV4OPEN_ACCESSREAD;
 	retrycnt = 0;
 	do {
 		lckp = NULL;
 		openerr = 1;
 		if (NFSHASNFSV4(nmp)) {
 			nfhp = VTONFS(vp)->n_fhp;
 			error = nfscl_getstateid(vp, nfhp->nfh_fh,
 			    nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
 			if (error && vnode_vtype(vp) == VREG &&
 			    (mode == NFSV4OPEN_ACCESSWRITE ||
 			     nfstest_openallsetattr)) {
 				/*
 				 * No Open stateid, so try and open the file
 				 * now.
 				 */
 				if (mode == NFSV4OPEN_ACCESSWRITE)
 					openerr = nfsrpc_open(vp, FWRITE, cred,
 					    p);
 				else
 					openerr = nfsrpc_open(vp, FREAD, cred,
 					    p);
 				if (!openerr)
 					(void) nfscl_getstateid(vp,
 					    nfhp->nfh_fh, nfhp->nfh_len,
 					    mode, 0, cred, p, &stateid, &lckp);
 			}
 		}
 		if (vap != NULL)
 			error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
 			    rnap, attrflagp, stuff);
 		else
 			error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid,
 			    stuff);
 		if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
 			NFSLOCKMNT(nmp);
 			nmp->nm_state |= NFSSTA_OPENMODE;
 			NFSUNLOCKMNT(nmp);
 		}
 		if (error == NFSERR_STALESTATEID)
 			nfscl_initiate_recovery(nmp->nm_clp);
 		if (lckp != NULL)
 			nfscl_lockderef(lckp);
 		if (!openerr)
 			(void) nfsrpc_close(vp, 0, p);
 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
 			(void) nfs_catnap(PZERO, error, "nfs_setattr");
 		} else if ((error == NFSERR_EXPIRED ||
 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
 		}
 		retrycnt++;
 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 	    error == NFSERR_BADSESSION ||
 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
 	    (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
 	     retrycnt < 4));
 	if (error && retrycnt >= 4)
 		error = EIO;
 	return (error);
 }
 
 static int
 nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
     nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
     struct nfsvattr *rnap, int *attrflagp, void *stuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error;
 	nfsattrbit_t attrbits;
 
 	*attrflagp = 0;
 	NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp);
 	if (nd->nd_flag & ND_NFSV4)
 		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
 	vap->va_type = vnode_vtype(vp);
 	nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
 	if (nd->nd_flag & ND_NFSV3) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = newnfs_false;
 	} else if (nd->nd_flag & ND_NFSV4) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		NFSGETATTR_ATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
 		error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff);
 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 	if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
 		error = nfscl_postop_attr(nd, rnap, attrflagp, stuff);
 	mbuf_freem(nd->nd_mrep);
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 	return (error);
 }
 
 /*
  * nfs lookup rpc
  */
 APPLESTATIC int
 nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
     struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	struct nfsfh *nfhp;
 	nfsattrbit_t attrbits;
 	int error = 0, lookupp = 0;
 
 	*attrflagp = 0;
 	*dattrflagp = 0;
 	if (vnode_vtype(dvp) != VDIR)
 		return (ENOTDIR);
 	nmp = VFSTONFS(vnode_mount(dvp));
 	if (len > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	if (NFSHASNFSV4(nmp) && len == 1 &&
 		name[0] == '.') {
 		/*
 		 * Just return the current dir's fh.
 		 */
 		np = VTONFS(dvp);
 		MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) +
 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
 		nfhp->nfh_len = np->n_fhp->nfh_len;
 		NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
 		*nfhpp = nfhp;
 		return (0);
 	}
 	if (NFSHASNFSV4(nmp) && len == 2 &&
 		name[0] == '.' && name[1] == '.') {
 		lookupp = 1;
 		NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp);
 	} else {
 		NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp);
 		(void) nfsm_strtom(nd, name, len);
 	}
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSGETATTR_ATTRBIT(&attrbits);
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	error = nfscl_request(nd, dvp, p, cred, stuff);
 	if (error)
 		return (error);
 	if (nd->nd_repstat) {
 		/*
 		 * When an NFSv4 Lookupp returns ENOENT, it means that
 		 * the lookup is at the root of an fs, so return this dir.
 		 */
 		if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
 		    np = VTONFS(dvp);
 		    MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) +
 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
 		    nfhp->nfh_len = np->n_fhp->nfh_len;
 		    NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
 		    *nfhpp = nfhp;
 		    mbuf_freem(nd->nd_mrep);
 		    return (0);
 		}
 		if (nd->nd_flag & ND_NFSV3)
 		    error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
 		else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
 		    ND_NFSV4) {
 			/* Load the directory attributes. */
 			error = nfsm_loadattr(nd, dnap);
 			if (error == 0)
 				*dattrflagp = 1;
 		}
 		goto nfsmout;
 	}
 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
 		/* Load the directory attributes. */
 		error = nfsm_loadattr(nd, dnap);
 		if (error != 0)
 			goto nfsmout;
 		*dattrflagp = 1;
 		/* Skip over the Lookup and GetFH operation status values. */
 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 	}
 	error = nfsm_getfh(nd, nfhpp);
 	if (error)
 		goto nfsmout;
 
 	error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 	if ((nd->nd_flag & ND_NFSV3) && !error)
 		error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	if (!error && nd->nd_repstat)
 		error = nd->nd_repstat;
 	return (error);
 }
 
 /*
  * Do a readlink rpc.
  */
 APPLESTATIC int
 nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsnode *np = VTONFS(vp);
 	nfsattrbit_t attrbits;
 	int error, len, cangetattr = 1;
 
 	*attrflagp = 0;
 	NFSCL_REQSTART(nd, NFSPROC_READLINK, vp);
 	if (nd->nd_flag & ND_NFSV4) {
 		/*
 		 * And do a Getattr op.
 		 */
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		NFSGETATTR_ATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & ND_NFSV3)
 		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 	if (!nd->nd_repstat && !error) {
 		NFSM_STRSIZ(len, NFS_MAXPATHLEN);
 		/*
 		 * This seems weird to me, but must have been added to
 		 * FreeBSD for some reason. The only thing I can think of
 		 * is that there was/is some server that replies with
 		 * more link data than it should?
 		 */
 		if (len == NFS_MAXPATHLEN) {
 			NFSLOCKNODE(np);
 			if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
 				len = np->n_size;
 				cangetattr = 0;
 			}
 			NFSUNLOCKNODE(np);
 		}
 		error = nfsm_mbufuio(nd, uiop, len);
 		if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 	}
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Read operation.
  */
 APPLESTATIC int
 nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
 {
 	int error, expireret = 0, retrycnt;
 	u_int32_t clidrev = 0;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsnode *np = VTONFS(vp);
 	struct ucred *newcred;
 	struct nfsfh *nfhp = NULL;
 	nfsv4stateid_t stateid;
 	void *lckp;
 
 	if (nmp->nm_clp != NULL)
 		clidrev = nmp->nm_clp->nfsc_clientidrev;
 	newcred = cred;
 	if (NFSHASNFSV4(nmp)) {
 		nfhp = np->n_fhp;
 		newcred = NFSNEWCRED(cred);
 	}
 	retrycnt = 0;
 	do {
 		lckp = NULL;
 		if (NFSHASNFSV4(nmp))
 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
 			    NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
 			    &lckp);
 		error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
 		    attrflagp, stuff);
 		if (error == NFSERR_OPENMODE) {
 			NFSLOCKMNT(nmp);
 			nmp->nm_state |= NFSSTA_OPENMODE;
 			NFSUNLOCKMNT(nmp);
 		}
 		if (error == NFSERR_STALESTATEID)
 			nfscl_initiate_recovery(nmp->nm_clp);
 		if (lckp != NULL)
 			nfscl_lockderef(lckp);
 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
 			(void) nfs_catnap(PZERO, error, "nfs_read");
 		} else if ((error == NFSERR_EXPIRED ||
 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
 		}
 		retrycnt++;
 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 	    error == NFSERR_BADSESSION ||
 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
 	    (error == NFSERR_OPENMODE && retrycnt < 4));
 	if (error && retrycnt >= 4)
 		error = EIO;
 	if (NFSHASNFSV4(nmp))
 		NFSFREECRED(newcred);
 	return (error);
 }
 
 /*
  * The actual read RPC.
  */
 static int
 nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
     nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
     int *attrflagp, void *stuff)
 {
 	u_int32_t *tl;
 	int error = 0, len, retlen, tsiz, eof = 0;
 	struct nfsrv_descript nfsd;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsrv_descript *nd = &nfsd;
 	int rsize;
 	off_t tmp_off;
 
 	*attrflagp = 0;
 	tsiz = uio_uio_resid(uiop);
 	tmp_off = uiop->uio_offset + tsiz;
 	NFSLOCKMNT(nmp);
 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
 		NFSUNLOCKMNT(nmp);
 		return (EFBIG);
 	}
 	rsize = nmp->nm_rsize;
 	NFSUNLOCKMNT(nmp);
 	nd->nd_mrep = NULL;
 	while (tsiz > 0) {
 		*attrflagp = 0;
 		len = (tsiz > rsize) ? rsize : tsiz;
 		NFSCL_REQSTART(nd, NFSPROC_READ, vp);
 		if (nd->nd_flag & ND_NFSV4)
 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
 		if (nd->nd_flag & ND_NFSV2) {
 			*tl++ = txdr_unsigned(uiop->uio_offset);
 			*tl++ = txdr_unsigned(len);
 			*tl = 0;
 		} else {
 			txdr_hyper(uiop->uio_offset, tl);
 			*(tl + 2) = txdr_unsigned(len);
 		}
 		/*
 		 * Since I can't do a Getattr for NFSv4 for Write, there
 		 * doesn't seem any point in doing one here, either.
 		 * (See the comment in nfsrpc_writerpc() for more info.)
 		 */
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		if (nd->nd_flag & ND_NFSV3) {
 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 		} else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
 			error = nfsm_loadattr(nd, nap);
 			if (!error)
 				*attrflagp = 1;
 		}
 		if (nd->nd_repstat || error) {
 			if (!error)
 				error = nd->nd_repstat;
 			goto nfsmout;
 		}
 		if (nd->nd_flag & ND_NFSV3) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			eof = fxdr_unsigned(int, *(tl + 1));
 		} else if (nd->nd_flag & ND_NFSV4) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			eof = fxdr_unsigned(int, *tl);
 		}
 		NFSM_STRSIZ(retlen, len);
 		error = nfsm_mbufuio(nd, uiop, retlen);
 		if (error)
 			goto nfsmout;
 		mbuf_freem(nd->nd_mrep);
 		nd->nd_mrep = NULL;
 		tsiz -= retlen;
 		if (!(nd->nd_flag & ND_NFSV2)) {
 			if (eof || retlen == 0)
 				tsiz = 0;
 		} else if (retlen < len)
 			tsiz = 0;
 	}
 	return (0);
 nfsmout:
 	if (nd->nd_mrep != NULL)
 		mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs write operation
  * When called_from_strategy != 0, it should return EIO for an error that
  * indicates recovery is in progress, so that the buffer will be left
  * dirty and be written back to the server later. If it loops around,
  * the recovery thread could get stuck waiting for the buffer and recovery
  * will then deadlock.
  */
 APPLESTATIC int
 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
     void *stuff, int called_from_strategy)
 {
 	int error, expireret = 0, retrycnt, nostateid;
 	u_int32_t clidrev = 0;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsnode *np = VTONFS(vp);
 	struct ucred *newcred;
 	struct nfsfh *nfhp = NULL;
 	nfsv4stateid_t stateid;
 	void *lckp;
 
 	*must_commit = 0;
 	if (nmp->nm_clp != NULL)
 		clidrev = nmp->nm_clp->nfsc_clientidrev;
 	newcred = cred;
 	if (NFSHASNFSV4(nmp)) {
 		newcred = NFSNEWCRED(cred);
 		nfhp = np->n_fhp;
 	}
 	retrycnt = 0;
 	do {
 		lckp = NULL;
 		nostateid = 0;
 		if (NFSHASNFSV4(nmp)) {
 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
 			    NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
 			    &lckp);
 			if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
 			    stateid.other[2] == 0) {
 				nostateid = 1;
 				NFSCL_DEBUG(1, "stateid0 in write\n");
 			}
 		}
 
 		/*
 		 * If there is no stateid for NFSv4, it means this is an
 		 * extraneous write after close. Basically a poorly
 		 * implemented buffer cache. Just don't do the write.
 		 */
 		if (nostateid)
 			error = 0;
 		else
 			error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
 			    newcred, &stateid, p, nap, attrflagp, stuff);
 		if (error == NFSERR_STALESTATEID)
 			nfscl_initiate_recovery(nmp->nm_clp);
 		if (lckp != NULL)
 			nfscl_lockderef(lckp);
 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
 			(void) nfs_catnap(PZERO, error, "nfs_write");
 		} else if ((error == NFSERR_EXPIRED ||
 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
 		}
 		retrycnt++;
 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
 	if (error != 0 && (retrycnt >= 4 ||
 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
 		error = EIO;
 	if (NFSHASNFSV4(nmp))
 		NFSFREECRED(newcred);
 	return (error);
 }
 
 /*
  * The actual write RPC.
  */
 static int
 nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
     int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
 {
 	u_int32_t *tl;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsnode *np = VTONFS(vp);
 	int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC;
 	int wccflag = 0, wsize;
 	int32_t backup;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	nfsattrbit_t attrbits;
 	off_t tmp_off;
 
 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
 	*attrflagp = 0;
 	tsiz = uio_uio_resid(uiop);
 	tmp_off = uiop->uio_offset + tsiz;
 	NFSLOCKMNT(nmp);
 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
 		NFSUNLOCKMNT(nmp);
 		return (EFBIG);
 	}
 	wsize = nmp->nm_wsize;
 	NFSUNLOCKMNT(nmp);
 	nd->nd_mrep = NULL;	/* NFSv2 sometimes does a write with */
 	nd->nd_repstat = 0;	/* uio_resid == 0, so the while is not done */
 	while (tsiz > 0) {
 		*attrflagp = 0;
 		len = (tsiz > wsize) ? wsize : tsiz;
 		NFSCL_REQSTART(nd, NFSPROC_WRITE, vp);
 		if (nd->nd_flag & ND_NFSV4) {
 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
 			txdr_hyper(uiop->uio_offset, tl);
 			tl += 2;
 			*tl++ = txdr_unsigned(*iomode);
 			*tl = txdr_unsigned(len);
 		} else if (nd->nd_flag & ND_NFSV3) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
 			txdr_hyper(uiop->uio_offset, tl);
 			tl += 2;
 			*tl++ = txdr_unsigned(len);
 			*tl++ = txdr_unsigned(*iomode);
 			*tl = txdr_unsigned(len);
 		} else {
 			u_int32_t x;
 
 			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 			/*
 			 * Not sure why someone changed this, since the
 			 * RFC clearly states that "beginoffset" and
 			 * "totalcount" are ignored, but it wouldn't
 			 * surprise me if there's a busted server out there.
 			 */
 			/* Set both "begin" and "current" to non-garbage. */
 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
 			*tl++ = x;      /* "begin offset" */
 			*tl++ = x;      /* "current offset" */
 			x = txdr_unsigned(len);
 			*tl++ = x;      /* total to this offset */
 			*tl = x;        /* size of this write */
 
 		}
 		nfsm_uiombuf(nd, uiop, len);
 		/*
 		 * Although it is tempting to do a normal Getattr Op in the
 		 * NFSv4 compound, the result can be a nearly hung client
 		 * system if the Getattr asks for Owner and/or OwnerGroup.
 		 * It occurs when the client can't map either the Owner or
 		 * Owner_group name in the Getattr reply to a uid/gid. When
 		 * there is a cache miss, the kernel does an upcall to the
 		 * nfsuserd. Then, it can try and read the local /etc/passwd
 		 * or /etc/group file. It can then block in getnewbuf(),
 		 * waiting for dirty writes to be pushed to the NFS server.
 		 * The only reason this doesn't result in a complete
 		 * deadlock, is that the upcall times out and allows
 		 * the write to complete. However, progress is so slow
 		 * that it might just as well be deadlocked.
 		 * As such, we get the rest of the attributes, but not
 		 * Owner or Owner_group.
 		 * nb: nfscl_loadattrcache() needs to be told that these
 		 *     partial attributes from a write rpc are being
 		 *     passed in, via a argument flag.
 		 */
 		if (nd->nd_flag & ND_NFSV4) {
 			NFSWRITEGETATTR_ATTRBIT(&attrbits);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
 			(void) nfsrv_putattrbit(nd, &attrbits);
 		}
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		if (nd->nd_repstat) {
 			/*
 			 * In case the rpc gets retried, roll
 			 * the uio fileds changed by nfsm_uiombuf()
 			 * back.
 			 */
 			uiop->uio_offset -= len;
 			uio_uio_resid_add(uiop, len);
 			uio_iov_base_add(uiop, -len);
 			uio_iov_len_add(uiop, len);
 		}
 		if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
 			error = nfscl_wcc_data(nd, vp, nap, attrflagp,
 			    &wccflag, stuff);
 			if (error)
 				goto nfsmout;
 		}
 		if (!nd->nd_repstat) {
 			if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
 					+ NFSX_VERF);
 				rlen = fxdr_unsigned(int, *tl++);
 				if (rlen == 0) {
 					error = NFSERR_IO;
 					goto nfsmout;
 				} else if (rlen < len) {
 					backup = len - rlen;
 					uio_iov_base_add(uiop, -(backup));
 					uio_iov_len_add(uiop, backup);
 					uiop->uio_offset -= backup;
 					uio_uio_resid_add(uiop, backup);
 					len = rlen;
 				}
 				commit = fxdr_unsigned(int, *tl++);
 
 				/*
 				 * Return the lowest commitment level
 				 * obtained by any of the RPCs.
 				 */
 				if (committed == NFSWRITE_FILESYNC)
 					committed = commit;
 				else if (committed == NFSWRITE_DATASYNC &&
 					commit == NFSWRITE_UNSTABLE)
 					committed = commit;
 				NFSLOCKMNT(nmp);
 				if (!NFSHASWRITEVERF(nmp)) {
 					NFSBCOPY((caddr_t)tl,
 					    (caddr_t)&nmp->nm_verf[0],
 					    NFSX_VERF);
 					NFSSETWRITEVERF(nmp);
 	    			} else if (NFSBCMP(tl, nmp->nm_verf,
 				    NFSX_VERF)) {
 					*must_commit = 1;
 					NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
 				}
 				NFSUNLOCKMNT(nmp);
 			}
 			if (nd->nd_flag & ND_NFSV4)
 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
 				error = nfsm_loadattr(nd, nap);
 				if (!error)
 					*attrflagp = NFS_LATTR_NOSHRINK;
 			}
 		} else {
 			error = nd->nd_repstat;
 		}
 		if (error)
 			goto nfsmout;
 		NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
 		mbuf_freem(nd->nd_mrep);
 		nd->nd_mrep = NULL;
 		tsiz -= len;
 	}
 nfsmout:
 	if (nd->nd_mrep != NULL)
 		mbuf_freem(nd->nd_mrep);
 	*iomode = committed;
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 	return (error);
 }
 
 /*
  * nfs mknod rpc
  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
  * mode set to specify the file type and the size field for rdev.
  */
 APPLESTATIC int
 nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
     u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p,
     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
     int *attrflagp, int *dattrflagp, void *dstuff)
 {
 	u_int32_t *tl;
 	int error = 0;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	nfsattrbit_t attrbits;
 
 	*nfhpp = NULL;
 	*attrflagp = 0;
 	*dattrflagp = 0;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp);
 	if (nd->nd_flag & ND_NFSV4) {
 		if (vtyp == VBLK || vtyp == VCHR) {
 			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			*tl++ = vtonfsv34_type(vtyp);
 			*tl++ = txdr_unsigned(NFSMAJOR(rdev));
 			*tl = txdr_unsigned(NFSMINOR(rdev));
 		} else {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = vtonfsv34_type(vtyp);
 		}
 	}
 	(void) nfsm_strtom(nd, name, namelen);
 	if (nd->nd_flag & ND_NFSV3) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = vtonfsv34_type(vtyp);
 	}
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
 	if ((nd->nd_flag & ND_NFSV3) &&
 	    (vtyp == VCHR || vtyp == VBLK)) {
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(NFSMAJOR(rdev));
 		*tl = txdr_unsigned(NFSMINOR(rdev));
 	}
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSGETATTR_ATTRBIT(&attrbits);
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	if (nd->nd_flag & ND_NFSV2)
 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & ND_NFSV4)
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	if (!nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV4) {
 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 			if (error)
 				goto nfsmout;
 		}
 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
 		if (error)
 			goto nfsmout;
 	}
 	if (nd->nd_flag & ND_NFSV3)
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	if (!error && nd->nd_repstat)
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs file create call
  * Mostly just call the approriate routine. (I separated out v4, so that
  * error recovery wouldn't be as difficult.)
  */
 APPLESTATIC int
 nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
     int *attrflagp, int *dattrflagp, void *dstuff)
 {
 	int error = 0, newone, expireret = 0, retrycnt, unlocked;
 	struct nfsclowner *owp;
 	struct nfscldeleg *dp;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(dvp));
 	u_int32_t clidrev;
 
 	if (NFSHASNFSV4(nmp)) {
 	    retrycnt = 0;
 	    do {
 		dp = NULL;
 		error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
 		    NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
 		    NULL, 1);
 		if (error)
 			return (error);
 		if (nmp->nm_clp != NULL)
 			clidrev = nmp->nm_clp->nfsc_clientidrev;
 		else
 			clidrev = 0;
 		if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
 		    nfs_numnfscbd == 0 || retrycnt > 0)
 			error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
 			  fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
 			  attrflagp, dattrflagp, dstuff, &unlocked);
 		else
 			error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
 			  cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
 			  attrflagp, dattrflagp, dstuff, &unlocked);
 		/*
 		 * There is no need to invalidate cached attributes here,
 		 * since new post-delegation issue attributes are always
 		 * returned by nfsrpc_createv4() and these will update the
 		 * attribute cache.
 		 */
 		if (dp != NULL)
 			(void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
 			    (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
 		nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
 		if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 		    error == NFSERR_BADSESSION) {
 			(void) nfs_catnap(PZERO, error, "nfs_open");
 		} else if ((error == NFSERR_EXPIRED ||
 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
 			retrycnt++;
 		}
 	    } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 		error == NFSERR_BADSESSION ||
 		((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 		 expireret == 0 && clidrev != 0 && retrycnt < 4));
 	    if (error && retrycnt >= 4)
 		    error = EIO;
 	} else {
 		error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
 		    fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
 		    dstuff);
 	}
 	return (error);
 }
 
 /*
  * The create rpc for v2 and 3.
  */
 static int
 nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
     int *attrflagp, int *dattrflagp, void *dstuff)
 {
 	u_int32_t *tl;
 	int error = 0;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 
 	*nfhpp = NULL;
 	*attrflagp = 0;
 	*dattrflagp = 0;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
 	(void) nfsm_strtom(nd, name, namelen);
 	if (nd->nd_flag & ND_NFSV3) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (fmode & O_EXCL) {
 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 			*tl++ = cverf.lval[0];
 			*tl = cverf.lval[1];
 		} else {
 			*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
 		}
 	} else {
 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
 	}
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
 		if (error)
 			goto nfsmout;
 	}
 	if (nd->nd_flag & ND_NFSV3)
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	if (nd->nd_repstat != 0 && error == 0)
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 static int
 nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
     int *dattrflagp, void *dstuff, int *unlockedp)
 {
 	u_int32_t *tl;
 	int error = 0, deleg, newone, ret, acesize, limitby;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsclopen *op;
 	struct nfscldeleg *dp = NULL;
 	struct nfsnode *np;
 	struct nfsfh *nfhp;
 	nfsattrbit_t attrbits;
 	nfsv4stateid_t stateid;
 	u_int32_t rflags;
 	struct nfsmount *nmp;
 	struct nfsclsession *tsep;
 
 	nmp = VFSTONFS(dvp->v_mount);
 	np = VTONFS(dvp);
 	*unlockedp = 0;
 	*nfhpp = NULL;
 	*dpp = NULL;
 	*attrflagp = 0;
 	*dattrflagp = 0;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
 	/*
 	 * For V4, this is actually an Open op.
 	 */
 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
 	    NFSV4OPEN_ACCESSREAD);
 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
 	tsep = nfsmnt_mdssession(nmp);
 	*tl++ = tsep->nfsess_clientid.lval[0];
 	*tl = tsep->nfsess_clientid.lval[1];
 	(void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
 	if (fmode & O_EXCL) {
 		if (NFSHASNFSV4N(nmp)) {
 			if (NFSHASSESSPERSIST(nmp)) {
 				/* Use GUARDED for persistent sessions. */
 				*tl = txdr_unsigned(NFSCREATE_GUARDED);
 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
 			} else {
 				/* Otherwise, use EXCLUSIVE4_1. */
 				*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
 				NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 				*tl++ = cverf.lval[0];
 				*tl = cverf.lval[1];
 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
 			}
 		} else {
 			/* NFSv4.0 */
 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 			*tl++ = cverf.lval[0];
 			*tl = cverf.lval[1];
 		}
 	} else {
 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
 	}
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
 	(void) nfsm_strtom(nd, name, namelen);
 	/* Get the new file's handle and attributes. */
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	NFSGETATTR_ATTRBIT(&attrbits);
 	(void) nfsrv_putattrbit(nd, &attrbits);
 	/* Get the directory's post-op attributes. */
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
 	(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	(void) nfsrv_putattrbit(nd, &attrbits);
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
 		return (error);
 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
 		    6 * NFSX_UNSIGNED);
 		stateid.seqid = *tl++;
 		stateid.other[0] = *tl++;
 		stateid.other[1] = *tl++;
 		stateid.other[2] = *tl;
 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
 		(void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		deleg = fxdr_unsigned(int, *tl);
 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
 			if (!(owp->nfsow_clp->nfsc_flags &
 			      NFSCLFLAGS_FIRSTDELEG))
 				owp->nfsow_clp->nfsc_flags |=
 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
 			MALLOC(dp, struct nfscldeleg *,
 			    sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
 			    M_NFSCLDELEG, M_WAITOK);
 			LIST_INIT(&dp->nfsdl_owner);
 			LIST_INIT(&dp->nfsdl_lock);
 			dp->nfsdl_clp = owp->nfsow_clp;
 			newnfs_copyincred(cred, &dp->nfsdl_cred);
 			nfscl_lockinit(&dp->nfsdl_rwlock);
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
 			    NFSX_UNSIGNED);
 			dp->nfsdl_stateid.seqid = *tl++;
 			dp->nfsdl_stateid.other[0] = *tl++;
 			dp->nfsdl_stateid.other[1] = *tl++;
 			dp->nfsdl_stateid.other[2] = *tl++;
 			ret = fxdr_unsigned(int, *tl);
 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
 				dp->nfsdl_flags = NFSCLDL_WRITE;
 				/*
 				 * Indicates how much the file can grow.
 				 */
 				NFSM_DISSECT(tl, u_int32_t *,
 				    3 * NFSX_UNSIGNED);
 				limitby = fxdr_unsigned(int, *tl++);
 				switch (limitby) {
 				case NFSV4OPEN_LIMITSIZE:
 					dp->nfsdl_sizelimit = fxdr_hyper(tl);
 					break;
 				case NFSV4OPEN_LIMITBLOCKS:
 					dp->nfsdl_sizelimit =
 					    fxdr_unsigned(u_int64_t, *tl++);
 					dp->nfsdl_sizelimit *=
 					    fxdr_unsigned(u_int64_t, *tl);
 					break;
 				default:
 					error = NFSERR_BADXDR;
 					goto nfsmout;
 				}
 			} else {
 				dp->nfsdl_flags = NFSCLDL_READ;
 			}
 			if (ret)
 				dp->nfsdl_flags |= NFSCLDL_RECALL;
 			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
 			    &acesize, p);
 			if (error)
 				goto nfsmout;
 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
 		if (error)
 			goto nfsmout;
 		/* Get rid of the PutFH and Getattr status values. */
 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 		/* Load the directory attributes. */
 		error = nfsm_loadattr(nd, dnap);
 		if (error)
 			goto nfsmout;
 		*dattrflagp = 1;
 		if (dp != NULL && *attrflagp) {
 			dp->nfsdl_change = nnap->na_filerev;
 			dp->nfsdl_modtime = nnap->na_mtime;
 			dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
 		}
 		/*
 		 * We can now complete the Open state.
 		 */
 		nfhp = *nfhpp;
 		if (dp != NULL) {
 			dp->nfsdl_fhlen = nfhp->nfh_len;
 			NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
 		}
 		/*
 		 * Get an Open structure that will be
 		 * attached to the OpenOwner, acquired already.
 		 */
 		error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
 		    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
 		    cred, p, NULL, &op, &newone, NULL, 0);
 		if (error)
 			goto nfsmout;
 		op->nfso_stateid = stateid;
 		newnfs_copyincred(cred, &op->nfso_cred);
 		if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
 		    do {
 			ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
 			    nfhp->nfh_len, op, cred, p);
 			if (ret == NFSERR_DELAY)
 			    (void) nfs_catnap(PZERO, ret, "nfs_create");
 		    } while (ret == NFSERR_DELAY);
 		    error = ret;
 		}
 
 		/*
 		 * If the server is handing out delegations, but we didn't
 		 * get one because an OpenConfirm was required, try the
 		 * Open again, to get a delegation. This is a harmless no-op,
 		 * from a server's point of view.
 		 */
 		if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
 		    (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
 		    !error && dp == NULL) {
 		    do {
 			ret = nfsrpc_openrpc(VFSTONFS(vnode_mount(dvp)), dvp,
 			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
 			    nfhp->nfh_fh, nfhp->nfh_len,
 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
 			    name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
 			if (ret == NFSERR_DELAY)
 			    (void) nfs_catnap(PZERO, ret, "nfs_crt2");
 		    } while (ret == NFSERR_DELAY);
 		    if (ret) {
 			if (dp != NULL) {
 				FREE((caddr_t)dp, M_NFSCLDELEG);
 				dp = NULL;
 			}
 			if (ret == NFSERR_STALECLIENTID ||
 			    ret == NFSERR_STALEDONTRECOVER ||
 			    ret == NFSERR_BADSESSION)
 				error = ret;
 		    }
 		}
 		nfscl_openrelease(nmp, op, error, newone);
 		*unlockedp = 1;
 	}
 	if (nd->nd_repstat != 0 && error == 0)
 		error = nd->nd_repstat;
 	if (error == NFSERR_STALECLIENTID)
 		nfscl_initiate_recovery(owp->nfsow_clp);
 nfsmout:
 	if (!error)
 		*dpp = dp;
 	else if (dp != NULL)
 		FREE((caddr_t)dp, M_NFSCLDELEG);
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Nfs remove rpc
  */
 APPLESTATIC int
 nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp,
     void *dstuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsnode *np;
 	struct nfsmount *nmp;
 	nfsv4stateid_t dstateid;
 	int error, ret = 0, i;
 
 	*dattrflagp = 0;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	nmp = VFSTONFS(vnode_mount(dvp));
 tryagain:
 	if (NFSHASNFSV4(nmp) && ret == 0) {
 		ret = nfscl_removedeleg(vp, p, &dstateid);
 		if (ret == 1) {
 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
 			    NFSX_UNSIGNED);
 			if (NFSHASNFSV4N(nmp))
 				*tl++ = 0;
 			else
 				*tl++ = dstateid.seqid;
 			*tl++ = dstateid.other[0];
 			*tl++ = dstateid.other[1];
 			*tl++ = dstateid.other[2];
 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
 			np = VTONFS(dvp);
 			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
 			    np->n_fhp->nfh_len, 0);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV4OP_REMOVE);
 		}
 	} else {
 		ret = 0;
 	}
 	if (ret == 0)
 		NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp);
 	(void) nfsm_strtom(nd, name, namelen);
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
 		/* For NFSv4, parse out any Delereturn replies. */
 		if (ret > 0 && nd->nd_repstat != 0 &&
 		    (nd->nd_flag & ND_NOMOREDATA)) {
 			/*
 			 * If the Delegreturn failed, try again without
 			 * it. The server will Recall, as required.
 			 */
 			mbuf_freem(nd->nd_mrep);
 			goto tryagain;
 		}
 		for (i = 0; i < (ret * 2); i++) {
 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
 			    ND_NFSV4) {
 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			    if (*(tl + 1))
 				nd->nd_flag |= ND_NOMOREDATA;
 			}
 		}
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	}
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do an nfs rename rpc.
  */
 APPLESTATIC int
 nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
     vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
     int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	nfsattrbit_t attrbits;
 	nfsv4stateid_t fdstateid, tdstateid;
 	int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
 	
 	*fattrflagp = 0;
 	*tattrflagp = 0;
 	nmp = VFSTONFS(vnode_mount(fdvp));
 	if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 tryagain:
 	if (NFSHASNFSV4(nmp) && ret == 0) {
 		ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
 		    &tdstateid, &gottd, p);
 		if (gotfd && gottd) {
 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp);
 		} else if (gotfd) {
 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp);
 		} else if (gottd) {
 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp);
 		}
 		if (gotfd) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 			if (NFSHASNFSV4N(nmp))
 				*tl++ = 0;
 			else
 				*tl++ = fdstateid.seqid;
 			*tl++ = fdstateid.other[0];
 			*tl++ = fdstateid.other[1];
 			*tl = fdstateid.other[2];
 			if (gottd) {
 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV4OP_PUTFH);
 				np = VTONFS(tvp);
 				(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
 				    np->n_fhp->nfh_len, 0);
 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
 			}
 		}
 		if (gottd) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 			if (NFSHASNFSV4N(nmp))
 				*tl++ = 0;
 			else
 				*tl++ = tdstateid.seqid;
 			*tl++ = tdstateid.other[0];
 			*tl++ = tdstateid.other[1];
 			*tl = tdstateid.other[2];
 		}
 		if (ret > 0) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
 			np = VTONFS(fdvp);
 			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
 			    np->n_fhp->nfh_len, 0);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV4OP_SAVEFH);
 		}
 	} else {
 		ret = 0;
 	}
 	if (ret == 0)
 		NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		NFSWCCATTR_ATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
 		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
 		    VTONFS(tdvp)->n_fhp->nfh_len, 0);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		nd->nd_flag |= ND_V4WCCATTR;
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_RENAME);
 	}
 	(void) nfsm_strtom(nd, fnameptr, fnamelen);
 	if (!(nd->nd_flag & ND_NFSV4))
 		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
 			VTONFS(tdvp)->n_fhp->nfh_len, 0);
 	(void) nfsm_strtom(nd, tnameptr, tnamelen);
 	error = nfscl_request(nd, fdvp, p, cred, fstuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
 		/* For NFSv4, parse out any Delereturn replies. */
 		if (ret > 0 && nd->nd_repstat != 0 &&
 		    (nd->nd_flag & ND_NOMOREDATA)) {
 			/*
 			 * If the Delegreturn failed, try again without
 			 * it. The server will Recall, as required.
 			 */
 			mbuf_freem(nd->nd_mrep);
 			goto tryagain;
 		}
 		for (i = 0; i < (ret * 2); i++) {
 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
 			    ND_NFSV4) {
 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			    if (*(tl + 1)) {
 				if (i == 0 && ret > 1) {
 				    /*
 				     * If the Delegreturn failed, try again
 				     * without it. The server will Recall, as
 				     * required.
 				     * If ret > 1, the first iteration of this
 				     * loop is the second DelegReturn result.
 				     */
 				    mbuf_freem(nd->nd_mrep);
 				    goto tryagain;
 				} else {
 				    nd->nd_flag |= ND_NOMOREDATA;
 				}
 			    }
 			}
 		}
 		/* Now, the first wcc attribute reply. */
 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			if (*(tl + 1))
 				nd->nd_flag |= ND_NOMOREDATA;
 		}
 		error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL,
 		    fstuff);
 		/* and the second wcc attribute reply. */
 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
 		    !error) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			if (*(tl + 1))
 				nd->nd_flag |= ND_NOMOREDATA;
 		}
 		if (!error)
 			error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
 			    NULL, tstuff);
 	}
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs hard link create rpc
  */
 APPLESTATIC int
 nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
     struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	nfsattrbit_t attrbits;
 	int error = 0;
 
 	*attrflagp = 0;
 	*dattrflagp = 0;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_LINK, vp);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
 	}
 	(void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh,
 		VTONFS(dvp)->n_fhp->nfh_len, 0);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		NFSWCCATTR_ATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		nd->nd_flag |= ND_V4WCCATTR;
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_LINK);
 	}
 	(void) nfsm_strtom(nd, name, namelen);
 	error = nfscl_request(nd, vp, p, cred, dstuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & ND_NFSV3) {
 		error = nfscl_postop_attr(nd, nap, attrflagp, dstuff);
 		if (!error)
 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
 			    NULL, dstuff);
 	} else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
 		/*
 		 * First, parse out the PutFH and Getattr result.
 		 */
 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		if (!(*(tl + 1)))
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		if (*(tl + 1))
 			nd->nd_flag |= ND_NOMOREDATA;
 		/*
 		 * Get the pre-op attributes.
 		 */
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	}
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs symbolic link create rpc
  */
 APPLESTATIC int
 nfsrpc_symlink(vnode_t dvp, char *name, int namelen, char *target,
     struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
     int *dattrflagp, void *dstuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp;
 	int slen, error = 0;
 
 	*nfhpp = NULL;
 	*attrflagp = 0;
 	*dattrflagp = 0;
 	nmp = VFSTONFS(vnode_mount(dvp));
 	slen = strlen(target);
 	if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFLNK);
 		(void) nfsm_strtom(nd, target, slen);
 	}
 	(void) nfsm_strtom(nd, name, namelen);
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
 	if (!(nd->nd_flag & ND_NFSV4))
 		(void) nfsm_strtom(nd, target, slen);
 	if (nd->nd_flag & ND_NFSV2)
 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & ND_NFSV4)
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	if ((nd->nd_flag & ND_NFSV3) && !error) {
 		if (!nd->nd_repstat)
 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
 		if (!error)
 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
 			    NULL, dstuff);
 	}
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 * Only do this if vfs.nfs.ignore_eexist is set.
 	 * Never do this for NFSv4.1 or later minor versions, since sessions
 	 * should guarantee "exactly once" RPC semantics.
 	 */
 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
 	    nmp->nm_minorvers == 0))
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs make dir rpc
  */
 APPLESTATIC int
 nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
     int *dattrflagp, void *dstuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	nfsattrbit_t attrbits;
 	int error = 0;
 	struct nfsfh *fhp;
 	struct nfsmount *nmp;
 
 	*nfhpp = NULL;
 	*attrflagp = 0;
 	*dattrflagp = 0;
 	nmp = VFSTONFS(vnode_mount(dvp));
 	fhp = VTONFS(dvp)->n_fhp;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFDIR);
 	}
 	(void) nfsm_strtom(nd, name, namelen);
 	nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSGETATTR_ATTRBIT(&attrbits);
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
 		(void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & ND_NFSV4)
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	if (!nd->nd_repstat && !error) {
 		if (nd->nd_flag & ND_NFSV4) {
 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 		}
 		if (!error)
 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
 		if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
 			/* Get rid of the PutFH and Getattr status values. */
 			NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 			/* Load the directory attributes. */
 			error = nfsm_loadattr(nd, dnap);
 			if (error == 0)
 				*dattrflagp = 1;
 		}
 	}
 	if ((nd->nd_flag & ND_NFSV3) && !error)
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 * Only do this if vfs.nfs.ignore_eexist is set.
 	 * Never do this for NFSv4.1 or later minor versions, since sessions
 	 * should guarantee "exactly once" RPC semantics.
 	 */
 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
 	    nmp->nm_minorvers == 0))
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs remove directory call
  */
 APPLESTATIC int
 nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error = 0;
 
 	*dattrflagp = 0;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp);
 	(void) nfsm_strtom(nd, name, namelen);
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * Readdir rpc.
  * Always returns with either uio_resid unchanged, if you are at the
  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
  * filled in.
  * I felt this would allow caching of directory blocks more easily
  * than returning a pertially filled block.
  * Directory offset cookies:
  * Oh my, what to do with them...
  * I can think of three ways to deal with them:
  * 1 - have the layer above these RPCs maintain a map between logical
  *     directory byte offsets and the NFS directory offset cookies
  * 2 - pass the opaque directory offset cookies up into userland
  *     and let the libc functions deal with them, via the system call
  * 3 - return them to userland in the "struct dirent", so future versions
  *     of libc can use them and do whatever is necessary to make things work
  *     above these rpc calls, in the meantime
  * For now, I do #3 by "hiding" the directory offset cookies after the
  * d_name field in struct dirent. This is space inside d_reclen that
  * will be ignored by anything that doesn't know about them.
  * The directory offset cookies are filled in as the last 8 bytes of
  * each directory entry, after d_name. Someday, the userland libc
  * functions may be able to use these. In the meantime, it satisfies
  * OpenBSD's requirements for cookies being returned.
  * If expects the directory offset cookie for the read to be in uio_offset
  * and returns the one for the next entry after this directory block in
  * there, as well.
  */
 APPLESTATIC int
 nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
     int *eofp, void *stuff)
 {
 	int len, left;
 	struct dirent *dp = NULL;
 	u_int32_t *tl;
 	nfsquad_t cookie, ncookie;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsnode *dnp = VTONFS(vp);
 	struct nfsvattr nfsva;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
 	int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
 	long dotfileid, dotdotfileid = 0;
 	u_int32_t fakefileno = 0xffffffff, rderr;
 	char *cp;
 	nfsattrbit_t attrbits, dattrbits;
 	u_int32_t *tl2 = NULL;
 	size_t tresid;
 
 	KASSERT(uiop->uio_iovcnt == 1 &&
 	    (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
 	    ("nfs readdirrpc bad uio"));
 
 	/*
 	 * There is no point in reading a lot more than uio_resid, however
 	 * adding one additional DIRBLKSIZ makes sense. Since uio_resid
 	 * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
 	 * will never make readsize > nm_readdirsize.
 	 */
 	readsize = nmp->nm_readdirsize;
 	if (readsize > uio_uio_resid(uiop))
 		readsize = uio_uio_resid(uiop) + DIRBLKSIZ;
 
 	*attrflagp = 0;
 	if (eofp)
 		*eofp = 0;
 	tresid = uio_uio_resid(uiop);
 	cookie.lval[0] = cookiep->nfsuquad[0];
 	cookie.lval[1] = cookiep->nfsuquad[1];
 	nd->nd_mrep = NULL;
 
 	/*
 	 * For NFSv4, first create the "." and ".." entries.
 	 */
 	if (NFSHASNFSV4(nmp)) {
 		reqsize = 6 * NFSX_UNSIGNED;
 		NFSGETATTR_ATTRBIT(&dattrbits);
 		NFSZERO_ATTRBIT(&attrbits);
 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
 		    NFSATTRBIT_MOUNTEDONFILEID)) {
 			NFSSETBIT_ATTRBIT(&attrbits,
 			    NFSATTRBIT_MOUNTEDONFILEID);
 			gotmnton = 1;
 		} else {
 			/*
 			 * Must fake it. Use the fileno, except when the
 			 * fsid is != to that of the directory. For that
 			 * case, generate a fake fileno that is not the same.
 			 */
 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
 			gotmnton = 0;
 		}
 
 		/*
 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
 		 */
 		if (uiop->uio_offset == 0) {
 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
 			(void) nfsrv_putattrbit(nd, &attrbits);
 			error = nfscl_request(nd, vp, p, cred, stuff);
 			if (error)
 			    return (error);
 			dotfileid = 0;	/* Fake out the compiler. */
 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
 			    error = nfsm_loadattr(nd, &nfsva);
 			    if (error != 0)
 				goto nfsmout;
 			    dotfileid = nfsva.na_fileid;
 			}
 			if (nd->nd_repstat == 0) {
 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			    len = fxdr_unsigned(int, *(tl + 4));
 			    if (len > 0 && len <= NFSX_V4FHMAX)
 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
 			    else
 				error = EPERM;
 			    if (!error) {
 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
 				nfsva.na_mntonfileno = 0xffffffff;
 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
 				    NULL, NULL, NULL, p, cred);
 				if (error) {
 				    dotdotfileid = dotfileid;
 				} else if (gotmnton) {
 				    if (nfsva.na_mntonfileno != 0xffffffff)
 					dotdotfileid = nfsva.na_mntonfileno;
 				    else
 					dotdotfileid = nfsva.na_fileid;
 				} else if (nfsva.na_filesid[0] ==
 				    dnp->n_vattr.na_filesid[0] &&
 				    nfsva.na_filesid[1] ==
 				    dnp->n_vattr.na_filesid[1]) {
 				    dotdotfileid = nfsva.na_fileid;
 				} else {
 				    do {
 					fakefileno--;
 				    } while (fakefileno ==
 					nfsva.na_fileid);
 				    dotdotfileid = fakefileno;
 				}
 			    }
 			} else if (nd->nd_repstat == NFSERR_NOENT) {
 			    /*
 			     * Lookupp returns NFSERR_NOENT when we are
 			     * at the root, so just use the current dir.
 			     */
 			    nd->nd_repstat = 0;
 			    dotdotfileid = dotfileid;
 			} else {
 			    error = nd->nd_repstat;
 			}
 			mbuf_freem(nd->nd_mrep);
 			if (error)
 			    return (error);
 			nd->nd_mrep = NULL;
 			dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
 			dp->d_type = DT_DIR;
 			dp->d_fileno = dotfileid;
 			dp->d_namlen = 1;
 			dp->d_name[0] = '.';
 			dp->d_name[1] = '\0';
 			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
 			/*
 			 * Just make these offset cookie 0.
 			 */
 			tl = (u_int32_t *)&dp->d_name[4];
 			*tl++ = 0;
 			*tl = 0;
 			blksiz += dp->d_reclen;
 			uio_uio_resid_add(uiop, -(dp->d_reclen));
 			uiop->uio_offset += dp->d_reclen;
 			uio_iov_base_add(uiop, dp->d_reclen);
 			uio_iov_len_add(uiop, -(dp->d_reclen));
 			dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
 			dp->d_type = DT_DIR;
 			dp->d_fileno = dotdotfileid;
 			dp->d_namlen = 2;
 			dp->d_name[0] = '.';
 			dp->d_name[1] = '.';
 			dp->d_name[2] = '\0';
 			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
 			/*
 			 * Just make these offset cookie 0.
 			 */
 			tl = (u_int32_t *)&dp->d_name[4];
 			*tl++ = 0;
 			*tl = 0;
 			blksiz += dp->d_reclen;
 			uio_uio_resid_add(uiop, -(dp->d_reclen));
 			uiop->uio_offset += dp->d_reclen;
 			uio_iov_base_add(uiop, dp->d_reclen);
 			uio_iov_len_add(uiop, -(dp->d_reclen));
 		}
 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
 	} else {
 		reqsize = 5 * NFSX_UNSIGNED;
 	}
 
 
 	/*
 	 * Loop around doing readdir rpc's of size readsize.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		*attrflagp = 0;
 		NFSCL_REQSTART(nd, NFSPROC_READDIR, vp);
 		if (nd->nd_flag & ND_NFSV2) {
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = cookie.lval[1];
 			*tl = txdr_unsigned(readsize);
 		} else {
 			NFSM_BUILD(tl, u_int32_t *, reqsize);
 			*tl++ = cookie.lval[0];
 			*tl++ = cookie.lval[1];
 			if (cookie.qval == 0) {
 				*tl++ = 0;
 				*tl++ = 0;
 			} else {
 				NFSLOCKNODE(dnp);
 				*tl++ = dnp->n_cookieverf.nfsuquad[0];
 				*tl++ = dnp->n_cookieverf.nfsuquad[1];
 				NFSUNLOCKNODE(dnp);
 			}
 			if (nd->nd_flag & ND_NFSV4) {
 				*tl++ = txdr_unsigned(readsize);
 				*tl = txdr_unsigned(readsize);
 				(void) nfsrv_putattrbit(nd, &attrbits);
 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV4OP_GETATTR);
 				(void) nfsrv_putattrbit(nd, &dattrbits);
 			} else {
 				*tl = txdr_unsigned(readsize);
 			}
 		}
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		if (!(nd->nd_flag & ND_NFSV2)) {
 			if (nd->nd_flag & ND_NFSV3)
 				error = nfscl_postop_attr(nd, nap, attrflagp,
 				    stuff);
 			if (!nd->nd_repstat && !error) {
 				NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
 				NFSLOCKNODE(dnp);
 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
 				dnp->n_cookieverf.nfsuquad[1] = *tl;
 				NFSUNLOCKNODE(dnp);
 			}
 		}
 		if (nd->nd_repstat || error) {
 			if (!error)
 				error = nd->nd_repstat;
 			goto nfsmout;
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		more_dirs = fxdr_unsigned(int, *tl);
 		if (!more_dirs)
 			tryformoredirs = 0;
 	
 		/* loop through the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			if (nd->nd_flag & ND_NFSV4) {
 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
 				ncookie.lval[0] = *tl++;
 				ncookie.lval[1] = *tl++;
 				len = fxdr_unsigned(int, *tl);
 			} else if (nd->nd_flag & ND_NFSV3) {
 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
 				nfsva.na_fileid = fxdr_hyper(tl);
 				tl += 2;
 				len = fxdr_unsigned(int, *tl);
 			} else {
 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
 				nfsva.na_fileid =
 				    fxdr_unsigned(long, *tl++);
 				len = fxdr_unsigned(int, *tl);
 			}
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				goto nfsmout;
 			}
 			tlen = NFSM_RNDUP(len);
 			if (tlen == len)
 				tlen += 4;  /* To ensure null termination */
 			left = DIRBLKSIZ - blksiz;
 			if ((int)(tlen + DIRHDSIZ + NFSX_HYPER) > left) {
 				dp->d_reclen += left;
 				uio_iov_base_add(uiop, left);
 				uio_iov_len_add(uiop, -(left));
 				uio_uio_resid_add(uiop, -(left));
 				uiop->uio_offset += left;
 				blksiz = 0;
 			}
 			if ((int)(tlen + DIRHDSIZ + NFSX_HYPER) > uio_uio_resid(uiop))
 				bigenough = 0;
 			if (bigenough) {
 				dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
 				dp->d_namlen = len;
 				dp->d_reclen = tlen + DIRHDSIZ + NFSX_HYPER;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uio_uio_resid_add(uiop, -(DIRHDSIZ));
 				uiop->uio_offset += DIRHDSIZ;
 				uio_iov_base_add(uiop, DIRHDSIZ);
 				uio_iov_len_add(uiop, -(DIRHDSIZ));
 				error = nfsm_mbufuio(nd, uiop, len);
 				if (error)
 					goto nfsmout;
 				cp = CAST_DOWN(caddr_t, uio_iov_base(uiop));
 				tlen -= len;
 				*cp = '\0';	/* null terminate */
 				cp += tlen;	/* points to cookie storage */
 				tl2 = (u_int32_t *)cp;
 				uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
 				uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
 				uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
 				uiop->uio_offset += (tlen + NFSX_HYPER);
 			} else {
 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
 				if (error)
 					goto nfsmout;
 			}
 			if (nd->nd_flag & ND_NFSV4) {
 				rderr = 0;
 				nfsva.na_mntonfileno = 0xffffffff;
 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
 				    NULL, NULL, &rderr, p, cred);
 				if (error)
 					goto nfsmout;
 				NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			} else if (nd->nd_flag & ND_NFSV3) {
 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
 				ncookie.lval[0] = *tl++;
 				ncookie.lval[1] = *tl++;
 			} else {
 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
 				ncookie.lval[0] = 0;
 				ncookie.lval[1] = *tl++;
 			}
 			if (bigenough) {
 			    if (nd->nd_flag & ND_NFSV4) {
 				if (rderr) {
 				    dp->d_fileno = 0;
 				} else {
 				    if (gotmnton) {
 					if (nfsva.na_mntonfileno != 0xffffffff)
 					    dp->d_fileno = nfsva.na_mntonfileno;
 					else
 					    dp->d_fileno = nfsva.na_fileid;
 				    } else if (nfsva.na_filesid[0] ==
 					dnp->n_vattr.na_filesid[0] &&
 					nfsva.na_filesid[1] ==
 					dnp->n_vattr.na_filesid[1]) {
 					dp->d_fileno = nfsva.na_fileid;
 				    } else {
 					do {
 					    fakefileno--;
 					} while (fakefileno ==
 					    nfsva.na_fileid);
 					dp->d_fileno = fakefileno;
 				    }
 				    dp->d_type = vtonfs_dtype(nfsva.na_type);
 				}
 			    } else {
 				dp->d_fileno = nfsva.na_fileid;
 			    }
 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
 				ncookie.lval[0];
 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
 				ncookie.lval[1];
 			}
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			eof = fxdr_unsigned(int, *tl);
 			if (tryformoredirs)
 				more_dirs = !eof;
 			if (nd->nd_flag & ND_NFSV4) {
 				error = nfscl_postop_attr(nd, nap, attrflagp,
 				    stuff);
 				if (error)
 					goto nfsmout;
 			}
 		}
 		mbuf_freem(nd->nd_mrep);
 		nd->nd_mrep = NULL;
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uio_iov_base_add(uiop, left);
 		uio_iov_len_add(uiop, -(left));
 		uio_uio_resid_add(uiop, -(left));
 		uiop->uio_offset += left;
 	}
 
 	/*
 	 * If returning no data, assume end of file.
 	 * If not bigenough, return not end of file, since you aren't
 	 *    returning all the data
 	 * Otherwise, return the eof flag from the server.
 	 */
 	if (eofp) {
 		if (tresid == ((size_t)(uio_uio_resid(uiop))))
 			*eofp = 1;
 		else if (!bigenough)
 			*eofp = 0;
 		else
 			*eofp = eof;
 	}
 
 	/*
 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
 	 */
 	while (uio_uio_resid(uiop) > 0 && ((size_t)(uio_uio_resid(uiop))) != tresid) {
 		dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
 		dp->d_type = DT_UNKNOWN;
 		dp->d_fileno = 0;
 		dp->d_namlen = 0;
 		dp->d_name[0] = '\0';
 		tl = (u_int32_t *)&dp->d_name[4];
 		*tl++ = cookie.lval[0];
 		*tl = cookie.lval[1];
 		dp->d_reclen = DIRBLKSIZ;
 		uio_iov_base_add(uiop, DIRBLKSIZ);
 		uio_iov_len_add(uiop, -(DIRBLKSIZ));
 		uio_uio_resid_add(uiop, -(DIRBLKSIZ));
 		uiop->uio_offset += DIRBLKSIZ;
 	}
 
 nfsmout:
 	if (nd->nd_mrep != NULL)
 		mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 #ifndef APPLE
 /*
  * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
  * (Also used for NFS V4 when mount flag set.)
  * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
  */
 APPLESTATIC int
 nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
     int *eofp, void *stuff)
 {
 	int len, left;
 	struct dirent *dp = NULL;
 	u_int32_t *tl;
 	vnode_t newvp = NULLVP;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nameidata nami, *ndp = &nami;
 	struct componentname *cnp = &ndp->ni_cnd;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsnode *dnp = VTONFS(vp), *np;
 	struct nfsvattr nfsva;
 	struct nfsfh *nfhp;
 	nfsquad_t cookie, ncookie;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
 	int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
 	int isdotdot = 0, unlocknewvp = 0;
 	long dotfileid, dotdotfileid = 0, fileno = 0;
 	char *cp;
 	nfsattrbit_t attrbits, dattrbits;
 	size_t tresid;
 	u_int32_t *tl2 = NULL, fakefileno = 0xffffffff, rderr;
 	struct timespec dctime;
 
 	KASSERT(uiop->uio_iovcnt == 1 &&
 	    (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
 	    ("nfs readdirplusrpc bad uio"));
 	timespecclear(&dctime);
 	*attrflagp = 0;
 	if (eofp != NULL)
 		*eofp = 0;
 	ndp->ni_dvp = vp;
 	nd->nd_mrep = NULL;
 	cookie.lval[0] = cookiep->nfsuquad[0];
 	cookie.lval[1] = cookiep->nfsuquad[1];
 	tresid = uio_uio_resid(uiop);
 
 	/*
 	 * For NFSv4, first create the "." and ".." entries.
 	 */
 	if (NFSHASNFSV4(nmp)) {
 		NFSGETATTR_ATTRBIT(&dattrbits);
 		NFSZERO_ATTRBIT(&attrbits);
 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
 		    NFSATTRBIT_MOUNTEDONFILEID)) {
 			NFSSETBIT_ATTRBIT(&attrbits,
 			    NFSATTRBIT_MOUNTEDONFILEID);
 			gotmnton = 1;
 		} else {
 			/*
 			 * Must fake it. Use the fileno, except when the
 			 * fsid is != to that of the directory. For that
 			 * case, generate a fake fileno that is not the same.
 			 */
 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
 			gotmnton = 0;
 		}
 
 		/*
 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
 		 */
 		if (uiop->uio_offset == 0) {
 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
 			(void) nfsrv_putattrbit(nd, &attrbits);
 			error = nfscl_request(nd, vp, p, cred, stuff);
 			if (error)
 			    return (error);
 			dotfileid = 0;	/* Fake out the compiler. */
 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
 			    error = nfsm_loadattr(nd, &nfsva);
 			    if (error != 0)
 				goto nfsmout;
 			    dctime = nfsva.na_ctime;
 			    dotfileid = nfsva.na_fileid;
 			}
 			if (nd->nd_repstat == 0) {
 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			    len = fxdr_unsigned(int, *(tl + 4));
 			    if (len > 0 && len <= NFSX_V4FHMAX)
 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
 			    else
 				error = EPERM;
 			    if (!error) {
 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
 				nfsva.na_mntonfileno = 0xffffffff;
 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
 				    NULL, NULL, NULL, p, cred);
 				if (error) {
 				    dotdotfileid = dotfileid;
 				} else if (gotmnton) {
 				    if (nfsva.na_mntonfileno != 0xffffffff)
 					dotdotfileid = nfsva.na_mntonfileno;
 				    else
 					dotdotfileid = nfsva.na_fileid;
 				} else if (nfsva.na_filesid[0] ==
 				    dnp->n_vattr.na_filesid[0] &&
 				    nfsva.na_filesid[1] ==
 				    dnp->n_vattr.na_filesid[1]) {
 				    dotdotfileid = nfsva.na_fileid;
 				} else {
 				    do {
 					fakefileno--;
 				    } while (fakefileno ==
 					nfsva.na_fileid);
 				    dotdotfileid = fakefileno;
 				}
 			    }
 			} else if (nd->nd_repstat == NFSERR_NOENT) {
 			    /*
 			     * Lookupp returns NFSERR_NOENT when we are
 			     * at the root, so just use the current dir.
 			     */
 			    nd->nd_repstat = 0;
 			    dotdotfileid = dotfileid;
 			} else {
 			    error = nd->nd_repstat;
 			}
 			mbuf_freem(nd->nd_mrep);
 			if (error)
 			    return (error);
 			nd->nd_mrep = NULL;
 			dp = (struct dirent *)uio_iov_base(uiop);
 			dp->d_type = DT_DIR;
 			dp->d_fileno = dotfileid;
 			dp->d_namlen = 1;
 			dp->d_name[0] = '.';
 			dp->d_name[1] = '\0';
 			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
 			/*
 			 * Just make these offset cookie 0.
 			 */
 			tl = (u_int32_t *)&dp->d_name[4];
 			*tl++ = 0;
 			*tl = 0;
 			blksiz += dp->d_reclen;
 			uio_uio_resid_add(uiop, -(dp->d_reclen));
 			uiop->uio_offset += dp->d_reclen;
 			uio_iov_base_add(uiop, dp->d_reclen);
 			uio_iov_len_add(uiop, -(dp->d_reclen));
 			dp = (struct dirent *)uio_iov_base(uiop);
 			dp->d_type = DT_DIR;
 			dp->d_fileno = dotdotfileid;
 			dp->d_namlen = 2;
 			dp->d_name[0] = '.';
 			dp->d_name[1] = '.';
 			dp->d_name[2] = '\0';
 			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
 			/*
 			 * Just make these offset cookie 0.
 			 */
 			tl = (u_int32_t *)&dp->d_name[4];
 			*tl++ = 0;
 			*tl = 0;
 			blksiz += dp->d_reclen;
 			uio_uio_resid_add(uiop, -(dp->d_reclen));
 			uiop->uio_offset += dp->d_reclen;
 			uio_iov_base_add(uiop, dp->d_reclen);
 			uio_iov_len_add(uiop, -(dp->d_reclen));
 		}
 		NFSREADDIRPLUS_ATTRBIT(&attrbits);
 		if (gotmnton)
 			NFSSETBIT_ATTRBIT(&attrbits,
 			    NFSATTRBIT_MOUNTEDONFILEID);
 	}
 
 	/*
 	 * Loop around doing readdir rpc's of size nm_readdirsize.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		*attrflagp = 0;
 		NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp);
  		NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
 		*tl++ = cookie.lval[0];
 		*tl++ = cookie.lval[1];
 		if (cookie.qval == 0) {
 			*tl++ = 0;
 			*tl++ = 0;
 		} else {
 			NFSLOCKNODE(dnp);
 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
 			NFSUNLOCKNODE(dnp);
 		}
 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
 		*tl = txdr_unsigned(nmp->nm_readdirsize);
 		if (nd->nd_flag & ND_NFSV4) {
 			(void) nfsrv_putattrbit(nd, &attrbits);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
 			(void) nfsrv_putattrbit(nd, &dattrbits);
 		}
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		if (nd->nd_flag & ND_NFSV3)
 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 		if (nd->nd_repstat || error) {
 			if (!error)
 				error = nd->nd_repstat;
 			goto nfsmout;
 		}
 		if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
 			dctime = nap->na_ctime;
 		NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 		NFSLOCKNODE(dnp);
 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
 		NFSUNLOCKNODE(dnp);
 		more_dirs = fxdr_unsigned(int, *tl);
 		if (!more_dirs)
 			tryformoredirs = 0;
 	
 		/* loop through the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			if (nd->nd_flag & ND_NFSV4) {
 				ncookie.lval[0] = *tl++;
 				ncookie.lval[1] = *tl++;
 			} else {
 				fileno = fxdr_unsigned(long, *++tl);
 				tl++;
 			}
 			len = fxdr_unsigned(int, *tl);
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				goto nfsmout;
 			}
 			tlen = NFSM_RNDUP(len);
 			if (tlen == len)
 				tlen += 4;  /* To ensure null termination */
 			left = DIRBLKSIZ - blksiz;
 			if ((tlen + DIRHDSIZ + NFSX_HYPER) > left) {
 				dp->d_reclen += left;
 				uio_iov_base_add(uiop, left);
 				uio_iov_len_add(uiop, -(left));
 				uio_uio_resid_add(uiop, -(left));
 				uiop->uio_offset += left;
 				blksiz = 0;
 			}
 			if ((tlen + DIRHDSIZ + NFSX_HYPER) > uio_uio_resid(uiop))
 				bigenough = 0;
 			if (bigenough) {
 				dp = (struct dirent *)uio_iov_base(uiop);
 				dp->d_namlen = len;
 				dp->d_reclen = tlen + DIRHDSIZ + NFSX_HYPER;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uio_uio_resid_add(uiop, -(DIRHDSIZ));
 				uiop->uio_offset += DIRHDSIZ;
 				uio_iov_base_add(uiop, DIRHDSIZ);
 				uio_iov_len_add(uiop, -(DIRHDSIZ));
 				cnp->cn_nameptr = uio_iov_base(uiop);
 				cnp->cn_namelen = len;
 				NFSCNHASHZERO(cnp);
 				error = nfsm_mbufuio(nd, uiop, len);
 				if (error)
 					goto nfsmout;
 				cp = uio_iov_base(uiop);
 				tlen -= len;
 				*cp = '\0';
 				cp += tlen;	/* points to cookie storage */
 				tl2 = (u_int32_t *)cp;
 				if (len == 2 && cnp->cn_nameptr[0] == '.' &&
 				    cnp->cn_nameptr[1] == '.')
 					isdotdot = 1;
 				else
 					isdotdot = 0;
 				uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
 				uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
 				uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
 				uiop->uio_offset += (tlen + NFSX_HYPER);
 			} else {
 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
 				if (error)
 					goto nfsmout;
 			}
 			nfhp = NULL;
 			if (nd->nd_flag & ND_NFSV3) {
 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
 				ncookie.lval[0] = *tl++;
 				ncookie.lval[1] = *tl++;
 				attrflag = fxdr_unsigned(int, *tl);
 				if (attrflag) {
 				  error = nfsm_loadattr(nd, &nfsva);
 				  if (error)
 					goto nfsmout;
 				}
 				NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
 				if (*tl) {
 					error = nfsm_getfh(nd, &nfhp);
 					if (error)
 					    goto nfsmout;
 				}
 				if (!attrflag && nfhp != NULL) {
 					FREE((caddr_t)nfhp, M_NFSFH);
 					nfhp = NULL;
 				}
 			} else {
 				rderr = 0;
 				nfsva.na_mntonfileno = 0xffffffff;
 				error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
 				    NULL, NULL, &rderr, p, cred);
 				if (error)
 					goto nfsmout;
 			}
 
 			if (bigenough) {
 			    if (nd->nd_flag & ND_NFSV4) {
 				if (rderr) {
 				    dp->d_fileno = 0;
 				} else if (gotmnton) {
 				    if (nfsva.na_mntonfileno != 0xffffffff)
 					dp->d_fileno = nfsva.na_mntonfileno;
 				    else
 					dp->d_fileno = nfsva.na_fileid;
 				} else if (nfsva.na_filesid[0] ==
 				    dnp->n_vattr.na_filesid[0] &&
 				    nfsva.na_filesid[1] ==
 				    dnp->n_vattr.na_filesid[1]) {
 				    dp->d_fileno = nfsva.na_fileid;
 				} else {
 				    do {
 					fakefileno--;
 				    } while (fakefileno ==
 					nfsva.na_fileid);
 				    dp->d_fileno = fakefileno;
 				}
 			    } else {
 				dp->d_fileno = fileno;
 			    }
 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
 				ncookie.lval[0];
 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
 				ncookie.lval[1];
 
 			    if (nfhp != NULL) {
 				if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
 				    dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
 				    VREF(vp);
 				    newvp = vp;
 				    unlocknewvp = 0;
 				    FREE((caddr_t)nfhp, M_NFSFH);
 				    np = dnp;
 				} else if (isdotdot != 0) {
 				    /*
 				     * Skip doing a nfscl_nget() call for "..".
 				     * There's a race between acquiring the nfs
 				     * node here and lookups that look for the
 				     * directory being read (in the parent).
 				     * It would try to get a lock on ".." here,
 				     * owning the lock on the directory being
 				     * read. Lookup will hold the lock on ".."
 				     * and try to acquire the lock on the
 				     * directory being read.
 				     * If the directory is unlocked/relocked,
 				     * then there is a LOR with the buflock
 				     * vp is relocked.
 				     */
 				    free(nfhp, M_NFSFH);
 				} else {
 				    error = nfscl_nget(vnode_mount(vp), vp,
 				      nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE);
 				    if (!error) {
 					newvp = NFSTOV(np);
 					unlocknewvp = 1;
 				    }
 				}
 				nfhp = NULL;
 				if (newvp != NULLVP) {
 				    error = nfscl_loadattrcache(&newvp,
 					&nfsva, NULL, NULL, 0, 0);
 				    if (error) {
 					if (unlocknewvp)
 					    vput(newvp);
 					else
 					    vrele(newvp);
 					goto nfsmout;
 				    }
 				    dp->d_type =
 					vtonfs_dtype(np->n_vattr.na_type);
 				    ndp->ni_vp = newvp;
 				    NFSCNHASH(cnp, HASHINIT);
 				    if (cnp->cn_namelen <= NCHNAMLEN &&
 					(newvp->v_type != VDIR ||
 					 dctime.tv_sec != 0)) {
 					cache_enter_time(ndp->ni_dvp,
 					    ndp->ni_vp, cnp,
 					    &nfsva.na_ctime,
 					    newvp->v_type != VDIR ? NULL :
 					    &dctime);
 				    }
 				    if (unlocknewvp)
 					vput(newvp);
 				    else
 					vrele(newvp);
 				    newvp = NULLVP;
 				}
 			    }
 			} else if (nfhp != NULL) {
 			    FREE((caddr_t)nfhp, M_NFSFH);
 			}
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			eof = fxdr_unsigned(int, *tl);
 			if (tryformoredirs)
 				more_dirs = !eof;
 			if (nd->nd_flag & ND_NFSV4) {
 				error = nfscl_postop_attr(nd, nap, attrflagp,
 				    stuff);
 				if (error)
 					goto nfsmout;
 			}
 		}
 		mbuf_freem(nd->nd_mrep);
 		nd->nd_mrep = NULL;
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uio_iov_base_add(uiop, left);
 		uio_iov_len_add(uiop, -(left));
 		uio_uio_resid_add(uiop, -(left));
 		uiop->uio_offset += left;
 	}
 
 	/*
 	 * If returning no data, assume end of file.
 	 * If not bigenough, return not end of file, since you aren't
 	 *    returning all the data
 	 * Otherwise, return the eof flag from the server.
 	 */
 	if (eofp != NULL) {
 		if (tresid == uio_uio_resid(uiop))
 			*eofp = 1;
 		else if (!bigenough)
 			*eofp = 0;
 		else
 			*eofp = eof;
 	}
 
 	/*
 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
 	 */
 	while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) {
 		dp = (struct dirent *)uio_iov_base(uiop);
 		dp->d_type = DT_UNKNOWN;
 		dp->d_fileno = 0;
 		dp->d_namlen = 0;
 		dp->d_name[0] = '\0';
 		tl = (u_int32_t *)&dp->d_name[4];
 		*tl++ = cookie.lval[0];
 		*tl = cookie.lval[1];
 		dp->d_reclen = DIRBLKSIZ;
 		uio_iov_base_add(uiop, DIRBLKSIZ);
 		uio_iov_len_add(uiop, -(DIRBLKSIZ));
 		uio_uio_resid_add(uiop, -(DIRBLKSIZ));
 		uiop->uio_offset += DIRBLKSIZ;
 	}
 
 nfsmout:
 	if (nd->nd_mrep != NULL)
 		mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 #endif	/* !APPLE */
 
 /*
  * Nfs commit rpc
  */
 APPLESTATIC int
 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	nfsattrbit_t attrbits;
 	int error;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	
 	*attrflagp = 0;
 	NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp);
 	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 	txdr_hyper(offset, tl);
 	tl += 2;
 	*tl = txdr_unsigned(cnt);
 	if (nd->nd_flag & ND_NFSV4) {
 		/*
 		 * And do a Getattr op.
 		 */
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		NFSGETATTR_ATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff);
 	if (!error && !nd->nd_repstat) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
 		NFSLOCKMNT(nmp);
 		if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
 			nd->nd_repstat = NFSERR_STALEWRITEVERF;
 		}
 		NFSUNLOCKMNT(nmp);
 		if (nd->nd_flag & ND_NFSV4)
 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 	}
 nfsmout:
 	if (!error && nd->nd_repstat)
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * NFS byte range lock rpc.
  * (Mostly just calls one of the three lower level RPC routines.)
  */
 APPLESTATIC int
 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
     int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
 {
 	struct nfscllockowner *lp;
 	struct nfsclclient *clp;
 	struct nfsfh *nfhp;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	u_int64_t off, len;
 	off_t start, end;
 	u_int32_t clidrev = 0;
 	int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
 	int callcnt, dorpc;
 
 	/*
 	 * Convert the flock structure into a start and end and do POSIX
 	 * bounds checking.
 	 */
 	switch (fl->l_whence) {
 	case SEEK_SET:
 	case SEEK_CUR:
 		/*
 		 * Caller is responsible for adding any necessary offset
 		 * when SEEK_CUR is used.
 		 */
 		start = fl->l_start;
 		off = fl->l_start;
 		break;
 	case SEEK_END:
 		start = size + fl->l_start;
 		off = size + fl->l_start;
 		break;
 	default:
 		return (EINVAL);
 	}
 	if (start < 0)
 		return (EINVAL);
 	if (fl->l_len != 0) {
 		end = start + fl->l_len - 1;
 		if (end < start)
 			return (EINVAL);
 	}
 
 	len = fl->l_len;
 	if (len == 0)
 		len = NFS64BITSSET;
 	retrycnt = 0;
 	do {
 	    nd->nd_repstat = 0;
 	    if (op == F_GETLK) {
 		error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
 		if (error)
 			return (error);
 		error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
 		if (!error) {
 			clidrev = clp->nfsc_clientidrev;
 			error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
 			    p, id, flags);
 		} else if (error == -1) {
 			error = 0;
 		}
 		nfscl_clientrelease(clp);
 	    } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
 		/*
 		 * We must loop around for all lockowner cases.
 		 */
 		callcnt = 0;
 		error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
 		if (error)
 			return (error);
 		do {
 		    error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
 			clp, id, flags, &lp, &dorpc);
 		    /*
 		     * If it returns a NULL lp, we're done.
 		     */
 		    if (lp == NULL) {
 			if (callcnt == 0)
 			    nfscl_clientrelease(clp);
 			else
 			    nfscl_releasealllocks(clp, vp, p, id, flags);
 			return (error);
 		    }
 		    if (nmp->nm_clp != NULL)
 			clidrev = nmp->nm_clp->nfsc_clientidrev;
 		    else
 			clidrev = 0;
 		    /*
 		     * If the server doesn't support Posix lock semantics,
 		     * only allow locks on the entire file, since it won't
 		     * handle overlapping byte ranges.
 		     * There might still be a problem when a lock
 		     * upgrade/downgrade (read<->write) occurs, since the
 		     * server "might" expect an unlock first?
 		     */
 		    if (dorpc && (lp->nfsl_open->nfso_posixlock ||
 			(off == 0 && len == NFS64BITSSET))) {
 			/*
 			 * Since the lock records will go away, we must
 			 * wait for grace and delay here.
 			 */
 			do {
 			    error = nfsrpc_locku(nd, nmp, lp, off, len,
 				NFSV4LOCKT_READ, cred, p, 0);
 			    if ((nd->nd_repstat == NFSERR_GRACE ||
 				 nd->nd_repstat == NFSERR_DELAY) &&
 				error == 0)
 				(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
 				    "nfs_advlock");
 			} while ((nd->nd_repstat == NFSERR_GRACE ||
 			    nd->nd_repstat == NFSERR_DELAY) && error == 0);
 		    }
 		    callcnt++;
 		} while (error == 0 && nd->nd_repstat == 0);
 		nfscl_releasealllocks(clp, vp, p, id, flags);
 	    } else if (op == F_SETLK) {
 		error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
 		    NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
 		if (error || donelocally) {
 			return (error);
 		}
 		if (nmp->nm_clp != NULL)
 			clidrev = nmp->nm_clp->nfsc_clientidrev;
 		else
 			clidrev = 0;
 		nfhp = VTONFS(vp)->n_fhp;
 		if (!lp->nfsl_open->nfso_posixlock &&
 		    (off != 0 || len != NFS64BITSSET)) {
 			error = EINVAL;
 		} else {
 			error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
 			    nfhp->nfh_len, lp, newone, reclaim, off,
 			    len, fl->l_type, cred, p, 0);
 		}
 		if (!error)
 			error = nd->nd_repstat;
 		nfscl_lockrelease(lp, error, newone);
 	    } else {
 		error = EINVAL;
 	    }
 	    if (!error)
 	        error = nd->nd_repstat;
 	    if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 		error == NFSERR_STALEDONTRECOVER ||
 		error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
 		error == NFSERR_BADSESSION) {
 		(void) nfs_catnap(PZERO, error, "nfs_advlock");
 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
 		&& clidrev != 0) {
 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
 		retrycnt++;
 	    }
 	} while (error == NFSERR_GRACE ||
 	    error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
 	    error == NFSERR_BADSESSION ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
 	if (error && retrycnt >= 4)
 		error = EIO;
 	return (error);
 }
 
 /*
  * The lower level routine for the LockT case.
  */
 APPLESTATIC int
 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
 {
 	u_int32_t *tl;
 	int error, type, size;
 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
 	struct nfsnode *np;
 	struct nfsmount *nmp;
 	struct nfsclsession *tsep;
 
 	nmp = VFSTONFS(vp->v_mount);
 	NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp);
 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 	if (fl->l_type == F_RDLCK)
 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
 	else
 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
 	txdr_hyper(off, tl);
 	tl += 2;
 	txdr_hyper(len, tl);
 	tl += 2;
 	tsep = nfsmnt_mdssession(nmp);
 	*tl++ = tsep->nfsess_clientid.lval[0];
 	*tl = tsep->nfsess_clientid.lval[1];
 	nfscl_filllockowner(id, own, flags);
 	np = VTONFS(vp);
 	NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
 	    np->n_fhp->nfh_len);
 	(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
 	error = nfscl_request(nd, vp, p, cred, NULL);
 	if (error)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		fl->l_type = F_UNLCK;
 	} else if (nd->nd_repstat == NFSERR_DENIED) {
 		nd->nd_repstat = 0;
 		fl->l_whence = SEEK_SET;
 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
 		fl->l_start = fxdr_hyper(tl);
 		tl += 2;
 		len = fxdr_hyper(tl);
 		tl += 2;
 		if (len == NFS64BITSSET)
 			fl->l_len = 0;
 		else
 			fl->l_len = len;
 		type = fxdr_unsigned(int, *tl++);
 		if (type == NFSV4LOCKT_WRITE)
 			fl->l_type = F_WRLCK;
 		else
 			fl->l_type = F_RDLCK;
 		/*
 		 * XXX For now, I have no idea what to do with the
 		 * conflicting lock_owner, so I'll just set the pid == 0
 		 * and skip over the lock_owner.
 		 */
 		fl->l_pid = (pid_t)0;
 		tl += 2;
 		size = fxdr_unsigned(int, *tl);
 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
 			error = EBADRPC;
 		if (!error)
 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
 	} else if (nd->nd_repstat == NFSERR_STALECLIENTID)
 		nfscl_initiate_recovery(clp);
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Lower level function that performs the LockU RPC.
  */
 static int
 nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
     struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
     u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
 {
 	u_int32_t *tl;
 	int error;
 
 	nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
 	    lp->nfsl_open->nfso_fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(type);
 	*tl = txdr_unsigned(lp->nfsl_seqid);
 	if (nfstest_outofseq &&
 	    (arc4random() % nfstest_outofseq) == 0)
 		*tl = txdr_unsigned(lp->nfsl_seqid + 1);
 	tl++;
 	if (NFSHASNFSV4N(nmp))
 		*tl++ = 0;
 	else
 		*tl++ = lp->nfsl_stateid.seqid;
 	*tl++ = lp->nfsl_stateid.other[0];
 	*tl++ = lp->nfsl_stateid.other[1];
 	*tl++ = lp->nfsl_stateid.other[2];
 	txdr_hyper(off, tl);
 	tl += 2;
 	txdr_hyper(len, tl);
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
 	if (error)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 		lp->nfsl_stateid.seqid = *tl++;
 		lp->nfsl_stateid.other[0] = *tl++;
 		lp->nfsl_stateid.other[1] = *tl++;
 		lp->nfsl_stateid.other[2] = *tl;
 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * The actual Lock RPC.
  */
 APPLESTATIC int
 nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
     u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
     int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
     NFSPROC_T *p, int syscred)
 {
 	u_int32_t *tl;
 	int error, size;
 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
 	struct nfsclsession *tsep;
 
 	nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 	if (type == F_RDLCK)
 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
 	else
 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
 	*tl++ = txdr_unsigned(reclaim);
 	txdr_hyper(off, tl);
 	tl += 2;
 	txdr_hyper(len, tl);
 	tl += 2;
 	if (newone) {
 	    *tl = newnfs_true;
 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
 		2 * NFSX_UNSIGNED + NFSX_HYPER);
 	    *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
 	    if (NFSHASNFSV4N(nmp))
 		*tl++ = 0;
 	    else
 		*tl++ = lp->nfsl_open->nfso_stateid.seqid;
 	    *tl++ = lp->nfsl_open->nfso_stateid.other[0];
 	    *tl++ = lp->nfsl_open->nfso_stateid.other[1];
 	    *tl++ = lp->nfsl_open->nfso_stateid.other[2];
 	    *tl++ = txdr_unsigned(lp->nfsl_seqid);
 	    tsep = nfsmnt_mdssession(nmp);
 	    *tl++ = tsep->nfsess_clientid.lval[0];
 	    *tl = tsep->nfsess_clientid.lval[1];
 	    NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
 	    NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
 	    (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
 	} else {
 	    *tl = newnfs_false;
 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
 	    if (NFSHASNFSV4N(nmp))
 		*tl++ = 0;
 	    else
 		*tl++ = lp->nfsl_stateid.seqid;
 	    *tl++ = lp->nfsl_stateid.other[0];
 	    *tl++ = lp->nfsl_stateid.other[1];
 	    *tl++ = lp->nfsl_stateid.other[2];
 	    *tl = txdr_unsigned(lp->nfsl_seqid);
 	    if (nfstest_outofseq &&
 		(arc4random() % nfstest_outofseq) == 0)
 		    *tl = txdr_unsigned(lp->nfsl_seqid + 1);
 	}
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	if (newone)
 	    NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 		lp->nfsl_stateid.seqid = *tl++;
 		lp->nfsl_stateid.other[0] = *tl++;
 		lp->nfsl_stateid.other[1] = *tl++;
 		lp->nfsl_stateid.other[2] = *tl;
 	} else if (nd->nd_repstat == NFSERR_DENIED) {
 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
 		size = fxdr_unsigned(int, *(tl + 7));
 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
 			error = EBADRPC;
 		if (!error)
 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs statfs rpc
  * (always called with the vp for the mount point)
  */
 APPLESTATIC int
 nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
     void *stuff)
 {
 	u_int32_t *tl = NULL;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp;
 	nfsattrbit_t attrbits;
 	int error;
 
 	*attrflagp = 0;
 	nmp = VFSTONFS(vnode_mount(vp));
 	if (NFSHASNFSV4(nmp)) {
 		/*
 		 * For V4, you actually do a getattr.
 		 */
 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
 		NFSSTATFS_GETATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		nd->nd_flag |= ND_USEGSSNAME;
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		if (nd->nd_repstat == 0) {
 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 			    NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p,
 			    cred);
 			if (!error) {
 				nmp->nm_fsid[0] = nap->na_filesid[0];
 				nmp->nm_fsid[1] = nap->na_filesid[1];
 				NFSSETHASSETFSID(nmp);
 				*attrflagp = 1;
 			}
 		} else {
 			error = nd->nd_repstat;
 		}
 		if (error)
 			goto nfsmout;
 	} else {
 		NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp);
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		if (nd->nd_flag & ND_NFSV3) {
 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 			if (error)
 				goto nfsmout;
 		}
 		if (nd->nd_repstat) {
 			error = nd->nd_repstat;
 			goto nfsmout;
 		}
 		NFSM_DISSECT(tl, u_int32_t *,
 		    NFSX_STATFS(nd->nd_flag & ND_NFSV3));
 	}
 	if (NFSHASNFSV3(nmp)) {
 		sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
 		sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
 		sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
 		sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
 		sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
 		sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
 		sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
 	} else if (NFSHASNFSV4(nmp) == 0) {
 		sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
 		sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
 		sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
 		sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
 		sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
 	}
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs pathconf rpc
  */
 APPLESTATIC int
 nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
     void *stuff)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp;
 	u_int32_t *tl;
 	nfsattrbit_t attrbits;
 	int error;
 
 	*attrflagp = 0;
 	nmp = VFSTONFS(vnode_mount(vp));
 	if (NFSHASNFSV4(nmp)) {
 		/*
 		 * For V4, you actually do a getattr.
 		 */
 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
 		NFSPATHCONF_GETATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		nd->nd_flag |= ND_USEGSSNAME;
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		if (nd->nd_repstat == 0) {
 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 			    pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
 			    cred);
 			if (!error)
 				*attrflagp = 1;
 		} else {
 			error = nd->nd_repstat;
 		}
 	} else {
 		NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp);
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 		if (nd->nd_repstat && !error)
 			error = nd->nd_repstat;
 		if (!error) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
 			pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
 			pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
 			pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
 			pc->pc_chownrestricted =
 			    fxdr_unsigned(u_int32_t, *tl++);
 			pc->pc_caseinsensitive =
 			    fxdr_unsigned(u_int32_t, *tl++);
 			pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
 		}
 	}
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs version 3 fsinfo rpc call
  */
 APPLESTATIC int
 nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error;
 
 	*attrflagp = 0;
 	NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp);
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 	if (!error) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
 		fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
 		fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
 		fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
 		fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
 		fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
 		fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
 		fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
 		fsp->fs_maxfilesize = fxdr_hyper(tl);
 		tl += 2;
 		fxdr_nfsv3time(tl, &fsp->fs_timedelta);
 		tl += 2;
 		fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
 	}
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * This function performs the Renew RPC.
  */
 APPLESTATIC int
 nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
     NFSPROC_T *p)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	struct nfsmount *nmp;
 	int error;
 	struct nfssockreq *nrp;
 	struct nfsclsession *tsep;
 
 	nmp = clp->nfsc_nmp;
 	if (nmp == NULL)
 		return (0);
 	if (dsp == NULL)
 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL);
 	else
 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
 		    &dsp->nfsclds_sess);
 	if (!NFSHASNFSV4N(nmp)) {
 		/* NFSv4.1 just uses a Sequence Op and not a Renew. */
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		tsep = nfsmnt_mdssession(nmp);
 		*tl++ = tsep->nfsess_clientid.lval[0];
 		*tl = tsep->nfsess_clientid.lval[1];
 	}
 	nrp = NULL;
 	if (dsp != NULL)
 		nrp = dsp->nfsclds_sockp;
 	if (nrp == NULL)
 		/* If NULL, use the MDS socket. */
 		nrp = &nmp->nm_sockreq;
 	nd->nd_flag |= ND_USEGSSNAME;
 	if (dsp == NULL)
 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	else
 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
 	if (error)
 		return (error);
 	error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * This function performs the Releaselockowner RPC.
  */
 APPLESTATIC int
 nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
     uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	u_int32_t *tl;
 	int error;
 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
 	struct nfsclsession *tsep;
 
 	if (NFSHASNFSV4N(nmp)) {
 		/* For NFSv4.1, do a FreeStateID. */
 		nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
 		    NULL);
 		nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
 	} else {
 		nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
 		    NULL);
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		tsep = nfsmnt_mdssession(nmp);
 		*tl++ = tsep->nfsess_clientid.lval[0];
 		*tl = tsep->nfsess_clientid.lval[1];
 		NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
 		NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
 		(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
 	}
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * This function performs the Compound to get the mount pt FH.
  */
 APPLESTATIC int
 nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
     NFSPROC_T *p)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	u_char *cp, *cp2;
 	int error, cnt, len, setnil;
 	u_int32_t *opcntp;
 
 	nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL);
 	cp = dirpath;
 	cnt = 0;
 	do {
 		setnil = 0;
 		while (*cp == '/')
 			cp++;
 		cp2 = cp;
 		while (*cp2 != '\0' && *cp2 != '/')
 			cp2++;
 		if (*cp2 == '/') {
 			setnil = 1;
 			*cp2 = '\0';
 		}
 		if (cp2 != cp) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV4OP_LOOKUP);
 			nfsm_strtom(nd, cp, strlen(cp));
 			cnt++;
 		}
 		if (setnil)
 			*cp2++ = '/';
 		cp = cp2;
 	} while (*cp != '\0');
 	if (NFSHASNFSV4N(nmp))
 		/* Has a Sequence Op done by nfscl_reqstart(). */
 		*opcntp = txdr_unsigned(3 + cnt);
 	else
 		*opcntp = txdr_unsigned(2 + cnt);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_GETFH);
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
 		tl += (2 + 2 * cnt);
 		if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
 			len > NFSX_FHMAX) {
 			nd->nd_repstat = NFSERR_BADXDR;
 		} else {
 			nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len);
 			if (nd->nd_repstat == 0)
 				nmp->nm_fhsize = len;
 		}
 	}
 	error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * This function performs the Delegreturn RPC.
  */
 APPLESTATIC int
 nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
     struct nfsmount *nmp, NFSPROC_T *p, int syscred)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	int error;
 
 	nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
 	    dp->nfsdl_fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 	if (NFSHASNFSV4N(nmp))
 		*tl++ = 0;
 	else
 		*tl++ = dp->nfsdl_stateid.seqid;
 	*tl++ = dp->nfsdl_stateid.other[0];
 	*tl++ = dp->nfsdl_stateid.other[1];
 	*tl = dp->nfsdl_stateid.other[2];
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs getacl call.
  */
 APPLESTATIC int
 nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
     struct acl *aclp, void *stuff)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error;
 	nfsattrbit_t attrbits;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	
 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
 		return (EOPNOTSUPP);
 	NFSCL_REQSTART(nd, NFSPROC_GETACL, vp);
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
 	(void) nfsrv_putattrbit(nd, &attrbits);
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	if (!nd->nd_repstat)
 		error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
 		    NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
 	else
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs setacl call.
  */
 APPLESTATIC int
 nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
     struct acl *aclp, void *stuff)
 {
 	int error;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	
 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
 		return (EOPNOTSUPP);
 	error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff);
 	return (error);
 }
 
 /*
  * nfs setacl call.
  */
 static int
 nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
     struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error;
 	nfsattrbit_t attrbits;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	
 	if (!NFSHASNFSV4(nmp))
 		return (EOPNOTSUPP);
 	NFSCL_REQSTART(nd, NFSPROC_SETACL, vp);
 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
 	(void) nfsv4_fillattr(nd, vnode_mount(vp), vp, aclp, NULL, NULL, 0,
 	    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0);
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	/* Don't care about the pre/postop attributes */
 	mbuf_freem(nd->nd_mrep);
 	return (nd->nd_repstat);
 }
 
 /*
  * Do the NFSv4.1 Exchange ID.
  */
 int
 nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
     struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp,
     struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl, v41flags;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	struct nfsclds *dsp;
 	struct timespec verstime;
 	int error, len;
 
 	*dspp = NULL;
 	nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);	/* Client owner */
 	*tl = txdr_unsigned(clp->nfsc_rev);
 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
 
 	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(exchflags);
 	*tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
 
 	/* Set the implementation id4 */
 	*tl = txdr_unsigned(1);
 	(void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
 	(void) nfsm_strtom(nd, version, strlen(version));
 	NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
 	verstime.tv_sec = 1293840000;		/* Jan 1, 2011 */
 	verstime.tv_nsec = 0;
 	txdr_nfsv4time(&verstime, tl);
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
 	    (int)nd->nd_repstat);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
 		len = fxdr_unsigned(int, *(tl + 7));
 		if (len < 0 || len > NFSV4_OPAQUELIMIT) {
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 		dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
 		    M_WAITOK | M_ZERO);
 		dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
 		dsp->nfsclds_servownlen = len;
 		dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
 		dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
 		dsp->nfsclds_sess.nfsess_sequenceid =
 		    fxdr_unsigned(uint32_t, *tl++);
 		v41flags = fxdr_unsigned(uint32_t, *tl);
 		if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
 		    NFSHASPNFSOPT(nmp)) {
 			NFSCL_DEBUG(1, "set PNFS\n");
 			NFSLOCKMNT(nmp);
 			nmp->nm_state |= NFSSTA_PNFS;
 			NFSUNLOCKMNT(nmp);
 			dsp->nfsclds_flags |= NFSCLDS_MDS;
 		}
 		if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
 			dsp->nfsclds_flags |= NFSCLDS_DS;
 		if (len > 0)
 			nd->nd_repstat = nfsrv_mtostr(nd,
 			    dsp->nfsclds_serverown, len);
 		if (nd->nd_repstat == 0) {
 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
 			    NULL, MTX_DEF);
 			nfscl_initsessionslots(&dsp->nfsclds_sess);
 			*dspp = dsp;
 		} else
 			free(dsp, M_NFSCLDS);
 	}
 	error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 Create Session.
  */
 int
 nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
     struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred,
     NFSPROC_T *p)
 {
 	uint32_t crflags, maxval, *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	int error, irdcnt;
 
 	/* Make sure nm_rsize, nm_wsize is set. */
 	if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
 		nmp->nm_rsize = NFS_MAXBSIZE;
 	if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
 		nmp->nm_wsize = NFS_MAXBSIZE;
 	nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
 	*tl++ = sep->nfsess_clientid.lval[0];
 	*tl++ = sep->nfsess_clientid.lval[1];
 	*tl++ = txdr_unsigned(sequenceid);
 	crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
 	if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
 		crflags |= NFSV4CRSESS_CONNBACKCHAN;
 	*tl = txdr_unsigned(crflags);
 
 	/* Fill in fore channel attributes. */
 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
 	*tl++ = 0;				/* Header pad size */
 	*tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */
 	*tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */
 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
 	*tl++ = txdr_unsigned(20);		/* Max operations */
 	*tl++ = txdr_unsigned(64);		/* Max slots */
 	*tl = 0;				/* No rdma ird */
 
 	/* Fill in back channel attributes. */
 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
 	*tl++ = 0;				/* Header pad size */
 	*tl++ = txdr_unsigned(10000);		/* Max request size */
 	*tl++ = txdr_unsigned(10000);		/* Max response size */
 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
 	*tl++ = txdr_unsigned(4);		/* Max operations */
 	*tl++ = txdr_unsigned(NFSV4_CBSLOTS);	/* Max slots */
 	*tl = 0;				/* No rdma ird */
 
 	NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(NFS_CALLBCKPROG);	/* Call back prog # */
 
 	/* Allow AUTH_SYS callbacks as uid, gid == 0. */
 	*tl++ = txdr_unsigned(1);		/* Auth_sys only */
 	*tl++ = txdr_unsigned(AUTH_SYS);	/* AUTH_SYS type */
 	*tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
 	*tl++ = 0;				/* Null machine name */
 	*tl++ = 0;				/* Uid == 0 */
 	*tl++ = 0;				/* Gid == 0 */
 	*tl = 0;				/* No additional gids */
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
 	    NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
 		    2 * NFSX_UNSIGNED);
 		bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
 		tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
 		sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
 		crflags = fxdr_unsigned(uint32_t, *tl);
 		if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
 			NFSLOCKMNT(nmp);
 			nmp->nm_state |= NFSSTA_SESSPERSIST;
 			NFSUNLOCKMNT(nmp);
 		}
 
 		/* Get the fore channel slot count. */
 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
 		tl++;			/* Skip the header pad size. */
 
 		/* Make sure nm_wsize is small enough. */
 		maxval = fxdr_unsigned(uint32_t, *tl++);
 		while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
 			if (nmp->nm_wsize > 8096)
 				nmp->nm_wsize /= 2;
 			else
 				break;
 		}
 
 		/* Make sure nm_rsize is small enough. */
 		maxval = fxdr_unsigned(uint32_t, *tl++);
 		while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
 			if (nmp->nm_rsize > 8096)
 				nmp->nm_rsize /= 2;
 			else
 				break;
 		}
 
 		sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
 		tl++;
 		sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
 		NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
 		irdcnt = fxdr_unsigned(int, *tl);
 		if (irdcnt > 0)
 			NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
 
 		/* and the back channel slot count. */
 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
 		tl += 5;
 		sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
 		NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
 	}
 	error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 Destroy Session.
  */
 int
 nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp,
     struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	int error;
 	struct nfsclsession *tsep;
 
 	nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
 	tsep = nfsmnt_mdssession(nmp);
 	bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID);
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 Destroy Client.
  */
 int
 nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
     struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	int error;
 	struct nfsclsession *tsep;
 
 	nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 	tsep = nfsmnt_mdssession(nmp);
 	*tl++ = tsep->nfsess_clientid.lval[0];
 	*tl = tsep->nfsess_clientid.lval[1];
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 LayoutGet.
  */
 int
 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
     uint64_t offset, uint64_t len, uint64_t minlen, int layoutlen,
     nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp,
     struct ucred *cred, NFSPROC_T *p, void *stuff)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error;
 
 	nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL);
 	nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
 	    layoutlen, 0);
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat == 0)
 		error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp);
 	if (error == 0 && nd->nd_repstat != 0)
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 Get Device Info.
  */
 int
 nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
     NFSPROC_T *p)
 {
 	uint32_t cnt, *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	struct sockaddr_storage ss;
 	struct nfsclds *dsp = NULL, **dspp;
 	struct nfscldevinfo *ndi;
 	int addrcnt, bitcnt, error, i, isudp, j, pos, safilled, stripecnt;
 	uint8_t stripeindex;
 
 	*ndip = NULL;
 	ndi = NULL;
 	nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
 	NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
 	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(layouttype);
 	*tl++ = txdr_unsigned(100000);
 	if (notifybitsp != NULL && *notifybitsp != 0) {
 		*tl = txdr_unsigned(1);		/* One word of bits. */
 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(*notifybitsp);
 	} else
 		*tl = txdr_unsigned(0);
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
 		if (layouttype != fxdr_unsigned(int, *tl++))
 			printf("EEK! devinfo layout type not same!\n");
 		stripecnt = fxdr_unsigned(int, *++tl);
 		NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
 		if (stripecnt < 1 || stripecnt > 4096) {
 			printf("NFS devinfo stripecnt %d: out of range\n",
 			    stripecnt);
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 		NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) * NFSX_UNSIGNED);
 		addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
 		NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
 		if (addrcnt < 1 || addrcnt > 128) {
 			printf("NFS devinfo addrcnt %d: out of range\n",
 			    addrcnt);
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 
 		/*
 		 * Now we know how many stripe indices and addresses, so
 		 * we can allocate the structure the correct size.
 		 */
 		i = (stripecnt * sizeof(uint8_t)) / sizeof(struct nfsclds *)
 		    + 1;
 		NFSCL_DEBUG(4, "stripeindices=%d\n", i);
 		ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
 		    sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO);
 		NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID);
 		ndi->nfsdi_refcnt = 0;
 		ndi->nfsdi_stripecnt = stripecnt;
 		ndi->nfsdi_addrcnt = addrcnt;
 		/* Fill in the stripe indices. */
 		for (i = 0; i < stripecnt; i++) {
 			stripeindex = fxdr_unsigned(uint8_t, *tl++);
 			NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
 			if (stripeindex >= addrcnt) {
 				printf("NFS devinfo stripeindex %d: too big\n",
 				    (int)stripeindex);
 				error = NFSERR_BADXDR;
 				goto nfsmout;
 			}
 			nfsfldi_setstripeindex(ndi, i, stripeindex);
 		}
 
 		/* Now, dissect the server address(es). */
 		safilled = 0;
 		for (i = 0; i < addrcnt; i++) {
 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 			cnt = fxdr_unsigned(uint32_t, *tl);
 			if (cnt == 0) {
 				printf("NFS devinfo 0 len addrlist\n");
 				error = NFSERR_BADXDR;
 				goto nfsmout;
 			}
 			dspp = nfsfldi_addr(ndi, i);
 			pos = arc4random() % cnt;	/* Choose one. */
 			safilled = 0;
 			for (j = 0; j < cnt; j++) {
 				error = nfsv4_getipaddr(nd, &ss, &isudp);
 				if (error != 0 && error != EPERM) {
 					error = NFSERR_BADXDR;
 					goto nfsmout;
 				}
 				if (error == 0 && isudp == 0) {
 					/*
 					 * The algorithm is:
 					 * - use "pos" entry if it is of the
 					 *   same af_family or none of them
 					 *   is of the same af_family
 					 * else
 					 * - use the first one of the same
 					 *   af_family.
 					 */
 					if ((safilled == 0 && ss.ss_family ==
 					     nmp->nm_nam->sa_family) ||
 					    (j == pos &&
 					     (safilled == 0 || ss.ss_family ==
 					      nmp->nm_nam->sa_family)) ||
 					    (safilled == 1 && ss.ss_family ==
 					     nmp->nm_nam->sa_family)) {
 						error = nfsrpc_fillsa(nmp, &ss,
 						    &dsp, p);
 						if (error == 0) {
 							*dspp = dsp;
 							if (ss.ss_family ==
 							 nmp->nm_nam->sa_family)
 								safilled = 2;
 							else
 								safilled = 1;
 						}
 					}
 				}
 			}
 			if (safilled == 0)
 				break;
 		}
 
 		/* And the notify bits. */
 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 		if (safilled != 0) {
 			bitcnt = fxdr_unsigned(int, *tl);
 			if (bitcnt > 0) {
 				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 				if (notifybitsp != NULL)
 					*notifybitsp =
 					    fxdr_unsigned(uint32_t, *tl);
 			}
 			*ndip = ndi;
 		} else
 			error = EPERM;
 	}
 	if (nd->nd_repstat != 0)
 		error = nd->nd_repstat;
 nfsmout:
 	if (error != 0 && ndi != NULL)
 		nfscl_freedevinfo(ndi);
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 LayoutCommit.
  */
 int
 nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
     uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
     int layouttype, int layoutupdatecnt, uint8_t *layp, struct ucred *cred,
     NFSPROC_T *p, void *stuff)
 {
 	uint32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error, outcnt, i;
 	uint8_t *cp;
 
 	nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
 	    NFSX_STATEID);
 	txdr_hyper(off, tl);
 	tl += 2;
 	txdr_hyper(len, tl);
 	tl += 2;
 	if (reclaim != 0)
 		*tl++ = newnfs_true;
 	else
 		*tl++ = newnfs_false;
 	*tl++ = txdr_unsigned(stateidp->seqid);
 	*tl++ = stateidp->other[0];
 	*tl++ = stateidp->other[1];
 	*tl++ = stateidp->other[2];
 	*tl++ = newnfs_true;
 	if (lastbyte < off)
 		lastbyte = off;
 	else if (lastbyte >= (off + len))
 		lastbyte = off + len - 1;
 	txdr_hyper(lastbyte, tl);
 	tl += 2;
 	*tl++ = newnfs_false;
 	*tl++ = txdr_unsigned(layouttype);
 	*tl = txdr_unsigned(layoutupdatecnt);
 	if (layoutupdatecnt > 0) {
 		KASSERT(layouttype != NFSLAYOUT_NFSV4_1_FILES,
 		    ("Must be nil for Files Layout"));
 		outcnt = NFSM_RNDUP(layoutupdatecnt);
 		NFSM_BUILD(cp, uint8_t *, outcnt);
 		NFSBCOPY(layp, cp, layoutupdatecnt);
 		cp += layoutupdatecnt;
 		for (i = 0; i < (outcnt - layoutupdatecnt); i++)
 			*cp++ = 0x0;
 	}
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 LayoutReturn.
  */
 int
 nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
     int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
     uint64_t len, nfsv4stateid_t *stateidp, int layoutcnt, uint32_t *layp,
     struct ucred *cred, NFSPROC_T *p, void *stuff)
 {
 	uint32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error, outcnt, i;
 	uint8_t *cp;
 
 	nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
 	if (reclaim != 0)
 		*tl++ = newnfs_true;
 	else
 		*tl++ = newnfs_false;
 	*tl++ = txdr_unsigned(layouttype);
 	*tl++ = txdr_unsigned(iomode);
 	*tl = txdr_unsigned(layoutreturn);
 	if (layoutreturn == NFSLAYOUTRETURN_FILE) {
 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
 		    NFSX_UNSIGNED);
 		txdr_hyper(offset, tl);
 		tl += 2;
 		txdr_hyper(len, tl);
 		tl += 2;
 		NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
 		*tl++ = txdr_unsigned(stateidp->seqid);
 		*tl++ = stateidp->other[0];
 		*tl++ = stateidp->other[1];
 		*tl++ = stateidp->other[2];
 		*tl = txdr_unsigned(layoutcnt);
 		if (layoutcnt > 0) {
 			outcnt = NFSM_RNDUP(layoutcnt);
 			NFSM_BUILD(cp, uint8_t *, outcnt);
 			NFSBCOPY(layp, cp, layoutcnt);
 			cp += layoutcnt;
 			for (i = 0; i < (outcnt - layoutcnt); i++)
 				*cp++ = 0x0;
 		}
 	}
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 		if (*tl != 0) {
 			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
 			stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
 			stateidp->other[0] = *tl++;
 			stateidp->other[1] = *tl++;
 			stateidp->other[2] = *tl;
 		}
 	} else
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Acquire a layout and devinfo, if possible. The caller must have acquired
  * a reference count on the nfsclclient structure before calling this.
  * Return the layout in lypp with a reference count on it, if successful.
  */
 static int
 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
     int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off,
     struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfscllayout *lyp;
 	struct nfsclflayout *flp;
 	struct nfsclflayouthead flh;
 	int error = 0, islocked, layoutlen, recalled, retonclose;
 	nfsv4stateid_t stateid;
 	struct nfsclsession *tsep;
 
 	*lypp = NULL;
 	/*
 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
 	 * flp == NULL.
 	 */
 	lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
 	    off, &flp, &recalled);
 	islocked = 0;
 	if (lyp == NULL || flp == NULL) {
 		if (recalled != 0)
 			return (EIO);
 		LIST_INIT(&flh);
 		tsep = nfsmnt_mdssession(nmp);
 		layoutlen = tsep->nfsess_maxcache -
 		    (NFSX_STATEID + 3 * NFSX_UNSIGNED);
 		if (lyp == NULL) {
 			stateid.seqid = 0;
 			stateid.other[0] = stateidp->other[0];
 			stateid.other[1] = stateidp->other[1];
 			stateid.other[2] = stateidp->other[2];
 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
 			    nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
 			    (uint64_t)0, layoutlen, &stateid, &retonclose,
 			    &flh, cred, p, NULL);
 		} else {
 			islocked = 1;
 			stateid.seqid = lyp->nfsly_stateid.seqid;
 			stateid.other[0] = lyp->nfsly_stateid.other[0];
 			stateid.other[1] = lyp->nfsly_stateid.other[1];
 			stateid.other[2] = lyp->nfsly_stateid.other[2];
 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
 			    nfhp->nfh_len, iomode, off, UINT64_MAX,
 			    (uint64_t)0, layoutlen, &stateid, &retonclose,
 			    &flh, cred, p, NULL);
 		}
 		error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
 		    nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
 		    &flh, error, NULL, cred, p);
 		if (error == 0)
 			*lypp = lyp;
 		else if (islocked != 0)
 			nfscl_rellayout(lyp, 1);
 	} else
 		*lypp = lyp;
 	return (error);
 }
 
 /*
  * Do a TCP connection plus exchange id and create session.
  * If successful, a "struct nfsclds" is linked into the list for the
  * mount point and a pointer to it is returned.
  */
 static int
 nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_storage *ssp,
     struct nfsclds **dspp, NFSPROC_T *p)
 {
 	struct sockaddr_in *msad, *sad, *ssd;
 	struct sockaddr_in6 *msad6, *sad6, *ssd6;
 	struct nfsclclient *clp;
 	struct nfssockreq *nrp;
 	struct nfsclds *dsp, *tdsp;
 	int error;
 	enum nfsclds_state retv;
 	uint32_t sequenceid;
 
 	KASSERT(nmp->nm_sockreq.nr_cred != NULL,
 	    ("nfsrpc_fillsa: NULL nr_cred"));
 	NFSLOCKCLSTATE();
 	clp = nmp->nm_clp;
 	NFSUNLOCKCLSTATE();
 	if (clp == NULL)
 		return (EPERM);
 	if (ssp->ss_family == AF_INET) {
 		ssd = (struct sockaddr_in *)ssp;
 		NFSLOCKMNT(nmp);
 
 		/*
 		 * Check to see if we already have a session for this
 		 * address that is usable for a DS.
 		 * Note that the MDS's address is in a different place
 		 * than the sessions already acquired for DS's.
 		 */
 		msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
 		while (tdsp != NULL) {
 			if (msad != NULL && msad->sin_family == AF_INET &&
 			    ssd->sin_addr.s_addr == msad->sin_addr.s_addr &&
 			    ssd->sin_port == msad->sin_port &&
 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
 				*dspp = tdsp;
 				NFSUNLOCKMNT(nmp);
 				NFSCL_DEBUG(4, "fnd same addr\n");
 				return (0);
 			}
 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
 				msad = (struct sockaddr_in *)
 				    tdsp->nfsclds_sockp->nr_nam;
 			else
 				msad = NULL;
 		}
 		NFSUNLOCKMNT(nmp);
 
 		/* No IP address match, so look for new/trunked one. */
 		sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
 		sad->sin_len = sizeof(*sad);
 		sad->sin_family = AF_INET;
 		sad->sin_port = ssd->sin_port;
 		sad->sin_addr.s_addr = ssd->sin_addr.s_addr;
 		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
 		nrp->nr_nam = (struct sockaddr *)sad;
 	} else if (ssp->ss_family == AF_INET6) {
 		ssd6 = (struct sockaddr_in6 *)ssp;
 		NFSLOCKMNT(nmp);
 
 		/*
 		 * Check to see if we already have a session for this
 		 * address that is usable for a DS.
 		 * Note that the MDS's address is in a different place
 		 * than the sessions already acquired for DS's.
 		 */
 		msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
 		while (tdsp != NULL) {
 			if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
 			    IN6_ARE_ADDR_EQUAL(&ssd6->sin6_addr,
 			    &msad6->sin6_addr) &&
 			    ssd6->sin6_port == msad6->sin6_port &&
 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
 				*dspp = tdsp;
 				NFSUNLOCKMNT(nmp);
 				return (0);
 			}
 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
 				msad6 = (struct sockaddr_in6 *)
 				    tdsp->nfsclds_sockp->nr_nam;
 			else
 				msad6 = NULL;
 		}
 		NFSUNLOCKMNT(nmp);
 
 		/* No IP address match, so look for new/trunked one. */
 		sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
 		sad6->sin6_len = sizeof(*sad6);
 		sad6->sin6_family = AF_INET6;
 		sad6->sin6_port = ssd6->sin6_port;
 		NFSBCOPY(&ssd6->sin6_addr, &sad6->sin6_addr,
 		    sizeof(struct in6_addr));
 		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
 		nrp->nr_nam = (struct sockaddr *)sad6;
 	} else
 		return (EPERM);
 
 	nrp->nr_sotype = SOCK_STREAM;
 	mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
 	nrp->nr_prog = NFS_PROG;
 	nrp->nr_vers = NFS_VER4;
 
 	/*
 	 * Use the credentials that were used for the mount, which are
 	 * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
 	 * Ref. counting the credentials with crhold() is probably not
 	 * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
 	 * unmount, but I did it anyhow.
 	 */
 	nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
 	error = newnfs_connect(nmp, nrp, NULL, p, 0);
 	NFSCL_DEBUG(3, "DS connect=%d\n", error);
 
 	/* Now, do the exchangeid and create session. */
 	if (error == 0) {
 		error = nfsrpc_exchangeid(nmp, clp, nrp, NFSV4EXCH_USEPNFSDS,
 		    &dsp, nrp->nr_cred, p);
 		NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
 		if (error != 0)
 			newnfs_disconnect(nrp);
 	}
 	if (error == 0) {
 		dsp->nfsclds_sockp = nrp;
 		NFSLOCKMNT(nmp);
 		retv = nfscl_getsameserver(nmp, dsp, &tdsp);
 		NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
 		if (retv == NFSDSP_USETHISSESSION) {
 			NFSUNLOCKMNT(nmp);
 			/*
 			 * If there is already a session for this server,
 			 * use it.
 			 */
 			(void)newnfs_disconnect(nrp);
 			nfscl_freenfsclds(dsp);
 			*dspp = tdsp;
 			return (0);
 		}
 		if (retv == NFSDSP_SEQTHISSESSION)
 			sequenceid = tdsp->nfsclds_sess.nfsess_sequenceid;
 		else
 			sequenceid = dsp->nfsclds_sess.nfsess_sequenceid;
 		NFSUNLOCKMNT(nmp);
 		error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
 		    nrp, sequenceid, 0, nrp->nr_cred, p);
 		NFSCL_DEBUG(3, "DS createsess=%d\n", error);
 	} else {
 		NFSFREECRED(nrp->nr_cred);
 		NFSFREEMUTEX(&nrp->nr_mtx);
 		free(nrp->nr_nam, M_SONAME);
 		free(nrp, M_NFSSOCKREQ);
 	}
 	if (error == 0) {
 		NFSCL_DEBUG(3, "add DS session\n");
 		/*
 		 * Put it at the end of the list. That way the list
 		 * is ordered by when the entry was added. This matters
 		 * since the one done first is the one that should be
 		 * used for sequencid'ing any subsequent create sessions.
 		 */
 		NFSLOCKMNT(nmp);
 		TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
 		NFSUNLOCKMNT(nmp);
 		*dspp = dsp;
 	} else if (dsp != NULL) {
 		newnfs_disconnect(nrp);
 		nfscl_freenfsclds(dsp);
 	}
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 Reclaim Complete.
  */
 int
 nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	int error;
 
 	nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 	*tl = newnfs_false;
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Initialize the slot tables for a session.
  */
 static void
 nfscl_initsessionslots(struct nfsclsession *sep)
 {
 	int i;
 
 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
 		if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
 			m_freem(sep->nfsess_cbslots[i].nfssl_reply);
 		NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
 	}
 	for (i = 0; i < 64; i++)
 		sep->nfsess_slotseq[i] = 0;
 	sep->nfsess_slots = 0;
 }
 
 /*
  * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
  */
 int
 nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
     uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfscllayout *layp;
 	struct nfscldevinfo *dip;
 	struct nfsclflayout *rflp;
 	nfsv4stateid_t stateid;
 	struct ucred *newcred;
 	uint64_t lastbyte, len, off, oresid, xfer;
 	int eof, error, iolaymode, recalled;
 	void *lckp;
 
 	if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
 	    (np->n_flag & NNOLAYOUT) != 0)
 		return (EIO);
 	/* Now, get a reference cnt on the clientid for this mount. */
 	if (nfscl_getref(nmp) == 0)
 		return (EIO);
 
 	/* Find an appropriate stateid. */
 	newcred = NFSNEWCRED(cred);
 	error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
 	    rwaccess, 1, newcred, p, &stateid, &lckp);
 	if (error != 0) {
 		NFSFREECRED(newcred);
 		nfscl_relref(nmp);
 		return (error);
 	}
 	/* Search for a layout for this file. */
 	off = uiop->uio_offset;
 	layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
 	    np->n_fhp->nfh_len, off, &rflp, &recalled);
 	if (layp == NULL || rflp == NULL) {
 		if (recalled != 0) {
 			NFSFREECRED(newcred);
 			nfscl_relref(nmp);
 			return (EIO);
 		}
 		if (layp != NULL) {
 			nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
 			layp = NULL;
 		}
 		/* Try and get a Layout, if it is supported. */
 		if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
 		    (np->n_flag & NWRITEOPENED) != 0)
 			iolaymode = NFSLAYOUTIOMODE_RW;
 		else
 			iolaymode = NFSLAYOUTIOMODE_READ;
 		error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
 		    NULL, &stateid, off, &layp, newcred, p);
 		if (error != 0) {
 			NFSLOCKNODE(np);
 			np->n_flag |= NNOLAYOUT;
 			NFSUNLOCKNODE(np);
 			if (lckp != NULL)
 				nfscl_lockderef(lckp);
 			NFSFREECRED(newcred);
 			if (layp != NULL)
 				nfscl_rellayout(layp, 0);
 			nfscl_relref(nmp);
 			return (error);
 		}
 	}
 
 	/*
 	 * Loop around finding a layout that works for the first part of
 	 * this I/O operation, and then call the function that actually
 	 * does the RPC.
 	 */
 	eof = 0;
 	len = (uint64_t)uiop->uio_resid;
 	while (len > 0 && error == 0 && eof == 0) {
 		off = uiop->uio_offset;
 		error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
 		if (error == 0) {
 			oresid = xfer = (uint64_t)uiop->uio_resid;
 			if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
 				xfer = rflp->nfsfl_end - rflp->nfsfl_off;
 			dip = nfscl_getdevinfo(nmp->nm_clp, rflp->nfsfl_dev,
 			    rflp->nfsfl_devp);
 			if (dip != NULL) {
 				error = nfscl_doflayoutio(vp, uiop, iomode,
 				    must_commit, &eof, &stateid, rwaccess, dip,
 				    layp, rflp, off, xfer, docommit, newcred,
 				    p);
 				nfscl_reldevinfo(dip);
 				lastbyte = off + xfer - 1;
 				if (error == 0) {
 					NFSLOCKCLSTATE();
 					if (lastbyte > layp->nfsly_lastbyte)
 						layp->nfsly_lastbyte = lastbyte;
 					NFSUNLOCKCLSTATE();
 				} else if (error == NFSERR_OPENMODE &&
 				    rwaccess == NFSV4OPEN_ACCESSREAD) {
 					NFSLOCKMNT(nmp);
 					nmp->nm_state |= NFSSTA_OPENMODE;
 					NFSUNLOCKMNT(nmp);
 				}
 			} else
 				error = EIO;
 			if (error == 0)
 				len -= (oresid - (uint64_t)uiop->uio_resid);
 		}
 	}
 	if (lckp != NULL)
 		nfscl_lockderef(lckp);
 	NFSFREECRED(newcred);
 	nfscl_rellayout(layp, 0);
 	nfscl_relref(nmp);
 	return (error);
 }
 
 /*
  * Find a file layout that will handle the first bytes of the requested
- * range and return the information from it needed to to the I/O operation.
+ * range and return the information from it needed to the I/O operation.
  */
 int
 nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
     struct nfsclflayout **retflpp)
 {
 	struct nfsclflayout *flp, *nflp, *rflp;
 	uint32_t rw;
 
 	rflp = NULL;
 	rw = rwaccess;
 	/* For reading, do the Read list first and then the Write list. */
 	do {
 		if (rw == NFSV4OPEN_ACCESSREAD)
 			flp = LIST_FIRST(&lyp->nfsly_flayread);
 		else
 			flp = LIST_FIRST(&lyp->nfsly_flayrw);
 		while (flp != NULL) {
 			nflp = LIST_NEXT(flp, nfsfl_list);
 			if (flp->nfsfl_off > off)
 				break;
 			if (flp->nfsfl_end > off &&
 			    (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
 				rflp = flp;
 			flp = nflp;
 		}
 		if (rw == NFSV4OPEN_ACCESSREAD)
 			rw = NFSV4OPEN_ACCESSWRITE;
 		else
 			rw = 0;
 	} while (rw != 0);
 	if (rflp != NULL) {
 		/* This one covers the most bytes starting at off. */
 		*retflpp = rflp;
 		return (0);
 	}
 	return (EIO);
 }
 
 /*
  * Do I/O using an NFSv4.1 file layout.
  */
 static int
 nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
     uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
 {
 	uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
 	int commit_thru_mds, error, stripe_index, stripe_pos;
 	struct nfsnode *np;
 	struct nfsfh *fhp;
 	struct nfsclds **dspp;
 
 	np = VTONFS(vp);
 	rel_off = off - flp->nfsfl_patoff;
 	stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff;
 	stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
 	    dp->nfsdi_stripecnt;
 	transfer = stripe_unit_size - (rel_off % stripe_unit_size);
 	error = 0;
 
 	/* Loop around, doing I/O for each stripe unit. */
 	while (len > 0 && error == 0) {
 		stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
 		dspp = nfsfldi_addr(dp, stripe_index);
 		if (len > transfer && docommit == 0)
 			xfer = transfer;
 		else
 			xfer = len;
 		if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
 			/* Dense layout. */
 			if (stripe_pos >= flp->nfsfl_fhcnt)
 				return (EIO);
 			fhp = flp->nfsfl_fh[stripe_pos];
 			io_off = (rel_off / (stripe_unit_size *
 			    dp->nfsdi_stripecnt)) * stripe_unit_size +
 			    rel_off % stripe_unit_size;
 		} else {
 			/* Sparse layout. */
 			if (flp->nfsfl_fhcnt > 1) {
 				if (stripe_index >= flp->nfsfl_fhcnt)
 					return (EIO);
 				fhp = flp->nfsfl_fh[stripe_index];
 			} else if (flp->nfsfl_fhcnt == 1)
 				fhp = flp->nfsfl_fh[0];
 			else
 				fhp = np->n_fhp;
 			io_off = off;
 		}
 		if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
 			commit_thru_mds = 1;
 			if (docommit != 0)
 				error = EIO;
 		} else {
 			commit_thru_mds = 0;
 			mtx_lock(&np->n_mtx);
 			np->n_flag |= NDSCOMMIT;
 			mtx_unlock(&np->n_mtx);
 		}
 		if (docommit != 0) {
 			if (error == 0)
 				error = nfsrpc_commitds(vp, io_off, xfer,
 				    *dspp, fhp, cred, p);
 			if (error == 0) {
 				/*
 				 * Set both eof and uio_resid = 0 to end any
 				 * loops.
 				 */
 				*eofp = 1;
 				uiop->uio_resid = 0;
 			} else {
 				mtx_lock(&np->n_mtx);
 				np->n_flag &= ~NDSCOMMIT;
 				mtx_unlock(&np->n_mtx);
 			}
 		} else if (rwflag == NFSV4OPEN_ACCESSREAD)
 			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
 			    io_off, xfer, fhp, cred, p);
 		else {
 			error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
 			    stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
 			    cred, p);
 			if (error == 0) {
 				NFSLOCKCLSTATE();
 				lyp->nfsly_flags |= NFSLY_WRITTEN;
 				NFSUNLOCKCLSTATE();
 			}
 		}
 		if (error == 0) {
 			transfer = stripe_unit_size;
 			stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
 			len -= xfer;
 			off += xfer;
 		}
 	}
 	return (error);
 }
 
 /*
  * The actual read RPC done to a DS.
  */
 static int
 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
     struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp,
     struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	int error, retlen;
 	struct nfsrv_descript nfsd;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsrv_descript *nd = &nfsd;
 	struct nfssockreq *nrp;
 
 	nd->nd_mrep = NULL;
 	nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh, fhp->nfh_len,
 	    NULL, &dsp->nfsclds_sess);
 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
 	txdr_hyper(io_off, tl);
 	*(tl + 2) = txdr_unsigned(len);
 	nrp = dsp->nfsclds_sockp;
 	if (nrp == NULL)
 		/* If NULL, use the MDS socket. */
 		nrp = &nmp->nm_sockreq;
 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat != 0) {
 		error = nd->nd_repstat;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 	*eofp = fxdr_unsigned(int, *tl);
 	NFSM_STRSIZ(retlen, len);
 	error = nfsm_mbufuio(nd, uiop, retlen);
 nfsmout:
 	if (nd->nd_mrep != NULL)
 		mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * The actual write RPC done to a DS.
  */
 static int
 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
     struct nfsfh *fhp, int commit_thru_mds, struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	int error, rlen, commit, committed = NFSWRITE_FILESYNC;
 	int32_t backup;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	struct nfssockreq *nrp;
 
 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
 	nd->nd_mrep = NULL;
 	nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len,
 	    NULL, &dsp->nfsclds_sess);
 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
 	txdr_hyper(io_off, tl);
 	tl += 2;
 	*tl++ = txdr_unsigned(*iomode);
 	*tl = txdr_unsigned(len);
 	nfsm_uiombuf(nd, uiop, len);
 	nrp = dsp->nfsclds_sockp;
 	if (nrp == NULL)
 		/* If NULL, use the MDS socket. */
 		nrp = &nmp->nm_sockreq;
 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat != 0) {
 		/*
 		 * In case the rpc gets retried, roll
 		 * the uio fileds changed by nfsm_uiombuf()
 		 * back.
 		 */
 		uiop->uio_offset -= len;
 		uio_uio_resid_add(uiop, len);
 		uio_iov_base_add(uiop, -len);
 		uio_iov_len_add(uiop, len);
 		error = nd->nd_repstat;
 	} else {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
 		rlen = fxdr_unsigned(int, *tl++);
 		if (rlen == 0) {
 			error = NFSERR_IO;
 			goto nfsmout;
 		} else if (rlen < len) {
 			backup = len - rlen;
 			uio_iov_base_add(uiop, -(backup));
 			uio_iov_len_add(uiop, backup);
 			uiop->uio_offset -= backup;
 			uio_uio_resid_add(uiop, backup);
 			len = rlen;
 		}
 		commit = fxdr_unsigned(int, *tl++);
 
 		/*
 		 * Return the lowest commitment level
 		 * obtained by any of the RPCs.
 		 */
 		if (committed == NFSWRITE_FILESYNC)
 			committed = commit;
 		else if (committed == NFSWRITE_DATASYNC &&
 		    commit == NFSWRITE_UNSTABLE)
 			committed = commit;
 		if (commit_thru_mds != 0) {
 			NFSLOCKMNT(nmp);
 			if (!NFSHASWRITEVERF(nmp)) {
 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
 				NFSSETWRITEVERF(nmp);
 	    		} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
 				*must_commit = 1;
 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
 			}
 			NFSUNLOCKMNT(nmp);
 		} else {
 			NFSLOCKDS(dsp);
 			if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
 				dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
 			} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
 				*must_commit = 1;
 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
 			}
 			NFSUNLOCKDS(dsp);
 		}
 	}
 nfsmout:
 	if (nd->nd_mrep != NULL)
 		mbuf_freem(nd->nd_mrep);
 	*iomode = committed;
 	if (nd->nd_repstat != 0 && error == 0)
 		error = nd->nd_repstat;
 	return (error);
 }
 
 /*
  * Free up the nfsclds structure.
  */
 void
 nfscl_freenfsclds(struct nfsclds *dsp)
 {
 	int i;
 
 	if (dsp == NULL)
 		return;
 	if (dsp->nfsclds_sockp != NULL) {
 		NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
 		NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
 		free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
 		free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
 	}
 	NFSFREEMUTEX(&dsp->nfsclds_mtx);
 	NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
 		if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
 			m_freem(
 			    dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
 	}
 	free(dsp, M_NFSCLDS);
 }
 
 static enum nfsclds_state
 nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
     struct nfsclds **retdspp)
 {
 	struct nfsclds *dsp, *cur_dsp;
 
 	/*
 	 * Search the list of nfsclds structures for one with the same
 	 * server.
 	 */
 	cur_dsp = NULL;
 	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
 		if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
 		    dsp->nfsclds_servownlen != 0 &&
 		    !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
 		    dsp->nfsclds_servownlen) &&
 		    dsp->nfsclds_sess.nfsess_defunct == 0) {
 			NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
 			    TAILQ_FIRST(&nmp->nm_sess), dsp,
 			    dsp->nfsclds_flags);
 			/* Server major id matches. */
 			if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
 				*retdspp = dsp;
 				return (NFSDSP_USETHISSESSION);
 			}
 
 			/*
 			 * Note the first match, so it can be used for
 			 * sequence'ing new sessions.
 			 */
 			if (cur_dsp == NULL)
 				cur_dsp = dsp;
 		}
 	}
 	if (cur_dsp != NULL) {
 		*retdspp = cur_dsp;
 		return (NFSDSP_SEQTHISSESSION);
 	}
 	return (NFSDSP_NOTFOUND);
 }
 
 /*
  * NFS commit rpc to a NFSv4.1 DS.
  */
 static int
 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
     struct nfsfh *fhp, struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfssockreq *nrp;
 	int error;
 	
 	nd->nd_mrep = NULL;
 	nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh, fhp->nfh_len,
 	    NULL, &dsp->nfsclds_sess);
 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
 	txdr_hyper(offset, tl);
 	tl += 2;
 	*tl = txdr_unsigned(cnt);
 	nrp = dsp->nfsclds_sockp;
 	if (nrp == NULL)
 		/* If NULL, use the MDS socket. */
 		nrp = &nmp->nm_sockreq;
 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
 		NFSLOCKDS(dsp);
 		if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
 			error = NFSERR_STALEWRITEVERF;
 		}
 		NFSUNLOCKDS(dsp);
 	}
 nfsmout:
 	if (error == 0 && nd->nd_repstat != 0)
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Set up the XDR arguments for the LayoutGet operation.
  */
 static void
 nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
     uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layoutlen,
     int usecurstateid)
 {
 	uint32_t *tl;
 
 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
 	    NFSX_STATEID);
 	*tl++ = newnfs_false;		/* Don't signal availability. */
 	*tl++ = txdr_unsigned(NFSLAYOUT_NFSV4_1_FILES);
 	*tl++ = txdr_unsigned(iomode);
 	txdr_hyper(offset, tl);
 	tl += 2;
 	txdr_hyper(len, tl);
 	tl += 2;
 	txdr_hyper(minlen, tl);
 	tl += 2;
 	if (usecurstateid != 0) {
 		/* Special stateid for Current stateid. */
 		*tl++ = txdr_unsigned(1);
 		*tl++ = 0;
 		*tl++ = 0;
 		*tl++ = 0;
 	} else {
 		*tl++ = txdr_unsigned(stateidp->seqid);
 		NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
 		*tl++ = stateidp->other[0];
 		*tl++ = stateidp->other[1];
 		*tl++ = stateidp->other[2];
 	}
 	*tl = txdr_unsigned(layoutlen);
 }
 
 /*
  * Parse the reply for a successful LayoutGet operation.
  */
 static int
 nfsrv_parselayoutget(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
     int *retonclosep, struct nfsclflayouthead *flhp)
 {
 	uint32_t *tl;
 	struct nfsclflayout *flp, *prevflp, *tflp;
 	int cnt, error, gotiomode, fhcnt, nfhlen, i, j;
 	uint64_t retlen;
 	struct nfsfh *nfhp;
 	uint8_t *cp;
 
 	error = 0;
 	flp = NULL;
 	gotiomode = -1;
 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
 	if (*tl++ != 0)
 		*retonclosep = 1;
 	else
 		*retonclosep = 0;
 	stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
 	NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
 	    (int)stateidp->seqid);
 	stateidp->other[0] = *tl++;
 	stateidp->other[1] = *tl++;
 	stateidp->other[2] = *tl++;
 	cnt = fxdr_unsigned(int, *tl);
 	NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
 	if (cnt <= 0 || cnt > 10000) {
 		/* Don't accept more than 10000 layouts in reply. */
 		error = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	for (i = 0; i < cnt; i++) {
 		/* Dissect all the way to the file handle cnt. */
 		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_HYPER +
 		    6 * NFSX_UNSIGNED + NFSX_V4DEVICEID);
 		fhcnt = fxdr_unsigned(int, *(tl + 11 +
 		    NFSX_V4DEVICEID / NFSX_UNSIGNED));
 		NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
 		if (fhcnt < 0 || fhcnt > 100) {
 			/* Don't accept more than 100 file handles. */
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 		if (fhcnt > 1)
 			flp = malloc(sizeof(*flp) + (fhcnt - 1) *
 			    sizeof(struct nfsfh *), M_NFSFLAYOUT, M_WAITOK);
 		else
 			flp = malloc(sizeof(*flp), M_NFSFLAYOUT, M_WAITOK);
 		flp->nfsfl_flags = 0;
 		flp->nfsfl_fhcnt = 0;
 		flp->nfsfl_devp = NULL;
 		flp->nfsfl_off = fxdr_hyper(tl); tl += 2;
 		retlen = fxdr_hyper(tl); tl += 2;
 		if (flp->nfsfl_off + retlen < flp->nfsfl_off)
 			flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
 		else
 			flp->nfsfl_end = flp->nfsfl_off + retlen;
 		flp->nfsfl_iomode = fxdr_unsigned(int, *tl++);
 		if (gotiomode == -1)
 			gotiomode = flp->nfsfl_iomode;
 		if (fxdr_unsigned(int, *tl++) != NFSLAYOUT_NFSV4_1_FILES) {
 			printf("NFSv4.1: got non-files layout\n");
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 		NFSBCOPY(++tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
 		tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 		flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
 		NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
 		flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
 		flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
 		if (fxdr_unsigned(int, *tl) != fhcnt) {
 			printf("EEK! bad fhcnt\n");
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 		for (j = 0; j < fhcnt; j++) {
 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 			nfhlen = fxdr_unsigned(int, *tl);
 			if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
 				error = NFSERR_BADXDR;
 				goto nfsmout;
 			}
 			nfhp = malloc(sizeof(*nfhp) + nfhlen - 1, M_NFSFH,
 			    M_WAITOK);
 			flp->nfsfl_fh[j] = nfhp;
 			flp->nfsfl_fhcnt++;
 			nfhp->nfh_len = nfhlen;
 			NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
 			NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
 		}
 		if (flp->nfsfl_iomode == gotiomode) {
 			/* Keep the list in increasing offset order. */
 			tflp = LIST_FIRST(flhp);
 			prevflp = NULL;
 			while (tflp != NULL &&
 			    tflp->nfsfl_off < flp->nfsfl_off) {
 				prevflp = tflp;
 				tflp = LIST_NEXT(tflp, nfsfl_list);
 			}
 			if (prevflp == NULL)
 				LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
 			else
 				LIST_INSERT_AFTER(prevflp, flp,
 				    nfsfl_list);
 		} else {
 			printf("nfscl_layoutget(): got wrong iomode\n");
 			nfscl_freeflayout(flp);
 		}
 		flp = NULL;
 	}
 nfsmout:
 	if (error != 0 && flp != NULL)
 		nfscl_freeflayout(flp);
 	return (error);
 }
 
 /*
  * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
  * so that it does both an Open and a Layoutget.
  */
 static int
 nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
     struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfscllayout *lyp;
 	struct nfsclflayout *flp;
 	struct nfsclflayouthead flh;
 	int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
 	int laystat;
 	nfsv4stateid_t stateid;
 	struct nfsclsession *tsep;
 
 	error = 0;
 	/*
 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
 	 * flp == NULL.
 	 */
 	lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, &flp,
 	    &recalled);
 	NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
 	if (lyp == NULL)
 		islocked = 0;
 	else if (flp != NULL)
 		islocked = 1;
 	else
 		islocked = 2;
 	if ((lyp == NULL || flp == NULL) && recalled == 0) {
 		LIST_INIT(&flh);
 		tsep = nfsmnt_mdssession(nmp);
 		layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
 		    3 * NFSX_UNSIGNED);
 		if (lyp == NULL)
 			usecurstateid = 1;
 		else {
 			usecurstateid = 0;
 			stateid.seqid = lyp->nfsly_stateid.seqid;
 			stateid.other[0] = lyp->nfsly_stateid.other[0];
 			stateid.other[1] = lyp->nfsly_stateid.other[1];
 			stateid.other[2] = lyp->nfsly_stateid.other[2];
 		}
 		error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
 		    newfhp, newfhlen, mode, op, name, namelen,
 		    dpp, &stateid, usecurstateid, layoutlen,
 		    &retonclose, &flh, &laystat, cred, p);
 		NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
 		    laystat, error);
 		laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
 		    &stateid, retonclose, NULL, &lyp, &flh, laystat, &islocked,
 		    cred, p);
 	} else
 		error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
 		    mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
 	if (islocked == 2)
 		nfscl_rellayout(lyp, 1);
 	else if (islocked == 1)
 		nfscl_rellayout(lyp, 0);
 	return (error);
 }
 
 /*
  * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
  * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
  * handled by nfsrpc_openrpc().
  * For the case where op == NULL, dvp is the directory.  When op != NULL, it
  * can be NULL.
  */
 static int
 nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
     nfsv4stateid_t *stateidp, int usecurstateid,
     int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
     int *laystatp, struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfscldeleg *ndp = NULL;
 	struct nfsvattr nfsva;
 	struct nfsclsession *tsep;
 	uint32_t rflags, deleg;
 	nfsattrbit_t attrbits;
 	int error, ret, acesize, limitby, iomode;
 
 	*dpp = NULL;
 	*laystatp = ENXIO;
 	nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
 	tsep = nfsmnt_mdssession(nmp);
 	*tl++ = tsep->nfsess_clientid.lval[0];
 	*tl = tsep->nfsess_clientid.lval[1];
 	nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
 	nfsm_strtom(nd, name, namelen);
 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
 	nfsrv_putattrbit(nd, &attrbits);
 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
 	if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
 		iomode = NFSLAYOUTIOMODE_RW;
 	else
 		iomode = NFSLAYOUTIOMODE_READ;
 	nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
 	    layoutlen, usecurstateid);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
 	if (nd->nd_repstat != 0)
 		*laystatp = nd->nd_repstat;
 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
 		/* ND_NOMOREDATA will be set if the Open operation failed. */
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
 		    6 * NFSX_UNSIGNED);
 		op->nfso_stateid.seqid = *tl++;
 		op->nfso_stateid.other[0] = *tl++;
 		op->nfso_stateid.other[1] = *tl++;
 		op->nfso_stateid.other[2] = *tl;
 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 		if (error != 0)
 			goto nfsmout;
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		deleg = fxdr_unsigned(u_int32_t, *tl);
 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
 			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
 			      NFSCLFLAGS_FIRSTDELEG))
 				op->nfso_own->nfsow_clp->nfsc_flags |=
 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
 			ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
 			    M_NFSCLDELEG, M_WAITOK);
 			LIST_INIT(&ndp->nfsdl_owner);
 			LIST_INIT(&ndp->nfsdl_lock);
 			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
 			ndp->nfsdl_fhlen = newfhlen;
 			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
 			nfscl_lockinit(&ndp->nfsdl_rwlock);
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
 			    NFSX_UNSIGNED);
 			ndp->nfsdl_stateid.seqid = *tl++;
 			ndp->nfsdl_stateid.other[0] = *tl++;
 			ndp->nfsdl_stateid.other[1] = *tl++;
 			ndp->nfsdl_stateid.other[2] = *tl++;
 			ret = fxdr_unsigned(int, *tl);
 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
 				ndp->nfsdl_flags = NFSCLDL_WRITE;
 				/*
 				 * Indicates how much the file can grow.
 				 */
 				NFSM_DISSECT(tl, u_int32_t *,
 				    3 * NFSX_UNSIGNED);
 				limitby = fxdr_unsigned(int, *tl++);
 				switch (limitby) {
 				case NFSV4OPEN_LIMITSIZE:
 					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
 					break;
 				case NFSV4OPEN_LIMITBLOCKS:
 					ndp->nfsdl_sizelimit =
 					    fxdr_unsigned(u_int64_t, *tl++);
 					ndp->nfsdl_sizelimit *=
 					    fxdr_unsigned(u_int64_t, *tl);
 					break;
 				default:
 					error = NFSERR_BADXDR;
 					goto nfsmout;
 				};
 			} else
 				ndp->nfsdl_flags = NFSCLDL_READ;
 			if (ret != 0)
 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
 			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
 			    &acesize, p);
 			if (error != 0)
 				goto nfsmout;
 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
 		    nfscl_assumeposixlocks)
 			op->nfso_posixlock = 1;
 		else
 			op->nfso_posixlock = 0;
 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		/* If the 2nd element == NFS_OK, the Getattr succeeded. */
 		if (*++tl == 0) {
 			error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 			    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
 			    NULL, NULL, NULL, p, cred);
 			if (error != 0)
 				goto nfsmout;
 			if (ndp != NULL) {
 				ndp->nfsdl_change = nfsva.na_filerev;
 				ndp->nfsdl_modtime = nfsva.na_mtime;
 				ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
 				*dpp = ndp;
 				ndp = NULL;
 			}
 			/*
 			 * At this point, the Open has succeeded, so set
 			 * nd_repstat = NFS_OK.  If the Layoutget failed,
 			 * this function just won't return a layout.
 			 */
 			if (nd->nd_repstat == 0) {
 				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 				*laystatp = fxdr_unsigned(int, *++tl);
 				if (*laystatp == 0) {
 					error = nfsrv_parselayoutget(nd,
 					    stateidp, retonclosep, flhp);
 					if (error != 0)
 						*laystatp = error;
 				}
 			} else
 				nd->nd_repstat = 0;	/* Return 0 for Open. */
 		}
 	}
 	if (nd->nd_repstat != 0 && error == 0)
 		error = nd->nd_repstat;
 nfsmout:
 	free(ndp, M_NFSCLDELEG);
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
  * Used only for mounts with pNFS enabled.
  */
 static int
 nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
     int *dattrflagp, void *dstuff, int *unlockedp, nfsv4stateid_t *stateidp,
     int usecurstateid, int layoutlen, int *retonclosep,
     struct nfsclflayouthead *flhp, int *laystatp)
 {
 	uint32_t *tl;
 	int error = 0, deleg, newone, ret, acesize, limitby;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsclopen *op;
 	struct nfscldeleg *dp = NULL;
 	struct nfsnode *np;
 	struct nfsfh *nfhp;
 	struct nfsclsession *tsep;
 	nfsattrbit_t attrbits;
 	nfsv4stateid_t stateid;
 	uint32_t rflags;
 	struct nfsmount *nmp;
 
 	nmp = VFSTONFS(dvp->v_mount);
 	np = VTONFS(dvp);
 	*laystatp = ENXIO;
 	*unlockedp = 0;
 	*nfhpp = NULL;
 	*dpp = NULL;
 	*attrflagp = 0;
 	*dattrflagp = 0;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp);
 	/*
 	 * For V4, this is actually an Open op.
 	 */
 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
 	    NFSV4OPEN_ACCESSREAD);
 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
 	tsep = nfsmnt_mdssession(nmp);
 	*tl++ = tsep->nfsess_clientid.lval[0];
 	*tl = tsep->nfsess_clientid.lval[1];
 	nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
 	if ((fmode & O_EXCL) != 0) {
 		if (NFSHASSESSPERSIST(nmp)) {
 			/* Use GUARDED for persistent sessions. */
 			*tl = txdr_unsigned(NFSCREATE_GUARDED);
 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
 		} else {
 			/* Otherwise, use EXCLUSIVE4_1. */
 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 			*tl++ = cverf.lval[0];
 			*tl = cverf.lval[1];
 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
 		}
 	} else {
 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
 	}
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
 	nfsm_strtom(nd, name, namelen);
 	/* Get the new file's handle and attributes, plus save the FH. */
 	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	NFSGETATTR_ATTRBIT(&attrbits);
 	nfsrv_putattrbit(nd, &attrbits);
 	/* Get the directory's post-op attributes. */
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
 	nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	nfsrv_putattrbit(nd, &attrbits);
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
 	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
 	nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
 	    layoutlen, usecurstateid);
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error != 0)
 		return (error);
 	NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
 	    error);
 	if (nd->nd_repstat != 0)
 		*laystatp = nd->nd_repstat;
 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
 		NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
 		    6 * NFSX_UNSIGNED);
 		stateid.seqid = *tl++;
 		stateid.other[0] = *tl++;
 		stateid.other[1] = *tl++;
 		stateid.other[2] = *tl;
 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
 		nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		deleg = fxdr_unsigned(int, *tl);
 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
 			if (!(owp->nfsow_clp->nfsc_flags &
 			      NFSCLFLAGS_FIRSTDELEG))
 				owp->nfsow_clp->nfsc_flags |=
 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
 			dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
 			    M_NFSCLDELEG, M_WAITOK);
 			LIST_INIT(&dp->nfsdl_owner);
 			LIST_INIT(&dp->nfsdl_lock);
 			dp->nfsdl_clp = owp->nfsow_clp;
 			newnfs_copyincred(cred, &dp->nfsdl_cred);
 			nfscl_lockinit(&dp->nfsdl_rwlock);
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
 			    NFSX_UNSIGNED);
 			dp->nfsdl_stateid.seqid = *tl++;
 			dp->nfsdl_stateid.other[0] = *tl++;
 			dp->nfsdl_stateid.other[1] = *tl++;
 			dp->nfsdl_stateid.other[2] = *tl++;
 			ret = fxdr_unsigned(int, *tl);
 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
 				dp->nfsdl_flags = NFSCLDL_WRITE;
 				/*
 				 * Indicates how much the file can grow.
 				 */
 				NFSM_DISSECT(tl, u_int32_t *,
 				    3 * NFSX_UNSIGNED);
 				limitby = fxdr_unsigned(int, *tl++);
 				switch (limitby) {
 				case NFSV4OPEN_LIMITSIZE:
 					dp->nfsdl_sizelimit = fxdr_hyper(tl);
 					break;
 				case NFSV4OPEN_LIMITBLOCKS:
 					dp->nfsdl_sizelimit =
 					    fxdr_unsigned(u_int64_t, *tl++);
 					dp->nfsdl_sizelimit *=
 					    fxdr_unsigned(u_int64_t, *tl);
 					break;
 				default:
 					error = NFSERR_BADXDR;
 					goto nfsmout;
 				};
 			} else {
 				dp->nfsdl_flags = NFSCLDL_READ;
 			}
 			if (ret != 0)
 				dp->nfsdl_flags |= NFSCLDL_RECALL;
 			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
 			    &acesize, p);
 			if (error != 0)
 				goto nfsmout;
 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 
 		/* Now, we should have the status for the SaveFH. */
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		if (*++tl == 0) {
 			NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
 			/*
 			 * Now, process the GetFH and Getattr for the newly
 			 * created file. nfscl_mtofh() will set
 			 * ND_NOMOREDATA if these weren't successful.
 			 */
 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
 			NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
 			if (error != 0)
 				goto nfsmout;
 		} else
 			nd->nd_flag |= ND_NOMOREDATA;
 		/* Now we have the PutFH and Getattr for the directory. */
 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 			if (*++tl != 0)
 				nd->nd_flag |= ND_NOMOREDATA;
 			else {
 				NFSM_DISSECT(tl, uint32_t *, 2 *
 				    NFSX_UNSIGNED);
 				if (*++tl != 0)
 					nd->nd_flag |= ND_NOMOREDATA;
 			}
 		}
 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
 			/* Load the directory attributes. */
 			error = nfsm_loadattr(nd, dnap);
 			NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
 			if (error != 0)
 				goto nfsmout;
 			*dattrflagp = 1;
 			if (dp != NULL && *attrflagp != 0) {
 				dp->nfsdl_change = nnap->na_filerev;
 				dp->nfsdl_modtime = nnap->na_mtime;
 				dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
 			}
 			/*
 			 * We can now complete the Open state.
 			 */
 			nfhp = *nfhpp;
 			if (dp != NULL) {
 				dp->nfsdl_fhlen = nfhp->nfh_len;
 				NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
 				    nfhp->nfh_len);
 			}
 			/*
 			 * Get an Open structure that will be
 			 * attached to the OpenOwner, acquired already.
 			 */
 			error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
 			    cred, p, NULL, &op, &newone, NULL, 0);
 			if (error != 0)
 				goto nfsmout;
 			op->nfso_stateid = stateid;
 			newnfs_copyincred(cred, &op->nfso_cred);
 	
 			nfscl_openrelease(nmp, op, error, newone);
 			*unlockedp = 1;
 
 			/* Now, handle the RestoreFH and LayoutGet. */
 			if (nd->nd_repstat == 0) {
 				NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
 				*laystatp = fxdr_unsigned(int, *(tl + 3));
 				if (*laystatp == 0) {
 					error = nfsrv_parselayoutget(nd,
 					    stateidp, retonclosep, flhp);
 					if (error != 0)
 						*laystatp = error;
 				}
 				NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
 				    error);
 			} else
 				nd->nd_repstat = 0;
 		}
 	}
 	if (nd->nd_repstat != 0 && error == 0)
 		error = nd->nd_repstat;
 	if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION)
 		nfscl_initiate_recovery(owp->nfsow_clp);
 nfsmout:
 	NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
 	if (error == 0)
 		*dpp = dp;
 	else
 		free(dp, M_NFSCLDELEG);
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
  */
 static int
 nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
     int *dattrflagp, void *dstuff, int *unlockedp)
 {
 	struct nfscllayout *lyp;
 	struct nfsclflayouthead flh;
 	struct nfsfh *nfhp;
 	struct nfsclsession *tsep;
 	struct nfsmount *nmp;
 	nfsv4stateid_t stateid;
 	int error, layoutlen, retonclose, laystat;
 
 	error = 0;
 	nmp = VFSTONFS(dvp->v_mount);
 	LIST_INIT(&flh);
 	tsep = nfsmnt_mdssession(nmp);
 	layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
 	error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
 	    owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
 	    dstuff, unlockedp, &stateid, 1, layoutlen, &retonclose, &flh,
 	    &laystat);
 	NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
 	    laystat, error);
 	lyp = NULL;
 	if (laystat == 0) {
 		nfhp = *nfhpp;
 		laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh,
 		    nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh,
 		    laystat, NULL, cred, p);
 	} else
 		laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid,
 		    retonclose, NULL, &lyp, &flh, laystat, NULL, cred, p);
 	if (laystat == 0)
 		nfscl_rellayout(lyp, 0);
 	return (error);
 }
 
 /*
  * Process the results of a layoutget() operation.
  */
 static int
 nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
     int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
     struct nfscllayout **lypp, struct nfsclflayouthead *flhp,
     int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfsclflayout *tflp;
 	struct nfscldevinfo *dip;
 
 	if (laystat == NFSERR_UNKNLAYOUTTYPE) {
 		/* Disable PNFS. */
 		NFSCL_DEBUG(1, "disable PNFS\n");
 		NFSLOCKMNT(nmp);
 		nmp->nm_state &= ~NFSSTA_PNFS;
 		NFSUNLOCKMNT(nmp);
 	}
 	if (laystat == 0) {
 		NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
 		LIST_FOREACH(tflp, flhp, nfsfl_list) {
 			laystat = nfscl_adddevinfo(nmp, NULL, tflp);
 			NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
 			if (laystat != 0) {
 				laystat = nfsrpc_getdeviceinfo(nmp,
 				    tflp->nfsfl_dev, NFSLAYOUT_NFSV4_1_FILES,
 				    notifybit, &dip, cred, p);
 				NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
 				    laystat);
 				if (laystat != 0)
 					break;
 				laystat = nfscl_adddevinfo(nmp, dip, tflp);
 				if (laystat != 0)
 					printf("getlayout: cannot add\n");
 			}
 		}
 	}
 	if (laystat == 0) {
 		/*
 		 * nfscl_layout() always returns with the nfsly_lock
 		 * set to a refcnt (shared lock).
 		 * Passing in dvp is sufficient, since it is only used to
 		 * get the fsid for the file system.
 		 */
 		laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
 		    retonclose, flhp, lypp, cred, p);
 		NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
 		    laystat);
 		if (laystat == 0 && islockedp != NULL)
 			*islockedp = 1;
 	}
 	return (laystat);
 }
 
Index: stable/11/sys/i386/i386/support.s
===================================================================
--- stable/11/sys/i386/i386/support.s	(revision 330445)
+++ stable/11/sys/i386/i386/support.s	(revision 330446)
@@ -1,828 +1,828 @@
 /*-
  * Copyright (c) 1993 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <machine/asmacros.h>
 #include <machine/cputypes.h>
 #include <machine/pmap.h>
 #include <machine/specialreg.h>
 
 #include "assym.s"
 
 #define IDXSHIFT	10
 
 	.text
 
 /*
  * bcopy family
  * void bzero(void *buf, u_int len)
  */
 ENTRY(bzero)
 	pushl	%edi
 	movl	8(%esp),%edi
 	movl	12(%esp),%ecx
 	xorl	%eax,%eax
 	shrl	$2,%ecx
 	rep
 	stosl
 	movl	12(%esp),%ecx
 	andl	$3,%ecx
 	rep
 	stosb
 	popl	%edi
 	ret
 END(bzero)
 
 ENTRY(sse2_pagezero)
 	pushl	%ebx
 	movl	8(%esp),%ecx
 	movl	%ecx,%eax
 	addl	$4096,%eax
 	xor	%ebx,%ebx
 1:
 	movnti	%ebx,(%ecx)
 	addl	$4,%ecx
 	cmpl	%ecx,%eax
 	jne	1b
 	sfence
 	popl	%ebx
 	ret
 END(sse2_pagezero)
 
 ENTRY(i686_pagezero)
 	pushl	%edi
 	pushl	%ebx
 
 	movl	12(%esp),%edi
 	movl	$1024,%ecx
 
 	ALIGN_TEXT
 1:
 	xorl	%eax,%eax
 	repe
 	scasl
 	jnz	2f
 
 	popl	%ebx
 	popl	%edi
 	ret
 
 	ALIGN_TEXT
 
 2:
 	incl	%ecx
 	subl	$4,%edi
 
 	movl	%ecx,%edx
 	cmpl	$16,%ecx
 
 	jge	3f
 
 	movl	%edi,%ebx
 	andl	$0x3f,%ebx
 	shrl	%ebx
 	shrl	%ebx
 	movl	$16,%ecx
 	subl	%ebx,%ecx
 
 3:
 	subl	%ecx,%edx
 	rep
 	stosl
 
 	movl	%edx,%ecx
 	testl	%edx,%edx
 	jnz	1b
 
 	popl	%ebx
 	popl	%edi
 	ret
 END(i686_pagezero)
 
 /* fillw(pat, base, cnt) */
 ENTRY(fillw)
 	pushl	%edi
 	movl	8(%esp),%eax
 	movl	12(%esp),%edi
 	movl	16(%esp),%ecx
 	rep
 	stosw
 	popl	%edi
 	ret
 END(fillw)
 
 ENTRY(bcopyb)
 	pushl	%esi
 	pushl	%edi
 	movl	12(%esp),%esi
 	movl	16(%esp),%edi
 	movl	20(%esp),%ecx
 	movl	%edi,%eax
 	subl	%esi,%eax
 	cmpl	%ecx,%eax			/* overlapping && src < dst? */
 	jb	1f
 	rep
 	movsb
 	popl	%edi
 	popl	%esi
 	ret
 
 	ALIGN_TEXT
 1:
 	addl	%ecx,%edi			/* copy backwards. */
 	addl	%ecx,%esi
 	decl	%edi
 	decl	%esi
 	std
 	rep
 	movsb
 	popl	%edi
 	popl	%esi
 	cld
 	ret
 END(bcopyb)
 
 /*
  * bcopy(src, dst, cnt)
  *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
  */
 ENTRY(bcopy)
 	pushl	%ebp
 	movl	%esp,%ebp
 	pushl	%esi
 	pushl	%edi
 	movl	8(%ebp),%esi
 	movl	12(%ebp),%edi
 	movl	16(%ebp),%ecx
 
 	movl	%edi,%eax
 	subl	%esi,%eax
 	cmpl	%ecx,%eax			/* overlapping && src < dst? */
 	jb	1f
 
 	shrl	$2,%ecx				/* copy by 32-bit words */
 	rep
 	movsl
 	movl	16(%ebp),%ecx
 	andl	$3,%ecx				/* any bytes left? */
 	rep
 	movsb
 	popl	%edi
 	popl	%esi
 	popl	%ebp
 	ret
 
 	ALIGN_TEXT
 1:
 	addl	%ecx,%edi			/* copy backwards */
 	addl	%ecx,%esi
 	decl	%edi
 	decl	%esi
 	andl	$3,%ecx				/* any fractional bytes? */
 	std
 	rep
 	movsb
 	movl	16(%ebp),%ecx			/* copy remainder by 32-bit words */
 	shrl	$2,%ecx
 	subl	$3,%esi
 	subl	$3,%edi
 	rep
 	movsl
 	popl	%edi
 	popl	%esi
 	cld
 	popl	%ebp
 	ret
 END(bcopy)
 
 /*
  * Note: memcpy does not support overlapping copies
  */
 ENTRY(memcpy)
 	pushl	%edi
 	pushl	%esi
 	movl	12(%esp),%edi
 	movl	16(%esp),%esi
 	movl	20(%esp),%ecx
 	movl	%edi,%eax
 	shrl	$2,%ecx				/* copy by 32-bit words */
 	rep
 	movsl
 	movl	20(%esp),%ecx
 	andl	$3,%ecx				/* any bytes left? */
 	rep
 	movsb
 	popl	%esi
 	popl	%edi
 	ret
 END(memcpy)
 
 /*****************************************************************************/
 /* copyout and fubyte family                                                 */
 /*****************************************************************************/
 /*
  * Access user memory from inside the kernel. These routines and possibly
  * the math- and DOS emulators should be the only places that do this.
  *
  * We have to access the memory with user's permissions, so use a segment
  * selector with RPL 3. For writes to user space we have to additionally
  * check the PTE for write permission, because the 386 does not check
  * write permissions when we are executing with EPL 0. The 486 does check
  * this if the WP bit is set in CR0, so we can use a simpler version here.
  *
  * These routines set curpcb->pcb_onfault for the time they execute. When a
  * protection violation occurs inside the functions, the trap handler
  * returns to *curpcb->pcb_onfault instead of the function.
  */
 
 /*
  * copyout(from_kernel, to_user, len)  - MP SAFE
  */
 ENTRY(copyout)
 	movl	PCPU(CURPCB),%eax
 	movl	$copyout_fault,PCB_ONFAULT(%eax)
 	pushl	%esi
 	pushl	%edi
 	pushl	%ebx
 	movl	16(%esp),%esi
 	movl	20(%esp),%edi
 	movl	24(%esp),%ebx
 	testl	%ebx,%ebx			/* anything to do? */
 	jz	done_copyout
 
 	/*
 	 * Check explicitly for non-user addresses.  This check is essential
 	 * because it prevents usermode from writing into the kernel.  We do
 	 * not verify anywhere else that the user did not specify a rogue
 	 * address.
 	 */
 	/*
 	 * First, prevent address wrapping.
 	 */
 	movl	%edi,%eax
 	addl	%ebx,%eax
 	jc	copyout_fault
 /*
  * XXX STOP USING VM_MAXUSER_ADDRESS.
  * It is an end address, not a max, so every time it is used correctly it
  * looks like there is an off by one error, and of course it caused an off
  * by one error in several places.
  */
 	cmpl	$VM_MAXUSER_ADDRESS,%eax
 	ja	copyout_fault
 
 	/* bcopy(%esi, %edi, %ebx) */
 	movl	%ebx,%ecx
 
 	shrl	$2,%ecx
 	rep
 	movsl
 	movb	%bl,%cl
 	andb	$3,%cl
 	rep
 	movsb
 
 done_copyout:
 	popl	%ebx
 	popl	%edi
 	popl	%esi
 	xorl	%eax,%eax
 	movl	PCPU(CURPCB),%edx
 	movl	%eax,PCB_ONFAULT(%edx)
 	ret
 END(copyout)
 
 	ALIGN_TEXT
 copyout_fault:
 	popl	%ebx
 	popl	%edi
 	popl	%esi
 	movl	PCPU(CURPCB),%edx
 	movl	$0,PCB_ONFAULT(%edx)
 	movl	$EFAULT,%eax
 	ret
 
 /*
  * copyin(from_user, to_kernel, len) - MP SAFE
  */
 ENTRY(copyin)
 	movl	PCPU(CURPCB),%eax
 	movl	$copyin_fault,PCB_ONFAULT(%eax)
 	pushl	%esi
 	pushl	%edi
 	movl	12(%esp),%esi			/* caddr_t from */
 	movl	16(%esp),%edi			/* caddr_t to */
 	movl	20(%esp),%ecx			/* size_t  len */
 
 	/*
 	 * make sure address is valid
 	 */
 	movl	%esi,%edx
 	addl	%ecx,%edx
 	jc	copyin_fault
 	cmpl	$VM_MAXUSER_ADDRESS,%edx
 	ja	copyin_fault
 
 	movb	%cl,%al
 	shrl	$2,%ecx				/* copy longword-wise */
 	rep
 	movsl
 	movb	%al,%cl
 	andb	$3,%cl				/* copy remaining bytes */
 	rep
 	movsb
 
 	popl	%edi
 	popl	%esi
 	xorl	%eax,%eax
 	movl	PCPU(CURPCB),%edx
 	movl	%eax,PCB_ONFAULT(%edx)
 	ret
 END(copyin)
 
 	ALIGN_TEXT
 copyin_fault:
 	popl	%edi
 	popl	%esi
 	movl	PCPU(CURPCB),%edx
 	movl	$0,PCB_ONFAULT(%edx)
 	movl	$EFAULT,%eax
 	ret
 
 /*
  * casueword.  Compare and set user word.  Returns -1 on fault,
  * 0 on non-faulting access.  The current value is in *oldp.
  */
 ALTENTRY(casueword32)
 ENTRY(casueword)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx			/* dst */
 	movl	8(%esp),%eax			/* old */
 	movl	16(%esp),%ecx			/* new */
 
 	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
 	ja	fusufault
 
 #ifdef SMP
 	lock
 #endif
 	cmpxchgl %ecx,(%edx)			/* Compare and set. */
 
 	/*
 	 * The old value is in %eax.  If the store succeeded it will be the
 	 * value we expected (old) from before the store, otherwise it will
 	 * be the current value.
 	 */
 
 	movl	PCPU(CURPCB),%ecx
 	movl	$0,PCB_ONFAULT(%ecx)
 	movl	12(%esp),%edx			/* oldp */
 	movl	%eax,(%edx)
 	xorl	%eax,%eax
 	ret
 END(casueword32)
 END(casueword)
 
 /*
  * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
  * memory.
  */
 
 ALTENTRY(fueword32)
 ENTRY(fueword)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx			/* from */
 
 	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
 	ja	fusufault
 
 	movl	(%edx),%eax
 	movl	$0,PCB_ONFAULT(%ecx)
 	movl	8(%esp),%edx
 	movl	%eax,(%edx)
 	xorl	%eax,%eax
 	ret
 END(fueword32)
 END(fueword)
 
 /*
  * fuswintr() and suswintr() are specialized variants of fuword16() and
  * suword16(), respectively.  They are called from the profiling code,
  * potentially at interrupt time.  If they fail, that's okay; good things
  * will happen later.  They always fail for now, until the trap code is
  * able to deal with this.
  */
 ALTENTRY(suswintr)
 ENTRY(fuswintr)
 	movl	$-1,%eax
 	ret
 END(suswintr)
 END(fuswintr)
 
 ENTRY(fuword16)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx
 
 	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
 	ja	fusufault
 
 	movzwl	(%edx),%eax
 	movl	$0,PCB_ONFAULT(%ecx)
 	ret
 END(fuword16)
 
 ENTRY(fubyte)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx
 
 	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
 	ja	fusufault
 
 	movzbl	(%edx),%eax
 	movl	$0,PCB_ONFAULT(%ecx)
 	ret
 END(fubyte)
 
 	ALIGN_TEXT
 fusufault:
 	movl	PCPU(CURPCB),%ecx
 	xorl	%eax,%eax
 	movl	%eax,PCB_ONFAULT(%ecx)
 	decl	%eax
 	ret
 
 /*
  * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory.
  * All these functions are MPSAFE.
  */
 
 ALTENTRY(suword32)
 ENTRY(suword)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx
 
 	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
 	ja	fusufault
 
 	movl	8(%esp),%eax
 	movl	%eax,(%edx)
 	xorl	%eax,%eax
 	movl	PCPU(CURPCB),%ecx
 	movl	%eax,PCB_ONFAULT(%ecx)
 	ret
 END(suword32)
 END(suword)
 
 ENTRY(suword16)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx
 
 	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
 	ja	fusufault
 
 	movw	8(%esp),%ax
 	movw	%ax,(%edx)
 	xorl	%eax,%eax
 	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
 	movl	%eax,PCB_ONFAULT(%ecx)
 	ret
 END(suword16)
 
 ENTRY(subyte)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx
 
 	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
 	ja	fusufault
 
 	movb	8(%esp),%al
 	movb	%al,(%edx)
 	xorl	%eax,%eax
 	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
 	movl	%eax,PCB_ONFAULT(%ecx)
 	ret
 END(subyte)
 
 /*
  * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
  *
- *	copy a string from from to to, stop when a 0 character is reached.
+ *	copy a string from 'from' to 'to', stop when a 0 character is reached.
  *	return ENAMETOOLONG if string is longer than maxlen, and
  *	EFAULT on protection violations. If lencopied is non-zero,
  *	return the actual length in *lencopied.
  */
 ENTRY(copyinstr)
 	pushl	%esi
 	pushl	%edi
 	movl	PCPU(CURPCB),%ecx
 	movl	$cpystrflt,PCB_ONFAULT(%ecx)
 
 	movl	12(%esp),%esi			/* %esi = from */
 	movl	16(%esp),%edi			/* %edi = to */
 	movl	20(%esp),%edx			/* %edx = maxlen */
 
 	movl	$VM_MAXUSER_ADDRESS,%eax
 
 	/* make sure 'from' is within bounds */
 	subl	%esi,%eax
 	jbe	cpystrflt
 
 	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
 	cmpl	%edx,%eax
 	jae	1f
 	movl	%eax,%edx
 	movl	%eax,20(%esp)
 1:
 	incl	%edx
 
 2:
 	decl	%edx
 	jz	3f
 
 	lodsb
 	stosb
 	orb	%al,%al
 	jnz	2b
 
 	/* Success -- 0 byte reached */
 	decl	%edx
 	xorl	%eax,%eax
 	jmp	cpystrflt_x
 3:
 	/* edx is zero - return ENAMETOOLONG or EFAULT */
 	cmpl	$VM_MAXUSER_ADDRESS,%esi
 	jae	cpystrflt
 4:
 	movl	$ENAMETOOLONG,%eax
 	jmp	cpystrflt_x
 
 cpystrflt:
 	movl	$EFAULT,%eax
 
 cpystrflt_x:
 	/* set *lencopied and return %eax */
 	movl	PCPU(CURPCB),%ecx
 	movl	$0,PCB_ONFAULT(%ecx)
 	movl	20(%esp),%ecx
 	subl	%edx,%ecx
 	movl	24(%esp),%edx
 	testl	%edx,%edx
 	jz	1f
 	movl	%ecx,(%edx)
 1:
 	popl	%edi
 	popl	%esi
 	ret
 END(copyinstr)
 
 /*
  * copystr(from, to, maxlen, int *lencopied) - MP SAFE
  */
 ENTRY(copystr)
 	pushl	%esi
 	pushl	%edi
 
 	movl	12(%esp),%esi			/* %esi = from */
 	movl	16(%esp),%edi			/* %edi = to */
 	movl	20(%esp),%edx			/* %edx = maxlen */
 	incl	%edx
 1:
 	decl	%edx
 	jz	4f
 	lodsb
 	stosb
 	orb	%al,%al
 	jnz	1b
 
 	/* Success -- 0 byte reached */
 	decl	%edx
 	xorl	%eax,%eax
 	jmp	6f
 4:
 	/* edx is zero -- return ENAMETOOLONG */
 	movl	$ENAMETOOLONG,%eax
 
 6:
 	/* set *lencopied and return %eax */
 	movl	20(%esp),%ecx
 	subl	%edx,%ecx
 	movl	24(%esp),%edx
 	testl	%edx,%edx
 	jz	7f
 	movl	%ecx,(%edx)
 7:
 	popl	%edi
 	popl	%esi
 	ret
 END(copystr)
 
 ENTRY(bcmp)
 	pushl	%edi
 	pushl	%esi
 	movl	12(%esp),%edi
 	movl	16(%esp),%esi
 	movl	20(%esp),%edx
 
 	movl	%edx,%ecx
 	shrl	$2,%ecx
 	repe
 	cmpsl
 	jne	1f
 
 	movl	%edx,%ecx
 	andl	$3,%ecx
 	repe
 	cmpsb
 1:
 	setne	%al
 	movsbl	%al,%eax
 	popl	%esi
 	popl	%edi
 	ret
 END(bcmp)
 
 /*
  * Handling of special 386 registers and descriptor tables etc
  */
 /* void lgdt(struct region_descriptor *rdp); */
 ENTRY(lgdt)
 	/* reload the descriptor table */
 	movl	4(%esp),%eax
 	lgdt	(%eax)
 
 	/* flush the prefetch q */
 	jmp	1f
 	nop
 1:
 	/* reload "stale" selectors */
 	movl	$KDSEL,%eax
 	movl	%eax,%ds
 	movl	%eax,%es
 	movl	%eax,%gs
 	movl	%eax,%ss
 	movl	$KPSEL,%eax
 	movl	%eax,%fs
 
 	/* reload code selector by turning return into intersegmental return */
 	movl	(%esp),%eax
 	pushl	%eax
 	movl	$KCSEL,4(%esp)
 	MEXITCOUNT
 	lret
 END(lgdt)
 
 /* ssdtosd(*ssdp,*sdp) */
 ENTRY(ssdtosd)
 	pushl	%ebx
 	movl	8(%esp),%ecx
 	movl	8(%ecx),%ebx
 	shll	$16,%ebx
 	movl	(%ecx),%edx
 	roll	$16,%edx
 	movb	%dh,%bl
 	movb	%dl,%bh
 	rorl	$8,%ebx
 	movl	4(%ecx),%eax
 	movw	%ax,%dx
 	andl	$0xf0000,%eax
 	orl	%eax,%ebx
 	movl	12(%esp),%ecx
 	movl	%edx,(%ecx)
 	movl	%ebx,4(%ecx)
 	popl	%ebx
 	ret
 END(ssdtosd)
 
 /* void reset_dbregs() */
 ENTRY(reset_dbregs)
 	movl	$0,%eax
 	movl	%eax,%dr7	/* disable all breakpoints first */
 	movl	%eax,%dr0
 	movl	%eax,%dr1
 	movl	%eax,%dr2
 	movl	%eax,%dr3
 	movl	%eax,%dr6
 	ret
 END(reset_dbregs)
 
 /*****************************************************************************/
 /* setjump, longjump                                                         */
 /*****************************************************************************/
 
 ENTRY(setjmp)
 	movl	4(%esp),%eax
 	movl	%ebx,(%eax)			/* save ebx */
 	movl	%esp,4(%eax)			/* save esp */
 	movl	%ebp,8(%eax)			/* save ebp */
 	movl	%esi,12(%eax)			/* save esi */
 	movl	%edi,16(%eax)			/* save edi */
 	movl	(%esp),%edx			/* get rta */
 	movl	%edx,20(%eax)			/* save eip */
 	xorl	%eax,%eax			/* return(0); */
 	ret
 END(setjmp)
 
 ENTRY(longjmp)
 	movl	4(%esp),%eax
 	movl	(%eax),%ebx			/* restore ebx */
 	movl	4(%eax),%esp			/* restore esp */
 	movl	8(%eax),%ebp			/* restore ebp */
 	movl	12(%eax),%esi			/* restore esi */
 	movl	16(%eax),%edi			/* restore edi */
 	movl	20(%eax),%edx			/* get rta */
 	movl	%edx,(%esp)			/* put in return frame */
 	xorl	%eax,%eax			/* return(1); */
 	incl	%eax
 	ret
 END(longjmp)
 
 /*
  * Support for reading MSRs in the safe manner.
  */
 ENTRY(rdmsr_safe)
 /* int rdmsr_safe(u_int msr, uint64_t *data) */
 	movl	PCPU(CURPCB),%ecx
 	movl	$msr_onfault,PCB_ONFAULT(%ecx)
 
 	movl	4(%esp),%ecx
 	rdmsr
 	movl	8(%esp),%ecx
 	movl	%eax,(%ecx)
 	movl	%edx,4(%ecx)
 	xorl	%eax,%eax
 
 	movl	PCPU(CURPCB),%ecx
 	movl	%eax,PCB_ONFAULT(%ecx)
 
 	ret
 
 /*
  * Support for writing MSRs in the safe manner.
  */
 ENTRY(wrmsr_safe)
 /* int wrmsr_safe(u_int msr, uint64_t data) */
 	movl	PCPU(CURPCB),%ecx
 	movl	$msr_onfault,PCB_ONFAULT(%ecx)
 
 	movl	4(%esp),%ecx
 	movl	8(%esp),%eax
 	movl	12(%esp),%edx
 	wrmsr
 	xorl	%eax,%eax
 
 	movl	PCPU(CURPCB),%ecx
 	movl	%eax,PCB_ONFAULT(%ecx)
 
 	ret
 
 /*
  * MSR operations fault handler
  */
 	ALIGN_TEXT
 msr_onfault:
 	movl	PCPU(CURPCB),%ecx
 	movl	$0,PCB_ONFAULT(%ecx)
 	movl	$EFAULT,%eax
 	ret
 
 ENTRY(handle_ibrs_entry)
 	ret
 END(handle_ibrs_entry)
 
 ENTRY(handle_ibrs_exit)
 	ret
 END(handle_ibrs_exit)
Index: stable/11/sys/kern/vfs_subr.c
===================================================================
--- stable/11/sys/kern/vfs_subr.c	(revision 330445)
+++ stable/11/sys/kern/vfs_subr.c	(revision 330446)
@@ -1,5404 +1,5404 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
  */
 
 /*
  * External virtual filesystem routines
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_ddb.h"
 #include "opt_watchdog.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/condvar.h>
 #include <sys/conf.h>
 #include <sys/counter.h>
 #include <sys/dirent.h>
 #include <sys/event.h>
 #include <sys/eventhandler.h>
 #include <sys/extattr.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/lockf.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/pctrie.h>
 #include <sys/priv.h>
 #include <sys/reboot.h>
 #include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
 #include <sys/smp.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 #include <sys/watchdog.h>
 
 #include <machine/stdarg.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_kern.h>
 #include <vm/uma.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 static void	delmntque(struct vnode *vp);
 static int	flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo,
 		    int slpflag, int slptimeo);
 static void	syncer_shutdown(void *arg, int howto);
 static int	vtryrecycle(struct vnode *vp);
 static void	v_init_counters(struct vnode *);
 static void	v_incr_usecount(struct vnode *);
 static void	v_incr_usecount_locked(struct vnode *);
 static void	v_incr_devcount(struct vnode *);
 static void	v_decr_devcount(struct vnode *);
 static void	vgonel(struct vnode *);
 static void	vfs_knllock(void *arg);
 static void	vfs_knlunlock(void *arg);
 static void	vfs_knl_assert_locked(void *arg);
 static void	vfs_knl_assert_unlocked(void *arg);
 static void	destroy_vpollinfo(struct vpollinfo *vi);
 
 /*
  * Number of vnodes in existence.  Increased whenever getnewvnode()
  * allocates a new vnode, decreased in vdropl() for VI_DOOMED vnode.
  */
 static unsigned long	numvnodes;
 
 SYSCTL_ULONG(_vfs, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0,
     "Number of vnodes in existence");
 
 static counter_u64_t vnodes_created;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, vnodes_created, CTLFLAG_RD, &vnodes_created,
     "Number of vnodes created by getnewvnode");
 
 /*
  * Conversion tables for conversion from vnode types to inode formats
  * and back.
  */
 enum vtype iftovt_tab[16] = {
 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
 };
 int vttoif_tab[10] = {
 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 	S_IFSOCK, S_IFIFO, S_IFMT, S_IFMT
 };
 
 /*
  * List of vnodes that are ready for recycling.
  */
 static TAILQ_HEAD(freelst, vnode) vnode_free_list;
 
 /*
  * "Free" vnode target.  Free vnodes are rarely completely free, but are
  * just ones that are cheap to recycle.  Usually they are for files which
  * have been stat'd but not read; these usually have inode and namecache
  * data attached to them.  This target is the preferred minimum size of a
  * sub-cache consisting mostly of such files. The system balances the size
  * of this sub-cache with its complement to try to prevent either from
  * thrashing while the other is relatively inactive.  The targets express
  * a preference for the best balance.
  *
  * "Above" this target there are 2 further targets (watermarks) related
  * to recyling of free vnodes.  In the best-operating case, the cache is
  * exactly full, the free list has size between vlowat and vhiwat above the
  * free target, and recycling from it and normal use maintains this state.
  * Sometimes the free list is below vlowat or even empty, but this state
  * is even better for immediate use provided the cache is not full.
  * Otherwise, vnlru_proc() runs to reclaim enough vnodes (usually non-free
  * ones) to reach one of these states.  The watermarks are currently hard-
  * coded as 4% and 9% of the available space higher.  These and the default
  * of 25% for wantfreevnodes are too large if the memory size is large.
  * E.g., 9% of 75% of MAXVNODES is more than 566000 vnodes to reclaim
  * whenever vnlru_proc() becomes active.
  */
 static u_long wantfreevnodes;
 SYSCTL_ULONG(_vfs, OID_AUTO, wantfreevnodes, CTLFLAG_RW,
     &wantfreevnodes, 0, "Target for minimum number of \"free\" vnodes");
 static u_long freevnodes;
 SYSCTL_ULONG(_vfs, OID_AUTO, freevnodes, CTLFLAG_RD,
     &freevnodes, 0, "Number of \"free\" vnodes");
 
 static counter_u64_t recycles_count;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles, CTLFLAG_RD, &recycles_count,
     "Number of vnodes recycled to meet vnode cache targets");
 
 /*
  * Various variables used for debugging the new implementation of
  * reassignbuf().
  * XXX these are probably of (very) limited utility now.
  */
 static int reassignbufcalls;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0,
     "Number of calls to reassignbuf");
 
 static counter_u64_t free_owe_inact;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, free_owe_inact, CTLFLAG_RD, &free_owe_inact,
     "Number of times free vnodes kept on active list due to VFS "
     "owing inactivation");
 
 /* To keep more than one thread at a time from running vfs_getnewfsid */
 static struct mtx mntid_mtx;
 
 /*
  * Lock for any access to the following:
  *	vnode_free_list
  *	numvnodes
  *	freevnodes
  */
 static struct mtx vnode_free_list_mtx;
 
 /* Publicly exported FS */
 struct nfs_public nfs_pub;
 
 static uma_zone_t buf_trie_zone;
 
 /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */
 static uma_zone_t vnode_zone;
 static uma_zone_t vnodepoll_zone;
 
 /*
  * The workitem queue.
  *
  * It is useful to delay writes of file data and filesystem metadata
  * for tens of seconds so that quickly created and deleted files need
  * not waste disk bandwidth being created and removed. To realize this,
  * we append vnodes to a "workitem" queue. When running with a soft
  * updates implementation, most pending metadata dependencies should
  * not wait for more than a few seconds. Thus, mounted on block devices
  * are delayed only about a half the time that file data is delayed.
  * Similarly, directory updates are more critical, so are only delayed
  * about a third the time that file data is delayed. Thus, there are
  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
  * one each second (driven off the filesystem syncer process). The
  * syncer_delayno variable indicates the next queue that is to be processed.
  * Items that need to be processed soon are placed in this queue:
  *
  *	syncer_workitem_pending[syncer_delayno]
  *
  * A delay of fifteen seconds is done by placing the request fifteen
  * entries later in the queue:
  *
  *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
  *
  */
 static int syncer_delayno;
 static long syncer_mask;
 LIST_HEAD(synclist, bufobj);
 static struct synclist *syncer_workitem_pending;
 /*
  * The sync_mtx protects:
  *	bo->bo_synclist
  *	sync_vnode_count
  *	syncer_delayno
  *	syncer_state
  *	syncer_workitem_pending
  *	syncer_worklist_len
  *	rushjob
  */
 static struct mtx sync_mtx;
 static struct cv sync_wakeup;
 
 #define SYNCER_MAXDELAY		32
 static int syncer_maxdelay = SYNCER_MAXDELAY;	/* maximum delay time */
 static int syncdelay = 30;		/* max time to delay syncing data */
 static int filedelay = 30;		/* time to delay syncing files */
 SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0,
     "Time to delay syncing files (in seconds)");
 static int dirdelay = 29;		/* time to delay syncing directories */
 SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0,
     "Time to delay syncing directories (in seconds)");
 static int metadelay = 28;		/* time to delay syncing metadata */
 SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0,
     "Time to delay syncing metadata (in seconds)");
 static int rushjob;		/* number of slots to run ASAP */
 static int stat_rush_requests;	/* number of times I/O speeded up */
 SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0,
     "Number of times I/O speeded up (rush requests)");
 
 /*
  * When shutting down the syncer, run it at four times normal speed.
  */
 #define SYNCER_SHUTDOWN_SPEEDUP		4
 static int sync_vnode_count;
 static int syncer_worklist_len;
 static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY }
     syncer_state;
 
 /* Target for maximum number of vnodes. */
 int desiredvnodes;
 static int gapvnodes;		/* gap between wanted and desired */
 static int vhiwat;		/* enough extras after expansion */
 static int vlowat;		/* minimal extras before expansion */
 static int vstir;		/* nonzero to stir non-free vnodes */
 static volatile int vsmalltrigger = 8;	/* pref to keep if > this many pages */
 
 static int
 sysctl_update_desiredvnodes(SYSCTL_HANDLER_ARGS)
 {
 	int error, old_desiredvnodes;
 
 	old_desiredvnodes = desiredvnodes;
 	if ((error = sysctl_handle_int(oidp, arg1, arg2, req)) != 0)
 		return (error);
 	if (old_desiredvnodes != desiredvnodes) {
 		wantfreevnodes = desiredvnodes / 4;
 		/* XXX locking seems to be incomplete. */
 		vfs_hash_changesize(desiredvnodes);
 		cache_changesize(desiredvnodes);
 	}
 	return (0);
 }
 
 SYSCTL_PROC(_kern, KERN_MAXVNODES, maxvnodes,
     CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, &desiredvnodes, 0,
     sysctl_update_desiredvnodes, "I", "Target for maximum number of vnodes");
 SYSCTL_ULONG(_kern, OID_AUTO, minvnodes, CTLFLAG_RW,
     &wantfreevnodes, 0, "Old name for vfs.wantfreevnodes (legacy)");
 static int vnlru_nowhere;
 SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW,
     &vnlru_nowhere, 0, "Number of times the vnlru process ran without success");
 
 /* Shift count for (uintptr_t)vp to initialize vp->v_hash. */
 static int vnsz2log;
 
 /*
  * Support for the bufobj clean & dirty pctrie.
  */
 static void *
 buf_trie_alloc(struct pctrie *ptree)
 {
 
 	return uma_zalloc(buf_trie_zone, M_NOWAIT);
 }
 
 static void
 buf_trie_free(struct pctrie *ptree, void *node)
 {
 
 	uma_zfree(buf_trie_zone, node);
 }
 PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free);
 
 /*
  * Initialize the vnode management data structures.
  *
  * Reevaluate the following cap on the number of vnodes after the physical
  * memory size exceeds 512GB.  In the limit, as the physical memory size
- * grows, the ratio of the memory size in KB to to vnodes approaches 64:1.
+ * grows, the ratio of the memory size in KB to vnodes approaches 64:1.
  */
 #ifndef	MAXVNODES_MAX
 #define	MAXVNODES_MAX	(512 * 1024 * 1024 / 64)	/* 8M */
 #endif
 
 /*
  * Initialize a vnode as it first enters the zone.
  */
 static int
 vnode_init(void *mem, int size, int flags)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 
 	vp = mem;
 	bzero(vp, size);
 	/*
 	 * Setup locks.
 	 */
 	vp->v_vnlock = &vp->v_lock;
 	mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF);
 	/*
 	 * By default, don't allow shared locks unless filesystems opt-in.
 	 */
 	lockinit(vp->v_vnlock, PVFS, "vnode", VLKTIMEOUT,
 	    LK_NOSHARE | LK_IS_VNODE);
 	/*
 	 * Initialize bufobj.
 	 */
 	bo = &vp->v_bufobj;
 	bo->__bo_vnode = vp;
 	rw_init(BO_LOCKPTR(bo), "bufobj interlock");
 	bo->bo_private = vp;
 	TAILQ_INIT(&bo->bo_clean.bv_hd);
 	TAILQ_INIT(&bo->bo_dirty.bv_hd);
 	/*
 	 * Initialize namecache.
 	 */
 	LIST_INIT(&vp->v_cache_src);
 	TAILQ_INIT(&vp->v_cache_dst);
 	/*
 	 * Initialize rangelocks.
 	 */
 	rangelock_init(&vp->v_rl);
 	return (0);
 }
 
 /*
  * Free a vnode when it is cleared from the zone.
  */
 static void
 vnode_fini(void *mem, int size)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 
 	vp = mem;
 	rangelock_destroy(&vp->v_rl);
 	lockdestroy(vp->v_vnlock);
 	mtx_destroy(&vp->v_interlock);
 	bo = &vp->v_bufobj;
 	rw_destroy(BO_LOCKPTR(bo));
 }
 
 /*
  * Provide the size of NFS nclnode and NFS fh for calculation of the
  * vnode memory consumption.  The size is specified directly to
  * eliminate dependency on NFS-private header.
  *
  * Other filesystems may use bigger or smaller (like UFS and ZFS)
  * private inode data, but the NFS-based estimation is ample enough.
  * Still, we care about differences in the size between 64- and 32-bit
  * platforms.
  *
  * Namecache structure size is heuristically
  * sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF + 1.
  */
 #ifdef _LP64
 #define	NFS_NCLNODE_SZ	(528 + 64)
 #define	NC_SZ		148
 #else
 #define	NFS_NCLNODE_SZ	(360 + 32)
 #define	NC_SZ		92
 #endif
 
 static void
 vntblinit(void *dummy __unused)
 {
 	u_int i;
 	int physvnodes, virtvnodes;
 
 	/*
 	 * Desiredvnodes is a function of the physical memory size and the
 	 * kernel's heap size.  Generally speaking, it scales with the
 	 * physical memory size.  The ratio of desiredvnodes to the physical
 	 * memory size is 1:16 until desiredvnodes exceeds 98,304.
 	 * Thereafter, the
 	 * marginal ratio of desiredvnodes to the physical memory size is
 	 * 1:64.  However, desiredvnodes is limited by the kernel's heap
 	 * size.  The memory required by desiredvnodes vnodes and vm objects
 	 * must not exceed 1/10th of the kernel's heap size.
 	 */
 	physvnodes = maxproc + pgtok(vm_cnt.v_page_count) / 64 +
 	    3 * min(98304 * 16, pgtok(vm_cnt.v_page_count)) / 64;
 	virtvnodes = vm_kmem_size / (10 * (sizeof(struct vm_object) +
 	    sizeof(struct vnode) + NC_SZ * ncsizefactor + NFS_NCLNODE_SZ));
 	desiredvnodes = min(physvnodes, virtvnodes);
 	if (desiredvnodes > MAXVNODES_MAX) {
 		if (bootverbose)
 			printf("Reducing kern.maxvnodes %d -> %d\n",
 			    desiredvnodes, MAXVNODES_MAX);
 		desiredvnodes = MAXVNODES_MAX;
 	}
 	wantfreevnodes = desiredvnodes / 4;
 	mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF);
 	TAILQ_INIT(&vnode_free_list);
 	mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF);
 	vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
 	    vnode_init, vnode_fini, UMA_ALIGN_PTR, 0);
 	vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	/*
 	 * Preallocate enough nodes to support one-per buf so that
 	 * we can not fail an insert.  reassignbuf() callers can not
 	 * tolerate the insertion failure.
 	 */
 	buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(),
 	    NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, 
 	    UMA_ZONE_NOFREE | UMA_ZONE_VM);
 	uma_prealloc(buf_trie_zone, nbuf);
 
 	vnodes_created = counter_u64_alloc(M_WAITOK);
 	recycles_count = counter_u64_alloc(M_WAITOK);
 	free_owe_inact = counter_u64_alloc(M_WAITOK);
 
 	/*
 	 * Initialize the filesystem syncer.
 	 */
 	syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE,
 	    &syncer_mask);
 	syncer_maxdelay = syncer_mask + 1;
 	mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF);
 	cv_init(&sync_wakeup, "syncer");
 	for (i = 1; i <= sizeof(struct vnode); i <<= 1)
 		vnsz2log++;
 	vnsz2log--;
 }
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL);
 
 
 /*
  * Mark a mount point as busy. Used to synchronize access and to delay
  * unmounting. Eventually, mountlist_mtx is not released on failure.
  *
  * vfs_busy() is a custom lock, it can block the caller.
  * vfs_busy() only sleeps if the unmount is active on the mount point.
  * For a mountpoint mp, vfs_busy-enforced lock is before lock of any
  * vnode belonging to mp.
  *
  * Lookup uses vfs_busy() to traverse mount points.
  * root fs			var fs
  * / vnode lock		A	/ vnode lock (/var)		D
  * /var vnode lock	B	/log vnode lock(/var/log)	E
  * vfs_busy lock	C	vfs_busy lock			F
  *
  * Within each file system, the lock order is C->A->B and F->D->E.
  *
  * When traversing across mounts, the system follows that lock order:
  *
  *        C->A->B
  *              |
  *              +->F->D->E
  *
  * The lookup() process for namei("/var") illustrates the process:
  *  VOP_LOOKUP() obtains B while A is held
  *  vfs_busy() obtains a shared lock on F while A and B are held
  *  vput() releases lock on B
  *  vput() releases lock on A
  *  VFS_ROOT() obtains lock on D while shared lock on F is held
  *  vfs_unbusy() releases shared lock on F
  *  vn_lock() obtains lock on deadfs vnode vp_crossmp instead of A.
  *    Attempt to lock A (instead of vp_crossmp) while D is held would
  *    violate the global order, causing deadlocks.
  *
  * dounmount() locks B while F is drained.
  */
 int
 vfs_busy(struct mount *mp, int flags)
 {
 
 	MPASS((flags & ~MBF_MASK) == 0);
 	CTR3(KTR_VFS, "%s: mp %p with flags %d", __func__, mp, flags);
 
 	MNT_ILOCK(mp);
 	MNT_REF(mp);
 	/*
 	 * If mount point is currently being unmounted, sleep until the
 	 * mount point fate is decided.  If thread doing the unmounting fails,
 	 * it will clear MNTK_UNMOUNT flag before waking us up, indicating
 	 * that this mount point has survived the unmount attempt and vfs_busy
 	 * should retry.  Otherwise the unmounter thread will set MNTK_REFEXPIRE
 	 * flag in addition to MNTK_UNMOUNT, indicating that mount point is
 	 * about to be really destroyed.  vfs_busy needs to release its
 	 * reference on the mount point in this case and return with ENOENT,
 	 * telling the caller that mount mount it tried to busy is no longer
 	 * valid.
 	 */
 	while (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 		if (flags & MBF_NOWAIT || mp->mnt_kern_flag & MNTK_REFEXPIRE) {
 			MNT_REL(mp);
 			MNT_IUNLOCK(mp);
 			CTR1(KTR_VFS, "%s: failed busying before sleeping",
 			    __func__);
 			return (ENOENT);
 		}
 		if (flags & MBF_MNTLSTLOCK)
 			mtx_unlock(&mountlist_mtx);
 		mp->mnt_kern_flag |= MNTK_MWAIT;
 		msleep(mp, MNT_MTX(mp), PVFS | PDROP, "vfs_busy", 0);
 		if (flags & MBF_MNTLSTLOCK)
 			mtx_lock(&mountlist_mtx);
 		MNT_ILOCK(mp);
 	}
 	if (flags & MBF_MNTLSTLOCK)
 		mtx_unlock(&mountlist_mtx);
 	mp->mnt_lockref++;
 	MNT_IUNLOCK(mp);
 	return (0);
 }
 
 /*
  * Free a busy filesystem.
  */
 void
 vfs_unbusy(struct mount *mp)
 {
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 	MNT_ILOCK(mp);
 	MNT_REL(mp);
 	KASSERT(mp->mnt_lockref > 0, ("negative mnt_lockref"));
 	mp->mnt_lockref--;
 	if (mp->mnt_lockref == 0 && (mp->mnt_kern_flag & MNTK_DRAINING) != 0) {
 		MPASS(mp->mnt_kern_flag & MNTK_UNMOUNT);
 		CTR1(KTR_VFS, "%s: waking up waiters", __func__);
 		mp->mnt_kern_flag &= ~MNTK_DRAINING;
 		wakeup(&mp->mnt_lockref);
 	}
 	MNT_IUNLOCK(mp);
 }
 
 /*
  * Lookup a mount point by filesystem identifier.
  */
 struct mount *
 vfs_getvfs(fsid_t *fsid)
 {
 	struct mount *mp;
 
 	CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid);
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 			vfs_ref(mp);
 			mtx_unlock(&mountlist_mtx);
 			return (mp);
 		}
 	}
 	mtx_unlock(&mountlist_mtx);
 	CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid);
 	return ((struct mount *) 0);
 }
 
 /*
  * Lookup a mount point by filesystem identifier, busying it before
  * returning.
  *
  * To avoid congestion on mountlist_mtx, implement simple direct-mapped
  * cache for popular filesystem identifiers.  The cache is lockess, using
  * the fact that struct mount's are never freed.  In worst case we may
  * get pointer to unmounted or even different filesystem, so we have to
  * check what we got, and go slow way if so.
  */
 struct mount *
 vfs_busyfs(fsid_t *fsid)
 {
 #define	FSID_CACHE_SIZE	256
 	typedef struct mount * volatile vmp_t;
 	static vmp_t cache[FSID_CACHE_SIZE];
 	struct mount *mp;
 	int error;
 	uint32_t hash;
 
 	CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid);
 	hash = fsid->val[0] ^ fsid->val[1];
 	hash = (hash >> 16 ^ hash) & (FSID_CACHE_SIZE - 1);
 	mp = cache[hash];
 	if (mp == NULL ||
 	    mp->mnt_stat.f_fsid.val[0] != fsid->val[0] ||
 	    mp->mnt_stat.f_fsid.val[1] != fsid->val[1])
 		goto slow;
 	if (vfs_busy(mp, 0) != 0) {
 		cache[hash] = NULL;
 		goto slow;
 	}
 	if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 	    mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
 		return (mp);
 	else
 	    vfs_unbusy(mp);
 
 slow:
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 			error = vfs_busy(mp, MBF_MNTLSTLOCK);
 			if (error) {
 				cache[hash] = NULL;
 				mtx_unlock(&mountlist_mtx);
 				return (NULL);
 			}
 			cache[hash] = mp;
 			return (mp);
 		}
 	}
 	CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid);
 	mtx_unlock(&mountlist_mtx);
 	return ((struct mount *) 0);
 }
 
 /*
  * Check if a user can access privileged mount options.
  */
 int
 vfs_suser(struct mount *mp, struct thread *td)
 {
 	int error;
 
 	/*
 	 * If the thread is jailed, but this is not a jail-friendly file
 	 * system, deny immediately.
 	 */
 	if (!(mp->mnt_vfc->vfc_flags & VFCF_JAIL) && jailed(td->td_ucred))
 		return (EPERM);
 
 	/*
 	 * If the file system was mounted outside the jail of the calling
 	 * thread, deny immediately.
 	 */
 	if (prison_check(td->td_ucred, mp->mnt_cred) != 0)
 		return (EPERM);
 
 	/*
 	 * If file system supports delegated administration, we don't check
 	 * for the PRIV_VFS_MOUNT_OWNER privilege - it will be better verified
 	 * by the file system itself.
 	 * If this is not the user that did original mount, we check for
 	 * the PRIV_VFS_MOUNT_OWNER privilege.
 	 */
 	if (!(mp->mnt_vfc->vfc_flags & VFCF_DELEGADMIN) &&
 	    mp->mnt_cred->cr_uid != td->td_ucred->cr_uid) {
 		if ((error = priv_check(td, PRIV_VFS_MOUNT_OWNER)) != 0)
 			return (error);
 	}
 	return (0);
 }
 
 /*
  * Get a new unique fsid.  Try to make its val[0] unique, since this value
  * will be used to create fake device numbers for stat().  Also try (but
  * not so hard) make its val[0] unique mod 2^16, since some emulators only
  * support 16-bit device numbers.  We end up with unique val[0]'s for the
  * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls.
  *
  * Keep in mind that several mounts may be running in parallel.  Starting
  * the search one past where the previous search terminated is both a
  * micro-optimization and a defense against returning the same fsid to
  * different mounts.
  */
 void
 vfs_getnewfsid(struct mount *mp)
 {
 	static uint16_t mntid_base;
 	struct mount *nmp;
 	fsid_t tfsid;
 	int mtype;
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 	mtx_lock(&mntid_mtx);
 	mtype = mp->mnt_vfc->vfc_typenum;
 	tfsid.val[1] = mtype;
 	mtype = (mtype & 0xFF) << 24;
 	for (;;) {
 		tfsid.val[0] = makedev(255,
 		    mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF));
 		mntid_base++;
 		if ((nmp = vfs_getvfs(&tfsid)) == NULL)
 			break;
 		vfs_rel(nmp);
 	}
 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 	mp->mnt_stat.f_fsid.val[1] = tfsid.val[1];
 	mtx_unlock(&mntid_mtx);
 }
 
 /*
  * Knob to control the precision of file timestamps:
  *
  *   0 = seconds only; nanoseconds zeroed.
  *   1 = seconds and nanoseconds, accurate within 1/HZ.
  *   2 = seconds and nanoseconds, truncated to microseconds.
  * >=3 = seconds and nanoseconds, maximum precision.
  */
 enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC };
 
 static int timestamp_precision = TSP_USEC;
 SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW,
     &timestamp_precision, 0, "File timestamp precision (0: seconds, "
     "1: sec + ns accurate to 1/HZ, 2: sec + ns truncated to us, "
     "3+: sec + ns (max. precision))");
 
 /*
  * Get a current timestamp.
  */
 void
 vfs_timestamp(struct timespec *tsp)
 {
 	struct timeval tv;
 
 	switch (timestamp_precision) {
 	case TSP_SEC:
 		tsp->tv_sec = time_second;
 		tsp->tv_nsec = 0;
 		break;
 	case TSP_HZ:
 		getnanotime(tsp);
 		break;
 	case TSP_USEC:
 		microtime(&tv);
 		TIMEVAL_TO_TIMESPEC(&tv, tsp);
 		break;
 	case TSP_NSEC:
 	default:
 		nanotime(tsp);
 		break;
 	}
 }
 
 /*
  * Set vnode attributes to VNOVAL
  */
 void
 vattr_null(struct vattr *vap)
 {
 
 	vap->va_type = VNON;
 	vap->va_size = VNOVAL;
 	vap->va_bytes = VNOVAL;
 	vap->va_mode = VNOVAL;
 	vap->va_nlink = VNOVAL;
 	vap->va_uid = VNOVAL;
 	vap->va_gid = VNOVAL;
 	vap->va_fsid = VNOVAL;
 	vap->va_fileid = VNOVAL;
 	vap->va_blocksize = VNOVAL;
 	vap->va_rdev = VNOVAL;
 	vap->va_atime.tv_sec = VNOVAL;
 	vap->va_atime.tv_nsec = VNOVAL;
 	vap->va_mtime.tv_sec = VNOVAL;
 	vap->va_mtime.tv_nsec = VNOVAL;
 	vap->va_ctime.tv_sec = VNOVAL;
 	vap->va_ctime.tv_nsec = VNOVAL;
 	vap->va_birthtime.tv_sec = VNOVAL;
 	vap->va_birthtime.tv_nsec = VNOVAL;
 	vap->va_flags = VNOVAL;
 	vap->va_gen = VNOVAL;
 	vap->va_vaflags = 0;
 }
 
 /*
  * This routine is called when we have too many vnodes.  It attempts
  * to free <count> vnodes and will potentially free vnodes that still
  * have VM backing store (VM backing store is typically the cause
  * of a vnode blowout so we want to do this).  Therefore, this operation
  * is not considered cheap.
  *
  * A number of conditions may prevent a vnode from being reclaimed.
  * the buffer cache may have references on the vnode, a directory
  * vnode may still have references due to the namei cache representing
  * underlying files, or the vnode may be in active use.   It is not
  * desirable to reuse such vnodes.  These conditions may cause the
  * number of vnodes to reach some minimum value regardless of what
  * you set kern.maxvnodes to.  Do not set kern.maxvnodes too low.
  */
 static int
 vlrureclaim(struct mount *mp, int reclaim_nc_src, int trigger)
 {
 	struct vnode *vp;
 	int count, done, target;
 
 	done = 0;
 	vn_start_write(NULL, &mp, V_WAIT);
 	MNT_ILOCK(mp);
 	count = mp->mnt_nvnodelistsize;
 	target = count * (int64_t)gapvnodes / imax(desiredvnodes, 1);
 	target = target / 10 + 1;
 	while (count != 0 && done < target) {
 		vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 		while (vp != NULL && vp->v_type == VMARKER)
 			vp = TAILQ_NEXT(vp, v_nmntvnodes);
 		if (vp == NULL)
 			break;
 		/*
 		 * XXX LRU is completely broken for non-free vnodes.  First
 		 * by calling here in mountpoint order, then by moving
 		 * unselected vnodes to the end here, and most grossly by
 		 * removing the vlruvp() function that was supposed to
 		 * maintain the order.  (This function was born broken
 		 * since syncer problems prevented it doing anything.)  The
 		 * order is closer to LRC (C = Created).
 		 *
 		 * LRU reclaiming of vnodes seems to have last worked in
 		 * FreeBSD-3 where LRU wasn't mentioned under any spelling.
 		 * Then there was no hold count, and inactive vnodes were
 		 * simply put on the free list in LRU order.  The separate
 		 * lists also break LRU.  We prefer to reclaim from the
 		 * free list for technical reasons.  This tends to thrash
 		 * the free list to keep very unrecently used held vnodes.
 		 * The problem is mitigated by keeping the free list large.
 		 */
 		TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
 		TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
 		--count;
 		if (!VI_TRYLOCK(vp))
 			goto next_iter;
 		/*
 		 * If it's been deconstructed already, it's still
 		 * referenced, or it exceeds the trigger, skip it.
 		 * Also skip free vnodes.  We are trying to make space
 		 * to expand the free list, not reduce it.
 		 */
 		if (vp->v_usecount ||
 		    (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) ||
 		    ((vp->v_iflag & VI_FREE) != 0) ||
 		    (vp->v_iflag & VI_DOOMED) != 0 || (vp->v_object != NULL &&
 		    vp->v_object->resident_page_count > trigger)) {
 			VI_UNLOCK(vp);
 			goto next_iter;
 		}
 		MNT_IUNLOCK(mp);
 		vholdl(vp);
 		if (VOP_LOCK(vp, LK_INTERLOCK|LK_EXCLUSIVE|LK_NOWAIT)) {
 			vdrop(vp);
 			goto next_iter_mntunlocked;
 		}
 		VI_LOCK(vp);
 		/*
 		 * v_usecount may have been bumped after VOP_LOCK() dropped
 		 * the vnode interlock and before it was locked again.
 		 *
 		 * It is not necessary to recheck VI_DOOMED because it can
 		 * only be set by another thread that holds both the vnode
 		 * lock and vnode interlock.  If another thread has the
 		 * vnode lock before we get to VOP_LOCK() and obtains the
 		 * vnode interlock after VOP_LOCK() drops the vnode
 		 * interlock, the other thread will be unable to drop the
 		 * vnode lock before our VOP_LOCK() call fails.
 		 */
 		if (vp->v_usecount ||
 		    (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) ||
 		    (vp->v_iflag & VI_FREE) != 0 ||
 		    (vp->v_object != NULL &&
 		    vp->v_object->resident_page_count > trigger)) {
 			VOP_UNLOCK(vp, LK_INTERLOCK);
 			vdrop(vp);
 			goto next_iter_mntunlocked;
 		}
 		KASSERT((vp->v_iflag & VI_DOOMED) == 0,
 		    ("VI_DOOMED unexpectedly detected in vlrureclaim()"));
 		counter_u64_add(recycles_count, 1);
 		vgonel(vp);
 		VOP_UNLOCK(vp, 0);
 		vdropl(vp);
 		done++;
 next_iter_mntunlocked:
 		if (!should_yield())
 			goto relock_mnt;
 		goto yield;
 next_iter:
 		if (!should_yield())
 			continue;
 		MNT_IUNLOCK(mp);
 yield:
 		kern_yield(PRI_USER);
 relock_mnt:
 		MNT_ILOCK(mp);
 	}
 	MNT_IUNLOCK(mp);
 	vn_finished_write(mp);
 	return done;
 }
 
 static int max_vnlru_free = 10000; /* limit on vnode free requests per call */
 SYSCTL_INT(_debug, OID_AUTO, max_vnlru_free, CTLFLAG_RW, &max_vnlru_free,
     0,
     "limit on vnode free requests per call to the vnlru_free routine");
 
 /*
  * Attempt to reduce the free list by the requested amount.
  */
 static void
 vnlru_free_locked(int count, struct vfsops *mnt_op)
 {
 	struct vnode *vp;
 	struct mount *mp;
 
 	mtx_assert(&vnode_free_list_mtx, MA_OWNED);
 	if (count > max_vnlru_free)
 		count = max_vnlru_free;
 	for (; count > 0; count--) {
 		vp = TAILQ_FIRST(&vnode_free_list);
 		/*
 		 * The list can be modified while the free_list_mtx
 		 * has been dropped and vp could be NULL here.
 		 */
 		if (!vp)
 			break;
 		VNASSERT(vp->v_op != NULL, vp,
 		    ("vnlru_free: vnode already reclaimed."));
 		KASSERT((vp->v_iflag & VI_FREE) != 0,
 		    ("Removing vnode not on freelist"));
 		KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
 		    ("Mangling active vnode"));
 		TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist);
 
 		/*
 		 * Don't recycle if our vnode is from different type
 		 * of mount point.  Note that mp is type-safe, the
 		 * check does not reach unmapped address even if
 		 * vnode is reclaimed.
 		 * Don't recycle if we can't get the interlock without
 		 * blocking.
 		 */
 		if ((mnt_op != NULL && (mp = vp->v_mount) != NULL &&
 		    mp->mnt_op != mnt_op) || !VI_TRYLOCK(vp)) {
 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist);
 			continue;
 		}
 		VNASSERT((vp->v_iflag & VI_FREE) != 0 && vp->v_holdcnt == 0,
 		    vp, ("vp inconsistent on freelist"));
 
 		/*
 		 * The clear of VI_FREE prevents activation of the
 		 * vnode.  There is no sense in putting the vnode on
 		 * the mount point active list, only to remove it
 		 * later during recycling.  Inline the relevant part
 		 * of vholdl(), to avoid triggering assertions or
 		 * activating.
 		 */
 		freevnodes--;
 		vp->v_iflag &= ~VI_FREE;
 		refcount_acquire(&vp->v_holdcnt);
 
 		mtx_unlock(&vnode_free_list_mtx);
 		VI_UNLOCK(vp);
 		vtryrecycle(vp);
 		/*
 		 * If the recycled succeeded this vdrop will actually free
 		 * the vnode.  If not it will simply place it back on
 		 * the free list.
 		 */
 		vdrop(vp);
 		mtx_lock(&vnode_free_list_mtx);
 	}
 }
 
 void
 vnlru_free(int count, struct vfsops *mnt_op)
 {
 
 	mtx_lock(&vnode_free_list_mtx);
 	vnlru_free_locked(count, mnt_op);
 	mtx_unlock(&vnode_free_list_mtx);
 }
 
 
 /* XXX some names and initialization are bad for limits and watermarks. */
 static int
 vspace(void)
 {
 	int space;
 
 	gapvnodes = imax(desiredvnodes - wantfreevnodes, 100);
 	vhiwat = gapvnodes / 11; /* 9% -- just under the 10% in vlrureclaim() */
 	vlowat = vhiwat / 2;
 	if (numvnodes > desiredvnodes)
 		return (0);
 	space = desiredvnodes - numvnodes;
 	if (freevnodes > wantfreevnodes)
 		space += freevnodes - wantfreevnodes;
 	return (space);
 }
 
 /*
  * Attempt to recycle vnodes in a context that is always safe to block.
  * Calling vlrurecycle() from the bowels of filesystem code has some
  * interesting deadlock problems.
  */
 static struct proc *vnlruproc;
 static int vnlruproc_sig;
 
 static void
 vnlru_proc(void)
 {
 	struct mount *mp, *nmp;
 	unsigned long ofreevnodes, onumvnodes;
 	int done, force, reclaim_nc_src, trigger, usevnodes;
 
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, vnlruproc,
 	    SHUTDOWN_PRI_FIRST);
 
 	force = 0;
 	for (;;) {
 		kproc_suspend_check(vnlruproc);
 		mtx_lock(&vnode_free_list_mtx);
 		/*
 		 * If numvnodes is too large (due to desiredvnodes being
 		 * adjusted using its sysctl, or emergency growth), first
 		 * try to reduce it by discarding from the free list.
 		 */
 		if (numvnodes > desiredvnodes && freevnodes > 0)
 			vnlru_free_locked(ulmin(numvnodes - desiredvnodes,
 			    freevnodes), NULL);
 		/*
 		 * Sleep if the vnode cache is in a good state.  This is
 		 * when it is not over-full and has space for about a 4%
 		 * or 9% expansion (by growing its size or inexcessively
 		 * reducing its free list).  Otherwise, try to reclaim
 		 * space for a 10% expansion.
 		 */
 		if (vstir && force == 0) {
 			force = 1;
 			vstir = 0;
 		}
 		if (vspace() >= vlowat && force == 0) {
 			vnlruproc_sig = 0;
 			wakeup(&vnlruproc_sig);
 			msleep(vnlruproc, &vnode_free_list_mtx,
 			    PVFS|PDROP, "vlruwt", hz);
 			continue;
 		}
 		mtx_unlock(&vnode_free_list_mtx);
 		done = 0;
 		ofreevnodes = freevnodes;
 		onumvnodes = numvnodes;
 		/*
 		 * Calculate parameters for recycling.  These are the same
 		 * throughout the loop to give some semblance of fairness.
 		 * The trigger point is to avoid recycling vnodes with lots
 		 * of resident pages.  We aren't trying to free memory; we
 		 * are trying to recycle or at least free vnodes.
 		 */
 		if (numvnodes <= desiredvnodes)
 			usevnodes = numvnodes - freevnodes;
 		else
 			usevnodes = numvnodes;
 		if (usevnodes <= 0)
 			usevnodes = 1;
 		/*
 		 * The trigger value is is chosen to give a conservatively
 		 * large value to ensure that it alone doesn't prevent
 		 * making progress.  The value can easily be so large that
 		 * it is effectively infinite in some congested and
 		 * misconfigured cases, and this is necessary.  Normally
 		 * it is about 8 to 100 (pages), which is quite large.
 		 */
 		trigger = vm_cnt.v_page_count * 2 / usevnodes;
 		if (force < 2)
 			trigger = vsmalltrigger;
 		reclaim_nc_src = force >= 3;
 		mtx_lock(&mountlist_mtx);
 		for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 			if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
 				nmp = TAILQ_NEXT(mp, mnt_list);
 				continue;
 			}
 			done += vlrureclaim(mp, reclaim_nc_src, trigger);
 			mtx_lock(&mountlist_mtx);
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			vfs_unbusy(mp);
 		}
 		mtx_unlock(&mountlist_mtx);
 		if (onumvnodes > desiredvnodes && numvnodes <= desiredvnodes)
 			uma_reclaim();
 		if (done == 0) {
 			if (force == 0 || force == 1) {
 				force = 2;
 				continue;
 			}
 			if (force == 2) {
 				force = 3;
 				continue;
 			}
 			force = 0;
 			vnlru_nowhere++;
 			tsleep(vnlruproc, PPAUSE, "vlrup", hz * 3);
 		} else
 			kern_yield(PRI_USER);
 		/*
 		 * After becoming active to expand above low water, keep
 		 * active until above high water.
 		 */
 		force = vspace() < vhiwat;
 	}
 }
 
 static struct kproc_desc vnlru_kp = {
 	"vnlru",
 	vnlru_proc,
 	&vnlruproc
 };
 SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start,
     &vnlru_kp);
  
 /*
  * Routines having to do with the management of the vnode table.
  */
 
 /*
  * Try to recycle a freed vnode.  We abort if anyone picks up a reference
  * before we actually vgone().  This function must be called with the vnode
  * held to prevent the vnode from being returned to the free list midway
  * through vgone().
  */
 static int
 vtryrecycle(struct vnode *vp)
 {
 	struct mount *vnmp;
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	VNASSERT(vp->v_holdcnt, vp,
 	    ("vtryrecycle: Recycling vp %p without a reference.", vp));
 	/*
 	 * This vnode may found and locked via some other list, if so we
 	 * can't recycle it yet.
 	 */
 	if (VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 		CTR2(KTR_VFS,
 		    "%s: impossible to recycle, vp %p lock is already held",
 		    __func__, vp);
 		return (EWOULDBLOCK);
 	}
 	/*
 	 * Don't recycle if its filesystem is being suspended.
 	 */
 	if (vn_start_write(vp, &vnmp, V_NOWAIT) != 0) {
 		VOP_UNLOCK(vp, 0);
 		CTR2(KTR_VFS,
 		    "%s: impossible to recycle, cannot start the write for %p",
 		    __func__, vp);
 		return (EBUSY);
 	}
 	/*
 	 * If we got this far, we need to acquire the interlock and see if
 	 * anyone picked up this vnode from another list.  If not, we will
 	 * mark it with DOOMED via vgonel() so that anyone who does find it
 	 * will skip over it.
 	 */
 	VI_LOCK(vp);
 	if (vp->v_usecount) {
 		VOP_UNLOCK(vp, LK_INTERLOCK);
 		vn_finished_write(vnmp);
 		CTR2(KTR_VFS,
 		    "%s: impossible to recycle, %p is already referenced",
 		    __func__, vp);
 		return (EBUSY);
 	}
 	if ((vp->v_iflag & VI_DOOMED) == 0) {
 		counter_u64_add(recycles_count, 1);
 		vgonel(vp);
 	}
 	VOP_UNLOCK(vp, LK_INTERLOCK);
 	vn_finished_write(vnmp);
 	return (0);
 }
 
 static void
 vcheckspace(void)
 {
 
 	if (vspace() < vlowat && vnlruproc_sig == 0) {
 		vnlruproc_sig = 1;
 		wakeup(vnlruproc);
 	}
 }
 
 /*
  * Wait if necessary for space for a new vnode.
  */
 static int
 getnewvnode_wait(int suspended)
 {
 
 	mtx_assert(&vnode_free_list_mtx, MA_OWNED);
 	if (numvnodes >= desiredvnodes) {
 		if (suspended) {
 			/*
 			 * The file system is being suspended.  We cannot
 			 * risk a deadlock here, so allow allocation of
 			 * another vnode even if this would give too many.
 			 */
 			return (0);
 		}
 		if (vnlruproc_sig == 0) {
 			vnlruproc_sig = 1;	/* avoid unnecessary wakeups */
 			wakeup(vnlruproc);
 		}
 		msleep(&vnlruproc_sig, &vnode_free_list_mtx, PVFS,
 		    "vlruwk", hz);
 	}
 	/* Post-adjust like the pre-adjust in getnewvnode(). */
 	if (numvnodes + 1 > desiredvnodes && freevnodes > 1)
 		vnlru_free_locked(1, NULL);
 	return (numvnodes >= desiredvnodes ? ENFILE : 0);
 }
 
 /*
  * This hack is fragile, and probably not needed any more now that the
  * watermark handling works.
  */
 void
 getnewvnode_reserve(u_int count)
 {
 	struct thread *td;
 
 	/* Pre-adjust like the pre-adjust in getnewvnode(), with any count. */
 	/* XXX no longer so quick, but this part is not racy. */
 	mtx_lock(&vnode_free_list_mtx);
 	if (numvnodes + count > desiredvnodes && freevnodes > wantfreevnodes)
 		vnlru_free_locked(ulmin(numvnodes + count - desiredvnodes,
 		    freevnodes - wantfreevnodes), NULL);
 	mtx_unlock(&vnode_free_list_mtx);
 
 	td = curthread;
 	/* First try to be quick and racy. */
 	if (atomic_fetchadd_long(&numvnodes, count) + count <= desiredvnodes) {
 		td->td_vp_reserv += count;
 		vcheckspace();	/* XXX no longer so quick, but more racy */
 		return;
 	} else
 		atomic_subtract_long(&numvnodes, count);
 
 	mtx_lock(&vnode_free_list_mtx);
 	while (count > 0) {
 		if (getnewvnode_wait(0) == 0) {
 			count--;
 			td->td_vp_reserv++;
 			atomic_add_long(&numvnodes, 1);
 		}
 	}
 	vcheckspace();
 	mtx_unlock(&vnode_free_list_mtx);
 }
 
 /*
  * This hack is fragile, especially if desiredvnodes or wantvnodes are
  * misconfgured or changed significantly.  Reducing desiredvnodes below
  * the reserved amount should cause bizarre behaviour like reducing it
  * below the number of active vnodes -- the system will try to reduce
  * numvnodes to match, but should fail, so the subtraction below should
  * not overflow.
  */
 void
 getnewvnode_drop_reserve(void)
 {
 	struct thread *td;
 
 	td = curthread;
 	atomic_subtract_long(&numvnodes, td->td_vp_reserv);
 	td->td_vp_reserv = 0;
 }
 
 /*
  * Return the next vnode from the free list.
  */
 int
 getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
     struct vnode **vpp)
 {
 	struct vnode *vp;
 	struct thread *td;
 	struct lock_object *lo;
 	static int cyclecount;
 	int error;
 
 	CTR3(KTR_VFS, "%s: mp %p with tag %s", __func__, mp, tag);
 	vp = NULL;
 	td = curthread;
 	if (td->td_vp_reserv > 0) {
 		td->td_vp_reserv -= 1;
 		goto alloc;
 	}
 	mtx_lock(&vnode_free_list_mtx);
 	if (numvnodes < desiredvnodes)
 		cyclecount = 0;
 	else if (cyclecount++ >= freevnodes) {
 		cyclecount = 0;
 		vstir = 1;
 	}
 	/*
 	 * Grow the vnode cache if it will not be above its target max
 	 * after growing.  Otherwise, if the free list is nonempty, try
 	 * to reclaim 1 item from it before growing the cache (possibly
 	 * above its target max if the reclamation failed or is delayed).
 	 * Otherwise, wait for some space.  In all cases, schedule
 	 * vnlru_proc() if we are getting short of space.  The watermarks
 	 * should be chosen so that we never wait or even reclaim from
 	 * the free list to below its target minimum.
 	 */
 	if (numvnodes + 1 <= desiredvnodes)
 		;
 	else if (freevnodes > 0)
 		vnlru_free_locked(1, NULL);
 	else {
 		error = getnewvnode_wait(mp != NULL && (mp->mnt_kern_flag &
 		    MNTK_SUSPEND));
 #if 0	/* XXX Not all VFS_VGET/ffs_vget callers check returns. */
 		if (error != 0) {
 			mtx_unlock(&vnode_free_list_mtx);
 			return (error);
 		}
 #endif
 	}
 	vcheckspace();
 	atomic_add_long(&numvnodes, 1);
 	mtx_unlock(&vnode_free_list_mtx);
 alloc:
 	counter_u64_add(vnodes_created, 1);
 	vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK);
 	/*
 	 * Locks are given the generic name "vnode" when created.
 	 * Follow the historic practice of using the filesystem
 	 * name when they allocated, e.g., "zfs", "ufs", "nfs, etc.
 	 *
 	 * Locks live in a witness group keyed on their name. Thus,
 	 * when a lock is renamed, it must also move from the witness
 	 * group of its old name to the witness group of its new name.
 	 *
 	 * The change only needs to be made when the vnode moves
 	 * from one filesystem type to another. We ensure that each
 	 * filesystem use a single static name pointer for its tag so
 	 * that we can compare pointers rather than doing a strcmp().
 	 */
 	lo = &vp->v_vnlock->lock_object;
 	if (lo->lo_name != tag) {
 		lo->lo_name = tag;
 		WITNESS_DESTROY(lo);
 		WITNESS_INIT(lo, tag);
 	}
 	/*
 	 * By default, don't allow shared locks unless filesystems opt-in.
 	 */
 	vp->v_vnlock->lock_object.lo_flags |= LK_NOSHARE;
 	/*
 	 * Finalize various vnode identity bits.
 	 */
 	KASSERT(vp->v_object == NULL, ("stale v_object %p", vp));
 	KASSERT(vp->v_lockf == NULL, ("stale v_lockf %p", vp));
 	KASSERT(vp->v_pollinfo == NULL, ("stale v_pollinfo %p", vp));
 	vp->v_type = VNON;
 	vp->v_tag = tag;
 	vp->v_op = vops;
 	v_init_counters(vp);
 	vp->v_bufobj.bo_ops = &buf_ops_bio;
 #ifdef DIAGNOSTIC
 	if (mp == NULL && vops != &dead_vnodeops)
 		printf("NULL mp in getnewvnode(9), tag %s\n", tag);
 #endif
 #ifdef MAC
 	mac_vnode_init(vp);
 	if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0)
 		mac_vnode_associate_singlelabel(mp, vp);
 #endif
 	if (mp != NULL) {
 		vp->v_bufobj.bo_bsize = mp->mnt_stat.f_iosize;
 		if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0)
 			vp->v_vflag |= VV_NOKNOTE;
 	}
 
 	/*
 	 * For the filesystems which do not use vfs_hash_insert(),
 	 * still initialize v_hash to have vfs_hash_index() useful.
 	 * E.g., nullfs uses vfs_hash_index() on the lower vnode for
 	 * its own hashing.
 	 */
 	vp->v_hash = (uintptr_t)vp >> vnsz2log;
 
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Delete from old mount point vnode list, if on one.
  */
 static void
 delmntque(struct vnode *vp)
 {
 	struct mount *mp;
 	int active;
 
 	mp = vp->v_mount;
 	if (mp == NULL)
 		return;
 	MNT_ILOCK(mp);
 	VI_LOCK(vp);
 	KASSERT(mp->mnt_activevnodelistsize <= mp->mnt_nvnodelistsize,
 	    ("Active vnode list size %d > Vnode list size %d",
 	     mp->mnt_activevnodelistsize, mp->mnt_nvnodelistsize));
 	active = vp->v_iflag & VI_ACTIVE;
 	vp->v_iflag &= ~VI_ACTIVE;
 	if (active) {
 		mtx_lock(&vnode_free_list_mtx);
 		TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist);
 		mp->mnt_activevnodelistsize--;
 		mtx_unlock(&vnode_free_list_mtx);
 	}
 	vp->v_mount = NULL;
 	VI_UNLOCK(vp);
 	VNASSERT(mp->mnt_nvnodelistsize > 0, vp,
 		("bad mount point vnode list size"));
 	TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
 	mp->mnt_nvnodelistsize--;
 	MNT_REL(mp);
 	MNT_IUNLOCK(mp);
 }
 
 static void
 insmntque_stddtr(struct vnode *vp, void *dtr_arg)
 {
 
 	vp->v_data = NULL;
 	vp->v_op = &dead_vnodeops;
 	vgone(vp);
 	vput(vp);
 }
 
 /*
  * Insert into list of vnodes for the new mount point, if available.
  */
 int
 insmntque1(struct vnode *vp, struct mount *mp,
 	void (*dtr)(struct vnode *, void *), void *dtr_arg)
 {
 
 	KASSERT(vp->v_mount == NULL,
 		("insmntque: vnode already on per mount vnode list"));
 	VNASSERT(mp != NULL, vp, ("Don't call insmntque(foo, NULL)"));
 	ASSERT_VOP_ELOCKED(vp, "insmntque: non-locked vp");
 
 	/*
 	 * We acquire the vnode interlock early to ensure that the
 	 * vnode cannot be recycled by another process releasing a
 	 * holdcnt on it before we get it on both the vnode list
 	 * and the active vnode list. The mount mutex protects only
 	 * manipulation of the vnode list and the vnode freelist
 	 * mutex protects only manipulation of the active vnode list.
 	 * Hence the need to hold the vnode interlock throughout.
 	 */
 	MNT_ILOCK(mp);
 	VI_LOCK(vp);
 	if (((mp->mnt_kern_flag & MNTK_NOINSMNTQ) != 0 &&
 	    ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0 ||
 	    mp->mnt_nvnodelistsize == 0)) &&
 	    (vp->v_vflag & VV_FORCEINSMQ) == 0) {
 		VI_UNLOCK(vp);
 		MNT_IUNLOCK(mp);
 		if (dtr != NULL)
 			dtr(vp, dtr_arg);
 		return (EBUSY);
 	}
 	vp->v_mount = mp;
 	MNT_REF(mp);
 	TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
 	VNASSERT(mp->mnt_nvnodelistsize >= 0, vp,
 		("neg mount point vnode list size"));
 	mp->mnt_nvnodelistsize++;
 	KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
 	    ("Activating already active vnode"));
 	vp->v_iflag |= VI_ACTIVE;
 	mtx_lock(&vnode_free_list_mtx);
 	TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
 	mp->mnt_activevnodelistsize++;
 	mtx_unlock(&vnode_free_list_mtx);
 	VI_UNLOCK(vp);
 	MNT_IUNLOCK(mp);
 	return (0);
 }
 
 int
 insmntque(struct vnode *vp, struct mount *mp)
 {
 
 	return (insmntque1(vp, mp, insmntque_stddtr, NULL));
 }
 
 /*
  * Flush out and invalidate all buffers associated with a bufobj
  * Called with the underlying object locked.
  */
 int
 bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo)
 {
 	int error;
 
 	BO_LOCK(bo);
 	if (flags & V_SAVE) {
 		error = bufobj_wwait(bo, slpflag, slptimeo);
 		if (error) {
 			BO_UNLOCK(bo);
 			return (error);
 		}
 		if (bo->bo_dirty.bv_cnt > 0) {
 			BO_UNLOCK(bo);
 			if ((error = BO_SYNC(bo, MNT_WAIT)) != 0)
 				return (error);
 			/*
 			 * XXX We could save a lock/unlock if this was only
 			 * enabled under INVARIANTS
 			 */
 			BO_LOCK(bo);
 			if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)
 				panic("vinvalbuf: dirty bufs");
 		}
 	}
 	/*
 	 * If you alter this loop please notice that interlock is dropped and
 	 * reacquired in flushbuflist.  Special care is needed to ensure that
 	 * no race conditions occur from this.
 	 */
 	do {
 		error = flushbuflist(&bo->bo_clean,
 		    flags, bo, slpflag, slptimeo);
 		if (error == 0 && !(flags & V_CLEANONLY))
 			error = flushbuflist(&bo->bo_dirty,
 			    flags, bo, slpflag, slptimeo);
 		if (error != 0 && error != EAGAIN) {
 			BO_UNLOCK(bo);
 			return (error);
 		}
 	} while (error != 0);
 
 	/*
 	 * Wait for I/O to complete.  XXX needs cleaning up.  The vnode can
 	 * have write I/O in-progress but if there is a VM object then the
 	 * VM object can also have read-I/O in-progress.
 	 */
 	do {
 		bufobj_wwait(bo, 0, 0);
 		if ((flags & V_VMIO) == 0) {
 			BO_UNLOCK(bo);
 			if (bo->bo_object != NULL) {
 				VM_OBJECT_WLOCK(bo->bo_object);
 				vm_object_pip_wait(bo->bo_object, "bovlbx");
 				VM_OBJECT_WUNLOCK(bo->bo_object);
 			}
 			BO_LOCK(bo);
 		}
 	} while (bo->bo_numoutput > 0);
 	BO_UNLOCK(bo);
 
 	/*
 	 * Destroy the copy in the VM cache, too.
 	 */
 	if (bo->bo_object != NULL &&
 	    (flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO)) == 0) {
 		VM_OBJECT_WLOCK(bo->bo_object);
 		vm_object_page_remove(bo->bo_object, 0, 0, (flags & V_SAVE) ?
 		    OBJPR_CLEANONLY : 0);
 		VM_OBJECT_WUNLOCK(bo->bo_object);
 	}
 
 #ifdef INVARIANTS
 	BO_LOCK(bo);
 	if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO |
 	    V_ALLOWCLEAN)) == 0 && (bo->bo_dirty.bv_cnt > 0 ||
 	    bo->bo_clean.bv_cnt > 0))
 		panic("vinvalbuf: flush failed");
 	if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO)) == 0 &&
 	    bo->bo_dirty.bv_cnt > 0)
 		panic("vinvalbuf: flush dirty failed");
 	BO_UNLOCK(bo);
 #endif
 	return (0);
 }
 
 /*
  * Flush out and invalidate all buffers associated with a vnode.
  * Called with the underlying object locked.
  */
 int
 vinvalbuf(struct vnode *vp, int flags, int slpflag, int slptimeo)
 {
 
 	CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags);
 	ASSERT_VOP_LOCKED(vp, "vinvalbuf");
 	if (vp->v_object != NULL && vp->v_object->handle != vp)
 		return (0);
 	return (bufobj_invalbuf(&vp->v_bufobj, flags, slpflag, slptimeo));
 }
 
 /*
  * Flush out buffers on the specified list.
  *
  */
 static int
 flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag,
     int slptimeo)
 {
 	struct buf *bp, *nbp;
 	int retval, error;
 	daddr_t lblkno;
 	b_xflags_t xflags;
 
 	ASSERT_BO_WLOCKED(bo);
 
 	retval = 0;
 	TAILQ_FOREACH_SAFE(bp, &bufv->bv_hd, b_bobufs, nbp) {
 		if (((flags & V_NORMAL) && (bp->b_xflags & BX_ALTDATA)) ||
 		    ((flags & V_ALT) && (bp->b_xflags & BX_ALTDATA) == 0)) {
 			continue;
 		}
 		if (nbp != NULL) {
 			lblkno = nbp->b_lblkno;
 			xflags = nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN);
 		}
 		retval = EAGAIN;
 		error = BUF_TIMELOCK(bp,
 		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo),
 		    "flushbuf", slpflag, slptimeo);
 		if (error) {
 			BO_LOCK(bo);
 			return (error != ENOLCK ? error : EAGAIN);
 		}
 		KASSERT(bp->b_bufobj == bo,
 		    ("bp %p wrong b_bufobj %p should be %p",
 		    bp, bp->b_bufobj, bo));
 		/*
 		 * XXX Since there are no node locks for NFS, I
 		 * believe there is a slight chance that a delayed
 		 * write will occur while sleeping just above, so
 		 * check for it.
 		 */
 		if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) &&
 		    (flags & V_SAVE)) {
 			bremfree(bp);
 			bp->b_flags |= B_ASYNC;
 			bwrite(bp);
 			BO_LOCK(bo);
 			return (EAGAIN);	/* XXX: why not loop ? */
 		}
 		bremfree(bp);
 		bp->b_flags |= (B_INVAL | B_RELBUF);
 		bp->b_flags &= ~B_ASYNC;
 		brelse(bp);
 		BO_LOCK(bo);
 		if (nbp == NULL)
 			break;
 		nbp = gbincore(bo, lblkno);
 		if (nbp == NULL || (nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN))
 		    != xflags)
 			break;			/* nbp invalid */
 	}
 	return (retval);
 }
 
 int
 bnoreuselist(struct bufv *bufv, struct bufobj *bo, daddr_t startn, daddr_t endn)
 {
 	struct buf *bp;
 	int error;
 	daddr_t lblkno;
 
 	ASSERT_BO_LOCKED(bo);
 
 	for (lblkno = startn;;) {
 again:
 		bp = BUF_PCTRIE_LOOKUP_GE(&bufv->bv_root, lblkno);
 		if (bp == NULL || bp->b_lblkno >= endn ||
 		    bp->b_lblkno < startn)
 			break;
 		error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
 		    LK_INTERLOCK, BO_LOCKPTR(bo), "brlsfl", 0, 0);
 		if (error != 0) {
 			BO_RLOCK(bo);
 			if (error == ENOLCK)
 				goto again;
 			return (error);
 		}
 		KASSERT(bp->b_bufobj == bo,
 		    ("bp %p wrong b_bufobj %p should be %p",
 		    bp, bp->b_bufobj, bo));
 		lblkno = bp->b_lblkno + 1;
 		if ((bp->b_flags & B_MANAGED) == 0)
 			bremfree(bp);
 		bp->b_flags |= B_RELBUF;
 		/*
 		 * In the VMIO case, use the B_NOREUSE flag to hint that the
 		 * pages backing each buffer in the range are unlikely to be
 		 * reused.  Dirty buffers will have the hint applied once
 		 * they've been written.
 		 */
 		if (bp->b_vp->v_object != NULL)
 			bp->b_flags |= B_NOREUSE;
 		brelse(bp);
 		BO_RLOCK(bo);
 	}
 	return (0);
 }
 
 /*
  * Truncate a file's buffer and pages to a specified length.  This
  * is in lieu of the old vinvalbuf mechanism, which performed unneeded
  * sync activity.
  */
 int
 vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize)
 {
 	struct buf *bp, *nbp;
 	int anyfreed;
 	int trunclbn;
 	struct bufobj *bo;
 
 	CTR5(KTR_VFS, "%s: vp %p with cred %p and block %d:%ju", __func__,
 	    vp, cred, blksize, (uintmax_t)length);
 
 	/*
 	 * Round up to the *next* lbn.
 	 */
 	trunclbn = howmany(length, blksize);
 
 	ASSERT_VOP_LOCKED(vp, "vtruncbuf");
 restart:
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
 	anyfreed = 1;
 	for (;anyfreed;) {
 		anyfreed = 0;
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) {
 			if (bp->b_lblkno < trunclbn)
 				continue;
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo)) == ENOLCK)
 				goto restart;
 
 			bremfree(bp);
 			bp->b_flags |= (B_INVAL | B_RELBUF);
 			bp->b_flags &= ~B_ASYNC;
 			brelse(bp);
 			anyfreed = 1;
 
 			BO_LOCK(bo);
 			if (nbp != NULL &&
 			    (((nbp->b_xflags & BX_VNCLEAN) == 0) ||
 			    (nbp->b_vp != vp) ||
 			    (nbp->b_flags & B_DELWRI))) {
 				BO_UNLOCK(bo);
 				goto restart;
 			}
 		}
 
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 			if (bp->b_lblkno < trunclbn)
 				continue;
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo)) == ENOLCK)
 				goto restart;
 			bremfree(bp);
 			bp->b_flags |= (B_INVAL | B_RELBUF);
 			bp->b_flags &= ~B_ASYNC;
 			brelse(bp);
 			anyfreed = 1;
 
 			BO_LOCK(bo);
 			if (nbp != NULL &&
 			    (((nbp->b_xflags & BX_VNDIRTY) == 0) ||
 			    (nbp->b_vp != vp) ||
 			    (nbp->b_flags & B_DELWRI) == 0)) {
 				BO_UNLOCK(bo);
 				goto restart;
 			}
 		}
 	}
 
 	if (length > 0) {
 restartsync:
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 			if (bp->b_lblkno > 0)
 				continue;
 			/*
 			 * Since we hold the vnode lock this should only
 			 * fail if we're racing with the buf daemon.
 			 */
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo)) == ENOLCK) {
 				goto restart;
 			}
 			VNASSERT((bp->b_flags & B_DELWRI), vp,
 			    ("buf(%p) on dirty queue without DELWRI", bp));
 
 			bremfree(bp);
 			bawrite(bp);
 			BO_LOCK(bo);
 			goto restartsync;
 		}
 	}
 
 	bufobj_wwait(bo, 0, 0);
 	BO_UNLOCK(bo);
 	vnode_pager_setsize(vp, length);
 
 	return (0);
 }
 
 static void
 buf_vlist_remove(struct buf *bp)
 {
 	struct bufv *bv;
 
 	KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
 	ASSERT_BO_WLOCKED(bp->b_bufobj);
 	KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) !=
 	    (BX_VNDIRTY|BX_VNCLEAN),
 	    ("buf_vlist_remove: Buf %p is on two lists", bp));
 	if (bp->b_xflags & BX_VNDIRTY)
 		bv = &bp->b_bufobj->bo_dirty;
 	else
 		bv = &bp->b_bufobj->bo_clean;
 	BUF_PCTRIE_REMOVE(&bv->bv_root, bp->b_lblkno);
 	TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs);
 	bv->bv_cnt--;
 	bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
 }
 
 /*
  * Add the buffer to the sorted clean or dirty block list.
  *
  * NOTE: xflags is passed as a constant, optimizing this inline function!
  */
 static void
 buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
 {
 	struct bufv *bv;
 	struct buf *n;
 	int error;
 
 	ASSERT_BO_WLOCKED(bo);
 	KASSERT((xflags & BX_VNDIRTY) == 0 || (bo->bo_flag & BO_DEAD) == 0,
 	    ("dead bo %p", bo));
 	KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0,
 	    ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags));
 	bp->b_xflags |= xflags;
 	if (xflags & BX_VNDIRTY)
 		bv = &bo->bo_dirty;
 	else
 		bv = &bo->bo_clean;
 
 	/*
 	 * Keep the list ordered.  Optimize empty list insertion.  Assume
 	 * we tend to grow at the tail so lookup_le should usually be cheaper
 	 * than _ge. 
 	 */
 	if (bv->bv_cnt == 0 ||
 	    bp->b_lblkno > TAILQ_LAST(&bv->bv_hd, buflists)->b_lblkno)
 		TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs);
 	else if ((n = BUF_PCTRIE_LOOKUP_LE(&bv->bv_root, bp->b_lblkno)) == NULL)
 		TAILQ_INSERT_HEAD(&bv->bv_hd, bp, b_bobufs);
 	else
 		TAILQ_INSERT_AFTER(&bv->bv_hd, n, bp, b_bobufs);
 	error = BUF_PCTRIE_INSERT(&bv->bv_root, bp);
 	if (error)
 		panic("buf_vlist_add:  Preallocated nodes insufficient.");
 	bv->bv_cnt++;
 }
 
 /*
  * Look up a buffer using the buffer tries.
  */
 struct buf *
 gbincore(struct bufobj *bo, daddr_t lblkno)
 {
 	struct buf *bp;
 
 	ASSERT_BO_LOCKED(bo);
 	bp = BUF_PCTRIE_LOOKUP(&bo->bo_clean.bv_root, lblkno);
 	if (bp != NULL)
 		return (bp);
 	return BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno);
 }
 
 /*
  * Associate a buffer with a vnode.
  */
 void
 bgetvp(struct vnode *vp, struct buf *bp)
 {
 	struct bufobj *bo;
 
 	bo = &vp->v_bufobj;
 	ASSERT_BO_WLOCKED(bo);
 	VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free"));
 
 	CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags);
 	VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp,
 	    ("bgetvp: bp already attached! %p", bp));
 
 	vhold(vp);
 	bp->b_vp = vp;
 	bp->b_bufobj = bo;
 	/*
 	 * Insert onto list for new vnode.
 	 */
 	buf_vlist_add(bp, bo, BX_VNCLEAN);
 }
 
 /*
  * Disassociate a buffer from a vnode.
  */
 void
 brelvp(struct buf *bp)
 {
 	struct bufobj *bo;
 	struct vnode *vp;
 
 	CTR3(KTR_BUF, "brelvp(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
 	KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
 
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	vp = bp->b_vp;		/* XXX */
 	bo = bp->b_bufobj;
 	BO_LOCK(bo);
 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN))
 		buf_vlist_remove(bp);
 	else
 		panic("brelvp: Buffer %p not on queue.", bp);
 	if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) {
 		bo->bo_flag &= ~BO_ONWORKLST;
 		mtx_lock(&sync_mtx);
 		LIST_REMOVE(bo, bo_synclist);
 		syncer_worklist_len--;
 		mtx_unlock(&sync_mtx);
 	}
 	bp->b_vp = NULL;
 	bp->b_bufobj = NULL;
 	BO_UNLOCK(bo);
 	vdrop(vp);
 }
 
 /*
  * Add an item to the syncer work queue.
  */
 static void
 vn_syncer_add_to_worklist(struct bufobj *bo, int delay)
 {
 	int slot;
 
 	ASSERT_BO_WLOCKED(bo);
 
 	mtx_lock(&sync_mtx);
 	if (bo->bo_flag & BO_ONWORKLST)
 		LIST_REMOVE(bo, bo_synclist);
 	else {
 		bo->bo_flag |= BO_ONWORKLST;
 		syncer_worklist_len++;
 	}
 
 	if (delay > syncer_maxdelay - 2)
 		delay = syncer_maxdelay - 2;
 	slot = (syncer_delayno + delay) & syncer_mask;
 
 	LIST_INSERT_HEAD(&syncer_workitem_pending[slot], bo, bo_synclist);
 	mtx_unlock(&sync_mtx);
 }
 
 static int
 sysctl_vfs_worklist_len(SYSCTL_HANDLER_ARGS)
 {
 	int error, len;
 
 	mtx_lock(&sync_mtx);
 	len = syncer_worklist_len - sync_vnode_count;
 	mtx_unlock(&sync_mtx);
 	error = SYSCTL_OUT(req, &len, sizeof(len));
 	return (error);
 }
 
 SYSCTL_PROC(_vfs, OID_AUTO, worklist_len, CTLTYPE_INT | CTLFLAG_RD, NULL, 0,
     sysctl_vfs_worklist_len, "I", "Syncer thread worklist length");
 
 static struct proc *updateproc;
 static void sched_sync(void);
 static struct kproc_desc up_kp = {
 	"syncer",
 	sched_sync,
 	&updateproc
 };
 SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp);
 
 static int
 sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td)
 {
 	struct vnode *vp;
 	struct mount *mp;
 
 	*bo = LIST_FIRST(slp);
 	if (*bo == NULL)
 		return (0);
 	vp = (*bo)->__bo_vnode;	/* XXX */
 	if (VOP_ISLOCKED(vp) != 0 || VI_TRYLOCK(vp) == 0)
 		return (1);
 	/*
 	 * We use vhold in case the vnode does not
 	 * successfully sync.  vhold prevents the vnode from
 	 * going away when we unlock the sync_mtx so that
 	 * we can acquire the vnode interlock.
 	 */
 	vholdl(vp);
 	mtx_unlock(&sync_mtx);
 	VI_UNLOCK(vp);
 	if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
 		vdrop(vp);
 		mtx_lock(&sync_mtx);
 		return (*bo == LIST_FIRST(slp));
 	}
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	(void) VOP_FSYNC(vp, MNT_LAZY, td);
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 	BO_LOCK(*bo);
 	if (((*bo)->bo_flag & BO_ONWORKLST) != 0) {
 		/*
 		 * Put us back on the worklist.  The worklist
 		 * routine will remove us from our current
 		 * position and then add us back in at a later
 		 * position.
 		 */
 		vn_syncer_add_to_worklist(*bo, syncdelay);
 	}
 	BO_UNLOCK(*bo);
 	vdrop(vp);
 	mtx_lock(&sync_mtx);
 	return (0);
 }
 
 static int first_printf = 1;
 
 /*
  * System filesystem synchronizer daemon.
  */
 static void
 sched_sync(void)
 {
 	struct synclist *next, *slp;
 	struct bufobj *bo;
 	long starttime;
 	struct thread *td = curthread;
 	int last_work_seen;
 	int net_worklist_len;
 	int syncer_final_iter;
 	int error;
 
 	last_work_seen = 0;
 	syncer_final_iter = 0;
 	syncer_state = SYNCER_RUNNING;
 	starttime = time_uptime;
 	td->td_pflags |= TDP_NORUNNINGBUF;
 
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc,
 	    SHUTDOWN_PRI_LAST);
 
 	mtx_lock(&sync_mtx);
 	for (;;) {
 		if (syncer_state == SYNCER_FINAL_DELAY &&
 		    syncer_final_iter == 0) {
 			mtx_unlock(&sync_mtx);
 			kproc_suspend_check(td->td_proc);
 			mtx_lock(&sync_mtx);
 		}
 		net_worklist_len = syncer_worklist_len - sync_vnode_count;
 		if (syncer_state != SYNCER_RUNNING &&
 		    starttime != time_uptime) {
 			if (first_printf) {
 				printf("\nSyncing disks, vnodes remaining... ");
 				first_printf = 0;
 			}
 			printf("%d ", net_worklist_len);
 		}
 		starttime = time_uptime;
 
 		/*
 		 * Push files whose dirty time has expired.  Be careful
 		 * of interrupt race on slp queue.
 		 *
 		 * Skip over empty worklist slots when shutting down.
 		 */
 		do {
 			slp = &syncer_workitem_pending[syncer_delayno];
 			syncer_delayno += 1;
 			if (syncer_delayno == syncer_maxdelay)
 				syncer_delayno = 0;
 			next = &syncer_workitem_pending[syncer_delayno];
 			/*
 			 * If the worklist has wrapped since the
 			 * it was emptied of all but syncer vnodes,
 			 * switch to the FINAL_DELAY state and run
 			 * for one more second.
 			 */
 			if (syncer_state == SYNCER_SHUTTING_DOWN &&
 			    net_worklist_len == 0 &&
 			    last_work_seen == syncer_delayno) {
 				syncer_state = SYNCER_FINAL_DELAY;
 				syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP;
 			}
 		} while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) &&
 		    syncer_worklist_len > 0);
 
 		/*
 		 * Keep track of the last time there was anything
 		 * on the worklist other than syncer vnodes.
 		 * Return to the SHUTTING_DOWN state if any
 		 * new work appears.
 		 */
 		if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING)
 			last_work_seen = syncer_delayno;
 		if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY)
 			syncer_state = SYNCER_SHUTTING_DOWN;
 		while (!LIST_EMPTY(slp)) {
 			error = sync_vnode(slp, &bo, td);
 			if (error == 1) {
 				LIST_REMOVE(bo, bo_synclist);
 				LIST_INSERT_HEAD(next, bo, bo_synclist);
 				continue;
 			}
 
 			if (first_printf == 0) {
 				/*
 				 * Drop the sync mutex, because some watchdog
 				 * drivers need to sleep while patting
 				 */
 				mtx_unlock(&sync_mtx);
 				wdog_kern_pat(WD_LASTVAL);
 				mtx_lock(&sync_mtx);
 			}
 
 		}
 		if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0)
 			syncer_final_iter--;
 		/*
 		 * The variable rushjob allows the kernel to speed up the
 		 * processing of the filesystem syncer process. A rushjob
 		 * value of N tells the filesystem syncer to process the next
 		 * N seconds worth of work on its queue ASAP. Currently rushjob
 		 * is used by the soft update code to speed up the filesystem
 		 * syncer process when the incore state is getting so far
 		 * ahead of the disk that the kernel memory pool is being
 		 * threatened with exhaustion.
 		 */
 		if (rushjob > 0) {
 			rushjob -= 1;
 			continue;
 		}
 		/*
 		 * Just sleep for a short period of time between
 		 * iterations when shutting down to allow some I/O
 		 * to happen.
 		 *
 		 * If it has taken us less than a second to process the
 		 * current work, then wait. Otherwise start right over
 		 * again. We can still lose time if any single round
 		 * takes more than two seconds, but it does not really
 		 * matter as we are just trying to generally pace the
 		 * filesystem activity.
 		 */
 		if (syncer_state != SYNCER_RUNNING ||
 		    time_uptime == starttime) {
 			thread_lock(td);
 			sched_prio(td, PPAUSE);
 			thread_unlock(td);
 		}
 		if (syncer_state != SYNCER_RUNNING)
 			cv_timedwait(&sync_wakeup, &sync_mtx,
 			    hz / SYNCER_SHUTDOWN_SPEEDUP);
 		else if (time_uptime == starttime)
 			cv_timedwait(&sync_wakeup, &sync_mtx, hz);
 	}
 }
 
 /*
  * Request the syncer daemon to speed up its work.
  * We never push it to speed up more than half of its
  * normal turn time, otherwise it could take over the cpu.
  */
 int
 speedup_syncer(void)
 {
 	int ret = 0;
 
 	mtx_lock(&sync_mtx);
 	if (rushjob < syncdelay / 2) {
 		rushjob += 1;
 		stat_rush_requests += 1;
 		ret = 1;
 	}
 	mtx_unlock(&sync_mtx);
 	cv_broadcast(&sync_wakeup);
 	return (ret);
 }
 
 /*
  * Tell the syncer to speed up its work and run though its work
  * list several times, then tell it to shut down.
  */
 static void
 syncer_shutdown(void *arg, int howto)
 {
 
 	if (howto & RB_NOSYNC)
 		return;
 	mtx_lock(&sync_mtx);
 	syncer_state = SYNCER_SHUTTING_DOWN;
 	rushjob = 0;
 	mtx_unlock(&sync_mtx);
 	cv_broadcast(&sync_wakeup);
 	kproc_shutdown(arg, howto);
 }
 
 void
 syncer_suspend(void)
 {
 
 	syncer_shutdown(updateproc, 0);
 }
 
 void
 syncer_resume(void)
 {
 
 	mtx_lock(&sync_mtx);
 	first_printf = 1;
 	syncer_state = SYNCER_RUNNING;
 	mtx_unlock(&sync_mtx);
 	cv_broadcast(&sync_wakeup);
 	kproc_resume(updateproc);
 }
 
 /*
  * Reassign a buffer from one vnode to another.
  * Used to assign file specific control information
  * (indirect blocks) to the vnode to which they belong.
  */
 void
 reassignbuf(struct buf *bp)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 	int delay;
 #ifdef INVARIANTS
 	struct bufv *bv;
 #endif
 
 	vp = bp->b_vp;
 	bo = bp->b_bufobj;
 	++reassignbufcalls;
 
 	CTR3(KTR_BUF, "reassignbuf(%p) vp %p flags %X",
 	    bp, bp->b_vp, bp->b_flags);
 	/*
 	 * B_PAGING flagged buffers cannot be reassigned because their vp
 	 * is not fully linked in.
 	 */
 	if (bp->b_flags & B_PAGING)
 		panic("cannot reassign paging buffer");
 
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	BO_LOCK(bo);
 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN))
 		buf_vlist_remove(bp);
 	else
 		panic("reassignbuf: Buffer %p not on queue.", bp);
 	/*
 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
 	 * of clean buffers.
 	 */
 	if (bp->b_flags & B_DELWRI) {
 		if ((bo->bo_flag & BO_ONWORKLST) == 0) {
 			switch (vp->v_type) {
 			case VDIR:
 				delay = dirdelay;
 				break;
 			case VCHR:
 				delay = metadelay;
 				break;
 			default:
 				delay = filedelay;
 			}
 			vn_syncer_add_to_worklist(bo, delay);
 		}
 		buf_vlist_add(bp, bo, BX_VNDIRTY);
 	} else {
 		buf_vlist_add(bp, bo, BX_VNCLEAN);
 
 		if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) {
 			mtx_lock(&sync_mtx);
 			LIST_REMOVE(bo, bo_synclist);
 			syncer_worklist_len--;
 			mtx_unlock(&sync_mtx);
 			bo->bo_flag &= ~BO_ONWORKLST;
 		}
 	}
 #ifdef INVARIANTS
 	bv = &bo->bo_clean;
 	bp = TAILQ_FIRST(&bv->bv_hd);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 	bp = TAILQ_LAST(&bv->bv_hd, buflists);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 	bv = &bo->bo_dirty;
 	bp = TAILQ_FIRST(&bv->bv_hd);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 	bp = TAILQ_LAST(&bv->bv_hd, buflists);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 #endif
 	BO_UNLOCK(bo);
 }
 
 /*
  * A temporary hack until refcount_* APIs are sorted out.
  */
 static __inline int
 vfs_refcount_acquire_if_not_zero(volatile u_int *count)
 {
 	u_int old;
 
 	old = *count;
 	for (;;) {
 		if (old == 0)
 			return (0);
 		if (atomic_fcmpset_int(count, &old, old + 1))
 			return (1);
 	}
 }
 
 static __inline int
 vfs_refcount_release_if_not_last(volatile u_int *count)
 {
 	u_int old;
 
 	old = *count;
 	for (;;) {
 		if (old == 1)
 			return (0);
 		if (atomic_fcmpset_int(count, &old, old - 1))
 			return (1);
 	}
 }
 
 static void
 v_init_counters(struct vnode *vp)
 {
 
 	VNASSERT(vp->v_type == VNON && vp->v_data == NULL && vp->v_iflag == 0,
 	    vp, ("%s called for an initialized vnode", __FUNCTION__));
 	ASSERT_VI_UNLOCKED(vp, __FUNCTION__);
 
 	refcount_init(&vp->v_holdcnt, 1);
 	refcount_init(&vp->v_usecount, 1);
 }
 
 static void
 v_incr_usecount_locked(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	if ((vp->v_iflag & VI_OWEINACT) != 0) {
 		VNASSERT(vp->v_usecount == 0, vp,
 		    ("vnode with usecount and VI_OWEINACT set"));
 		vp->v_iflag &= ~VI_OWEINACT;
 	}
 	refcount_acquire(&vp->v_usecount);
 	v_incr_devcount(vp);
 }
 
 /*
  * Increment the use and hold counts on the vnode, taking care to reference
  * the driver's usecount if this is a chardev.  The _vhold() will remove
  * the vnode from the free list if it is presently free.
  */
 static void
 v_incr_usecount(struct vnode *vp)
 {
 
 	ASSERT_VI_UNLOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 
 	if (vp->v_type != VCHR &&
 	    vfs_refcount_acquire_if_not_zero(&vp->v_usecount)) {
 		VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp,
 		    ("vnode with usecount and VI_OWEINACT set"));
 	} else {
 		VI_LOCK(vp);
 		v_incr_usecount_locked(vp);
 		VI_UNLOCK(vp);
 	}
 }
 
 /*
  * Increment si_usecount of the associated device, if any.
  */
 static void
 v_incr_devcount(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __FUNCTION__);
 	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
 		dev_lock();
 		vp->v_rdev->si_usecount++;
 		dev_unlock();
 	}
 }
 
 /*
  * Decrement si_usecount of the associated device, if any.
  */
 static void
 v_decr_devcount(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __FUNCTION__);
 	if (vp->v_type == VCHR && vp->v_rdev != NULL) {
 		dev_lock();
 		vp->v_rdev->si_usecount--;
 		dev_unlock();
 	}
 }
 
 /*
  * Grab a particular vnode from the free list, increment its
  * reference count and lock it.  VI_DOOMED is set if the vnode
  * is being destroyed.  Only callers who specify LK_RETRY will
  * see doomed vnodes.  If inactive processing was delayed in
  * vput try to do it here.
  *
  * Notes on lockless counter manipulation:
  * _vhold, vputx and other routines make various decisions based
  * on either holdcnt or usecount being 0. As long as either counter
  * is not transitioning 0->1 nor 1->0, the manipulation can be done
  * with atomic operations. Otherwise the interlock is taken covering
  * both the atomic and additional actions.
  */
 int
 vget(struct vnode *vp, int flags, struct thread *td)
 {
 	int error, oweinact;
 
 	VNASSERT((flags & LK_TYPE_MASK) != 0, vp,
 	    ("vget: invalid lock operation"));
 
 	if ((flags & LK_INTERLOCK) != 0)
 		ASSERT_VI_LOCKED(vp, __func__);
 	else
 		ASSERT_VI_UNLOCKED(vp, __func__);
 	if ((flags & LK_VNHELD) != 0)
 		VNASSERT((vp->v_holdcnt > 0), vp,
 		    ("vget: LK_VNHELD passed but vnode not held"));
 
 	CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags);
 
 	if ((flags & LK_VNHELD) == 0)
 		_vhold(vp, (flags & LK_INTERLOCK) != 0);
 
 	if ((error = vn_lock(vp, flags)) != 0) {
 		vdrop(vp);
 		CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__,
 		    vp);
 		return (error);
 	}
 	if (vp->v_iflag & VI_DOOMED && (flags & LK_RETRY) == 0)
 		panic("vget: vn_lock failed to return ENOENT\n");
 	/*
 	 * We don't guarantee that any particular close will
 	 * trigger inactive processing so just make a best effort
 	 * here at preventing a reference to a removed file.  If
 	 * we don't succeed no harm is done.
 	 *
 	 * Upgrade our holdcnt to a usecount.
 	 */
 	if (vp->v_type == VCHR ||
 	    !vfs_refcount_acquire_if_not_zero(&vp->v_usecount)) {
 		VI_LOCK(vp);
 		if ((vp->v_iflag & VI_OWEINACT) == 0) {
 			oweinact = 0;
 		} else {
 			oweinact = 1;
 			vp->v_iflag &= ~VI_OWEINACT;
 		}
 		refcount_acquire(&vp->v_usecount);
 		v_incr_devcount(vp);
 		if (oweinact && VOP_ISLOCKED(vp) == LK_EXCLUSIVE &&
 		    (flags & LK_NOWAIT) == 0)
 			vinactive(vp, td);
 		VI_UNLOCK(vp);
 	}
 	return (0);
 }
 
 /*
  * Increase the reference count of a vnode.
  */
 void
 vref(struct vnode *vp)
 {
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	_vhold(vp, false);
 	v_incr_usecount(vp);
 }
 
 void
 vrefl(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	_vhold(vp, true);
 	v_incr_usecount_locked(vp);
 }
 
 void
 vrefact(struct vnode *vp)
 {
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	if (__predict_false(vp->v_type == VCHR)) {
 		VNASSERT(vp->v_holdcnt > 0 && vp->v_usecount > 0, vp,
 		    ("%s: wrong ref counts", __func__));
 		vref(vp);
 		return;
 	}
 #ifdef INVARIANTS
 	int old = atomic_fetchadd_int(&vp->v_holdcnt, 1);
 	VNASSERT(old > 0, vp, ("%s: wrong hold count", __func__));
 	old = atomic_fetchadd_int(&vp->v_usecount, 1);
 	VNASSERT(old > 0, vp, ("%s: wrong use count", __func__));
 #else
 	refcount_acquire(&vp->v_holdcnt);
 	refcount_acquire(&vp->v_usecount);
 #endif
 }
 
 /*
  * Return reference count of a vnode.
  *
  * The results of this call are only guaranteed when some mechanism is used to
  * stop other processes from gaining references to the vnode.  This may be the
  * case if the caller holds the only reference.  This is also useful when stale
  * data is acceptable as race conditions may be accounted for by some other
  * means.
  */
 int
 vrefcnt(struct vnode *vp)
 {
 
 	return (vp->v_usecount);
 }
 
 #define	VPUTX_VRELE	1
 #define	VPUTX_VPUT	2
 #define	VPUTX_VUNREF	3
 
 /*
  * Decrement the use and hold counts for a vnode.
  *
  * See an explanation near vget() as to why atomic operation is safe.
  */
 static void
 vputx(struct vnode *vp, int func)
 {
 	int error;
 
 	KASSERT(vp != NULL, ("vputx: null vp"));
 	if (func == VPUTX_VUNREF)
 		ASSERT_VOP_LOCKED(vp, "vunref");
 	else if (func == VPUTX_VPUT)
 		ASSERT_VOP_LOCKED(vp, "vput");
 	else
 		KASSERT(func == VPUTX_VRELE, ("vputx: wrong func"));
 	ASSERT_VI_UNLOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 
 	if (vp->v_type != VCHR &&
 	    vfs_refcount_release_if_not_last(&vp->v_usecount)) {
 		if (func == VPUTX_VPUT)
 			VOP_UNLOCK(vp, 0);
 		vdrop(vp);
 		return;
 	}
 
 	VI_LOCK(vp);
 
 	/*
 	 * We want to hold the vnode until the inactive finishes to
 	 * prevent vgone() races.  We drop the use count here and the
 	 * hold count below when we're done.
 	 */
 	if (!refcount_release(&vp->v_usecount) ||
 	    (vp->v_iflag & VI_DOINGINACT)) {
 		if (func == VPUTX_VPUT)
 			VOP_UNLOCK(vp, 0);
 		v_decr_devcount(vp);
 		vdropl(vp);
 		return;
 	}
 
 	v_decr_devcount(vp);
 
 	error = 0;
 
 	if (vp->v_usecount != 0) {
 		vn_printf(vp, "vputx: usecount not zero for vnode ");
 		panic("vputx: usecount not zero");
 	}
 
 	CTR2(KTR_VFS, "%s: return vnode %p to the freelist", __func__, vp);
 
 	/*
 	 * We must call VOP_INACTIVE with the node locked. Mark
 	 * as VI_DOINGINACT to avoid recursion.
 	 */
 	vp->v_iflag |= VI_OWEINACT;
 	switch (func) {
 	case VPUTX_VRELE:
 		error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK);
 		VI_LOCK(vp);
 		break;
 	case VPUTX_VPUT:
 		if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 			error = VOP_LOCK(vp, LK_UPGRADE | LK_INTERLOCK |
 			    LK_NOWAIT);
 			VI_LOCK(vp);
 		}
 		break;
 	case VPUTX_VUNREF:
 		if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 			error = VOP_LOCK(vp, LK_TRYUPGRADE | LK_INTERLOCK);
 			VI_LOCK(vp);
 		}
 		break;
 	}
 	VNASSERT(vp->v_usecount == 0 || (vp->v_iflag & VI_OWEINACT) == 0, vp,
 	    ("vnode with usecount and VI_OWEINACT set"));
 	if (error == 0) {
 		if (vp->v_iflag & VI_OWEINACT)
 			vinactive(vp, curthread);
 		if (func != VPUTX_VUNREF)
 			VOP_UNLOCK(vp, 0);
 	}
 	vdropl(vp);
 }
 
 /*
  * Vnode put/release.
  * If count drops to zero, call inactive routine and return to freelist.
  */
 void
 vrele(struct vnode *vp)
 {
 
 	vputx(vp, VPUTX_VRELE);
 }
 
 /*
  * Release an already locked vnode.  This give the same effects as
  * unlock+vrele(), but takes less time and avoids releasing and
  * re-aquiring the lock (as vrele() acquires the lock internally.)
  */
 void
 vput(struct vnode *vp)
 {
 
 	vputx(vp, VPUTX_VPUT);
 }
 
 /*
  * Release an exclusively locked vnode. Do not unlock the vnode lock.
  */
 void
 vunref(struct vnode *vp)
 {
 
 	vputx(vp, VPUTX_VUNREF);
 }
 
 /*
  * Increase the hold count and activate if this is the first reference.
  */
 void
 _vhold(struct vnode *vp, bool locked)
 {
 	struct mount *mp;
 
 	if (locked)
 		ASSERT_VI_LOCKED(vp, __func__);
 	else
 		ASSERT_VI_UNLOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	if (!locked) {
 		if (vfs_refcount_acquire_if_not_zero(&vp->v_holdcnt)) {
 			VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
 			    ("_vhold: vnode with holdcnt is free"));
 			return;
 		}
 		VI_LOCK(vp);
 	}
 	if ((vp->v_iflag & VI_FREE) == 0) {
 		refcount_acquire(&vp->v_holdcnt);
 		if (!locked)
 			VI_UNLOCK(vp);
 		return;
 	}
 	VNASSERT(vp->v_holdcnt == 0, vp,
 	    ("%s: wrong hold count", __func__));
 	VNASSERT(vp->v_op != NULL, vp,
 	    ("%s: vnode already reclaimed.", __func__));
 	/*
 	 * Remove a vnode from the free list, mark it as in use,
 	 * and put it on the active list.
 	 */
 	mtx_lock(&vnode_free_list_mtx);
 	TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist);
 	freevnodes--;
 	vp->v_iflag &= ~VI_FREE;
 	KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
 	    ("Activating already active vnode"));
 	vp->v_iflag |= VI_ACTIVE;
 	mp = vp->v_mount;
 	TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
 	mp->mnt_activevnodelistsize++;
 	mtx_unlock(&vnode_free_list_mtx);
 	refcount_acquire(&vp->v_holdcnt);
 	if (!locked)
 		VI_UNLOCK(vp);
 }
 
 /*
  * Drop the hold count of the vnode.  If this is the last reference to
  * the vnode we place it on the free list unless it has been vgone'd
  * (marked VI_DOOMED) in which case we will free it.
  *
  * Because the vnode vm object keeps a hold reference on the vnode if
  * there is at least one resident non-cached page, the vnode cannot
  * leave the active list without the page cleanup done.
  */
 void
 _vdrop(struct vnode *vp, bool locked)
 {
 	struct bufobj *bo;
 	struct mount *mp;
 	int active;
 
 	if (locked)
 		ASSERT_VI_LOCKED(vp, __func__);
 	else
 		ASSERT_VI_UNLOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	if ((int)vp->v_holdcnt <= 0)
 		panic("vdrop: holdcnt %d", vp->v_holdcnt);
 	if (!locked) {
 		if (vfs_refcount_release_if_not_last(&vp->v_holdcnt))
 			return;
 		VI_LOCK(vp);
 	}
 	if (refcount_release(&vp->v_holdcnt) == 0) {
 		VI_UNLOCK(vp);
 		return;
 	}
 	if ((vp->v_iflag & VI_DOOMED) == 0) {
 		/*
 		 * Mark a vnode as free: remove it from its active list
 		 * and put it up for recycling on the freelist.
 		 */
 		VNASSERT(vp->v_op != NULL, vp,
 		    ("vdropl: vnode already reclaimed."));
 		VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
 		    ("vnode already free"));
 		VNASSERT(vp->v_holdcnt == 0, vp,
 		    ("vdropl: freeing when we shouldn't"));
 		active = vp->v_iflag & VI_ACTIVE;
 		if ((vp->v_iflag & VI_OWEINACT) == 0) {
 			vp->v_iflag &= ~VI_ACTIVE;
 			mp = vp->v_mount;
 			mtx_lock(&vnode_free_list_mtx);
 			if (active) {
 				TAILQ_REMOVE(&mp->mnt_activevnodelist, vp,
 				    v_actfreelist);
 				mp->mnt_activevnodelistsize--;
 			}
 			TAILQ_INSERT_TAIL(&vnode_free_list, vp,
 			    v_actfreelist);
 			freevnodes++;
 			vp->v_iflag |= VI_FREE;
 			mtx_unlock(&vnode_free_list_mtx);
 		} else {
 			counter_u64_add(free_owe_inact, 1);
 		}
 		VI_UNLOCK(vp);
 		return;
 	}
 	/*
 	 * The vnode has been marked for destruction, so free it.
 	 *
 	 * The vnode will be returned to the zone where it will
 	 * normally remain until it is needed for another vnode. We
 	 * need to cleanup (or verify that the cleanup has already
 	 * been done) any residual data left from its current use
 	 * so as not to contaminate the freshly allocated vnode.
 	 */
 	CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp);
 	atomic_subtract_long(&numvnodes, 1);
 	bo = &vp->v_bufobj;
 	VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
 	    ("cleaned vnode still on the free list."));
 	VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't"));
 	VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count"));
 	VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count"));
 	VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count"));
 	VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's"));
 	VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0"));
 	VNASSERT(pctrie_is_empty(&bo->bo_clean.bv_root), vp,
 	    ("clean blk trie not empty"));
 	VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0"));
 	VNASSERT(pctrie_is_empty(&bo->bo_dirty.bv_root), vp,
 	    ("dirty blk trie not empty"));
 	VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst"));
 	VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src"));
 	VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for .."));
 	VNASSERT(TAILQ_EMPTY(&vp->v_rl.rl_waiters), vp,
 	    ("Dangling rangelock waiters"));
 	VI_UNLOCK(vp);
 #ifdef MAC
 	mac_vnode_destroy(vp);
 #endif
 	if (vp->v_pollinfo != NULL) {
 		destroy_vpollinfo(vp->v_pollinfo);
 		vp->v_pollinfo = NULL;
 	}
 #ifdef INVARIANTS
 	/* XXX Elsewhere we detect an already freed vnode via NULL v_op. */
 	vp->v_op = NULL;
 #endif
 	bzero(&vp->v_un, sizeof(vp->v_un));
 	vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
 	vp->v_iflag = 0;
 	vp->v_vflag = 0;
 	bo->bo_flag = 0;
 	uma_zfree(vnode_zone, vp);
 }
 
 /*
  * Call VOP_INACTIVE on the vnode and manage the DOINGINACT and OWEINACT
  * flags.  DOINGINACT prevents us from recursing in calls to vinactive.
  * OWEINACT tracks whether a vnode missed a call to inactive due to a
  * failed lock upgrade.
  */
 void
 vinactive(struct vnode *vp, struct thread *td)
 {
 	struct vm_object *obj;
 
 	ASSERT_VOP_ELOCKED(vp, "vinactive");
 	ASSERT_VI_LOCKED(vp, "vinactive");
 	VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp,
 	    ("vinactive: recursed on VI_DOINGINACT"));
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	vp->v_iflag |= VI_DOINGINACT;
 	vp->v_iflag &= ~VI_OWEINACT;
 	VI_UNLOCK(vp);
 	/*
 	 * Before moving off the active list, we must be sure that any
 	 * modified pages are converted into the vnode's dirty
 	 * buffers, since these will no longer be checked once the
 	 * vnode is on the inactive list.
 	 *
 	 * The write-out of the dirty pages is asynchronous.  At the
 	 * point that VOP_INACTIVE() is called, there could still be
 	 * pending I/O and dirty pages in the object.
 	 */
 	if ((obj = vp->v_object) != NULL && (vp->v_vflag & VV_NOSYNC) == 0 &&
 	    (obj->flags & OBJ_MIGHTBEDIRTY) != 0) {
 		VM_OBJECT_WLOCK(obj);
 		vm_object_page_clean(obj, 0, 0, 0);
 		VM_OBJECT_WUNLOCK(obj);
 	}
 	VOP_INACTIVE(vp, td);
 	VI_LOCK(vp);
 	VNASSERT(vp->v_iflag & VI_DOINGINACT, vp,
 	    ("vinactive: lost VI_DOINGINACT"));
 	vp->v_iflag &= ~VI_DOINGINACT;
 }
 
 /*
  * Remove any vnodes in the vnode table belonging to mount point mp.
  *
  * If FORCECLOSE is not specified, there should not be any active ones,
  * return error if any are found (nb: this is a user error, not a
  * system error). If FORCECLOSE is specified, detach any active vnodes
  * that are found.
  *
  * If WRITECLOSE is set, only flush out regular file vnodes open for
  * writing.
  *
  * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped.
  *
  * `rootrefs' specifies the base reference count for the root vnode
  * of this filesystem. The root vnode is considered busy if its
  * v_usecount exceeds this value. On a successful return, vflush(, td)
  * will call vrele() on the root vnode exactly rootrefs times.
  * If the SKIPSYSTEM or WRITECLOSE flags are specified, rootrefs must
  * be zero.
  */
 #ifdef DIAGNOSTIC
 static int busyprt = 0;		/* print out busy vnodes */
 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "Print out busy vnodes");
 #endif
 
 int
 vflush(struct mount *mp, int rootrefs, int flags, struct thread *td)
 {
 	struct vnode *vp, *mvp, *rootvp = NULL;
 	struct vattr vattr;
 	int busy = 0, error;
 
 	CTR4(KTR_VFS, "%s: mp %p with rootrefs %d and flags %d", __func__, mp,
 	    rootrefs, flags);
 	if (rootrefs > 0) {
 		KASSERT((flags & (SKIPSYSTEM | WRITECLOSE)) == 0,
 		    ("vflush: bad args"));
 		/*
 		 * Get the filesystem root vnode. We can vput() it
 		 * immediately, since with rootrefs > 0, it won't go away.
 		 */
 		if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rootvp)) != 0) {
 			CTR2(KTR_VFS, "%s: vfs_root lookup failed with %d",
 			    __func__, error);
 			return (error);
 		}
 		vput(rootvp);
 	}
 loop:
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		vholdl(vp);
 		error = vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE);
 		if (error) {
 			vdrop(vp);
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			goto loop;
 		}
 		/*
 		 * Skip over a vnodes marked VV_SYSTEM.
 		 */
 		if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
 			VOP_UNLOCK(vp, 0);
 			vdrop(vp);
 			continue;
 		}
 		/*
 		 * If WRITECLOSE is set, flush out unlinked but still open
 		 * files (even if open only for reading) and regular file
 		 * vnodes open for writing.
 		 */
 		if (flags & WRITECLOSE) {
 			if (vp->v_object != NULL) {
 				VM_OBJECT_WLOCK(vp->v_object);
 				vm_object_page_clean(vp->v_object, 0, 0, 0);
 				VM_OBJECT_WUNLOCK(vp->v_object);
 			}
 			error = VOP_FSYNC(vp, MNT_WAIT, td);
 			if (error != 0) {
 				VOP_UNLOCK(vp, 0);
 				vdrop(vp);
 				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 				return (error);
 			}
 			error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 			VI_LOCK(vp);
 
 			if ((vp->v_type == VNON ||
 			    (error == 0 && vattr.va_nlink > 0)) &&
 			    (vp->v_writecount == 0 || vp->v_type != VREG)) {
 				VOP_UNLOCK(vp, 0);
 				vdropl(vp);
 				continue;
 			}
 		} else
 			VI_LOCK(vp);
 		/*
 		 * With v_usecount == 0, all we need to do is clear out the
 		 * vnode data structures and we are done.
 		 *
 		 * If FORCECLOSE is set, forcibly close the vnode.
 		 */
 		if (vp->v_usecount == 0 || (flags & FORCECLOSE)) {
 			vgonel(vp);
 		} else {
 			busy++;
 #ifdef DIAGNOSTIC
 			if (busyprt)
 				vn_printf(vp, "vflush: busy vnode ");
 #endif
 		}
 		VOP_UNLOCK(vp, 0);
 		vdropl(vp);
 	}
 	if (rootrefs > 0 && (flags & FORCECLOSE) == 0) {
 		/*
 		 * If just the root vnode is busy, and if its refcount
 		 * is equal to `rootrefs', then go ahead and kill it.
 		 */
 		VI_LOCK(rootvp);
 		KASSERT(busy > 0, ("vflush: not busy"));
 		VNASSERT(rootvp->v_usecount >= rootrefs, rootvp,
 		    ("vflush: usecount %d < rootrefs %d",
 		     rootvp->v_usecount, rootrefs));
 		if (busy == 1 && rootvp->v_usecount == rootrefs) {
 			VOP_LOCK(rootvp, LK_EXCLUSIVE|LK_INTERLOCK);
 			vgone(rootvp);
 			VOP_UNLOCK(rootvp, 0);
 			busy = 0;
 		} else
 			VI_UNLOCK(rootvp);
 	}
 	if (busy) {
 		CTR2(KTR_VFS, "%s: failing as %d vnodes are busy", __func__,
 		    busy);
 		return (EBUSY);
 	}
 	for (; rootrefs > 0; rootrefs--)
 		vrele(rootvp);
 	return (0);
 }
 
 /*
  * Recycle an unused vnode to the front of the free list.
  */
 int
 vrecycle(struct vnode *vp)
 {
 	int recycled;
 
 	ASSERT_VOP_ELOCKED(vp, "vrecycle");
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	recycled = 0;
 	VI_LOCK(vp);
 	if (vp->v_usecount == 0) {
 		recycled = 1;
 		vgonel(vp);
 	}
 	VI_UNLOCK(vp);
 	return (recycled);
 }
 
 /*
  * Eliminate all activity associated with a vnode
  * in preparation for reuse.
  */
 void
 vgone(struct vnode *vp)
 {
 	VI_LOCK(vp);
 	vgonel(vp);
 	VI_UNLOCK(vp);
 }
 
 static void
 notify_lowervp_vfs_dummy(struct mount *mp __unused,
     struct vnode *lowervp __unused)
 {
 }
 
 /*
  * Notify upper mounts about reclaimed or unlinked vnode.
  */
 void
 vfs_notify_upper(struct vnode *vp, int event)
 {
 	static struct vfsops vgonel_vfsops = {
 		.vfs_reclaim_lowervp = notify_lowervp_vfs_dummy,
 		.vfs_unlink_lowervp = notify_lowervp_vfs_dummy,
 	};
 	struct mount *mp, *ump, *mmp;
 
 	mp = vp->v_mount;
 	if (mp == NULL)
 		return;
 
 	MNT_ILOCK(mp);
 	if (TAILQ_EMPTY(&mp->mnt_uppers))
 		goto unlock;
 	MNT_IUNLOCK(mp);
 	mmp = malloc(sizeof(struct mount), M_TEMP, M_WAITOK | M_ZERO);
 	mmp->mnt_op = &vgonel_vfsops;
 	mmp->mnt_kern_flag |= MNTK_MARKER;
 	MNT_ILOCK(mp);
 	mp->mnt_kern_flag |= MNTK_VGONE_UPPER;
 	for (ump = TAILQ_FIRST(&mp->mnt_uppers); ump != NULL;) {
 		if ((ump->mnt_kern_flag & MNTK_MARKER) != 0) {
 			ump = TAILQ_NEXT(ump, mnt_upper_link);
 			continue;
 		}
 		TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link);
 		MNT_IUNLOCK(mp);
 		switch (event) {
 		case VFS_NOTIFY_UPPER_RECLAIM:
 			VFS_RECLAIM_LOWERVP(ump, vp);
 			break;
 		case VFS_NOTIFY_UPPER_UNLINK:
 			VFS_UNLINK_LOWERVP(ump, vp);
 			break;
 		default:
 			KASSERT(0, ("invalid event %d", event));
 			break;
 		}
 		MNT_ILOCK(mp);
 		ump = TAILQ_NEXT(mmp, mnt_upper_link);
 		TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link);
 	}
 	free(mmp, M_TEMP);
 	mp->mnt_kern_flag &= ~MNTK_VGONE_UPPER;
 	if ((mp->mnt_kern_flag & MNTK_VGONE_WAITER) != 0) {
 		mp->mnt_kern_flag &= ~MNTK_VGONE_WAITER;
 		wakeup(&mp->mnt_uppers);
 	}
 unlock:
 	MNT_IUNLOCK(mp);
 }
 
 /*
  * vgone, with the vp interlock held.
  */
 static void
 vgonel(struct vnode *vp)
 {
 	struct thread *td;
 	int oweinact;
 	int active;
 	struct mount *mp;
 
 	ASSERT_VOP_ELOCKED(vp, "vgonel");
 	ASSERT_VI_LOCKED(vp, "vgonel");
 	VNASSERT(vp->v_holdcnt, vp,
 	    ("vgonel: vp %p has no reference.", vp));
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	td = curthread;
 
 	/*
 	 * Don't vgonel if we're already doomed.
 	 */
 	if (vp->v_iflag & VI_DOOMED)
 		return;
 	vp->v_iflag |= VI_DOOMED;
 
 	/*
 	 * Check to see if the vnode is in use.  If so, we have to call
 	 * VOP_CLOSE() and VOP_INACTIVE().
 	 */
 	active = vp->v_usecount;
 	oweinact = (vp->v_iflag & VI_OWEINACT);
 	VI_UNLOCK(vp);
 	vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM);
 
 	/*
 	 * If purging an active vnode, it must be closed and
 	 * deactivated before being reclaimed.
 	 */
 	if (active)
 		VOP_CLOSE(vp, FNONBLOCK, NOCRED, td);
 	if (oweinact || active) {
 		VI_LOCK(vp);
 		if ((vp->v_iflag & VI_DOINGINACT) == 0)
 			vinactive(vp, td);
 		VI_UNLOCK(vp);
 	}
 	if (vp->v_type == VSOCK)
 		vfs_unp_reclaim(vp);
 
 	/*
 	 * Clean out any buffers associated with the vnode.
 	 * If the flush fails, just toss the buffers.
 	 */
 	mp = NULL;
 	if (!TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd))
 		(void) vn_start_secondary_write(vp, &mp, V_WAIT);
 	if (vinvalbuf(vp, V_SAVE, 0, 0) != 0) {
 		while (vinvalbuf(vp, 0, 0, 0) != 0)
 			;
 	}
 
 	BO_LOCK(&vp->v_bufobj);
 	KASSERT(TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd) &&
 	    vp->v_bufobj.bo_dirty.bv_cnt == 0 &&
 	    TAILQ_EMPTY(&vp->v_bufobj.bo_clean.bv_hd) &&
 	    vp->v_bufobj.bo_clean.bv_cnt == 0,
 	    ("vp %p bufobj not invalidated", vp));
 
 	/*
 	 * For VMIO bufobj, BO_DEAD is set in vm_object_terminate()
 	 * after the object's page queue is flushed.
 	 */
 	if (vp->v_bufobj.bo_object == NULL)
 		vp->v_bufobj.bo_flag |= BO_DEAD;
 	BO_UNLOCK(&vp->v_bufobj);
 
 	/*
 	 * Reclaim the vnode.
 	 */
 	if (VOP_RECLAIM(vp, td))
 		panic("vgone: cannot reclaim");
 	if (mp != NULL)
 		vn_finished_secondary_write(mp);
 	VNASSERT(vp->v_object == NULL, vp,
 	    ("vop_reclaim left v_object vp=%p, tag=%s", vp, vp->v_tag));
 	/*
 	 * Clear the advisory locks and wake up waiting threads.
 	 */
 	(void)VOP_ADVLOCKPURGE(vp);
 	vp->v_lockf = NULL;
 	/*
 	 * Delete from old mount point vnode list.
 	 */
 	delmntque(vp);
 	cache_purge(vp);
 	/*
 	 * Done with purge, reset to the standard lock and invalidate
 	 * the vnode.
 	 */
 	VI_LOCK(vp);
 	vp->v_vnlock = &vp->v_lock;
 	vp->v_op = &dead_vnodeops;
 	vp->v_tag = "none";
 	vp->v_type = VBAD;
 }
 
 /*
  * Calculate the total number of references to a special device.
  */
 int
 vcount(struct vnode *vp)
 {
 	int count;
 
 	dev_lock();
 	count = vp->v_rdev->si_usecount;
 	dev_unlock();
 	return (count);
 }
 
 /*
  * Same as above, but using the struct cdev *as argument
  */
 int
 count_dev(struct cdev *dev)
 {
 	int count;
 
 	dev_lock();
 	count = dev->si_usecount;
 	dev_unlock();
 	return(count);
 }
 
 /*
  * Print out a description of a vnode.
  */
 static char *typename[] =
 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD",
  "VMARKER"};
 
 void
 vn_printf(struct vnode *vp, const char *fmt, ...)
 {
 	va_list ap;
 	char buf[256], buf2[16];
 	u_long flags;
 
 	va_start(ap, fmt);
 	vprintf(fmt, ap);
 	va_end(ap);
 	printf("%p: ", (void *)vp);
 	printf("tag %s, type %s\n", vp->v_tag, typename[vp->v_type]);
 	printf("    usecount %d, writecount %d, refcount %d",
 	    vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
 	switch (vp->v_type) {
 	case VDIR:
 		printf(" mountedhere %p\n", vp->v_mountedhere);
 		break;
 	case VCHR:
 		printf(" rdev %p\n", vp->v_rdev);
 		break;
 	case VSOCK:
 		printf(" socket %p\n", vp->v_socket);
 		break;
 	case VFIFO:
 		printf(" fifoinfo %p\n", vp->v_fifoinfo);
 		break;
 	default:
 		printf("\n");
 		break;
 	}
 	buf[0] = '\0';
 	buf[1] = '\0';
 	if (vp->v_vflag & VV_ROOT)
 		strlcat(buf, "|VV_ROOT", sizeof(buf));
 	if (vp->v_vflag & VV_ISTTY)
 		strlcat(buf, "|VV_ISTTY", sizeof(buf));
 	if (vp->v_vflag & VV_NOSYNC)
 		strlcat(buf, "|VV_NOSYNC", sizeof(buf));
 	if (vp->v_vflag & VV_ETERNALDEV)
 		strlcat(buf, "|VV_ETERNALDEV", sizeof(buf));
 	if (vp->v_vflag & VV_CACHEDLABEL)
 		strlcat(buf, "|VV_CACHEDLABEL", sizeof(buf));
 	if (vp->v_vflag & VV_TEXT)
 		strlcat(buf, "|VV_TEXT", sizeof(buf));
 	if (vp->v_vflag & VV_COPYONWRITE)
 		strlcat(buf, "|VV_COPYONWRITE", sizeof(buf));
 	if (vp->v_vflag & VV_SYSTEM)
 		strlcat(buf, "|VV_SYSTEM", sizeof(buf));
 	if (vp->v_vflag & VV_PROCDEP)
 		strlcat(buf, "|VV_PROCDEP", sizeof(buf));
 	if (vp->v_vflag & VV_NOKNOTE)
 		strlcat(buf, "|VV_NOKNOTE", sizeof(buf));
 	if (vp->v_vflag & VV_DELETED)
 		strlcat(buf, "|VV_DELETED", sizeof(buf));
 	if (vp->v_vflag & VV_MD)
 		strlcat(buf, "|VV_MD", sizeof(buf));
 	if (vp->v_vflag & VV_FORCEINSMQ)
 		strlcat(buf, "|VV_FORCEINSMQ", sizeof(buf));
 	flags = vp->v_vflag & ~(VV_ROOT | VV_ISTTY | VV_NOSYNC | VV_ETERNALDEV |
 	    VV_CACHEDLABEL | VV_TEXT | VV_COPYONWRITE | VV_SYSTEM | VV_PROCDEP |
 	    VV_NOKNOTE | VV_DELETED | VV_MD | VV_FORCEINSMQ);
 	if (flags != 0) {
 		snprintf(buf2, sizeof(buf2), "|VV(0x%lx)", flags);
 		strlcat(buf, buf2, sizeof(buf));
 	}
 	if (vp->v_iflag & VI_MOUNT)
 		strlcat(buf, "|VI_MOUNT", sizeof(buf));
 	if (vp->v_iflag & VI_DOOMED)
 		strlcat(buf, "|VI_DOOMED", sizeof(buf));
 	if (vp->v_iflag & VI_FREE)
 		strlcat(buf, "|VI_FREE", sizeof(buf));
 	if (vp->v_iflag & VI_ACTIVE)
 		strlcat(buf, "|VI_ACTIVE", sizeof(buf));
 	if (vp->v_iflag & VI_DOINGINACT)
 		strlcat(buf, "|VI_DOINGINACT", sizeof(buf));
 	if (vp->v_iflag & VI_OWEINACT)
 		strlcat(buf, "|VI_OWEINACT", sizeof(buf));
 	flags = vp->v_iflag & ~(VI_MOUNT | VI_DOOMED | VI_FREE |
 	    VI_ACTIVE | VI_DOINGINACT | VI_OWEINACT);
 	if (flags != 0) {
 		snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags);
 		strlcat(buf, buf2, sizeof(buf));
 	}
 	printf("    flags (%s)\n", buf + 1);
 	if (mtx_owned(VI_MTX(vp)))
 		printf(" VI_LOCKed");
 	if (vp->v_object != NULL)
 		printf("    v_object %p ref %d pages %d "
 		    "cleanbuf %d dirtybuf %d\n",
 		    vp->v_object, vp->v_object->ref_count,
 		    vp->v_object->resident_page_count,
 		    vp->v_bufobj.bo_clean.bv_cnt,
 		    vp->v_bufobj.bo_dirty.bv_cnt);
 	printf("    ");
 	lockmgr_printinfo(vp->v_vnlock);
 	if (vp->v_data != NULL)
 		VOP_PRINT(vp);
 }
 
 #ifdef DDB
 /*
  * List all of the locked vnodes in the system.
  * Called when debugging the kernel.
  */
 DB_SHOW_COMMAND(lockedvnods, lockedvnodes)
 {
 	struct mount *mp;
 	struct vnode *vp;
 
 	/*
 	 * Note: because this is DDB, we can't obey the locking semantics
 	 * for these structures, which means we could catch an inconsistent
 	 * state and dereference a nasty pointer.  Not much to be done
 	 * about that.
 	 */
 	db_printf("Locked vnodes\n");
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 			if (vp->v_type != VMARKER && VOP_ISLOCKED(vp))
 				vn_printf(vp, "vnode ");
 		}
 	}
 }
 
 /*
  * Show details about the given vnode.
  */
 DB_SHOW_COMMAND(vnode, db_show_vnode)
 {
 	struct vnode *vp;
 
 	if (!have_addr)
 		return;
 	vp = (struct vnode *)addr;
 	vn_printf(vp, "vnode ");
 }
 
 /*
  * Show details about the given mount point.
  */
 DB_SHOW_COMMAND(mount, db_show_mount)
 {
 	struct mount *mp;
 	struct vfsopt *opt;
 	struct statfs *sp;
 	struct vnode *vp;
 	char buf[512];
 	uint64_t mflags;
 	u_int flags;
 
 	if (!have_addr) {
 		/* No address given, print short info about all mount points. */
 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 			db_printf("%p %s on %s (%s)\n", mp,
 			    mp->mnt_stat.f_mntfromname,
 			    mp->mnt_stat.f_mntonname,
 			    mp->mnt_stat.f_fstypename);
 			if (db_pager_quit)
 				break;
 		}
 		db_printf("\nMore info: show mount <addr>\n");
 		return;
 	}
 
 	mp = (struct mount *)addr;
 	db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname,
 	    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename);
 
 	buf[0] = '\0';
 	mflags = mp->mnt_flag;
 #define	MNT_FLAG(flag)	do {						\
 	if (mflags & (flag)) {						\
 		if (buf[0] != '\0')					\
 			strlcat(buf, ", ", sizeof(buf));		\
 		strlcat(buf, (#flag) + 4, sizeof(buf));			\
 		mflags &= ~(flag);					\
 	}								\
 } while (0)
 	MNT_FLAG(MNT_RDONLY);
 	MNT_FLAG(MNT_SYNCHRONOUS);
 	MNT_FLAG(MNT_NOEXEC);
 	MNT_FLAG(MNT_NOSUID);
 	MNT_FLAG(MNT_NFS4ACLS);
 	MNT_FLAG(MNT_UNION);
 	MNT_FLAG(MNT_ASYNC);
 	MNT_FLAG(MNT_SUIDDIR);
 	MNT_FLAG(MNT_SOFTDEP);
 	MNT_FLAG(MNT_NOSYMFOLLOW);
 	MNT_FLAG(MNT_GJOURNAL);
 	MNT_FLAG(MNT_MULTILABEL);
 	MNT_FLAG(MNT_ACLS);
 	MNT_FLAG(MNT_NOATIME);
 	MNT_FLAG(MNT_NOCLUSTERR);
 	MNT_FLAG(MNT_NOCLUSTERW);
 	MNT_FLAG(MNT_SUJ);
 	MNT_FLAG(MNT_EXRDONLY);
 	MNT_FLAG(MNT_EXPORTED);
 	MNT_FLAG(MNT_DEFEXPORTED);
 	MNT_FLAG(MNT_EXPORTANON);
 	MNT_FLAG(MNT_EXKERB);
 	MNT_FLAG(MNT_EXPUBLIC);
 	MNT_FLAG(MNT_LOCAL);
 	MNT_FLAG(MNT_QUOTA);
 	MNT_FLAG(MNT_ROOTFS);
 	MNT_FLAG(MNT_USER);
 	MNT_FLAG(MNT_IGNORE);
 	MNT_FLAG(MNT_UPDATE);
 	MNT_FLAG(MNT_DELEXPORT);
 	MNT_FLAG(MNT_RELOAD);
 	MNT_FLAG(MNT_FORCE);
 	MNT_FLAG(MNT_SNAPSHOT);
 	MNT_FLAG(MNT_BYFSID);
 #undef MNT_FLAG
 	if (mflags != 0) {
 		if (buf[0] != '\0')
 			strlcat(buf, ", ", sizeof(buf));
 		snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
 		    "0x%016jx", mflags);
 	}
 	db_printf("    mnt_flag = %s\n", buf);
 
 	buf[0] = '\0';
 	flags = mp->mnt_kern_flag;
 #define	MNT_KERN_FLAG(flag)	do {					\
 	if (flags & (flag)) {						\
 		if (buf[0] != '\0')					\
 			strlcat(buf, ", ", sizeof(buf));		\
 		strlcat(buf, (#flag) + 5, sizeof(buf));			\
 		flags &= ~(flag);					\
 	}								\
 } while (0)
 	MNT_KERN_FLAG(MNTK_UNMOUNTF);
 	MNT_KERN_FLAG(MNTK_ASYNC);
 	MNT_KERN_FLAG(MNTK_SOFTDEP);
 	MNT_KERN_FLAG(MNTK_NOINSMNTQ);
 	MNT_KERN_FLAG(MNTK_DRAINING);
 	MNT_KERN_FLAG(MNTK_REFEXPIRE);
 	MNT_KERN_FLAG(MNTK_EXTENDED_SHARED);
 	MNT_KERN_FLAG(MNTK_SHARED_WRITES);
 	MNT_KERN_FLAG(MNTK_NO_IOPF);
 	MNT_KERN_FLAG(MNTK_VGONE_UPPER);
 	MNT_KERN_FLAG(MNTK_VGONE_WAITER);
 	MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT);
 	MNT_KERN_FLAG(MNTK_MARKER);
 	MNT_KERN_FLAG(MNTK_USES_BCACHE);
 	MNT_KERN_FLAG(MNTK_NOASYNC);
 	MNT_KERN_FLAG(MNTK_UNMOUNT);
 	MNT_KERN_FLAG(MNTK_MWAIT);
 	MNT_KERN_FLAG(MNTK_SUSPEND);
 	MNT_KERN_FLAG(MNTK_SUSPEND2);
 	MNT_KERN_FLAG(MNTK_SUSPENDED);
 	MNT_KERN_FLAG(MNTK_LOOKUP_SHARED);
 	MNT_KERN_FLAG(MNTK_NOKNOTE);
 #undef MNT_KERN_FLAG
 	if (flags != 0) {
 		if (buf[0] != '\0')
 			strlcat(buf, ", ", sizeof(buf));
 		snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
 		    "0x%08x", flags);
 	}
 	db_printf("    mnt_kern_flag = %s\n", buf);
 
 	db_printf("    mnt_opt = ");
 	opt = TAILQ_FIRST(mp->mnt_opt);
 	if (opt != NULL) {
 		db_printf("%s", opt->name);
 		opt = TAILQ_NEXT(opt, link);
 		while (opt != NULL) {
 			db_printf(", %s", opt->name);
 			opt = TAILQ_NEXT(opt, link);
 		}
 	}
 	db_printf("\n");
 
 	sp = &mp->mnt_stat;
 	db_printf("    mnt_stat = { version=%u type=%u flags=0x%016jx "
 	    "bsize=%ju iosize=%ju blocks=%ju bfree=%ju bavail=%jd files=%ju "
 	    "ffree=%jd syncwrites=%ju asyncwrites=%ju syncreads=%ju "
 	    "asyncreads=%ju namemax=%u owner=%u fsid=[%d, %d] }\n",
 	    (u_int)sp->f_version, (u_int)sp->f_type, (uintmax_t)sp->f_flags,
 	    (uintmax_t)sp->f_bsize, (uintmax_t)sp->f_iosize,
 	    (uintmax_t)sp->f_blocks, (uintmax_t)sp->f_bfree,
 	    (intmax_t)sp->f_bavail, (uintmax_t)sp->f_files,
 	    (intmax_t)sp->f_ffree, (uintmax_t)sp->f_syncwrites,
 	    (uintmax_t)sp->f_asyncwrites, (uintmax_t)sp->f_syncreads,
 	    (uintmax_t)sp->f_asyncreads, (u_int)sp->f_namemax,
 	    (u_int)sp->f_owner, (int)sp->f_fsid.val[0], (int)sp->f_fsid.val[1]);
 
 	db_printf("    mnt_cred = { uid=%u ruid=%u",
 	    (u_int)mp->mnt_cred->cr_uid, (u_int)mp->mnt_cred->cr_ruid);
 	if (jailed(mp->mnt_cred))
 		db_printf(", jail=%d", mp->mnt_cred->cr_prison->pr_id);
 	db_printf(" }\n");
 	db_printf("    mnt_ref = %d\n", mp->mnt_ref);
 	db_printf("    mnt_gen = %d\n", mp->mnt_gen);
 	db_printf("    mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize);
 	db_printf("    mnt_activevnodelistsize = %d\n",
 	    mp->mnt_activevnodelistsize);
 	db_printf("    mnt_writeopcount = %d\n", mp->mnt_writeopcount);
 	db_printf("    mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen);
 	db_printf("    mnt_iosize_max = %d\n", mp->mnt_iosize_max);
 	db_printf("    mnt_hashseed = %u\n", mp->mnt_hashseed);
 	db_printf("    mnt_lockref = %d\n", mp->mnt_lockref);
 	db_printf("    mnt_secondary_writes = %d\n", mp->mnt_secondary_writes);
 	db_printf("    mnt_secondary_accwrites = %d\n",
 	    mp->mnt_secondary_accwrites);
 	db_printf("    mnt_gjprovider = %s\n",
 	    mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL");
 
 	db_printf("\n\nList of active vnodes\n");
 	TAILQ_FOREACH(vp, &mp->mnt_activevnodelist, v_actfreelist) {
 		if (vp->v_type != VMARKER) {
 			vn_printf(vp, "vnode ");
 			if (db_pager_quit)
 				break;
 		}
 	}
 	db_printf("\n\nList of inactive vnodes\n");
 	TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 		if (vp->v_type != VMARKER && (vp->v_iflag & VI_ACTIVE) == 0) {
 			vn_printf(vp, "vnode ");
 			if (db_pager_quit)
 				break;
 		}
 	}
 }
 #endif	/* DDB */
 
 /*
  * Fill in a struct xvfsconf based on a struct vfsconf.
  */
 static int
 vfsconf2x(struct sysctl_req *req, struct vfsconf *vfsp)
 {
 	struct xvfsconf xvfsp;
 
 	bzero(&xvfsp, sizeof(xvfsp));
 	strcpy(xvfsp.vfc_name, vfsp->vfc_name);
 	xvfsp.vfc_typenum = vfsp->vfc_typenum;
 	xvfsp.vfc_refcount = vfsp->vfc_refcount;
 	xvfsp.vfc_flags = vfsp->vfc_flags;
 	/*
 	 * These are unused in userland, we keep them
 	 * to not break binary compatibility.
 	 */
 	xvfsp.vfc_vfsops = NULL;
 	xvfsp.vfc_next = NULL;
 	return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp)));
 }
 
 #ifdef COMPAT_FREEBSD32
 struct xvfsconf32 {
 	uint32_t	vfc_vfsops;
 	char		vfc_name[MFSNAMELEN];
 	int32_t		vfc_typenum;
 	int32_t		vfc_refcount;
 	int32_t		vfc_flags;
 	uint32_t	vfc_next;
 };
 
 static int
 vfsconf2x32(struct sysctl_req *req, struct vfsconf *vfsp)
 {
 	struct xvfsconf32 xvfsp;
 
 	bzero(&xvfsp, sizeof(xvfsp));
 	strcpy(xvfsp.vfc_name, vfsp->vfc_name);
 	xvfsp.vfc_typenum = vfsp->vfc_typenum;
 	xvfsp.vfc_refcount = vfsp->vfc_refcount;
 	xvfsp.vfc_flags = vfsp->vfc_flags;
 	return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp)));
 }
 #endif
 
 /*
  * Top level filesystem related information gathering.
  */
 static int
 sysctl_vfs_conflist(SYSCTL_HANDLER_ARGS)
 {
 	struct vfsconf *vfsp;
 	int error;
 
 	error = 0;
 	vfsconf_slock();
 	TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
 #ifdef COMPAT_FREEBSD32
 		if (req->flags & SCTL_MASK32)
 			error = vfsconf2x32(req, vfsp);
 		else
 #endif
 			error = vfsconf2x(req, vfsp);
 		if (error)
 			break;
 	}
 	vfsconf_sunlock();
 	return (error);
 }
 
 SYSCTL_PROC(_vfs, OID_AUTO, conflist, CTLTYPE_OPAQUE | CTLFLAG_RD |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_vfs_conflist,
     "S,xvfsconf", "List of all configured filesystems");
 
 #ifndef BURN_BRIDGES
 static int	sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS);
 
 static int
 vfs_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1 - 1;	/* XXX */
 	u_int namelen = arg2 + 1;	/* XXX */
 	struct vfsconf *vfsp;
 
 	log(LOG_WARNING, "userland calling deprecated sysctl, "
 	    "please rebuild world\n");
 
 #if 1 || defined(COMPAT_PRELITE2)
 	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
 	if (namelen == 1)
 		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
 #endif
 
 	switch (name[1]) {
 	case VFS_MAXTYPENUM:
 		if (namelen != 2)
 			return (ENOTDIR);
 		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
 	case VFS_CONF:
 		if (namelen != 3)
 			return (ENOTDIR);	/* overloaded */
 		vfsconf_slock();
 		TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
 			if (vfsp->vfc_typenum == name[2])
 				break;
 		}
 		vfsconf_sunlock();
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 #ifdef COMPAT_FREEBSD32
 		if (req->flags & SCTL_MASK32)
 			return (vfsconf2x32(req, vfsp));
 		else
 #endif
 			return (vfsconf2x(req, vfsp));
 	}
 	return (EOPNOTSUPP);
 }
 
 static SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD | CTLFLAG_SKIP |
     CTLFLAG_MPSAFE, vfs_sysctl,
     "Generic filesystem");
 
 #if 1 || defined(COMPAT_PRELITE2)
 
 static int
 sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct vfsconf *vfsp;
 	struct ovfsconf ovfs;
 
 	vfsconf_slock();
 	TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
 		bzero(&ovfs, sizeof(ovfs));
 		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
 		strcpy(ovfs.vfc_name, vfsp->vfc_name);
 		ovfs.vfc_index = vfsp->vfc_typenum;
 		ovfs.vfc_refcount = vfsp->vfc_refcount;
 		ovfs.vfc_flags = vfsp->vfc_flags;
 		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
 		if (error != 0) {
 			vfsconf_sunlock();
 			return (error);
 		}
 	}
 	vfsconf_sunlock();
 	return (0);
 }
 
 #endif /* 1 || COMPAT_PRELITE2 */
 #endif /* !BURN_BRIDGES */
 
 #define KINFO_VNODESLOP		10
 #ifdef notyet
 /*
  * Dump vnode list (via sysctl).
  */
 /* ARGSUSED */
 static int
 sysctl_vnode(SYSCTL_HANDLER_ARGS)
 {
 	struct xvnode *xvn;
 	struct mount *mp;
 	struct vnode *vp;
 	int error, len, n;
 
 	/*
 	 * Stale numvnodes access is not fatal here.
 	 */
 	req->lock = 0;
 	len = (numvnodes + KINFO_VNODESLOP) * sizeof *xvn;
 	if (!req->oldptr)
 		/* Make an estimate */
 		return (SYSCTL_OUT(req, 0, len));
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	xvn = malloc(len, M_TEMP, M_ZERO | M_WAITOK);
 	n = 0;
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
 			continue;
 		MNT_ILOCK(mp);
 		TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 			if (n == len)
 				break;
 			vref(vp);
 			xvn[n].xv_size = sizeof *xvn;
 			xvn[n].xv_vnode = vp;
 			xvn[n].xv_id = 0;	/* XXX compat */
 #define XV_COPY(field) xvn[n].xv_##field = vp->v_##field
 			XV_COPY(usecount);
 			XV_COPY(writecount);
 			XV_COPY(holdcnt);
 			XV_COPY(mount);
 			XV_COPY(numoutput);
 			XV_COPY(type);
 #undef XV_COPY
 			xvn[n].xv_flag = vp->v_vflag;
 
 			switch (vp->v_type) {
 			case VREG:
 			case VDIR:
 			case VLNK:
 				break;
 			case VBLK:
 			case VCHR:
 				if (vp->v_rdev == NULL) {
 					vrele(vp);
 					continue;
 				}
 				xvn[n].xv_dev = dev2udev(vp->v_rdev);
 				break;
 			case VSOCK:
 				xvn[n].xv_socket = vp->v_socket;
 				break;
 			case VFIFO:
 				xvn[n].xv_fifo = vp->v_fifoinfo;
 				break;
 			case VNON:
 			case VBAD:
 			default:
 				/* shouldn't happen? */
 				vrele(vp);
 				continue;
 			}
 			vrele(vp);
 			++n;
 		}
 		MNT_IUNLOCK(mp);
 		mtx_lock(&mountlist_mtx);
 		vfs_unbusy(mp);
 		if (n == len)
 			break;
 	}
 	mtx_unlock(&mountlist_mtx);
 
 	error = SYSCTL_OUT(req, xvn, n * sizeof *xvn);
 	free(xvn, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE | CTLFLAG_RD |
     CTLFLAG_MPSAFE, 0, 0, sysctl_vnode, "S,xvnode",
     "");
 #endif
 
 static void
 unmount_or_warn(struct mount *mp)
 {
 	int error;
 
 	error = dounmount(mp, MNT_FORCE, curthread);
 	if (error != 0) {
 		printf("unmount of %s failed (", mp->mnt_stat.f_mntonname);
 		if (error == EBUSY)
 			printf("BUSY)\n");
 		else
 			printf("%d)\n", error);
 	}
 }
 
 /*
  * Unmount all filesystems. The list is traversed in reverse order
  * of mounting to avoid dependencies.
  */
 void
 vfs_unmountall(void)
 {
 	struct mount *mp, *tmp;
 
 	CTR1(KTR_VFS, "%s: unmounting all filesystems", __func__);
 
 	/*
 	 * Since this only runs when rebooting, it is not interlocked.
 	 */
 	TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, tmp) {
 		vfs_ref(mp);
 
 		/*
 		 * Forcibly unmounting "/dev" before "/" would prevent clean
 		 * unmount of the latter.
 		 */
 		if (mp == rootdevmp)
 			continue;
 
 		unmount_or_warn(mp);
 	}
 
 	if (rootdevmp != NULL)
 		unmount_or_warn(rootdevmp);
 }
 
 /*
  * perform msync on all vnodes under a mount point
  * the mount point must be locked.
  */
 void
 vfs_msync(struct mount *mp, int flags)
 {
 	struct vnode *vp, *mvp;
 	struct vm_object *obj;
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 	MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) {
 		obj = vp->v_object;
 		if (obj != NULL && (obj->flags & OBJ_MIGHTBEDIRTY) != 0 &&
 		    (flags == MNT_WAIT || VOP_ISLOCKED(vp) == 0)) {
 			if (!vget(vp,
 			    LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK,
 			    curthread)) {
 				if (vp->v_vflag & VV_NOSYNC) {	/* unlinked */
 					vput(vp);
 					continue;
 				}
 
 				obj = vp->v_object;
 				if (obj != NULL) {
 					VM_OBJECT_WLOCK(obj);
 					vm_object_page_clean(obj, 0, 0,
 					    flags == MNT_WAIT ?
 					    OBJPC_SYNC : OBJPC_NOSYNC);
 					VM_OBJECT_WUNLOCK(obj);
 				}
 				vput(vp);
 			}
 		} else
 			VI_UNLOCK(vp);
 	}
 }
 
 static void
 destroy_vpollinfo_free(struct vpollinfo *vi)
 {
 
 	knlist_destroy(&vi->vpi_selinfo.si_note);
 	mtx_destroy(&vi->vpi_lock);
 	uma_zfree(vnodepoll_zone, vi);
 }
 
 static void
 destroy_vpollinfo(struct vpollinfo *vi)
 {
 
 	knlist_clear(&vi->vpi_selinfo.si_note, 1);
 	seldrain(&vi->vpi_selinfo);
 	destroy_vpollinfo_free(vi);
 }
 
 /*
  * Initialize per-vnode helper structure to hold poll-related state.
  */
 void
 v_addpollinfo(struct vnode *vp)
 {
 	struct vpollinfo *vi;
 
 	if (vp->v_pollinfo != NULL)
 		return;
 	vi = uma_zalloc(vnodepoll_zone, M_WAITOK | M_ZERO);
 	mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF);
 	knlist_init(&vi->vpi_selinfo.si_note, vp, vfs_knllock,
 	    vfs_knlunlock, vfs_knl_assert_locked, vfs_knl_assert_unlocked);
 	VI_LOCK(vp);
 	if (vp->v_pollinfo != NULL) {
 		VI_UNLOCK(vp);
 		destroy_vpollinfo_free(vi);
 		return;
 	}
 	vp->v_pollinfo = vi;
 	VI_UNLOCK(vp);
 }
 
 /*
  * Record a process's interest in events which might happen to
  * a vnode.  Because poll uses the historic select-style interface
  * internally, this routine serves as both the ``check for any
  * pending events'' and the ``record my interest in future events''
  * functions.  (These are done together, while the lock is held,
  * to avoid race conditions.)
  */
 int
 vn_pollrecord(struct vnode *vp, struct thread *td, int events)
 {
 
 	v_addpollinfo(vp);
 	mtx_lock(&vp->v_pollinfo->vpi_lock);
 	if (vp->v_pollinfo->vpi_revents & events) {
 		/*
 		 * This leaves events we are not interested
 		 * in available for the other process which
 		 * which presumably had requested them
 		 * (otherwise they would never have been
 		 * recorded).
 		 */
 		events &= vp->v_pollinfo->vpi_revents;
 		vp->v_pollinfo->vpi_revents &= ~events;
 
 		mtx_unlock(&vp->v_pollinfo->vpi_lock);
 		return (events);
 	}
 	vp->v_pollinfo->vpi_events |= events;
 	selrecord(td, &vp->v_pollinfo->vpi_selinfo);
 	mtx_unlock(&vp->v_pollinfo->vpi_lock);
 	return (0);
 }
 
 /*
  * Routine to create and manage a filesystem syncer vnode.
  */
 #define sync_close ((int (*)(struct  vop_close_args *))nullop)
 static int	sync_fsync(struct  vop_fsync_args *);
 static int	sync_inactive(struct  vop_inactive_args *);
 static int	sync_reclaim(struct  vop_reclaim_args *);
 
 static struct vop_vector sync_vnodeops = {
 	.vop_bypass =	VOP_EOPNOTSUPP,
 	.vop_close =	sync_close,		/* close */
 	.vop_fsync =	sync_fsync,		/* fsync */
 	.vop_inactive =	sync_inactive,	/* inactive */
 	.vop_reclaim =	sync_reclaim,	/* reclaim */
 	.vop_lock1 =	vop_stdlock,	/* lock */
 	.vop_unlock =	vop_stdunlock,	/* unlock */
 	.vop_islocked =	vop_stdislocked,	/* islocked */
 };
 
 /*
  * Create a new filesystem syncer vnode for the specified mount point.
  */
 void
 vfs_allocate_syncvnode(struct mount *mp)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 	static long start, incr, next;
 	int error;
 
 	/* Allocate a new vnode */
 	error = getnewvnode("syncer", mp, &sync_vnodeops, &vp);
 	if (error != 0)
 		panic("vfs_allocate_syncvnode: getnewvnode() failed");
 	vp->v_type = VNON;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	vp->v_vflag |= VV_FORCEINSMQ;
 	error = insmntque(vp, mp);
 	if (error != 0)
 		panic("vfs_allocate_syncvnode: insmntque() failed");
 	vp->v_vflag &= ~VV_FORCEINSMQ;
 	VOP_UNLOCK(vp, 0);
 	/*
 	 * Place the vnode onto the syncer worklist. We attempt to
 	 * scatter them about on the list so that they will go off
 	 * at evenly distributed times even if all the filesystems
 	 * are mounted at once.
 	 */
 	next += incr;
 	if (next == 0 || next > syncer_maxdelay) {
 		start /= 2;
 		incr /= 2;
 		if (start == 0) {
 			start = syncer_maxdelay / 2;
 			incr = syncer_maxdelay;
 		}
 		next = start;
 	}
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
 	vn_syncer_add_to_worklist(bo, syncdelay > 0 ? next % syncdelay : 0);
 	/* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */
 	mtx_lock(&sync_mtx);
 	sync_vnode_count++;
 	if (mp->mnt_syncer == NULL) {
 		mp->mnt_syncer = vp;
 		vp = NULL;
 	}
 	mtx_unlock(&sync_mtx);
 	BO_UNLOCK(bo);
 	if (vp != NULL) {
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		vgone(vp);
 		vput(vp);
 	}
 }
 
 void
 vfs_deallocate_syncvnode(struct mount *mp)
 {
 	struct vnode *vp;
 
 	mtx_lock(&sync_mtx);
 	vp = mp->mnt_syncer;
 	if (vp != NULL)
 		mp->mnt_syncer = NULL;
 	mtx_unlock(&sync_mtx);
 	if (vp != NULL)
 		vrele(vp);
 }
 
 /*
  * Do a lazy sync of the filesystem.
  */
 static int
 sync_fsync(struct vop_fsync_args *ap)
 {
 	struct vnode *syncvp = ap->a_vp;
 	struct mount *mp = syncvp->v_mount;
 	int error, save;
 	struct bufobj *bo;
 
 	/*
 	 * We only need to do something if this is a lazy evaluation.
 	 */
 	if (ap->a_waitfor != MNT_LAZY)
 		return (0);
 
 	/*
 	 * Move ourselves to the back of the sync list.
 	 */
 	bo = &syncvp->v_bufobj;
 	BO_LOCK(bo);
 	vn_syncer_add_to_worklist(bo, syncdelay);
 	BO_UNLOCK(bo);
 
 	/*
 	 * Walk the list of vnodes pushing all that are dirty and
 	 * not already on the sync list.
 	 */
 	if (vfs_busy(mp, MBF_NOWAIT) != 0)
 		return (0);
 	if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) {
 		vfs_unbusy(mp);
 		return (0);
 	}
 	save = curthread_pflags_set(TDP_SYNCIO);
 	vfs_msync(mp, MNT_NOWAIT);
 	error = VFS_SYNC(mp, MNT_LAZY);
 	curthread_pflags_restore(save);
 	vn_finished_write(mp);
 	vfs_unbusy(mp);
 	return (error);
 }
 
 /*
  * The syncer vnode is no referenced.
  */
 static int
 sync_inactive(struct vop_inactive_args *ap)
 {
 
 	vgone(ap->a_vp);
 	return (0);
 }
 
 /*
  * The syncer vnode is no longer needed and is being decommissioned.
  *
  * Modifications to the worklist must be protected by sync_mtx.
  */
 static int
 sync_reclaim(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct bufobj *bo;
 
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
 	mtx_lock(&sync_mtx);
 	if (vp->v_mount->mnt_syncer == vp)
 		vp->v_mount->mnt_syncer = NULL;
 	if (bo->bo_flag & BO_ONWORKLST) {
 		LIST_REMOVE(bo, bo_synclist);
 		syncer_worklist_len--;
 		sync_vnode_count--;
 		bo->bo_flag &= ~BO_ONWORKLST;
 	}
 	mtx_unlock(&sync_mtx);
 	BO_UNLOCK(bo);
 
 	return (0);
 }
 
 /*
  * Check if vnode represents a disk device
  */
 int
 vn_isdisk(struct vnode *vp, int *errp)
 {
 	int error;
 
 	if (vp->v_type != VCHR) {
 		error = ENOTBLK;
 		goto out;
 	}
 	error = 0;
 	dev_lock();
 	if (vp->v_rdev == NULL)
 		error = ENXIO;
 	else if (vp->v_rdev->si_devsw == NULL)
 		error = ENXIO;
 	else if (!(vp->v_rdev->si_devsw->d_flags & D_DISK))
 		error = ENOTBLK;
 	dev_unlock();
 out:
 	if (errp != NULL)
 		*errp = error;
 	return (error == 0);
 }
 
 /*
  * Common filesystem object access control check routine.  Accepts a
  * vnode's type, "mode", uid and gid, requested access mode, credentials,
  * and optional call-by-reference privused argument allowing vaccess()
  * to indicate to the caller whether privilege was used to satisfy the
  * request (obsoleted).  Returns 0 on success, or an errno on failure.
  */
 int
 vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid,
     accmode_t accmode, struct ucred *cred, int *privused)
 {
 	accmode_t dac_granted;
 	accmode_t priv_granted;
 
 	KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0,
 	    ("invalid bit in accmode"));
 	KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE),
 	    ("VAPPEND without VWRITE"));
 
 	/*
 	 * Look for a normal, non-privileged way to access the file/directory
 	 * as requested.  If it exists, go with that.
 	 */
 
 	if (privused != NULL)
 		*privused = 0;
 
 	dac_granted = 0;
 
 	/* Check the owner. */
 	if (cred->cr_uid == file_uid) {
 		dac_granted |= VADMIN;
 		if (file_mode & S_IXUSR)
 			dac_granted |= VEXEC;
 		if (file_mode & S_IRUSR)
 			dac_granted |= VREAD;
 		if (file_mode & S_IWUSR)
 			dac_granted |= (VWRITE | VAPPEND);
 
 		if ((accmode & dac_granted) == accmode)
 			return (0);
 
 		goto privcheck;
 	}
 
 	/* Otherwise, check the groups (first match) */
 	if (groupmember(file_gid, cred)) {
 		if (file_mode & S_IXGRP)
 			dac_granted |= VEXEC;
 		if (file_mode & S_IRGRP)
 			dac_granted |= VREAD;
 		if (file_mode & S_IWGRP)
 			dac_granted |= (VWRITE | VAPPEND);
 
 		if ((accmode & dac_granted) == accmode)
 			return (0);
 
 		goto privcheck;
 	}
 
 	/* Otherwise, check everyone else. */
 	if (file_mode & S_IXOTH)
 		dac_granted |= VEXEC;
 	if (file_mode & S_IROTH)
 		dac_granted |= VREAD;
 	if (file_mode & S_IWOTH)
 		dac_granted |= (VWRITE | VAPPEND);
 	if ((accmode & dac_granted) == accmode)
 		return (0);
 
 privcheck:
 	/*
 	 * Build a privilege mask to determine if the set of privileges
 	 * satisfies the requirements when combined with the granted mask
 	 * from above.  For each privilege, if the privilege is required,
 	 * bitwise or the request type onto the priv_granted mask.
 	 */
 	priv_granted = 0;
 
 	if (type == VDIR) {
 		/*
 		 * For directories, use PRIV_VFS_LOOKUP to satisfy VEXEC
 		 * requests, instead of PRIV_VFS_EXEC.
 		 */
 		if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) &&
 		    !priv_check_cred(cred, PRIV_VFS_LOOKUP, 0))
 			priv_granted |= VEXEC;
 	} else {
 		/*
 		 * Ensure that at least one execute bit is on. Otherwise,
 		 * a privileged user will always succeed, and we don't want
 		 * this to happen unless the file really is executable.
 		 */
 		if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) &&
 		    (file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0 &&
 		    !priv_check_cred(cred, PRIV_VFS_EXEC, 0))
 			priv_granted |= VEXEC;
 	}
 
 	if ((accmode & VREAD) && ((dac_granted & VREAD) == 0) &&
 	    !priv_check_cred(cred, PRIV_VFS_READ, 0))
 		priv_granted |= VREAD;
 
 	if ((accmode & VWRITE) && ((dac_granted & VWRITE) == 0) &&
 	    !priv_check_cred(cred, PRIV_VFS_WRITE, 0))
 		priv_granted |= (VWRITE | VAPPEND);
 
 	if ((accmode & VADMIN) && ((dac_granted & VADMIN) == 0) &&
 	    !priv_check_cred(cred, PRIV_VFS_ADMIN, 0))
 		priv_granted |= VADMIN;
 
 	if ((accmode & (priv_granted | dac_granted)) == accmode) {
 		/* XXX audit: privilege used */
 		if (privused != NULL)
 			*privused = 1;
 		return (0);
 	}
 
 	return ((accmode & VADMIN) ? EPERM : EACCES);
 }
 
 /*
  * Credential check based on process requesting service, and per-attribute
  * permissions.
  */
 int
 extattr_check_cred(struct vnode *vp, int attrnamespace, struct ucred *cred,
     struct thread *td, accmode_t accmode)
 {
 
 	/*
 	 * Kernel-invoked always succeeds.
 	 */
 	if (cred == NOCRED)
 		return (0);
 
 	/*
 	 * Do not allow privileged processes in jail to directly manipulate
 	 * system attributes.
 	 */
 	switch (attrnamespace) {
 	case EXTATTR_NAMESPACE_SYSTEM:
 		/* Potentially should be: return (EPERM); */
 		return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM, 0));
 	case EXTATTR_NAMESPACE_USER:
 		return (VOP_ACCESS(vp, accmode, cred, td));
 	default:
 		return (EPERM);
 	}
 }
 
 #ifdef DEBUG_VFS_LOCKS
 /*
  * This only exists to suppress warnings from unlocked specfs accesses.  It is
  * no longer ok to have an unlocked VFS.
  */
 #define	IGNORE_LOCK(vp) (panicstr != NULL || (vp) == NULL ||		\
 	(vp)->v_type == VCHR ||	(vp)->v_type == VBAD)
 
 int vfs_badlock_ddb = 1;	/* Drop into debugger on violation. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_ddb, CTLFLAG_RW, &vfs_badlock_ddb, 0,
     "Drop into debugger on lock violation");
 
 int vfs_badlock_mutex = 1;	/* Check for interlock across VOPs. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_mutex, CTLFLAG_RW, &vfs_badlock_mutex,
     0, "Check for interlock across VOPs");
 
 int vfs_badlock_print = 1;	/* Print lock violations. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_print, CTLFLAG_RW, &vfs_badlock_print,
     0, "Print lock violations");
 
 int vfs_badlock_vnode = 1;	/* Print vnode details on lock violations. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_vnode, CTLFLAG_RW, &vfs_badlock_vnode,
     0, "Print vnode details on lock violations");
 
 #ifdef KDB
 int vfs_badlock_backtrace = 1;	/* Print backtrace at lock violations. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_backtrace, CTLFLAG_RW,
     &vfs_badlock_backtrace, 0, "Print backtrace at lock violations");
 #endif
 
 static void
 vfs_badlock(const char *msg, const char *str, struct vnode *vp)
 {
 
 #ifdef KDB
 	if (vfs_badlock_backtrace)
 		kdb_backtrace();
 #endif
 	if (vfs_badlock_vnode)
 		vn_printf(vp, "vnode ");
 	if (vfs_badlock_print)
 		printf("%s: %p %s\n", str, (void *)vp, msg);
 	if (vfs_badlock_ddb)
 		kdb_enter(KDB_WHY_VFSLOCK, "lock violation");
 }
 
 void
 assert_vi_locked(struct vnode *vp, const char *str)
 {
 
 	if (vfs_badlock_mutex && !mtx_owned(VI_MTX(vp)))
 		vfs_badlock("interlock is not locked but should be", str, vp);
 }
 
 void
 assert_vi_unlocked(struct vnode *vp, const char *str)
 {
 
 	if (vfs_badlock_mutex && mtx_owned(VI_MTX(vp)))
 		vfs_badlock("interlock is locked but should not be", str, vp);
 }
 
 void
 assert_vop_locked(struct vnode *vp, const char *str)
 {
 	int locked;
 
 	if (!IGNORE_LOCK(vp)) {
 		locked = VOP_ISLOCKED(vp);
 		if (locked == 0 || locked == LK_EXCLOTHER)
 			vfs_badlock("is not locked but should be", str, vp);
 	}
 }
 
 void
 assert_vop_unlocked(struct vnode *vp, const char *str)
 {
 
 	if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) == LK_EXCLUSIVE)
 		vfs_badlock("is locked but should not be", str, vp);
 }
 
 void
 assert_vop_elocked(struct vnode *vp, const char *str)
 {
 
 	if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
 		vfs_badlock("is not exclusive locked but should be", str, vp);
 }
 
 #if 0
 void
 assert_vop_elocked_other(struct vnode *vp, const char *str)
 {
 
 	if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_EXCLOTHER)
 		vfs_badlock("is not exclusive locked by another thread",
 		    str, vp);
 }
 
 void
 assert_vop_slocked(struct vnode *vp, const char *str)
 {
 
 	if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_SHARED)
 		vfs_badlock("is not locked shared but should be", str, vp);
 }
 #endif /* 0 */
 #endif /* DEBUG_VFS_LOCKS */
 
 void
 vop_rename_fail(struct vop_rename_args *ap)
 {
 
 	if (ap->a_tvp != NULL)
 		vput(ap->a_tvp);
 	if (ap->a_tdvp == ap->a_tvp)
 		vrele(ap->a_tdvp);
 	else
 		vput(ap->a_tdvp);
 	vrele(ap->a_fdvp);
 	vrele(ap->a_fvp);
 }
 
 void
 vop_rename_pre(void *ap)
 {
 	struct vop_rename_args *a = ap;
 
 #ifdef DEBUG_VFS_LOCKS
 	if (a->a_tvp)
 		ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME");
 	ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME");
 	ASSERT_VI_UNLOCKED(a->a_fvp, "VOP_RENAME");
 	ASSERT_VI_UNLOCKED(a->a_fdvp, "VOP_RENAME");
 
 	/* Check the source (from). */
 	if (a->a_tdvp->v_vnlock != a->a_fdvp->v_vnlock &&
 	    (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fdvp->v_vnlock))
 		ASSERT_VOP_UNLOCKED(a->a_fdvp, "vop_rename: fdvp locked");
 	if (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fvp->v_vnlock)
 		ASSERT_VOP_UNLOCKED(a->a_fvp, "vop_rename: fvp locked");
 
 	/* Check the target. */
 	if (a->a_tvp)
 		ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked");
 	ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked");
 #endif
 	if (a->a_tdvp != a->a_fdvp)
 		vhold(a->a_fdvp);
 	if (a->a_tvp != a->a_fvp)
 		vhold(a->a_fvp);
 	vhold(a->a_tdvp);
 	if (a->a_tvp)
 		vhold(a->a_tvp);
 }
 
 #ifdef DEBUG_VFS_LOCKS
 void
 vop_strategy_pre(void *ap)
 {
 	struct vop_strategy_args *a;
 	struct buf *bp;
 
 	a = ap;
 	bp = a->a_bp;
 
 	/*
 	 * Cluster ops lock their component buffers but not the IO container.
 	 */
 	if ((bp->b_flags & B_CLUSTER) != 0)
 		return;
 
 	if (panicstr == NULL && !BUF_ISLOCKED(bp)) {
 		if (vfs_badlock_print)
 			printf(
 			    "VOP_STRATEGY: bp is not locked but should be\n");
 		if (vfs_badlock_ddb)
 			kdb_enter(KDB_WHY_VFSLOCK, "lock violation");
 	}
 }
 
 void
 vop_lock_pre(void *ap)
 {
 	struct vop_lock1_args *a = ap;
 
 	if ((a->a_flags & LK_INTERLOCK) == 0)
 		ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK");
 	else
 		ASSERT_VI_LOCKED(a->a_vp, "VOP_LOCK");
 }
 
 void
 vop_lock_post(void *ap, int rc)
 {
 	struct vop_lock1_args *a = ap;
 
 	ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK");
 	if (rc == 0 && (a->a_flags & LK_EXCLOTHER) == 0)
 		ASSERT_VOP_LOCKED(a->a_vp, "VOP_LOCK");
 }
 
 void
 vop_unlock_pre(void *ap)
 {
 	struct vop_unlock_args *a = ap;
 
 	if (a->a_flags & LK_INTERLOCK)
 		ASSERT_VI_LOCKED(a->a_vp, "VOP_UNLOCK");
 	ASSERT_VOP_LOCKED(a->a_vp, "VOP_UNLOCK");
 }
 
 void
 vop_unlock_post(void *ap, int rc)
 {
 	struct vop_unlock_args *a = ap;
 
 	if (a->a_flags & LK_INTERLOCK)
 		ASSERT_VI_UNLOCKED(a->a_vp, "VOP_UNLOCK");
 }
 #endif
 
 void
 vop_create_post(void *ap, int rc)
 {
 	struct vop_create_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE);
 }
 
 void
 vop_deleteextattr_post(void *ap, int rc)
 {
 	struct vop_deleteextattr_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB);
 }
 
 void
 vop_link_post(void *ap, int rc)
 {
 	struct vop_link_args *a = ap;
 
 	if (!rc) {
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_LINK);
 		VFS_KNOTE_LOCKED(a->a_tdvp, NOTE_WRITE);
 	}
 }
 
 void
 vop_mkdir_post(void *ap, int rc)
 {
 	struct vop_mkdir_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK);
 }
 
 void
 vop_mknod_post(void *ap, int rc)
 {
 	struct vop_mknod_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE);
 }
 
 void
 vop_reclaim_post(void *ap, int rc)
 {
 	struct vop_reclaim_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_REVOKE);
 }
 
 void
 vop_remove_post(void *ap, int rc)
 {
 	struct vop_remove_args *a = ap;
 
 	if (!rc) {
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE);
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE);
 	}
 }
 
 void
 vop_rename_post(void *ap, int rc)
 {
 	struct vop_rename_args *a = ap;
 	long hint;
 
 	if (!rc) {
 		hint = NOTE_WRITE;
 		if (a->a_fdvp == a->a_tdvp) {
 			if (a->a_tvp != NULL && a->a_tvp->v_type == VDIR)
 				hint |= NOTE_LINK;
 			VFS_KNOTE_UNLOCKED(a->a_fdvp, hint);
 			VFS_KNOTE_UNLOCKED(a->a_tdvp, hint);
 		} else {
 			hint |= NOTE_EXTEND;
 			if (a->a_fvp->v_type == VDIR)
 				hint |= NOTE_LINK;
 			VFS_KNOTE_UNLOCKED(a->a_fdvp, hint);
 
 			if (a->a_fvp->v_type == VDIR && a->a_tvp != NULL &&
 			    a->a_tvp->v_type == VDIR)
 				hint &= ~NOTE_LINK;
 			VFS_KNOTE_UNLOCKED(a->a_tdvp, hint);
 		}
 
 		VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME);
 		if (a->a_tvp)
 			VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE);
 	}
 	if (a->a_tdvp != a->a_fdvp)
 		vdrop(a->a_fdvp);
 	if (a->a_tvp != a->a_fvp)
 		vdrop(a->a_fvp);
 	vdrop(a->a_tdvp);
 	if (a->a_tvp)
 		vdrop(a->a_tvp);
 }
 
 void
 vop_rmdir_post(void *ap, int rc)
 {
 	struct vop_rmdir_args *a = ap;
 
 	if (!rc) {
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK);
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE);
 	}
 }
 
 void
 vop_setattr_post(void *ap, int rc)
 {
 	struct vop_setattr_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB);
 }
 
 void
 vop_setextattr_post(void *ap, int rc)
 {
 	struct vop_setextattr_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB);
 }
 
 void
 vop_symlink_post(void *ap, int rc)
 {
 	struct vop_symlink_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE);
 }
 
 void
 vop_open_post(void *ap, int rc)
 {
 	struct vop_open_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_OPEN);
 }
 
 void
 vop_close_post(void *ap, int rc)
 {
 	struct vop_close_args *a = ap;
 
 	if (!rc && (a->a_cred != NOCRED || /* filter out revokes */
 	    (a->a_vp->v_iflag & VI_DOOMED) == 0)) {
 		VFS_KNOTE_LOCKED(a->a_vp, (a->a_fflag & FWRITE) != 0 ?
 		    NOTE_CLOSE_WRITE : NOTE_CLOSE);
 	}
 }
 
 void
 vop_read_post(void *ap, int rc)
 {
 	struct vop_read_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ);
 }
 
 void
 vop_readdir_post(void *ap, int rc)
 {
 	struct vop_readdir_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ);
 }
 
 static struct knlist fs_knlist;
 
 static void
 vfs_event_init(void *arg)
 {
 	knlist_init_mtx(&fs_knlist, NULL);
 }
 /* XXX - correct order? */
 SYSINIT(vfs_knlist, SI_SUB_VFS, SI_ORDER_ANY, vfs_event_init, NULL);
 
 void
 vfs_event_signal(fsid_t *fsid, uint32_t event, intptr_t data __unused)
 {
 
 	KNOTE_UNLOCKED(&fs_knlist, event);
 }
 
 static int	filt_fsattach(struct knote *kn);
 static void	filt_fsdetach(struct knote *kn);
 static int	filt_fsevent(struct knote *kn, long hint);
 
 struct filterops fs_filtops = {
 	.f_isfd = 0,
 	.f_attach = filt_fsattach,
 	.f_detach = filt_fsdetach,
 	.f_event = filt_fsevent
 };
 
 static int
 filt_fsattach(struct knote *kn)
 {
 
 	kn->kn_flags |= EV_CLEAR;
 	knlist_add(&fs_knlist, kn, 0);
 	return (0);
 }
 
 static void
 filt_fsdetach(struct knote *kn)
 {
 
 	knlist_remove(&fs_knlist, kn, 0);
 }
 
 static int
 filt_fsevent(struct knote *kn, long hint)
 {
 
 	kn->kn_fflags |= hint;
 	return (kn->kn_fflags != 0);
 }
 
 static int
 sysctl_vfs_ctl(SYSCTL_HANDLER_ARGS)
 {
 	struct vfsidctl vc;
 	int error;
 	struct mount *mp;
 
 	error = SYSCTL_IN(req, &vc, sizeof(vc));
 	if (error)
 		return (error);
 	if (vc.vc_vers != VFS_CTL_VERS1)
 		return (EINVAL);
 	mp = vfs_getvfs(&vc.vc_fsid);
 	if (mp == NULL)
 		return (ENOENT);
 	/* ensure that a specific sysctl goes to the right filesystem. */
 	if (strcmp(vc.vc_fstypename, "*") != 0 &&
 	    strcmp(vc.vc_fstypename, mp->mnt_vfc->vfc_name) != 0) {
 		vfs_rel(mp);
 		return (EINVAL);
 	}
 	VCTLTOREQ(&vc, req);
 	error = VFS_SYSCTL(mp, vc.vc_op, req);
 	vfs_rel(mp);
 	return (error);
 }
 
 SYSCTL_PROC(_vfs, OID_AUTO, ctl, CTLTYPE_OPAQUE | CTLFLAG_WR,
     NULL, 0, sysctl_vfs_ctl, "",
     "Sysctl by fsid");
 
 /*
  * Function to initialize a va_filerev field sensibly.
  * XXX: Wouldn't a random number make a lot more sense ??
  */
 u_quad_t
 init_va_filerev(void)
 {
 	struct bintime bt;
 
 	getbinuptime(&bt);
 	return (((u_quad_t)bt.sec << 32LL) | (bt.frac >> 32LL));
 }
 
 static int	filt_vfsread(struct knote *kn, long hint);
 static int	filt_vfswrite(struct knote *kn, long hint);
 static int	filt_vfsvnode(struct knote *kn, long hint);
 static void	filt_vfsdetach(struct knote *kn);
 static struct filterops vfsread_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_vfsdetach,
 	.f_event = filt_vfsread
 };
 static struct filterops vfswrite_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_vfsdetach,
 	.f_event = filt_vfswrite
 };
 static struct filterops vfsvnode_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_vfsdetach,
 	.f_event = filt_vfsvnode
 };
 
 static void
 vfs_knllock(void *arg)
 {
 	struct vnode *vp = arg;
 
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 }
 
 static void
 vfs_knlunlock(void *arg)
 {
 	struct vnode *vp = arg;
 
 	VOP_UNLOCK(vp, 0);
 }
 
 static void
 vfs_knl_assert_locked(void *arg)
 {
 #ifdef DEBUG_VFS_LOCKS
 	struct vnode *vp = arg;
 
 	ASSERT_VOP_LOCKED(vp, "vfs_knl_assert_locked");
 #endif
 }
 
 static void
 vfs_knl_assert_unlocked(void *arg)
 {
 #ifdef DEBUG_VFS_LOCKS
 	struct vnode *vp = arg;
 
 	ASSERT_VOP_UNLOCKED(vp, "vfs_knl_assert_unlocked");
 #endif
 }
 
 int
 vfs_kqfilter(struct vop_kqfilter_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct knote *kn = ap->a_kn;
 	struct knlist *knl;
 
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		kn->kn_fop = &vfsread_filtops;
 		break;
 	case EVFILT_WRITE:
 		kn->kn_fop = &vfswrite_filtops;
 		break;
 	case EVFILT_VNODE:
 		kn->kn_fop = &vfsvnode_filtops;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	kn->kn_hook = (caddr_t)vp;
 
 	v_addpollinfo(vp);
 	if (vp->v_pollinfo == NULL)
 		return (ENOMEM);
 	knl = &vp->v_pollinfo->vpi_selinfo.si_note;
 	vhold(vp);
 	knlist_add(knl, kn, 0);
 
 	return (0);
 }
 
 /*
  * Detach knote from vnode
  */
 static void
 filt_vfsdetach(struct knote *kn)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 
 	KASSERT(vp->v_pollinfo != NULL, ("Missing v_pollinfo"));
 	knlist_remove(&vp->v_pollinfo->vpi_selinfo.si_note, kn, 0);
 	vdrop(vp);
 }
 
 /*ARGSUSED*/
 static int
 filt_vfsread(struct knote *kn, long hint)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 	struct vattr va;
 	int res;
 
 	/*
 	 * filesystem is gone, so set the EOF flag and schedule
 	 * the knote for deletion.
 	 */
 	if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD)) {
 		VI_LOCK(vp);
 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 		VI_UNLOCK(vp);
 		return (1);
 	}
 
 	if (VOP_GETATTR(vp, &va, curthread->td_ucred))
 		return (0);
 
 	VI_LOCK(vp);
 	kn->kn_data = va.va_size - kn->kn_fp->f_offset;
 	res = (kn->kn_sfflags & NOTE_FILE_POLL) != 0 || kn->kn_data != 0;
 	VI_UNLOCK(vp);
 	return (res);
 }
 
 /*ARGSUSED*/
 static int
 filt_vfswrite(struct knote *kn, long hint)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 
 	VI_LOCK(vp);
 
 	/*
 	 * filesystem is gone, so set the EOF flag and schedule
 	 * the knote for deletion.
 	 */
 	if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD))
 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 
 	kn->kn_data = 0;
 	VI_UNLOCK(vp);
 	return (1);
 }
 
 static int
 filt_vfsvnode(struct knote *kn, long hint)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 	int res;
 
 	VI_LOCK(vp);
 	if (kn->kn_sfflags & hint)
 		kn->kn_fflags |= hint;
 	if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD)) {
 		kn->kn_flags |= EV_EOF;
 		VI_UNLOCK(vp);
 		return (1);
 	}
 	res = (kn->kn_fflags != 0);
 	VI_UNLOCK(vp);
 	return (res);
 }
 
 int
 vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off)
 {
 	int error;
 
 	if (dp->d_reclen > ap->a_uio->uio_resid)
 		return (ENAMETOOLONG);
 	error = uiomove(dp, dp->d_reclen, ap->a_uio);
 	if (error) {
 		if (ap->a_ncookies != NULL) {
 			if (ap->a_cookies != NULL)
 				free(ap->a_cookies, M_TEMP);
 			ap->a_cookies = NULL;
 			*ap->a_ncookies = 0;
 		}
 		return (error);
 	}
 	if (ap->a_ncookies == NULL)
 		return (0);
 
 	KASSERT(ap->a_cookies,
 	    ("NULL ap->a_cookies value with non-NULL ap->a_ncookies!"));
 
 	*ap->a_cookies = realloc(*ap->a_cookies,
 	    (*ap->a_ncookies + 1) * sizeof(u_long), M_TEMP, M_WAITOK | M_ZERO);
 	(*ap->a_cookies)[*ap->a_ncookies] = off;
 	*ap->a_ncookies += 1;
 	return (0);
 }
 
 /*
  * Mark for update the access time of the file if the filesystem
  * supports VOP_MARKATIME.  This functionality is used by execve and
  * mmap, so we want to avoid the I/O implied by directly setting
  * va_atime for the sake of efficiency.
  */
 void
 vfs_mark_atime(struct vnode *vp, struct ucred *cred)
 {
 	struct mount *mp;
 
 	mp = vp->v_mount;
 	ASSERT_VOP_LOCKED(vp, "vfs_mark_atime");
 	if (mp != NULL && (mp->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
 		(void)VOP_MARKATIME(vp);
 }
 
 /*
  * The purpose of this routine is to remove granularity from accmode_t,
  * reducing it into standard unix access bits - VEXEC, VREAD, VWRITE,
  * VADMIN and VAPPEND.
  *
  * If it returns 0, the caller is supposed to continue with the usual
  * access checks using 'accmode' as modified by this routine.  If it
  * returns nonzero value, the caller is supposed to return that value
  * as errno.
  *
  * Note that after this routine runs, accmode may be zero.
  */
 int
 vfs_unixify_accmode(accmode_t *accmode)
 {
 	/*
 	 * There is no way to specify explicit "deny" rule using
 	 * file mode or POSIX.1e ACLs.
 	 */
 	if (*accmode & VEXPLICIT_DENY) {
 		*accmode = 0;
 		return (0);
 	}
 
 	/*
 	 * None of these can be translated into usual access bits.
 	 * Also, the common case for NFSv4 ACLs is to not contain
 	 * either of these bits. Caller should check for VWRITE
 	 * on the containing directory instead.
 	 */
 	if (*accmode & (VDELETE_CHILD | VDELETE))
 		return (EPERM);
 
 	if (*accmode & VADMIN_PERMS) {
 		*accmode &= ~VADMIN_PERMS;
 		*accmode |= VADMIN;
 	}
 
 	/*
 	 * There is no way to deny VREAD_ATTRIBUTES, VREAD_ACL
 	 * or VSYNCHRONIZE using file mode or POSIX.1e ACL.
 	 */
 	*accmode &= ~(VSTAT_PERMS | VSYNCHRONIZE);
 
 	return (0);
 }
 
 /*
  * These are helper functions for filesystems to traverse all
  * their vnodes.  See MNT_VNODE_FOREACH_ALL() in sys/mount.h.
  *
  * This interface replaces MNT_VNODE_FOREACH.
  */
 
 MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
 
 struct vnode *
 __mnt_vnode_next_all(struct vnode **mvp, struct mount *mp)
 {
 	struct vnode *vp;
 
 	if (should_yield())
 		kern_yield(PRI_USER);
 	MNT_ILOCK(mp);
 	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 	for (vp = TAILQ_NEXT(*mvp, v_nmntvnodes); vp != NULL;
 	    vp = TAILQ_NEXT(vp, v_nmntvnodes)) {
 		/* Allow a racy peek at VI_DOOMED to save a lock acquisition. */
 		if (vp->v_type == VMARKER || (vp->v_iflag & VI_DOOMED) != 0)
 			continue;
 		VI_LOCK(vp);
 		if ((vp->v_iflag & VI_DOOMED) != 0) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		break;
 	}
 	if (vp == NULL) {
 		__mnt_vnode_markerfree_all(mvp, mp);
 		/* MNT_IUNLOCK(mp); -- done in above function */
 		mtx_assert(MNT_MTX(mp), MA_NOTOWNED);
 		return (NULL);
 	}
 	TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 	TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 	MNT_IUNLOCK(mp);
 	return (vp);
 }
 
 struct vnode *
 __mnt_vnode_first_all(struct vnode **mvp, struct mount *mp)
 {
 	struct vnode *vp;
 
 	*mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO);
 	MNT_ILOCK(mp);
 	MNT_REF(mp);
 	(*mvp)->v_mount = mp;
 	(*mvp)->v_type = VMARKER;
 
 	TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 		/* Allow a racy peek at VI_DOOMED to save a lock acquisition. */
 		if (vp->v_type == VMARKER || (vp->v_iflag & VI_DOOMED) != 0)
 			continue;
 		VI_LOCK(vp);
 		if ((vp->v_iflag & VI_DOOMED) != 0) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		break;
 	}
 	if (vp == NULL) {
 		MNT_REL(mp);
 		MNT_IUNLOCK(mp);
 		free(*mvp, M_VNODE_MARKER);
 		*mvp = NULL;
 		return (NULL);
 	}
 	TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 	MNT_IUNLOCK(mp);
 	return (vp);
 }
 
 void
 __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp)
 {
 
 	if (*mvp == NULL) {
 		MNT_IUNLOCK(mp);
 		return;
 	}
 
 	mtx_assert(MNT_MTX(mp), MA_OWNED);
 
 	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 	TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 	MNT_REL(mp);
 	MNT_IUNLOCK(mp);
 	free(*mvp, M_VNODE_MARKER);
 	*mvp = NULL;
 }
 
 /*
  * These are helper functions for filesystems to traverse their
  * active vnodes.  See MNT_VNODE_FOREACH_ACTIVE() in sys/mount.h
  */
 static void
 mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp)
 {
 
 	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 
 	MNT_ILOCK(mp);
 	MNT_REL(mp);
 	MNT_IUNLOCK(mp);
 	free(*mvp, M_VNODE_MARKER);
 	*mvp = NULL;
 }
 
 static struct vnode *
 mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
 {
 	struct vnode *vp, *nvp;
 
 	mtx_assert(&vnode_free_list_mtx, MA_OWNED);
 	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 restart:
 	vp = TAILQ_NEXT(*mvp, v_actfreelist);
 	TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
 	while (vp != NULL) {
 		if (vp->v_type == VMARKER) {
 			vp = TAILQ_NEXT(vp, v_actfreelist);
 			continue;
 		}
 		if (!VI_TRYLOCK(vp)) {
 			if (mp_ncpus == 1 || should_yield()) {
 				TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist);
 				mtx_unlock(&vnode_free_list_mtx);
 				pause("vnacti", 1);
 				mtx_lock(&vnode_free_list_mtx);
 				goto restart;
 			}
 			continue;
 		}
 		KASSERT(vp->v_type != VMARKER, ("locked marker %p", vp));
 		KASSERT(vp->v_mount == mp || vp->v_mount == NULL,
 		    ("alien vnode on the active list %p %p", vp, mp));
 		if (vp->v_mount == mp && (vp->v_iflag & VI_DOOMED) == 0)
 			break;
 		nvp = TAILQ_NEXT(vp, v_actfreelist);
 		VI_UNLOCK(vp);
 		vp = nvp;
 	}
 
 	/* Check if we are done */
 	if (vp == NULL) {
 		mtx_unlock(&vnode_free_list_mtx);
 		mnt_vnode_markerfree_active(mvp, mp);
 		return (NULL);
 	}
 	TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
 	mtx_unlock(&vnode_free_list_mtx);
 	ASSERT_VI_LOCKED(vp, "active iter");
 	KASSERT((vp->v_iflag & VI_ACTIVE) != 0, ("Non-active vp %p", vp));
 	return (vp);
 }
 
 struct vnode *
 __mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
 {
 
 	if (should_yield())
 		kern_yield(PRI_USER);
 	mtx_lock(&vnode_free_list_mtx);
 	return (mnt_vnode_next_active(mvp, mp));
 }
 
 struct vnode *
 __mnt_vnode_first_active(struct vnode **mvp, struct mount *mp)
 {
 	struct vnode *vp;
 
 	*mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO);
 	MNT_ILOCK(mp);
 	MNT_REF(mp);
 	MNT_IUNLOCK(mp);
 	(*mvp)->v_type = VMARKER;
 	(*mvp)->v_mount = mp;
 
 	mtx_lock(&vnode_free_list_mtx);
 	vp = TAILQ_FIRST(&mp->mnt_activevnodelist);
 	if (vp == NULL) {
 		mtx_unlock(&vnode_free_list_mtx);
 		mnt_vnode_markerfree_active(mvp, mp);
 		return (NULL);
 	}
 	TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist);
 	return (mnt_vnode_next_active(mvp, mp));
 }
 
 void
 __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp)
 {
 
 	if (*mvp == NULL)
 		return;
 
 	mtx_lock(&vnode_free_list_mtx);
 	TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
 	mtx_unlock(&vnode_free_list_mtx);
 	mnt_vnode_markerfree_active(mvp, mp);
 }
Index: stable/11/sys/mips/conf/BERI_SOCKIT
===================================================================
--- stable/11/sys/mips/conf/BERI_SOCKIT	(revision 330445)
+++ stable/11/sys/mips/conf/BERI_SOCKIT	(revision 330446)
@@ -1,26 +1,26 @@
 #
 # BERI_SOCKIT -- Kernel for the SRI/Cambridge "BERI" (Bluespec Extensible
 # RISC Implementation) FPGA soft core, as configured in its Terasic SoCKit
 # reference configuration.  This kernel configration must be further
-# specialized to to include a root filesystem specification.
+# specialized to include a root filesystem specification.
 #
 # $FreeBSD$
 #
 
 include "BERI_TEMPLATE"
 
 ident		BERI_SOCKIT
 
 options 	ROOTDEVNAME=\"ufs:vtbd0\"
 
 device		altera_pio
 device		altera_jtag_uart
 
 device		virtio
 device		virtio_blk
 device		vtnet
 device		virtio_mmio
 
 options 	FDT
 options 	FDT_DTB_STATIC
 makeoptions	FDT_DTS_FILE=beripad-sockit.dts
Index: stable/11/sys/net/altq/altq_hfsc.h
===================================================================
--- stable/11/sys/net/altq/altq_hfsc.h	(revision 330445)
+++ stable/11/sys/net/altq/altq_hfsc.h	(revision 330446)
@@ -1,319 +1,319 @@
 /*-
  * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation is hereby granted (including for commercial or
  * for-profit use), provided that both the copyright notice and this
  * permission notice appear in all copies of the software, derivative
  * works, or modified versions, and any portions thereof.
  *
  * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
  * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
  * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  *
  * Carnegie Mellon encourages (but does not require) users of this
  * software to return any improvements or extensions that they make,
  * and to grant Carnegie Mellon the rights to redistribute these
  * changes without encumbrance.
  *
  * $KAME: altq_hfsc.h,v 1.12 2003/12/05 05:40:46 kjc Exp $
  * $FreeBSD$
  */
 #ifndef _ALTQ_ALTQ_HFSC_H_
 #define	_ALTQ_ALTQ_HFSC_H_
 
 #include <net/altq/altq.h>
 #include <net/altq/altq_classq.h>
 #include <net/altq/altq_codel.h>
 #include <net/altq/altq_red.h>
 #include <net/altq/altq_rio.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 struct service_curve {
 	u_int	m1;	/* slope of the first segment in bits/sec */
 	u_int	d;	/* the x-projection of the first segment in msec */
 	u_int	m2;	/* slope of the second segment in bits/sec */
 };
 
 /* special class handles */
 #define	HFSC_NULLCLASS_HANDLE	0
 #define	HFSC_MAX_CLASSES	64
 
 /* hfsc class flags */
 #define	HFCF_RED		0x0001	/* use RED */
 #define	HFCF_ECN		0x0002  /* use RED/ECN */
 #define	HFCF_RIO		0x0004  /* use RIO */
 #define	HFCF_CODEL		0x0008	/* use CoDel */
 #define	HFCF_CLEARDSCP		0x0010  /* clear diffserv codepoint */
 #define	HFCF_DEFAULTCLASS	0x1000	/* default class */
 
 /* service curve types */
 #define	HFSC_REALTIMESC		1
 #define	HFSC_LINKSHARINGSC	2
 #define	HFSC_UPPERLIMITSC	4
 #define	HFSC_DEFAULTSC		(HFSC_REALTIMESC|HFSC_LINKSHARINGSC)
 
 struct hfsc_classstats {
 	u_int			class_id;
 	u_int32_t		class_handle;
 	struct service_curve	rsc;
 	struct service_curve	fsc;
 	struct service_curve	usc;	/* upper limit service curve */
 
 	u_int64_t		total;	/* total work in bytes */
 	u_int64_t		cumul;	/* cumulative work in bytes
 					   done by real-time criteria */
 	u_int64_t		d;		/* deadline */
 	u_int64_t		e;		/* eligible time */
 	u_int64_t		vt;		/* virtual time */
 	u_int64_t		f;		/* fit time for upper-limit */
 
 	/* info helpful for debugging */
 	u_int64_t		initvt;		/* init virtual time */
 	u_int64_t		vtoff;		/* cl_vt_ipoff */
 	u_int64_t		cvtmax;		/* cl_maxvt */
 	u_int64_t		myf;		/* cl_myf */
 	u_int64_t		cfmin;		/* cl_mincf */
 	u_int64_t		cvtmin;		/* cl_mincvt */
 	u_int64_t		myfadj;		/* cl_myfadj */
 	u_int64_t		vtadj;		/* cl_vtadj */
 	u_int64_t		cur_time;
 	u_int32_t		machclk_freq;
 
 	u_int			qlength;
 	u_int			qlimit;
 	struct pktcntr		xmit_cnt;
 	struct pktcntr		drop_cnt;
 	u_int			period;
 
 	u_int			vtperiod;	/* vt period sequence no */
 	u_int			parentperiod;	/* parent's vt period seqno */
 	int			nactive;	/* number of active children */
 
 	/* codel, red and rio related info */
 	int			qtype;
 	struct redstats		red[3];
 	struct codel_stats	codel;
 };
 
 #ifdef ALTQ3_COMPAT
 struct hfsc_interface {
 	char	hfsc_ifname[IFNAMSIZ];  /* interface name (e.g., fxp0) */
 };
 
 struct hfsc_attach {
 	struct hfsc_interface	iface;
 	u_int			bandwidth;  /* link bandwidth in bits/sec */
 };
 
 struct hfsc_add_class {
 	struct hfsc_interface	iface;
 	u_int32_t		parent_handle;
 	struct service_curve	service_curve;
 	int			qlimit;
 	int			flags;
 
 	u_int32_t		class_handle;  /* return value */
 };
 
 struct hfsc_delete_class {
 	struct hfsc_interface	iface;
 	u_int32_t		class_handle;
 };
 
 struct hfsc_modify_class {
 	struct hfsc_interface	iface;
 	u_int32_t		class_handle;
 	struct service_curve	service_curve;
 	int			sctype;
 };
 
 struct hfsc_add_filter {
 	struct hfsc_interface	iface;
 	u_int32_t		class_handle;
 	struct flow_filter	filter;
 
 	u_long			filter_handle;  /* return value */
 };
 
 struct hfsc_delete_filter {
 	struct hfsc_interface	iface;
 	u_long			filter_handle;
 };
 
 struct hfsc_class_stats {
 	struct hfsc_interface	iface;
 	int			nskip;		/* skip # of classes */
 	int			nclasses;	/* # of class stats (WR) */
 	u_int64_t		cur_time;	/* current time */
 	u_int32_t		machclk_freq;	/* machine clock frequency */
 	u_int			hif_classes;	/* # of classes in the tree */
 	u_int			hif_packets;	/* # of packets in the tree */
 	struct hfsc_classstats	*stats;		/* pointer to stats array */
 };
 
 #define	HFSC_IF_ATTACH		_IOW('Q', 1, struct hfsc_attach)
 #define	HFSC_IF_DETACH		_IOW('Q', 2, struct hfsc_interface)
 #define	HFSC_ENABLE		_IOW('Q', 3, struct hfsc_interface)
 #define	HFSC_DISABLE		_IOW('Q', 4, struct hfsc_interface)
 #define	HFSC_CLEAR_HIERARCHY	_IOW('Q', 5, struct hfsc_interface)
 #define	HFSC_ADD_CLASS		_IOWR('Q', 7, struct hfsc_add_class)
 #define	HFSC_DEL_CLASS		_IOW('Q', 8, struct hfsc_delete_class)
 #define	HFSC_MOD_CLASS		_IOW('Q', 9, struct hfsc_modify_class)
 #define	HFSC_ADD_FILTER		_IOWR('Q', 10, struct hfsc_add_filter)
 #define	HFSC_DEL_FILTER		_IOW('Q', 11, struct hfsc_delete_filter)
 #define	HFSC_GETSTATS		_IOWR('Q', 12, struct hfsc_class_stats)
 #endif /* ALTQ3_COMPAT */
 
 #ifdef _KERNEL
 /*
  * kernel internal service curve representation
  *	coordinates are given by 64 bit unsigned integers.
  *	x-axis: unit is clock count.  for the intel x86 architecture,
  *		the raw Pentium TSC (Timestamp Counter) value is used.
  *		virtual time is also calculated in this time scale.
  *	y-axis: unit is byte.
  *
  *	the service curve parameters are converted to the internal
  *	representation.
  *	the slope values are scaled to avoid overflow.
  *	the inverse slope values as well as the y-projection of the 1st
- *	segment are kept in order to to avoid 64-bit divide operations
+ *	segment are kept in order to avoid 64-bit divide operations
  *	that are expensive on 32-bit architectures.
  *
  *  note: Intel Pentium TSC never wraps around in several thousands of years.
  *	x-axis doesn't wrap around for 1089 years with 1GHz clock.
  *      y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth.
  */
 
 /* kernel internal representation of a service curve */
 struct internal_sc {
 	u_int64_t	sm1;	/* scaled slope of the 1st segment */
 	u_int64_t	ism1;	/* scaled inverse-slope of the 1st segment */
 	u_int64_t	dx;	/* the x-projection of the 1st segment */
 	u_int64_t	dy;	/* the y-projection of the 1st segment */
 	u_int64_t	sm2;	/* scaled slope of the 2nd segment */
 	u_int64_t	ism2;	/* scaled inverse-slope of the 2nd segment */
 };
 
 /* runtime service curve */
 struct runtime_sc {
 	u_int64_t	x;	/* current starting position on x-axis */
 	u_int64_t	y;	/* current starting position on x-axis */
 	u_int64_t	sm1;	/* scaled slope of the 1st segment */
 	u_int64_t	ism1;	/* scaled inverse-slope of the 1st segment */
 	u_int64_t	dx;	/* the x-projection of the 1st segment */
 	u_int64_t	dy;	/* the y-projection of the 1st segment */
 	u_int64_t	sm2;	/* scaled slope of the 2nd segment */
 	u_int64_t	ism2;	/* scaled inverse-slope of the 2nd segment */
 };
 
 struct hfsc_class {
 	u_int		cl_id;		/* class id (just for debug) */
 	u_int32_t	cl_handle;	/* class handle */
 	struct hfsc_if	*cl_hif;	/* back pointer to struct hfsc_if */
 	int		cl_flags;	/* misc flags */
 
 	struct hfsc_class *cl_parent;	/* parent class */
 	struct hfsc_class *cl_siblings;	/* sibling classes */
 	struct hfsc_class *cl_children;	/* child classes */
 
 	class_queue_t	*cl_q;		/* class queue structure */
 	union {
 		struct red	*cl_red;	/* RED state */
 		struct codel	*cl_codel;	/* CoDel state */
 	} cl_aqm;
 #define	cl_red			cl_aqm.cl_red
 #define	cl_codel		cl_aqm.cl_codel
 	struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
 
 	u_int64_t	cl_total;	/* total work in bytes */
 	u_int64_t	cl_cumul;	/* cumulative work in bytes
 					   done by real-time criteria */
 	u_int64_t	cl_d;		/* deadline */
 	u_int64_t	cl_e;		/* eligible time */
 	u_int64_t	cl_vt;		/* virtual time */
 	u_int64_t	cl_f;		/* time when this class will fit for
 					   link-sharing, max(myf, cfmin) */
 	u_int64_t	cl_myf;		/* my fit-time (as calculated from this
 					   class's own upperlimit curve) */
 	u_int64_t	cl_myfadj;	/* my fit-time adjustment
 					   (to cancel history dependence) */
 	u_int64_t	cl_cfmin;	/* earliest children's fit-time (used
 					   with cl_myf to obtain cl_f) */
 	u_int64_t	cl_cvtmin;	/* minimal virtual time among the
 					   children fit for link-sharing
 					   (monotonic within a period) */
 	u_int64_t	cl_vtadj;	/* intra-period cumulative vt
 					   adjustment */
 	u_int64_t	cl_vtoff;	/* inter-period cumulative vt offset */
 	u_int64_t	cl_cvtmax;	/* max child's vt in the last period */
 
 	u_int64_t	cl_initvt;	/* init virtual time (for debugging) */
 
 	struct internal_sc *cl_rsc;	/* internal real-time service curve */
 	struct internal_sc *cl_fsc;	/* internal fair service curve */
 	struct internal_sc *cl_usc;	/* internal upperlimit service curve */
 	struct runtime_sc  cl_deadline;	/* deadline curve */
 	struct runtime_sc  cl_eligible;	/* eligible curve */
 	struct runtime_sc  cl_virtual;	/* virtual curve */
 	struct runtime_sc  cl_ulimit;	/* upperlimit curve */
 
 	u_int		cl_vtperiod;	/* vt period sequence no */
 	u_int		cl_parentperiod;  /* parent's vt period seqno */
 	int		cl_nactive;	/* number of active children */
 
 	TAILQ_HEAD(acthead, hfsc_class) cl_actc; /* active children list */
 	TAILQ_ENTRY(hfsc_class)	cl_actlist;	/* active children list entry */
 	TAILQ_ENTRY(hfsc_class)	cl_ellist;	/* eligible list entry */
 
 	struct {
 		struct pktcntr	xmit_cnt;
 		struct pktcntr	drop_cnt;
 		u_int period;
 	} cl_stats;
 };
 
 /*
  * hfsc interface state
  */
 struct hfsc_if {
 	struct hfsc_if		*hif_next;	/* interface state list */
 	struct ifaltq		*hif_ifq;	/* backpointer to ifaltq */
 	struct hfsc_class	*hif_rootclass;		/* root class */
 	struct hfsc_class	*hif_defaultclass;	/* default class */
 	struct hfsc_class	*hif_class_tbl[HFSC_MAX_CLASSES];
 	struct hfsc_class	*hif_pollcache;	/* cache for poll operation */
 
 	u_int	hif_classes;			/* # of classes in the tree */
 	u_int	hif_packets;			/* # of packets in the tree */
 	u_int	hif_classid;			/* class id sequence number */
 
 	TAILQ_HEAD(elighead, hfsc_class) hif_eligible; /* eligible list */
 
 #ifdef ALTQ3_CLFIER_COMPAT
 	struct acc_classifier	hif_classifier;
 #endif
 };
 
 #endif /* _KERNEL */
 
 #ifdef __cplusplus
 }
 #endif
 
 #endif /* _ALTQ_ALTQ_HFSC_H_ */
Index: stable/11/sys/net/bpf.c
===================================================================
--- stable/11/sys/net/bpf.c	(revision 330445)
+++ stable/11/sys/net/bpf.c	(revision 330446)
@@ -1,3042 +1,3042 @@
 /*-
  * Copyright (c) 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from the Stanford/CMU enet packet filter,
  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
  * Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bpf.h"
 #include "opt_compat.h"
 #include "opt_ddb.h"
 #include "opt_netgraph.h"
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/time.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/signalvar.h>
 #include <sys/filio.h>
 #include <sys/sockio.h>
 #include <sys/ttycom.h>
 #include <sys/uio.h>
 
 #include <sys/event.h>
 #include <sys/file.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
 
 #include <sys/socket.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/bpf.h>
 #include <net/bpf_buffer.h>
 #ifdef BPF_JITTER
 #include <net/bpf_jitter.h>
 #endif
 #include <net/bpf_zerocopy.h>
 #include <net/bpfdesc.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
 #include <net80211/ieee80211_freebsd.h>
 
 #include <security/mac/mac_framework.h>
 
 MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
 
 struct bpf_if {
 #define	bif_next	bif_ext.bif_next
 #define	bif_dlist	bif_ext.bif_dlist
 	struct bpf_if_ext bif_ext;	/* public members */
 	u_int		bif_dlt;	/* link layer type */
 	u_int		bif_hdrlen;	/* length of link header */
 	struct ifnet	*bif_ifp;	/* corresponding interface */
 	struct rwlock	bif_lock;	/* interface lock */
 	LIST_HEAD(, bpf_d) bif_wlist;	/* writer-only list */
 	int		bif_flags;	/* Interface flags */
 	struct bpf_if	**bif_bpf;	/* Pointer to pointer to us */
 };
 
 CTASSERT(offsetof(struct bpf_if, bif_ext) == 0);
 
 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
 
 #define PRINET  26			/* interruptible */
 
 #define	SIZEOF_BPF_HDR(type)	\
     (offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen))
 
 #ifdef COMPAT_FREEBSD32
 #include <sys/mount.h>
 #include <compat/freebsd32/freebsd32.h>
 #define BPF_ALIGNMENT32 sizeof(int32_t)
 #define	BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32)
 
 #ifndef BURN_BRIDGES
 /*
  * 32-bit version of structure prepended to each packet.  We use this header
  * instead of the standard one for 32-bit streams.  We mark the a stream as
  * 32-bit the first time we see a 32-bit compat ioctl request.
  */
 struct bpf_hdr32 {
 	struct timeval32 bh_tstamp;	/* time stamp */
 	uint32_t	bh_caplen;	/* length of captured portion */
 	uint32_t	bh_datalen;	/* original length of packet */
 	uint16_t	bh_hdrlen;	/* length of bpf header (this struct
 					   plus alignment padding) */
 };
 #endif
 
 struct bpf_program32 {
 	u_int bf_len;
 	uint32_t bf_insns;
 };
 
 struct bpf_dltlist32 {
 	u_int	bfl_len;
 	u_int	bfl_list;
 };
 
 #define	BIOCSETF32	_IOW('B', 103, struct bpf_program32)
 #define	BIOCSRTIMEOUT32	_IOW('B', 109, struct timeval32)
 #define	BIOCGRTIMEOUT32	_IOR('B', 110, struct timeval32)
 #define	BIOCGDLTLIST32	_IOWR('B', 121, struct bpf_dltlist32)
 #define	BIOCSETWF32	_IOW('B', 123, struct bpf_program32)
 #define	BIOCSETFNR32	_IOW('B', 130, struct bpf_program32)
 #endif
 
 /*
  * bpf_iflist is a list of BPF interface structures, each corresponding to a
  * specific DLT.  The same network interface might have several BPF interface
  * structures registered by different layers in the stack (i.e., 802.11
  * frames, ethernet frames, etc).
  */
 static LIST_HEAD(, bpf_if)	bpf_iflist, bpf_freelist;
 static struct mtx	bpf_mtx;		/* bpf global lock */
 static int		bpf_bpfd_cnt;
 
 static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
 static void	bpf_detachd(struct bpf_d *);
 static void	bpf_detachd_locked(struct bpf_d *);
 static void	bpf_freed(struct bpf_d *);
 static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
 		    struct sockaddr *, int *, struct bpf_d *);
 static int	bpf_setif(struct bpf_d *, struct ifreq *);
 static void	bpf_timed_out(void *);
 static __inline void
 		bpf_wakeup(struct bpf_d *);
 static void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
 		    void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
 		    struct bintime *);
 static void	reset_d(struct bpf_d *);
 static int	bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
 static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
 static int	bpf_setdlt(struct bpf_d *, u_int);
 static void	filt_bpfdetach(struct knote *);
 static int	filt_bpfread(struct knote *, long);
 static void	bpf_drvinit(void *);
 static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
 
 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
 int bpf_maxinsns = BPF_MAXINSNS;
 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
     &bpf_maxinsns, 0, "Maximum bpf program instructions");
 static int bpf_zerocopy_enable = 0;
 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
     &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
 static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
     bpf_stats_sysctl, "bpf statistics portal");
 
 static VNET_DEFINE(int, bpf_optimize_writers) = 0;
 #define	V_bpf_optimize_writers VNET(bpf_optimize_writers)
 SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(bpf_optimize_writers), 0,
     "Do not send packets until BPF program is set");
 
 static	d_open_t	bpfopen;
 static	d_read_t	bpfread;
 static	d_write_t	bpfwrite;
 static	d_ioctl_t	bpfioctl;
 static	d_poll_t	bpfpoll;
 static	d_kqfilter_t	bpfkqfilter;
 
 static struct cdevsw bpf_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_open =	bpfopen,
 	.d_read =	bpfread,
 	.d_write =	bpfwrite,
 	.d_ioctl =	bpfioctl,
 	.d_poll =	bpfpoll,
 	.d_name =	"bpf",
 	.d_kqfilter =	bpfkqfilter,
 };
 
 static struct filterops bpfread_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_bpfdetach,
 	.f_event = filt_bpfread,
 };
 
 eventhandler_tag	bpf_ifdetach_cookie = NULL;
 
 /*
  * LOCKING MODEL USED BY BPF:
  * Locks:
  * 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal,
  * some global counters and every bpf_if reference.
  * 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters.
  * 3) Descriptor lock. Mutex, used to protect BPF buffers and various structure fields
  *   used by bpf_mtap code.
  *
  * Lock order:
  *
  * Global lock, interface lock, descriptor lock
  *
  * We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2]
  * working model. In many places (like bpf_detachd) we start with BPF descriptor
  * (and we need to at least rlock it to get reliable interface pointer). This
  * gives us potential LOR. As a result, we use global lock to protect from bpf_if
  * change in every such place.
  *
  * Changing d->bd_bif is protected by 1) global lock, 2) interface lock and
  * 3) descriptor main wlock.
  * Reading bd_bif can be protected by any of these locks, typically global lock.
  *
  * Changing read/write BPF filter is protected by the same three locks,
  * the same applies for reading.
  *
  * Sleeping in global lock is not allowed due to bpfdetach() using it.
  */
 
 /*
  * Wrapper functions for various buffering methods.  If the set of buffer
  * modes expands, we will probably want to introduce a switch data structure
  * similar to protosw, et.
  */
 static void
 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
     u_int len)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
 		return (bpf_buffer_append_bytes(d, buf, offset, src, len));
 
 	case BPF_BUFMODE_ZBUF:
 		d->bd_zcopy++;
 		return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
 
 	default:
 		panic("bpf_buf_append_bytes");
 	}
 }
 
 static void
 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
     u_int len)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
 		return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
 
 	case BPF_BUFMODE_ZBUF:
 		d->bd_zcopy++;
 		return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
 
 	default:
 		panic("bpf_buf_append_mbuf");
 	}
 }
 
 /*
  * This function gets called when the free buffer is re-assigned.
  */
 static void
 bpf_buf_reclaimed(struct bpf_d *d)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
 		return;
 
 	case BPF_BUFMODE_ZBUF:
 		bpf_zerocopy_buf_reclaimed(d);
 		return;
 
 	default:
 		panic("bpf_buf_reclaimed");
 	}
 }
 
 /*
  * If the buffer mechanism has a way to decide that a held buffer can be made
  * free, then it is exposed via the bpf_canfreebuf() interface.  (1) is
  * returned if the buffer can be discarded, (0) is returned if it cannot.
  */
 static int
 bpf_canfreebuf(struct bpf_d *d)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_ZBUF:
 		return (bpf_zerocopy_canfreebuf(d));
 	}
 	return (0);
 }
 
 /*
  * Allow the buffer model to indicate that the current store buffer is
  * immutable, regardless of the appearance of space.  Return (1) if the
  * buffer is writable, and (0) if not.
  */
 static int
 bpf_canwritebuf(struct bpf_d *d)
 {
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_ZBUF:
 		return (bpf_zerocopy_canwritebuf(d));
 	}
 	return (1);
 }
 
 /*
  * Notify buffer model that an attempt to write to the store buffer has
  * resulted in a dropped packet, in which case the buffer may be considered
  * full.
  */
 static void
 bpf_buffull(struct bpf_d *d)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_ZBUF:
 		bpf_zerocopy_buffull(d);
 		break;
 	}
 }
 
 /*
  * Notify the buffer model that a buffer has moved into the hold position.
  */
 void
 bpf_bufheld(struct bpf_d *d)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_ZBUF:
 		bpf_zerocopy_bufheld(d);
 		break;
 	}
 }
 
 static void
 bpf_free(struct bpf_d *d)
 {
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
 		return (bpf_buffer_free(d));
 
 	case BPF_BUFMODE_ZBUF:
 		return (bpf_zerocopy_free(d));
 
 	default:
 		panic("bpf_buf_free");
 	}
 }
 
 static int
 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
 {
 
 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
 		return (EOPNOTSUPP);
 	return (bpf_buffer_uiomove(d, buf, len, uio));
 }
 
 static int
 bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
 {
 
 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
 		return (EOPNOTSUPP);
 	return (bpf_buffer_ioctl_sblen(d, i));
 }
 
 static int
 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
 {
 
 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
 		return (EOPNOTSUPP);
 	return (bpf_zerocopy_ioctl_getzmax(td, d, i));
 }
 
 static int
 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
 {
 
 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
 		return (EOPNOTSUPP);
 	return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
 }
 
 static int
 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
 {
 
 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
 		return (EOPNOTSUPP);
 	return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
 }
 
 /*
  * General BPF functions.
  */
 static int
 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
     struct sockaddr *sockp, int *hdrlen, struct bpf_d *d)
 {
 	const struct ieee80211_bpf_params *p;
 	struct ether_header *eh;
 	struct mbuf *m;
 	int error;
 	int len;
 	int hlen;
 	int slen;
 
 	/*
 	 * Build a sockaddr based on the data link layer type.
 	 * We do this at this level because the ethernet header
 	 * is copied directly into the data field of the sockaddr.
 	 * In the case of SLIP, there is no header and the packet
 	 * is forwarded as is.
 	 * Also, we are careful to leave room at the front of the mbuf
 	 * for the link level header.
 	 */
 	switch (linktype) {
 
 	case DLT_SLIP:
 		sockp->sa_family = AF_INET;
 		hlen = 0;
 		break;
 
 	case DLT_EN10MB:
 		sockp->sa_family = AF_UNSPEC;
 		/* XXX Would MAXLINKHDR be better? */
 		hlen = ETHER_HDR_LEN;
 		break;
 
 	case DLT_FDDI:
 		sockp->sa_family = AF_IMPLINK;
 		hlen = 0;
 		break;
 
 	case DLT_RAW:
 		sockp->sa_family = AF_UNSPEC;
 		hlen = 0;
 		break;
 
 	case DLT_NULL:
 		/*
 		 * null interface types require a 4 byte pseudo header which
 		 * corresponds to the address family of the packet.
 		 */
 		sockp->sa_family = AF_UNSPEC;
 		hlen = 4;
 		break;
 
 	case DLT_ATM_RFC1483:
 		/*
 		 * en atm driver requires 4-byte atm pseudo header.
 		 * though it isn't standard, vpi:vci needs to be
 		 * specified anyway.
 		 */
 		sockp->sa_family = AF_UNSPEC;
 		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
 		break;
 
 	case DLT_PPP:
 		sockp->sa_family = AF_UNSPEC;
 		hlen = 4;	/* This should match PPP_HDRLEN */
 		break;
 
 	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
 		sockp->sa_family = AF_IEEE80211;
 		hlen = 0;
 		break;
 
 	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
 		sockp->sa_family = AF_IEEE80211;
 		sockp->sa_len = 12;	/* XXX != 0 */
 		hlen = sizeof(struct ieee80211_bpf_params);
 		break;
 
 	default:
 		return (EIO);
 	}
 
 	len = uio->uio_resid;
 	if (len < hlen || len - hlen > ifp->if_mtu)
 		return (EMSGSIZE);
 
 	m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		return (EIO);
 	m->m_pkthdr.len = m->m_len = len;
 	*mp = m;
 
 	error = uiomove(mtod(m, u_char *), len, uio);
 	if (error)
 		goto bad;
 
 	slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
 	if (slen == 0) {
 		error = EPERM;
 		goto bad;
 	}
 
 	/* Check for multicast destination */
 	switch (linktype) {
 	case DLT_EN10MB:
 		eh = mtod(m, struct ether_header *);
 		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
 			    ETHER_ADDR_LEN) == 0)
 				m->m_flags |= M_BCAST;
 			else
 				m->m_flags |= M_MCAST;
 		}
 		if (d->bd_hdrcmplt == 0) {
 			memcpy(eh->ether_shost, IF_LLADDR(ifp),
 			    sizeof(eh->ether_shost));
 		}
 		break;
 	}
 
 	/*
 	 * Make room for link header, and copy it to sockaddr
 	 */
 	if (hlen != 0) {
 		if (sockp->sa_family == AF_IEEE80211) {
 			/*
 			 * Collect true length from the parameter header
 			 * NB: sockp is known to be zero'd so if we do a
 			 *     short copy unspecified parameters will be
 			 *     zero.
 			 * NB: packet may not be aligned after stripping
 			 *     bpf params
 			 * XXX check ibp_vers
 			 */
 			p = mtod(m, const struct ieee80211_bpf_params *);
 			hlen = p->ibp_len;
 			if (hlen > sizeof(sockp->sa_data)) {
 				error = EINVAL;
 				goto bad;
 			}
 		}
 		bcopy(mtod(m, const void *), sockp->sa_data, hlen);
 	}
 	*hdrlen = hlen;
 
 	return (0);
 bad:
 	m_freem(m);
 	return (error);
 }
 
 /*
  * Attach file to the bpf interface, i.e. make d listen on bp.
  */
 static void
 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 {
 	int op_w;
 
 	BPF_LOCK_ASSERT();
 
 	/*
 	 * Save sysctl value to protect from sysctl change
 	 * between reads
 	 */
 	op_w = V_bpf_optimize_writers || d->bd_writer;
 
 	if (d->bd_bif != NULL)
 		bpf_detachd_locked(d);
 	/*
 	 * Point d at bp, and add d to the interface's list.
 	 * Since there are many applications using BPF for
 	 * sending raw packets only (dhcpd, cdpd are good examples)
 	 * we can delay adding d to the list of active listeners until
 	 * some filter is configured.
 	 */
 
 	BPFIF_WLOCK(bp);
 	BPFD_LOCK(d);
 
 	d->bd_bif = bp;
 
 	if (op_w != 0) {
 		/* Add to writers-only list */
 		LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
 		/*
 		 * We decrement bd_writer on every filter set operation.
 		 * First BIOCSETF is done by pcap_open_live() to set up
 		 * snap length. After that appliation usually sets its own filter
 		 */
 		d->bd_writer = 2;
 	} else
 		LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
 
 	BPFD_UNLOCK(d);
 	BPFIF_WUNLOCK(bp);
 
 	bpf_bpfd_cnt++;
 
 	CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
 	    __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
 
 	if (op_w == 0)
 		EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
 }
 
 /*
  * Check if we need to upgrade our descriptor @d from write-only mode.
  */
 static int
 bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen)
 {
 	int is_snap, need_upgrade;
 
 	/*
 	 * Check if we've already upgraded or new filter is empty.
 	 */
 	if (d->bd_writer == 0 || fcode == NULL)
 		return (0);
 
 	need_upgrade = 0;
 
 	/*
 	 * Check if cmd looks like snaplen setting from
 	 * pcap_bpf.c:pcap_open_live().
 	 * Note we're not checking .k value here:
-	 * while pcap_open_live() definitely sets to to non-zero value,
+	 * while pcap_open_live() definitely sets to non-zero value,
 	 * we'd prefer to treat k=0 (deny ALL) case the same way: e.g.
 	 * do not consider upgrading immediately
 	 */
 	if (cmd == BIOCSETF && flen == 1 && fcode[0].code == (BPF_RET | BPF_K))
 		is_snap = 1;
 	else
 		is_snap = 0;
 
 	if (is_snap == 0) {
 		/*
 		 * We're setting first filter and it doesn't look like
 		 * setting snaplen.  We're probably using bpf directly.
 		 * Upgrade immediately.
 		 */
 		need_upgrade = 1;
 	} else {
 		/*
 		 * Do not require upgrade by first BIOCSETF
 		 * (used to set snaplen) by pcap_open_live().
 		 */
 
 		if (--d->bd_writer == 0) {
 			/*
 			 * First snaplen filter has already
 			 * been set. This is probably catch-all
 			 * filter
 			 */
 			need_upgrade = 1;
 		}
 	}
 
 	CTR5(KTR_NET,
 	    "%s: filter function set by pid %d, "
 	    "bd_writer counter %d, snap %d upgrade %d",
 	    __func__, d->bd_pid, d->bd_writer,
 	    is_snap, need_upgrade);
 
 	return (need_upgrade);
 }
 
 /*
  * Add d to the list of active bp filters.
  * Requires bpf_attachd() to be called before.
  */
 static void
 bpf_upgraded(struct bpf_d *d)
 {
 	struct bpf_if *bp;
 
 	BPF_LOCK_ASSERT();
 
 	bp = d->bd_bif;
 
 	/*
 	 * Filter can be set several times without specifying interface.
 	 * Mark d as reader and exit.
 	 */
 	if (bp == NULL) {
 		BPFD_LOCK(d);
 		d->bd_writer = 0;
 		BPFD_UNLOCK(d);
 		return;
 	}
 
 	BPFIF_WLOCK(bp);
 	BPFD_LOCK(d);
 
 	/* Remove from writers-only list */
 	LIST_REMOVE(d, bd_next);
 	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
 	/* Mark d as reader */
 	d->bd_writer = 0;
 
 	BPFD_UNLOCK(d);
 	BPFIF_WUNLOCK(bp);
 
 	CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid);
 
 	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
 }
 
 /*
  * Detach a file from its interface.
  */
 static void
 bpf_detachd(struct bpf_d *d)
 {
 	BPF_LOCK();
 	bpf_detachd_locked(d);
 	BPF_UNLOCK();
 }
 
 static void
 bpf_detachd_locked(struct bpf_d *d)
 {
 	int error;
 	struct bpf_if *bp;
 	struct ifnet *ifp;
 
 	CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
 
 	BPF_LOCK_ASSERT();
 
 	/* Check if descriptor is attached */
 	if ((bp = d->bd_bif) == NULL)
 		return;
 
 	BPFIF_WLOCK(bp);
 	BPFD_LOCK(d);
 
 	/* Save bd_writer value */
 	error = d->bd_writer;
 
 	/*
 	 * Remove d from the interface's descriptor list.
 	 */
 	LIST_REMOVE(d, bd_next);
 
 	ifp = bp->bif_ifp;
 	d->bd_bif = NULL;
 	BPFD_UNLOCK(d);
 	BPFIF_WUNLOCK(bp);
 
 	bpf_bpfd_cnt--;
 
 	/* Call event handler iff d is attached */
 	if (error == 0)
 		EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
 
 	/*
 	 * Check if this descriptor had requested promiscuous mode.
 	 * If so, turn it off.
 	 */
 	if (d->bd_promisc) {
 		d->bd_promisc = 0;
 		CURVNET_SET(ifp->if_vnet);
 		error = ifpromisc(ifp, 0);
 		CURVNET_RESTORE();
 		if (error != 0 && error != ENXIO) {
 			/*
 			 * ENXIO can happen if a pccard is unplugged
 			 * Something is really wrong if we were able to put
 			 * the driver into promiscuous mode, but can't
 			 * take it out.
 			 */
 			if_printf(bp->bif_ifp,
 				"bpf_detach: ifpromisc failed (%d)\n", error);
 		}
 	}
 }
 
 /*
  * Close the descriptor by detaching it from its interface,
  * deallocating its buffers, and marking it free.
  */
 static void
 bpf_dtor(void *data)
 {
 	struct bpf_d *d = data;
 
 	BPFD_LOCK(d);
 	if (d->bd_state == BPF_WAITING)
 		callout_stop(&d->bd_callout);
 	d->bd_state = BPF_IDLE;
 	BPFD_UNLOCK(d);
 	funsetown(&d->bd_sigio);
 	bpf_detachd(d);
 #ifdef MAC
 	mac_bpfdesc_destroy(d);
 #endif /* MAC */
 	seldrain(&d->bd_sel);
 	knlist_destroy(&d->bd_sel.si_note);
 	callout_drain(&d->bd_callout);
 	bpf_freed(d);
 	free(d, M_BPF);
 }
 
 /*
  * Open ethernet device.  Returns ENXIO for illegal minor device number,
  * EBUSY if file is open by another process.
  */
 /* ARGSUSED */
 static	int
 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
 	struct bpf_d *d;
 	int error;
 
 	d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
 	error = devfs_set_cdevpriv(d, bpf_dtor);
 	if (error != 0) {
 		free(d, M_BPF);
 		return (error);
 	}
 
 	/*
 	 * For historical reasons, perform a one-time initialization call to
 	 * the buffer routines, even though we're not yet committed to a
 	 * particular buffer method.
 	 */
 	bpf_buffer_init(d);
 	if ((flags & FREAD) == 0)
 		d->bd_writer = 2;
 	d->bd_hbuf_in_use = 0;
 	d->bd_bufmode = BPF_BUFMODE_BUFFER;
 	d->bd_sig = SIGIO;
 	d->bd_direction = BPF_D_INOUT;
 	BPF_PID_REFRESH(d, td);
 #ifdef MAC
 	mac_bpfdesc_init(d);
 	mac_bpfdesc_create(td->td_ucred, d);
 #endif
 	mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
 	callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
 	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
 
 	return (0);
 }
 
 /*
  *  bpfread - read next chunk of packets from buffers
  */
 static	int
 bpfread(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	struct bpf_d *d;
 	int error;
 	int non_block;
 	int timed_out;
 
 	error = devfs_get_cdevpriv((void **)&d);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Restrict application to use a buffer the same size as
 	 * as kernel buffers.
 	 */
 	if (uio->uio_resid != d->bd_bufsize)
 		return (EINVAL);
 
 	non_block = ((ioflag & O_NONBLOCK) != 0);
 
 	BPFD_LOCK(d);
 	BPF_PID_REFRESH_CUR(d);
 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
 		BPFD_UNLOCK(d);
 		return (EOPNOTSUPP);
 	}
 	if (d->bd_state == BPF_WAITING)
 		callout_stop(&d->bd_callout);
 	timed_out = (d->bd_state == BPF_TIMED_OUT);
 	d->bd_state = BPF_IDLE;
 	while (d->bd_hbuf_in_use) {
 		error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
 		    PRINET|PCATCH, "bd_hbuf", 0);
 		if (error != 0) {
 			BPFD_UNLOCK(d);
 			return (error);
 		}
 	}
 	/*
 	 * If the hold buffer is empty, then do a timed sleep, which
 	 * ends when the timeout expires or when enough packets
 	 * have arrived to fill the store buffer.
 	 */
 	while (d->bd_hbuf == NULL) {
 		if (d->bd_slen != 0) {
 			/*
 			 * A packet(s) either arrived since the previous
 			 * read or arrived while we were asleep.
 			 */
 			if (d->bd_immediate || non_block || timed_out) {
 				/*
 				 * Rotate the buffers and return what's here
 				 * if we are in immediate mode, non-blocking
 				 * flag is set, or this descriptor timed out.
 				 */
 				ROTATE_BUFFERS(d);
 				break;
 			}
 		}
 
 		/*
 		 * No data is available, check to see if the bpf device
 		 * is still pointed at a real interface.  If not, return
 		 * ENXIO so that the userland process knows to rebind
 		 * it before using it again.
 		 */
 		if (d->bd_bif == NULL) {
 			BPFD_UNLOCK(d);
 			return (ENXIO);
 		}
 
 		if (non_block) {
 			BPFD_UNLOCK(d);
 			return (EWOULDBLOCK);
 		}
 		error = msleep(d, &d->bd_lock, PRINET|PCATCH,
 		     "bpf", d->bd_rtout);
 		if (error == EINTR || error == ERESTART) {
 			BPFD_UNLOCK(d);
 			return (error);
 		}
 		if (error == EWOULDBLOCK) {
 			/*
 			 * On a timeout, return what's in the buffer,
 			 * which may be nothing.  If there is something
 			 * in the store buffer, we can rotate the buffers.
 			 */
 			if (d->bd_hbuf)
 				/*
 				 * We filled up the buffer in between
 				 * getting the timeout and arriving
 				 * here, so we don't need to rotate.
 				 */
 				break;
 
 			if (d->bd_slen == 0) {
 				BPFD_UNLOCK(d);
 				return (0);
 			}
 			ROTATE_BUFFERS(d);
 			break;
 		}
 	}
 	/*
 	 * At this point, we know we have something in the hold slot.
 	 */
 	d->bd_hbuf_in_use = 1;
 	BPFD_UNLOCK(d);
 
 	/*
 	 * Move data from hold buffer into user space.
 	 * We know the entire buffer is transferred since
 	 * we checked above that the read buffer is bpf_bufsize bytes.
   	 *
 	 * We do not have to worry about simultaneous reads because
 	 * we waited for sole access to the hold buffer above.
 	 */
 	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
 
 	BPFD_LOCK(d);
 	KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
 	d->bd_fbuf = d->bd_hbuf;
 	d->bd_hbuf = NULL;
 	d->bd_hlen = 0;
 	bpf_buf_reclaimed(d);
 	d->bd_hbuf_in_use = 0;
 	wakeup(&d->bd_hbuf_in_use);
 	BPFD_UNLOCK(d);
 
 	return (error);
 }
 
 /*
  * If there are processes sleeping on this descriptor, wake them up.
  */
 static __inline void
 bpf_wakeup(struct bpf_d *d)
 {
 
 	BPFD_LOCK_ASSERT(d);
 	if (d->bd_state == BPF_WAITING) {
 		callout_stop(&d->bd_callout);
 		d->bd_state = BPF_IDLE;
 	}
 	wakeup(d);
 	if (d->bd_async && d->bd_sig && d->bd_sigio)
 		pgsigio(&d->bd_sigio, d->bd_sig, 0);
 
 	selwakeuppri(&d->bd_sel, PRINET);
 	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
 }
 
 static void
 bpf_timed_out(void *arg)
 {
 	struct bpf_d *d = (struct bpf_d *)arg;
 
 	BPFD_LOCK_ASSERT(d);
 
 	if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout))
 		return;
 	if (d->bd_state == BPF_WAITING) {
 		d->bd_state = BPF_TIMED_OUT;
 		if (d->bd_slen != 0)
 			bpf_wakeup(d);
 	}
 }
 
 static int
 bpf_ready(struct bpf_d *d)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
 		return (1);
 	if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
 	    d->bd_slen != 0)
 		return (1);
 	return (0);
 }
 
 static int
 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	struct bpf_d *d;
 	struct ifnet *ifp;
 	struct mbuf *m, *mc;
 	struct sockaddr dst;
 	struct route ro;
 	int error, hlen;
 
 	error = devfs_get_cdevpriv((void **)&d);
 	if (error != 0)
 		return (error);
 
 	BPF_PID_REFRESH_CUR(d);
 	d->bd_wcount++;
 	/* XXX: locking required */
 	if (d->bd_bif == NULL) {
 		d->bd_wdcount++;
 		return (ENXIO);
 	}
 
 	ifp = d->bd_bif->bif_ifp;
 
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		d->bd_wdcount++;
 		return (ENETDOWN);
 	}
 
 	if (uio->uio_resid == 0) {
 		d->bd_wdcount++;
 		return (0);
 	}
 
 	bzero(&dst, sizeof(dst));
 	m = NULL;
 	hlen = 0;
 	/* XXX: bpf_movein() can sleep */
 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
 	    &m, &dst, &hlen, d);
 	if (error) {
 		d->bd_wdcount++;
 		return (error);
 	}
 	d->bd_wfcount++;
 	if (d->bd_hdrcmplt)
 		dst.sa_family = pseudo_AF_HDRCMPLT;
 
 	if (d->bd_feedback) {
 		mc = m_dup(m, M_NOWAIT);
 		if (mc != NULL)
 			mc->m_pkthdr.rcvif = ifp;
 		/* Set M_PROMISC for outgoing packets to be discarded. */
 		if (d->bd_direction == BPF_D_INOUT)
 			m->m_flags |= M_PROMISC;
 	} else
 		mc = NULL;
 
 	m->m_pkthdr.len -= hlen;
 	m->m_len -= hlen;
 	m->m_data += hlen;	/* XXX */
 
 	CURVNET_SET(ifp->if_vnet);
 #ifdef MAC
 	BPFD_LOCK(d);
 	mac_bpfdesc_create_mbuf(d, m);
 	if (mc != NULL)
 		mac_bpfdesc_create_mbuf(d, mc);
 	BPFD_UNLOCK(d);
 #endif
 
 	bzero(&ro, sizeof(ro));
 	if (hlen != 0) {
 		ro.ro_prepend = (u_char *)&dst.sa_data;
 		ro.ro_plen = hlen;
 		ro.ro_flags = RT_HAS_HEADER;
 	}
 
 	error = (*ifp->if_output)(ifp, m, &dst, &ro);
 	if (error)
 		d->bd_wdcount++;
 
 	if (mc != NULL) {
 		if (error == 0)
 			(*ifp->if_input)(ifp, mc);
 		else
 			m_freem(mc);
 	}
 	CURVNET_RESTORE();
 
 	return (error);
 }
 
 /*
  * Reset a descriptor by flushing its packet buffer and clearing the receive
  * and drop counts.  This is doable for kernel-only buffers, but with
  * zero-copy buffers, we can't write to (or rotate) buffers that are
  * currently owned by userspace.  It would be nice if we could encapsulate
  * this logic in the buffer code rather than here.
  */
 static void
 reset_d(struct bpf_d *d)
 {
 
 	BPFD_LOCK_ASSERT(d);
 
 	while (d->bd_hbuf_in_use)
 		mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET,
 		    "bd_hbuf", 0);
 	if ((d->bd_hbuf != NULL) &&
 	    (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
 		/* Free the hold buffer. */
 		d->bd_fbuf = d->bd_hbuf;
 		d->bd_hbuf = NULL;
 		d->bd_hlen = 0;
 		bpf_buf_reclaimed(d);
 	}
 	if (bpf_canwritebuf(d))
 		d->bd_slen = 0;
 	d->bd_rcount = 0;
 	d->bd_dcount = 0;
 	d->bd_fcount = 0;
 	d->bd_wcount = 0;
 	d->bd_wfcount = 0;
 	d->bd_wdcount = 0;
 	d->bd_zcopy = 0;
 }
 
 /*
  *  FIONREAD		Check for read packet available.
  *  BIOCGBLEN		Get buffer len [for read()].
  *  BIOCSETF		Set read filter.
  *  BIOCSETFNR		Set read filter without resetting descriptor.
  *  BIOCSETWF		Set write filter.
  *  BIOCFLUSH		Flush read packet buffer.
  *  BIOCPROMISC		Put interface into promiscuous mode.
  *  BIOCGDLT		Get link layer type.
  *  BIOCGETIF		Get interface name.
  *  BIOCSETIF		Set interface.
  *  BIOCSRTIMEOUT	Set read timeout.
  *  BIOCGRTIMEOUT	Get read timeout.
  *  BIOCGSTATS		Get packet stats.
  *  BIOCIMMEDIATE	Set immediate mode.
  *  BIOCVERSION		Get filter language version.
  *  BIOCGHDRCMPLT	Get "header already complete" flag
  *  BIOCSHDRCMPLT	Set "header already complete" flag
  *  BIOCGDIRECTION	Get packet direction flag
  *  BIOCSDIRECTION	Set packet direction flag
  *  BIOCGTSTAMP		Get time stamp format and resolution.
  *  BIOCSTSTAMP		Set time stamp format and resolution.
  *  BIOCLOCK		Set "locked" flag
  *  BIOCFEEDBACK	Set packet feedback mode.
  *  BIOCSETZBUF		Set current zero-copy buffer locations.
  *  BIOCGETZMAX		Get maximum zero-copy buffer size.
  *  BIOCROTZBUF		Force rotation of zero-copy buffer
  *  BIOCSETBUFMODE	Set buffer mode.
  *  BIOCGETBUFMODE	Get current buffer mode.
  */
 /* ARGSUSED */
 static	int
 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
     struct thread *td)
 {
 	struct bpf_d *d;
 	int error;
 
 	error = devfs_get_cdevpriv((void **)&d);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Refresh PID associated with this descriptor.
 	 */
 	BPFD_LOCK(d);
 	BPF_PID_REFRESH(d, td);
 	if (d->bd_state == BPF_WAITING)
 		callout_stop(&d->bd_callout);
 	d->bd_state = BPF_IDLE;
 	BPFD_UNLOCK(d);
 
 	if (d->bd_locked == 1) {
 		switch (cmd) {
 		case BIOCGBLEN:
 		case BIOCFLUSH:
 		case BIOCGDLT:
 		case BIOCGDLTLIST:
 #ifdef COMPAT_FREEBSD32
 		case BIOCGDLTLIST32:
 #endif
 		case BIOCGETIF:
 		case BIOCGRTIMEOUT:
 #if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 		case BIOCGRTIMEOUT32:
 #endif
 		case BIOCGSTATS:
 		case BIOCVERSION:
 		case BIOCGRSIG:
 		case BIOCGHDRCMPLT:
 		case BIOCSTSTAMP:
 		case BIOCFEEDBACK:
 		case FIONREAD:
 		case BIOCLOCK:
 		case BIOCSRTIMEOUT:
 #if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 		case BIOCSRTIMEOUT32:
 #endif
 		case BIOCIMMEDIATE:
 		case TIOCGPGRP:
 		case BIOCROTZBUF:
 			break;
 		default:
 			return (EPERM);
 		}
 	}
 #ifdef COMPAT_FREEBSD32
 	/*
 	 * If we see a 32-bit compat ioctl, mark the stream as 32-bit so
 	 * that it will get 32-bit packet headers.
 	 */
 	switch (cmd) {
 	case BIOCSETF32:
 	case BIOCSETFNR32:
 	case BIOCSETWF32:
 	case BIOCGDLTLIST32:
 	case BIOCGRTIMEOUT32:
 	case BIOCSRTIMEOUT32:
 		BPFD_LOCK(d);
 		d->bd_compat32 = 1;
 		BPFD_UNLOCK(d);
 	}
 #endif
 
 	CURVNET_SET(TD_TO_VNET(td));
 	switch (cmd) {
 
 	default:
 		error = EINVAL;
 		break;
 
 	/*
 	 * Check for read packet available.
 	 */
 	case FIONREAD:
 		{
 			int n;
 
 			BPFD_LOCK(d);
 			n = d->bd_slen;
 			while (d->bd_hbuf_in_use)
 				mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
 				    PRINET, "bd_hbuf", 0);
 			if (d->bd_hbuf)
 				n += d->bd_hlen;
 			BPFD_UNLOCK(d);
 
 			*(int *)addr = n;
 			break;
 		}
 
 	/*
 	 * Get buffer len [for read()].
 	 */
 	case BIOCGBLEN:
 		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_bufsize;
 		BPFD_UNLOCK(d);
 		break;
 
 	/*
 	 * Set buffer length.
 	 */
 	case BIOCSBLEN:
 		error = bpf_ioctl_sblen(d, (u_int *)addr);
 		break;
 
 	/*
 	 * Set link layer read filter.
 	 */
 	case BIOCSETF:
 	case BIOCSETFNR:
 	case BIOCSETWF:
 #ifdef COMPAT_FREEBSD32
 	case BIOCSETF32:
 	case BIOCSETFNR32:
 	case BIOCSETWF32:
 #endif
 		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
 		break;
 
 	/*
 	 * Flush read packet buffer.
 	 */
 	case BIOCFLUSH:
 		BPFD_LOCK(d);
 		reset_d(d);
 		BPFD_UNLOCK(d);
 		break;
 
 	/*
 	 * Put interface into promiscuous mode.
 	 */
 	case BIOCPROMISC:
 		if (d->bd_bif == NULL) {
 			/*
 			 * No interface attached yet.
 			 */
 			error = EINVAL;
 			break;
 		}
 		if (d->bd_promisc == 0) {
 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
 			if (error == 0)
 				d->bd_promisc = 1;
 		}
 		break;
 
 	/*
 	 * Get current data link type.
 	 */
 	case BIOCGDLT:
 		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else
 			*(u_int *)addr = d->bd_bif->bif_dlt;
 		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Get a list of supported data link types.
 	 */
 #ifdef COMPAT_FREEBSD32
 	case BIOCGDLTLIST32:
 		{
 			struct bpf_dltlist32 *list32;
 			struct bpf_dltlist dltlist;
 
 			list32 = (struct bpf_dltlist32 *)addr;
 			dltlist.bfl_len = list32->bfl_len;
 			dltlist.bfl_list = PTRIN(list32->bfl_list);
 			BPF_LOCK();
 			if (d->bd_bif == NULL)
 				error = EINVAL;
 			else {
 				error = bpf_getdltlist(d, &dltlist);
 				if (error == 0)
 					list32->bfl_len = dltlist.bfl_len;
 			}
 			BPF_UNLOCK();
 			break;
 		}
 #endif
 
 	case BIOCGDLTLIST:
 		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else
 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
 		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Set data link type.
 	 */
 	case BIOCSDLT:
 		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else
 			error = bpf_setdlt(d, *(u_int *)addr);
 		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Get interface name.
 	 */
 	case BIOCGETIF:
 		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else {
 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
 			struct ifreq *const ifr = (struct ifreq *)addr;
 
 			strlcpy(ifr->ifr_name, ifp->if_xname,
 			    sizeof(ifr->ifr_name));
 		}
 		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Set interface.
 	 */
 	case BIOCSETIF:
 		{
 			int alloc_buf, size;
 
 			/*
 			 * Behavior here depends on the buffering model.  If
 			 * we're using kernel memory buffers, then we can
 			 * allocate them here.  If we're using zero-copy,
 			 * then the user process must have registered buffers
 			 * by the time we get here.
 			 */
 			alloc_buf = 0;
 			BPFD_LOCK(d);
 			if (d->bd_bufmode == BPF_BUFMODE_BUFFER &&
 			    d->bd_sbuf == NULL)
 				alloc_buf = 1;
 			BPFD_UNLOCK(d);
 			if (alloc_buf) {
 				size = d->bd_bufsize;
 				error = bpf_buffer_ioctl_sblen(d, &size);
 				if (error != 0)
 					break;
 			}
 			BPF_LOCK();
 			error = bpf_setif(d, (struct ifreq *)addr);
 			BPF_UNLOCK();
 			break;
 		}
 
 	/*
 	 * Set read timeout.
 	 */
 	case BIOCSRTIMEOUT:
 #if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 	case BIOCSRTIMEOUT32:
 #endif
 		{
 			struct timeval *tv = (struct timeval *)addr;
 #if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 			struct timeval32 *tv32;
 			struct timeval tv64;
 
 			if (cmd == BIOCSRTIMEOUT32) {
 				tv32 = (struct timeval32 *)addr;
 				tv = &tv64;
 				tv->tv_sec = tv32->tv_sec;
 				tv->tv_usec = tv32->tv_usec;
 			} else
 #endif
 				tv = (struct timeval *)addr;
 
 			/*
 			 * Subtract 1 tick from tvtohz() since this isn't
 			 * a one-shot timer.
 			 */
 			if ((error = itimerfix(tv)) == 0)
 				d->bd_rtout = tvtohz(tv) - 1;
 			break;
 		}
 
 	/*
 	 * Get read timeout.
 	 */
 	case BIOCGRTIMEOUT:
 #if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 	case BIOCGRTIMEOUT32:
 #endif
 		{
 			struct timeval *tv;
 #if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 			struct timeval32 *tv32;
 			struct timeval tv64;
 
 			if (cmd == BIOCGRTIMEOUT32)
 				tv = &tv64;
 			else
 #endif
 				tv = (struct timeval *)addr;
 
 			tv->tv_sec = d->bd_rtout / hz;
 			tv->tv_usec = (d->bd_rtout % hz) * tick;
 #if defined(COMPAT_FREEBSD32) && !defined(__mips__)
 			if (cmd == BIOCGRTIMEOUT32) {
 				tv32 = (struct timeval32 *)addr;
 				tv32->tv_sec = tv->tv_sec;
 				tv32->tv_usec = tv->tv_usec;
 			}
 #endif
 
 			break;
 		}
 
 	/*
 	 * Get packet stats.
 	 */
 	case BIOCGSTATS:
 		{
 			struct bpf_stat *bs = (struct bpf_stat *)addr;
 
 			/* XXXCSJP overflow */
 			bs->bs_recv = d->bd_rcount;
 			bs->bs_drop = d->bd_dcount;
 			break;
 		}
 
 	/*
 	 * Set immediate mode.
 	 */
 	case BIOCIMMEDIATE:
 		BPFD_LOCK(d);
 		d->bd_immediate = *(u_int *)addr;
 		BPFD_UNLOCK(d);
 		break;
 
 	case BIOCVERSION:
 		{
 			struct bpf_version *bv = (struct bpf_version *)addr;
 
 			bv->bv_major = BPF_MAJOR_VERSION;
 			bv->bv_minor = BPF_MINOR_VERSION;
 			break;
 		}
 
 	/*
 	 * Get "header already complete" flag
 	 */
 	case BIOCGHDRCMPLT:
 		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_hdrcmplt;
 		BPFD_UNLOCK(d);
 		break;
 
 	/*
 	 * Set "header already complete" flag
 	 */
 	case BIOCSHDRCMPLT:
 		BPFD_LOCK(d);
 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
 		BPFD_UNLOCK(d);
 		break;
 
 	/*
 	 * Get packet direction flag
 	 */
 	case BIOCGDIRECTION:
 		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_direction;
 		BPFD_UNLOCK(d);
 		break;
 
 	/*
 	 * Set packet direction flag
 	 */
 	case BIOCSDIRECTION:
 		{
 			u_int	direction;
 
 			direction = *(u_int *)addr;
 			switch (direction) {
 			case BPF_D_IN:
 			case BPF_D_INOUT:
 			case BPF_D_OUT:
 				BPFD_LOCK(d);
 				d->bd_direction = direction;
 				BPFD_UNLOCK(d);
 				break;
 			default:
 				error = EINVAL;
 			}
 		}
 		break;
 
 	/*
 	 * Get packet timestamp format and resolution.
 	 */
 	case BIOCGTSTAMP:
 		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_tstamp;
 		BPFD_UNLOCK(d);
 		break;
 
 	/*
 	 * Set packet timestamp format and resolution.
 	 */
 	case BIOCSTSTAMP:
 		{
 			u_int	func;
 
 			func = *(u_int *)addr;
 			if (BPF_T_VALID(func))
 				d->bd_tstamp = func;
 			else
 				error = EINVAL;
 		}
 		break;
 
 	case BIOCFEEDBACK:
 		BPFD_LOCK(d);
 		d->bd_feedback = *(u_int *)addr;
 		BPFD_UNLOCK(d);
 		break;
 
 	case BIOCLOCK:
 		BPFD_LOCK(d);
 		d->bd_locked = 1;
 		BPFD_UNLOCK(d);
 		break;
 
 	case FIONBIO:		/* Non-blocking I/O */
 		break;
 
 	case FIOASYNC:		/* Send signal on receive packets */
 		BPFD_LOCK(d);
 		d->bd_async = *(int *)addr;
 		BPFD_UNLOCK(d);
 		break;
 
 	case FIOSETOWN:
 		/*
 		 * XXX: Add some sort of locking here?
 		 * fsetown() can sleep.
 		 */
 		error = fsetown(*(int *)addr, &d->bd_sigio);
 		break;
 
 	case FIOGETOWN:
 		BPFD_LOCK(d);
 		*(int *)addr = fgetown(&d->bd_sigio);
 		BPFD_UNLOCK(d);
 		break;
 
 	/* This is deprecated, FIOSETOWN should be used instead. */
 	case TIOCSPGRP:
 		error = fsetown(-(*(int *)addr), &d->bd_sigio);
 		break;
 
 	/* This is deprecated, FIOGETOWN should be used instead. */
 	case TIOCGPGRP:
 		*(int *)addr = -fgetown(&d->bd_sigio);
 		break;
 
 	case BIOCSRSIG:		/* Set receive signal */
 		{
 			u_int sig;
 
 			sig = *(u_int *)addr;
 
 			if (sig >= NSIG)
 				error = EINVAL;
 			else {
 				BPFD_LOCK(d);
 				d->bd_sig = sig;
 				BPFD_UNLOCK(d);
 			}
 			break;
 		}
 	case BIOCGRSIG:
 		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_sig;
 		BPFD_UNLOCK(d);
 		break;
 
 	case BIOCGETBUFMODE:
 		BPFD_LOCK(d);
 		*(u_int *)addr = d->bd_bufmode;
 		BPFD_UNLOCK(d);
 		break;
 
 	case BIOCSETBUFMODE:
 		/*
 		 * Allow the buffering mode to be changed as long as we
 		 * haven't yet committed to a particular mode.  Our
 		 * definition of commitment, for now, is whether or not a
 		 * buffer has been allocated or an interface attached, since
 		 * that's the point where things get tricky.
 		 */
 		switch (*(u_int *)addr) {
 		case BPF_BUFMODE_BUFFER:
 			break;
 
 		case BPF_BUFMODE_ZBUF:
 			if (bpf_zerocopy_enable)
 				break;
 			/* FALLSTHROUGH */
 
 		default:
 			CURVNET_RESTORE();
 			return (EINVAL);
 		}
 
 		BPFD_LOCK(d);
 		if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
 		    d->bd_fbuf != NULL || d->bd_bif != NULL) {
 			BPFD_UNLOCK(d);
 			CURVNET_RESTORE();
 			return (EBUSY);
 		}
 		d->bd_bufmode = *(u_int *)addr;
 		BPFD_UNLOCK(d);
 		break;
 
 	case BIOCGETZMAX:
 		error = bpf_ioctl_getzmax(td, d, (size_t *)addr);
 		break;
 
 	case BIOCSETZBUF:
 		error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr);
 		break;
 
 	case BIOCROTZBUF:
 		error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr);
 		break;
 	}
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * Set d's packet filter program to fp.  If this file already has a filter,
  * free it and replace it.  Returns EINVAL for bogus requests.
  *
  * Note we need global lock here to serialize bpf_setf() and bpf_setif() calls
  * since reading d->bd_bif can't be protected by d or interface lock due to
  * lock order.
  *
  * Additionally, we have to acquire interface write lock due to bpf_mtap() uses
  * interface read lock to read all filers.
  *
  */
 static int
 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
 {
 #ifdef COMPAT_FREEBSD32
 	struct bpf_program fp_swab;
 	struct bpf_program32 *fp32;
 #endif
 	struct bpf_insn *fcode, *old;
 #ifdef BPF_JITTER
 	bpf_jit_filter *jfunc, *ofunc;
 #endif
 	size_t size;
 	u_int flen;
 	int need_upgrade;
 
 #ifdef COMPAT_FREEBSD32
 	switch (cmd) {
 	case BIOCSETF32:
 	case BIOCSETWF32:
 	case BIOCSETFNR32:
 		fp32 = (struct bpf_program32 *)fp;
 		fp_swab.bf_len = fp32->bf_len;
 		fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
 		fp = &fp_swab;
 		switch (cmd) {
 		case BIOCSETF32:
 			cmd = BIOCSETF;
 			break;
 		case BIOCSETWF32:
 			cmd = BIOCSETWF;
 			break;
 		}
 		break;
 	}
 #endif
 
 	fcode = NULL;
 #ifdef BPF_JITTER
 	jfunc = ofunc = NULL;
 #endif
 	need_upgrade = 0;
 
 	/*
 	 * Check new filter validness before acquiring any locks.
 	 * Allocate memory for new filter, if needed.
 	 */
 	flen = fp->bf_len;
 	if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0))
 		return (EINVAL);
 	size = flen * sizeof(*fp->bf_insns);
 	if (size > 0) {
 		/* We're setting up new filter.  Copy and check actual data. */
 		fcode = malloc(size, M_BPF, M_WAITOK);
 		if (copyin(fp->bf_insns, fcode, size) != 0 ||
 		    !bpf_validate(fcode, flen)) {
 			free(fcode, M_BPF);
 			return (EINVAL);
 		}
 #ifdef BPF_JITTER
 		/* Filter is copied inside fcode and is perfectly valid. */
 		jfunc = bpf_jitter(fcode, flen);
 #endif
 	}
 
 	BPF_LOCK();
 
 	/*
 	 * Set up new filter.
 	 * Protect filter change by interface lock.
 	 * Additionally, we are protected by global lock here.
 	 */
 	if (d->bd_bif != NULL)
 		BPFIF_WLOCK(d->bd_bif);
 	BPFD_LOCK(d);
 	if (cmd == BIOCSETWF) {
 		old = d->bd_wfilter;
 		d->bd_wfilter = fcode;
 	} else {
 		old = d->bd_rfilter;
 		d->bd_rfilter = fcode;
 #ifdef BPF_JITTER
 		ofunc = d->bd_bfilter;
 		d->bd_bfilter = jfunc;
 #endif
 		if (cmd == BIOCSETF)
 			reset_d(d);
 
 		need_upgrade = bpf_check_upgrade(cmd, d, fcode, flen);
 	}
 	BPFD_UNLOCK(d);
 	if (d->bd_bif != NULL)
 		BPFIF_WUNLOCK(d->bd_bif);
 	if (old != NULL)
 		free(old, M_BPF);
 #ifdef BPF_JITTER
 	if (ofunc != NULL)
 		bpf_destroy_jit_filter(ofunc);
 #endif
 
 	/* Move d to active readers list. */
 	if (need_upgrade != 0)
 		bpf_upgraded(d);
 
 	BPF_UNLOCK();
 	return (0);
 }
 
 /*
  * Detach a file from its current interface (if attached at all) and attach
  * to the interface indicated by the name stored in ifr.
  * Return an errno or 0.
  */
 static int
 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
 {
 	struct bpf_if *bp;
 	struct ifnet *theywant;
 
 	BPF_LOCK_ASSERT();
 
 	theywant = ifunit(ifr->ifr_name);
 	if (theywant == NULL || theywant->if_bpf == NULL)
 		return (ENXIO);
 
 	bp = theywant->if_bpf;
 
 	/* Check if interface is not being detached from BPF */
 	BPFIF_RLOCK(bp);
 	if (bp->bif_flags & BPFIF_FLAG_DYING) {
 		BPFIF_RUNLOCK(bp);
 		return (ENXIO);
 	}
 	BPFIF_RUNLOCK(bp);
 
 	/*
 	 * At this point, we expect the buffer is already allocated.  If not,
 	 * return an error.
 	 */
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
 	case BPF_BUFMODE_ZBUF:
 		if (d->bd_sbuf == NULL)
 			return (EINVAL);
 		break;
 
 	default:
 		panic("bpf_setif: bufmode %d", d->bd_bufmode);
 	}
 	if (bp != d->bd_bif)
 		bpf_attachd(d, bp);
 	BPFD_LOCK(d);
 	reset_d(d);
 	BPFD_UNLOCK(d);
 	return (0);
 }
 
 /*
  * Support for select() and poll() system calls
  *
  * Return true iff the specific operation will not block indefinitely.
  * Otherwise, return false but make a note that a selwakeup() must be done.
  */
 static int
 bpfpoll(struct cdev *dev, int events, struct thread *td)
 {
 	struct bpf_d *d;
 	int revents;
 
 	if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
 		return (events &
 		    (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
 
 	/*
 	 * Refresh PID associated with this descriptor.
 	 */
 	revents = events & (POLLOUT | POLLWRNORM);
 	BPFD_LOCK(d);
 	BPF_PID_REFRESH(d, td);
 	if (events & (POLLIN | POLLRDNORM)) {
 		if (bpf_ready(d))
 			revents |= events & (POLLIN | POLLRDNORM);
 		else {
 			selrecord(td, &d->bd_sel);
 			/* Start the read timeout if necessary. */
 			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
 				callout_reset(&d->bd_callout, d->bd_rtout,
 				    bpf_timed_out, d);
 				d->bd_state = BPF_WAITING;
 			}
 		}
 	}
 	BPFD_UNLOCK(d);
 	return (revents);
 }
 
 /*
  * Support for kevent() system call.  Register EVFILT_READ filters and
  * reject all others.
  */
 int
 bpfkqfilter(struct cdev *dev, struct knote *kn)
 {
 	struct bpf_d *d;
 
 	if (devfs_get_cdevpriv((void **)&d) != 0 ||
 	    kn->kn_filter != EVFILT_READ)
 		return (1);
 
 	/*
 	 * Refresh PID associated with this descriptor.
 	 */
 	BPFD_LOCK(d);
 	BPF_PID_REFRESH_CUR(d);
 	kn->kn_fop = &bpfread_filtops;
 	kn->kn_hook = d;
 	knlist_add(&d->bd_sel.si_note, kn, 1);
 	BPFD_UNLOCK(d);
 
 	return (0);
 }
 
 static void
 filt_bpfdetach(struct knote *kn)
 {
 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
 
 	knlist_remove(&d->bd_sel.si_note, kn, 0);
 }
 
 static int
 filt_bpfread(struct knote *kn, long hint)
 {
 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
 	int ready;
 
 	BPFD_LOCK_ASSERT(d);
 	ready = bpf_ready(d);
 	if (ready) {
 		kn->kn_data = d->bd_slen;
 		/*
 		 * Ignore the hold buffer if it is being copied to user space.
 		 */
 		if (!d->bd_hbuf_in_use && d->bd_hbuf)
 			kn->kn_data += d->bd_hlen;
 	} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
 		callout_reset(&d->bd_callout, d->bd_rtout,
 		    bpf_timed_out, d);
 		d->bd_state = BPF_WAITING;
 	}
 
 	return (ready);
 }
 
 #define	BPF_TSTAMP_NONE		0
 #define	BPF_TSTAMP_FAST		1
 #define	BPF_TSTAMP_NORMAL	2
 #define	BPF_TSTAMP_EXTERN	3
 
 static int
 bpf_ts_quality(int tstype)
 {
 
 	if (tstype == BPF_T_NONE)
 		return (BPF_TSTAMP_NONE);
 	if ((tstype & BPF_T_FAST) != 0)
 		return (BPF_TSTAMP_FAST);
 
 	return (BPF_TSTAMP_NORMAL);
 }
 
 static int
 bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m)
 {
 	struct m_tag *tag;
 	int quality;
 
 	quality = bpf_ts_quality(tstype);
 	if (quality == BPF_TSTAMP_NONE)
 		return (quality);
 
 	if (m != NULL) {
 		tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL);
 		if (tag != NULL) {
 			*bt = *(struct bintime *)(tag + 1);
 			return (BPF_TSTAMP_EXTERN);
 		}
 	}
 	if (quality == BPF_TSTAMP_NORMAL)
 		binuptime(bt);
 	else
 		getbinuptime(bt);
 
 	return (quality);
 }
 
 /*
  * Incoming linkage from device drivers.  Process the packet pkt, of length
  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
  * by each process' filter, and if accepted, stashed into the corresponding
  * buffer.
  */
 void
 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
 {
 	struct bintime bt;
 	struct bpf_d *d;
 #ifdef BPF_JITTER
 	bpf_jit_filter *bf;
 #endif
 	u_int slen;
 	int gottime;
 
 	gottime = BPF_TSTAMP_NONE;
 
 	BPFIF_RLOCK(bp);
 
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		/*
 		 * We are not using any locks for d here because:
 		 * 1) any filter change is protected by interface
 		 * write lock
 		 * 2) destroying/detaching d is protected by interface
 		 * write lock, too
 		 */
 
 		/* XXX: Do not protect counter for the sake of performance. */
 		++d->bd_rcount;
 		/*
 		 * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
 		 * way for the caller to indiciate to us whether this packet
 		 * is inbound or outbound.  In the bpf_mtap() routines, we use
 		 * the interface pointers on the mbuf to figure it out.
 		 */
 #ifdef BPF_JITTER
 		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
 		if (bf != NULL)
 			slen = (*(bf->func))(pkt, pktlen, pktlen);
 		else
 #endif
 		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
 		if (slen != 0) {
 			/*
 			 * Filter matches. Let's to acquire write lock.
 			 */
 			BPFD_LOCK(d);
 
 			d->bd_fcount++;
 			if (gottime < bpf_ts_quality(d->bd_tstamp))
 				gottime = bpf_gettime(&bt, d->bd_tstamp, NULL);
 #ifdef MAC
 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
 #endif
 				catchpacket(d, pkt, pktlen, slen,
 				    bpf_append_bytes, &bt);
 			BPFD_UNLOCK(d);
 		}
 	}
 	BPFIF_RUNLOCK(bp);
 }
 
 #define	BPF_CHECK_DIRECTION(d, r, i)				\
 	    (((d)->bd_direction == BPF_D_IN && (r) != (i)) ||	\
 	    ((d)->bd_direction == BPF_D_OUT && (r) == (i)))
 
 /*
  * Incoming linkage from device drivers, when packet is in an mbuf chain.
  * Locking model is explained in bpf_tap().
  */
 void
 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
 {
 	struct bintime bt;
 	struct bpf_d *d;
 #ifdef BPF_JITTER
 	bpf_jit_filter *bf;
 #endif
 	u_int pktlen, slen;
 	int gottime;
 
 	/* Skip outgoing duplicate packets. */
 	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
 		m->m_flags &= ~M_PROMISC;
 		return;
 	}
 
 	pktlen = m_length(m, NULL);
 	gottime = BPF_TSTAMP_NONE;
 
 	BPFIF_RLOCK(bp);
 
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
 			continue;
 		++d->bd_rcount;
 #ifdef BPF_JITTER
 		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
 		/* XXX We cannot handle multiple mbufs. */
 		if (bf != NULL && m->m_next == NULL)
 			slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen);
 		else
 #endif
 		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
 		if (slen != 0) {
 			BPFD_LOCK(d);
 
 			d->bd_fcount++;
 			if (gottime < bpf_ts_quality(d->bd_tstamp))
 				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
 #ifdef MAC
 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
 #endif
 				catchpacket(d, (u_char *)m, pktlen, slen,
 				    bpf_append_mbuf, &bt);
 			BPFD_UNLOCK(d);
 		}
 	}
 	BPFIF_RUNLOCK(bp);
 }
 
 /*
  * Incoming linkage from device drivers, when packet is in
  * an mbuf chain and to be prepended by a contiguous header.
  */
 void
 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
 {
 	struct bintime bt;
 	struct mbuf mb;
 	struct bpf_d *d;
 	u_int pktlen, slen;
 	int gottime;
 
 	/* Skip outgoing duplicate packets. */
 	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
 		m->m_flags &= ~M_PROMISC;
 		return;
 	}
 
 	pktlen = m_length(m, NULL);
 	/*
 	 * Craft on-stack mbuf suitable for passing to bpf_filter.
 	 * Note that we cut corners here; we only setup what's
 	 * absolutely needed--this mbuf should never go anywhere else.
 	 */
 	mb.m_next = m;
 	mb.m_data = data;
 	mb.m_len = dlen;
 	pktlen += dlen;
 
 	gottime = BPF_TSTAMP_NONE;
 
 	BPFIF_RLOCK(bp);
 
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
 			continue;
 		++d->bd_rcount;
 		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
 		if (slen != 0) {
 			BPFD_LOCK(d);
 
 			d->bd_fcount++;
 			if (gottime < bpf_ts_quality(d->bd_tstamp))
 				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
 #ifdef MAC
 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
 #endif
 				catchpacket(d, (u_char *)&mb, pktlen, slen,
 				    bpf_append_mbuf, &bt);
 			BPFD_UNLOCK(d);
 		}
 	}
 	BPFIF_RUNLOCK(bp);
 }
 
 #undef	BPF_CHECK_DIRECTION
 
 #undef	BPF_TSTAMP_NONE
 #undef	BPF_TSTAMP_FAST
 #undef	BPF_TSTAMP_NORMAL
 #undef	BPF_TSTAMP_EXTERN
 
 static int
 bpf_hdrlen(struct bpf_d *d)
 {
 	int hdrlen;
 
 	hdrlen = d->bd_bif->bif_hdrlen;
 #ifndef BURN_BRIDGES
 	if (d->bd_tstamp == BPF_T_NONE ||
 	    BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME)
 #ifdef COMPAT_FREEBSD32
 		if (d->bd_compat32)
 			hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32);
 		else
 #endif
 			hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr);
 	else
 #endif
 		hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr);
 #ifdef COMPAT_FREEBSD32
 	if (d->bd_compat32)
 		hdrlen = BPF_WORDALIGN32(hdrlen);
 	else
 #endif
 		hdrlen = BPF_WORDALIGN(hdrlen);
 
 	return (hdrlen - d->bd_bif->bif_hdrlen);
 }
 
 static void
 bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype)
 {
 	struct bintime bt2, boottimebin;
 	struct timeval tsm;
 	struct timespec tsn;
 
 	if ((tstype & BPF_T_MONOTONIC) == 0) {
 		bt2 = *bt;
 		getboottimebin(&boottimebin);
 		bintime_add(&bt2, &boottimebin);
 		bt = &bt2;
 	}
 	switch (BPF_T_FORMAT(tstype)) {
 	case BPF_T_MICROTIME:
 		bintime2timeval(bt, &tsm);
 		ts->bt_sec = tsm.tv_sec;
 		ts->bt_frac = tsm.tv_usec;
 		break;
 	case BPF_T_NANOTIME:
 		bintime2timespec(bt, &tsn);
 		ts->bt_sec = tsn.tv_sec;
 		ts->bt_frac = tsn.tv_nsec;
 		break;
 	case BPF_T_BINTIME:
 		ts->bt_sec = bt->sec;
 		ts->bt_frac = bt->frac;
 		break;
 	}
 }
 
 /*
  * Move the packet data from interface memory (pkt) into the
  * store buffer.  "cpfn" is the routine called to do the actual data
  * transfer.  bcopy is passed in to copy contiguous chunks, while
  * bpf_append_mbuf is passed in to copy mbuf chains.  In the latter case,
  * pkt is really an mbuf.
  */
 static void
 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
     void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
     struct bintime *bt)
 {
 	struct bpf_xhdr hdr;
 #ifndef BURN_BRIDGES
 	struct bpf_hdr hdr_old;
 #ifdef COMPAT_FREEBSD32
 	struct bpf_hdr32 hdr32_old;
 #endif
 #endif
 	int caplen, curlen, hdrlen, totlen;
 	int do_wakeup = 0;
 	int do_timestamp;
 	int tstype;
 
 	BPFD_LOCK_ASSERT(d);
 
 	/*
 	 * Detect whether user space has released a buffer back to us, and if
 	 * so, move it from being a hold buffer to a free buffer.  This may
 	 * not be the best place to do it (for example, we might only want to
 	 * run this check if we need the space), but for now it's a reliable
 	 * spot to do it.
 	 */
 	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
 		d->bd_fbuf = d->bd_hbuf;
 		d->bd_hbuf = NULL;
 		d->bd_hlen = 0;
 		bpf_buf_reclaimed(d);
 	}
 
 	/*
 	 * Figure out how many bytes to move.  If the packet is
 	 * greater or equal to the snapshot length, transfer that
 	 * much.  Otherwise, transfer the whole packet (unless
 	 * we hit the buffer size limit).
 	 */
 	hdrlen = bpf_hdrlen(d);
 	totlen = hdrlen + min(snaplen, pktlen);
 	if (totlen > d->bd_bufsize)
 		totlen = d->bd_bufsize;
 
 	/*
 	 * Round up the end of the previous packet to the next longword.
 	 *
 	 * Drop the packet if there's no room and no hope of room
 	 * If the packet would overflow the storage buffer or the storage
 	 * buffer is considered immutable by the buffer model, try to rotate
 	 * the buffer and wakeup pending processes.
 	 */
 #ifdef COMPAT_FREEBSD32
 	if (d->bd_compat32)
 		curlen = BPF_WORDALIGN32(d->bd_slen);
 	else
 #endif
 		curlen = BPF_WORDALIGN(d->bd_slen);
 	if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
 		if (d->bd_fbuf == NULL) {
 			/*
 			 * There's no room in the store buffer, and no
 			 * prospect of room, so drop the packet.  Notify the
 			 * buffer model.
 			 */
 			bpf_buffull(d);
 			++d->bd_dcount;
 			return;
 		}
 		KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use"));
 		ROTATE_BUFFERS(d);
 		do_wakeup = 1;
 		curlen = 0;
 	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
 		/*
 		 * Immediate mode is set, or the read timeout has already
 		 * expired during a select call.  A packet arrived, so the
 		 * reader should be woken up.
 		 */
 		do_wakeup = 1;
 	caplen = totlen - hdrlen;
 	tstype = d->bd_tstamp;
 	do_timestamp = tstype != BPF_T_NONE;
 #ifndef BURN_BRIDGES
 	if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) {
 		struct bpf_ts ts;
 		if (do_timestamp)
 			bpf_bintime2ts(bt, &ts, tstype);
 #ifdef COMPAT_FREEBSD32
 		if (d->bd_compat32) {
 			bzero(&hdr32_old, sizeof(hdr32_old));
 			if (do_timestamp) {
 				hdr32_old.bh_tstamp.tv_sec = ts.bt_sec;
 				hdr32_old.bh_tstamp.tv_usec = ts.bt_frac;
 			}
 			hdr32_old.bh_datalen = pktlen;
 			hdr32_old.bh_hdrlen = hdrlen;
 			hdr32_old.bh_caplen = caplen;
 			bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old,
 			    sizeof(hdr32_old));
 			goto copy;
 		}
 #endif
 		bzero(&hdr_old, sizeof(hdr_old));
 		if (do_timestamp) {
 			hdr_old.bh_tstamp.tv_sec = ts.bt_sec;
 			hdr_old.bh_tstamp.tv_usec = ts.bt_frac;
 		}
 		hdr_old.bh_datalen = pktlen;
 		hdr_old.bh_hdrlen = hdrlen;
 		hdr_old.bh_caplen = caplen;
 		bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old,
 		    sizeof(hdr_old));
 		goto copy;
 	}
 #endif
 
 	/*
 	 * Append the bpf header.  Note we append the actual header size, but
 	 * move forward the length of the header plus padding.
 	 */
 	bzero(&hdr, sizeof(hdr));
 	if (do_timestamp)
 		bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype);
 	hdr.bh_datalen = pktlen;
 	hdr.bh_hdrlen = hdrlen;
 	hdr.bh_caplen = caplen;
 	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
 
 	/*
 	 * Copy the packet data into the store buffer and update its length.
 	 */
 #ifndef BURN_BRIDGES
 copy:
 #endif
 	(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen);
 	d->bd_slen = curlen + totlen;
 
 	if (do_wakeup)
 		bpf_wakeup(d);
 }
 
 /*
  * Free buffers currently in use by a descriptor.
  * Called on close.
  */
 static void
 bpf_freed(struct bpf_d *d)
 {
 
 	/*
 	 * We don't need to lock out interrupts since this descriptor has
 	 * been detached from its interface and it yet hasn't been marked
 	 * free.
 	 */
 	bpf_free(d);
 	if (d->bd_rfilter != NULL) {
 		free((caddr_t)d->bd_rfilter, M_BPF);
 #ifdef BPF_JITTER
 		if (d->bd_bfilter != NULL)
 			bpf_destroy_jit_filter(d->bd_bfilter);
 #endif
 	}
 	if (d->bd_wfilter != NULL)
 		free((caddr_t)d->bd_wfilter, M_BPF);
 	mtx_destroy(&d->bd_lock);
 }
 
 /*
  * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
  * fixed size of the link header (variable length headers not yet supported).
  */
 void
 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
 {
 
 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
 }
 
 /*
  * Attach an interface to bpf.  ifp is a pointer to the structure
  * defining the interface to be attached, dlt is the link layer type,
  * and hdrlen is the fixed size of the link header (variable length
  * headers are not yet supporrted).
  */
 void
 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
 {
 	struct bpf_if *bp;
 
 	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
 	if (bp == NULL)
 		panic("bpfattach");
 
 	LIST_INIT(&bp->bif_dlist);
 	LIST_INIT(&bp->bif_wlist);
 	bp->bif_ifp = ifp;
 	bp->bif_dlt = dlt;
 	rw_init(&bp->bif_lock, "bpf interface lock");
 	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
 	bp->bif_bpf = driverp;
 	*driverp = bp;
 
 	BPF_LOCK();
 	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
 	BPF_UNLOCK();
 
 	bp->bif_hdrlen = hdrlen;
 
 	if (bootverbose && IS_DEFAULT_VNET(curvnet))
 		if_printf(ifp, "bpf attached\n");
 }
 
 #ifdef VIMAGE
 /*
  * When moving interfaces between vnet instances we need a way to
  * query the dlt and hdrlen before detach so we can re-attch the if_bpf
  * after the vmove.  We unfortunately have no device driver infrastructure
  * to query the interface for these values after creation/attach, thus
  * add this as a workaround.
  */
 int
 bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen)
 {
 
 	if (bp == NULL)
 		return (ENXIO);
 	if (bif_dlt == NULL && bif_hdrlen == NULL)
 		return (0);
 
 	if (bif_dlt != NULL)
 		*bif_dlt = bp->bif_dlt;
 	if (bif_hdrlen != NULL)
 		*bif_hdrlen = bp->bif_hdrlen;
 
 	return (0);
 }
 #endif
 
 /*
  * Detach bpf from an interface. This involves detaching each descriptor
  * associated with the interface. Notify each descriptor as it's detached
  * so that any sleepers wake up and get ENXIO.
  */
 void
 bpfdetach(struct ifnet *ifp)
 {
 	struct bpf_if	*bp, *bp_temp;
 	struct bpf_d	*d;
 	int ndetached;
 
 	ndetached = 0;
 
 	BPF_LOCK();
 	/* Find all bpf_if struct's which reference ifp and detach them. */
 	LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
 		if (ifp != bp->bif_ifp)
 			continue;
 
 		LIST_REMOVE(bp, bif_next);
 		/* Add to to-be-freed list */
 		LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next);
 
 		ndetached++;
 		/*
 		 * Delay freeing bp till interface is detached
 		 * and all routes through this interface are removed.
 		 * Mark bp as detached to restrict new consumers.
 		 */
 		BPFIF_WLOCK(bp);
 		bp->bif_flags |= BPFIF_FLAG_DYING;
 		*bp->bif_bpf = NULL;
 		BPFIF_WUNLOCK(bp);
 
 		CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p",
 		    __func__, bp->bif_dlt, bp, ifp);
 
 		/* Free common descriptors */
 		while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
 			bpf_detachd_locked(d);
 			BPFD_LOCK(d);
 			bpf_wakeup(d);
 			BPFD_UNLOCK(d);
 		}
 
 		/* Free writer-only descriptors */
 		while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
 			bpf_detachd_locked(d);
 			BPFD_LOCK(d);
 			bpf_wakeup(d);
 			BPFD_UNLOCK(d);
 		}
 	}
 	BPF_UNLOCK();
 
 #ifdef INVARIANTS
 	if (ndetached == 0)
 		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
 #endif
 }
 
 /*
  * Interface departure handler.
  * Note departure event does not guarantee interface is going down.
  * Interface renaming is currently done via departure/arrival event set.
  *
  * Departure handled is called after all routes pointing to
  * given interface are removed and interface is in down state
  * restricting any packets to be sent/received. We assume it is now safe
  * to free data allocated by BPF.
  */
 static void
 bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
 {
 	struct bpf_if *bp, *bp_temp;
 	int nmatched = 0;
 
 	/* Ignore ifnet renaming. */
 	if (ifp->if_flags & IFF_RENAMING)
 		return;
 
 	BPF_LOCK();
 	/*
 	 * Find matching entries in free list.
 	 * Nothing should be found if bpfdetach() was not called.
 	 */
 	LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) {
 		if (ifp != bp->bif_ifp)
 			continue;
 
 		CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p",
 		    __func__, bp, ifp);
 
 		LIST_REMOVE(bp, bif_next);
 
 		rw_destroy(&bp->bif_lock);
 		free(bp, M_BPF);
 
 		nmatched++;
 	}
 	BPF_UNLOCK();
 }
 
 /*
  * Get a list of available data link type of the interface.
  */
 static int
 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
 {
 	struct ifnet *ifp;
 	struct bpf_if *bp;
 	u_int *lst;
 	int error, n, n1;
 
 	BPF_LOCK_ASSERT();
 
 	ifp = d->bd_bif->bif_ifp;
 again:
 	n1 = 0;
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp == ifp)
 			n1++;
 	}
 	if (bfl->bfl_list == NULL) {
 		bfl->bfl_len = n1;
 		return (0);
 	}
 	if (n1 > bfl->bfl_len)
 		return (ENOMEM);
 	BPF_UNLOCK();
 	lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
 	n = 0;
 	BPF_LOCK();
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp != ifp)
 			continue;
 		if (n >= n1) {
 			free(lst, M_TEMP);
 			goto again;
 		}
 		lst[n] = bp->bif_dlt;
 		n++;
 	}
 	BPF_UNLOCK();
 	error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
 	free(lst, M_TEMP);
 	BPF_LOCK();
 	bfl->bfl_len = n;
 	return (error);
 }
 
 /*
  * Set the data link type of a BPF instance.
  */
 static int
 bpf_setdlt(struct bpf_d *d, u_int dlt)
 {
 	int error, opromisc;
 	struct ifnet *ifp;
 	struct bpf_if *bp;
 
 	BPF_LOCK_ASSERT();
 
 	if (d->bd_bif->bif_dlt == dlt)
 		return (0);
 	ifp = d->bd_bif->bif_ifp;
 
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
 			break;
 	}
 
 	if (bp != NULL) {
 		opromisc = d->bd_promisc;
 		bpf_attachd(d, bp);
 		BPFD_LOCK(d);
 		reset_d(d);
 		BPFD_UNLOCK(d);
 		if (opromisc) {
 			error = ifpromisc(bp->bif_ifp, 1);
 			if (error)
 				if_printf(bp->bif_ifp,
 					"bpf_setdlt: ifpromisc failed (%d)\n",
 					error);
 			else
 				d->bd_promisc = 1;
 		}
 	}
 	return (bp == NULL ? EINVAL : 0);
 }
 
 static void
 bpf_drvinit(void *unused)
 {
 	struct cdev *dev;
 
 	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
 	LIST_INIT(&bpf_iflist);
 	LIST_INIT(&bpf_freelist);
 
 	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
 	/* For compatibility */
 	make_dev_alias(dev, "bpf0");
 
 	/* Register interface departure handler */
 	bpf_ifdetach_cookie = EVENTHANDLER_REGISTER(
 		    ifnet_departure_event, bpf_ifdetach, NULL,
 		    EVENTHANDLER_PRI_ANY);
 }
 
 /*
  * Zero out the various packet counters associated with all of the bpf
  * descriptors.  At some point, we will probably want to get a bit more
  * granular and allow the user to specify descriptors to be zeroed.
  */
 static void
 bpf_zero_counters(void)
 {
 	struct bpf_if *bp;
 	struct bpf_d *bd;
 
 	BPF_LOCK();
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		BPFIF_RLOCK(bp);
 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
 			BPFD_LOCK(bd);
 			bd->bd_rcount = 0;
 			bd->bd_dcount = 0;
 			bd->bd_fcount = 0;
 			bd->bd_wcount = 0;
 			bd->bd_wfcount = 0;
 			bd->bd_zcopy = 0;
 			BPFD_UNLOCK(bd);
 		}
 		BPFIF_RUNLOCK(bp);
 	}
 	BPF_UNLOCK();
 }
 
 /*
  * Fill filter statistics
  */
 static void
 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
 {
 
 	bzero(d, sizeof(*d));
 	BPFD_LOCK_ASSERT(bd);
 	d->bd_structsize = sizeof(*d);
 	/* XXX: reading should be protected by global lock */
 	d->bd_immediate = bd->bd_immediate;
 	d->bd_promisc = bd->bd_promisc;
 	d->bd_hdrcmplt = bd->bd_hdrcmplt;
 	d->bd_direction = bd->bd_direction;
 	d->bd_feedback = bd->bd_feedback;
 	d->bd_async = bd->bd_async;
 	d->bd_rcount = bd->bd_rcount;
 	d->bd_dcount = bd->bd_dcount;
 	d->bd_fcount = bd->bd_fcount;
 	d->bd_sig = bd->bd_sig;
 	d->bd_slen = bd->bd_slen;
 	d->bd_hlen = bd->bd_hlen;
 	d->bd_bufsize = bd->bd_bufsize;
 	d->bd_pid = bd->bd_pid;
 	strlcpy(d->bd_ifname,
 	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
 	d->bd_locked = bd->bd_locked;
 	d->bd_wcount = bd->bd_wcount;
 	d->bd_wdcount = bd->bd_wdcount;
 	d->bd_wfcount = bd->bd_wfcount;
 	d->bd_zcopy = bd->bd_zcopy;
 	d->bd_bufmode = bd->bd_bufmode;
 }
 
 /*
  * Handle `netstat -B' stats request
  */
 static int
 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	static const struct xbpf_d zerostats;
 	struct xbpf_d *xbdbuf, *xbd, tempstats;
 	int index, error;
 	struct bpf_if *bp;
 	struct bpf_d *bd;
 
 	/*
 	 * XXX This is not technically correct. It is possible for non
 	 * privileged users to open bpf devices. It would make sense
 	 * if the users who opened the devices were able to retrieve
 	 * the statistics for them, too.
 	 */
 	error = priv_check(req->td, PRIV_NET_BPF);
 	if (error)
 		return (error);
 	/*
 	 * Check to see if the user is requesting that the counters be
 	 * zeroed out.  Explicitly check that the supplied data is zeroed,
 	 * as we aren't allowing the user to set the counters currently.
 	 */
 	if (req->newptr != NULL) {
 		if (req->newlen != sizeof(tempstats))
 			return (EINVAL);
 		memset(&tempstats, 0, sizeof(tempstats));
 		error = SYSCTL_IN(req, &tempstats, sizeof(tempstats));
 		if (error)
 			return (error);
 		if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0)
 			return (EINVAL);
 		bpf_zero_counters();
 		return (0);
 	}
 	if (req->oldptr == NULL)
 		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
 	if (bpf_bpfd_cnt == 0)
 		return (SYSCTL_OUT(req, 0, 0));
 	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
 	BPF_LOCK();
 	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
 		BPF_UNLOCK();
 		free(xbdbuf, M_BPF);
 		return (ENOMEM);
 	}
 	index = 0;
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		BPFIF_RLOCK(bp);
 		/* Send writers-only first */
 		LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
 			xbd = &xbdbuf[index++];
 			BPFD_LOCK(bd);
 			bpfstats_fill_xbpf(xbd, bd);
 			BPFD_UNLOCK(bd);
 		}
 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
 			xbd = &xbdbuf[index++];
 			BPFD_LOCK(bd);
 			bpfstats_fill_xbpf(xbd, bd);
 			BPFD_UNLOCK(bd);
 		}
 		BPFIF_RUNLOCK(bp);
 	}
 	BPF_UNLOCK();
 	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
 	free(xbdbuf, M_BPF);
 	return (error);
 }
 
 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
 
 #else /* !DEV_BPF && !NETGRAPH_BPF */
 /*
  * NOP stubs to allow bpf-using drivers to load and function.
  *
  * A 'better' implementation would allow the core bpf functionality
  * to be loaded at runtime.
  */
 static struct bpf_if bp_null;
 
 void
 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
 {
 }
 
 void
 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
 {
 }
 
 void
 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
 {
 }
 
 void
 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
 {
 
 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
 }
 
 void
 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
 {
 
 	*driverp = &bp_null;
 }
 
 void
 bpfdetach(struct ifnet *ifp)
 {
 }
 
 u_int
 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
 {
 	return -1;	/* "no filter" behaviour */
 }
 
 int
 bpf_validate(const struct bpf_insn *f, int len)
 {
 	return 0;		/* false */
 }
 
 #endif /* !DEV_BPF && !NETGRAPH_BPF */
 
 #ifdef DDB
 static void
 bpf_show_bpf_if(struct bpf_if *bpf_if)
 {
 
 	if (bpf_if == NULL)
 		return;
 	db_printf("%p:\n", bpf_if);
 #define	BPF_DB_PRINTF(f, e)	db_printf("   %s = " f "\n", #e, bpf_if->e);
 	/* bif_ext.bif_next */
 	/* bif_ext.bif_dlist */
 	BPF_DB_PRINTF("%#x", bif_dlt);
 	BPF_DB_PRINTF("%u", bif_hdrlen);
 	BPF_DB_PRINTF("%p", bif_ifp);
 	/* bif_lock */
 	/* bif_wlist */
 	BPF_DB_PRINTF("%#x", bif_flags);
 }
 
 DB_SHOW_COMMAND(bpf_if, db_show_bpf_if)
 {
 
 	if (!have_addr) {
 		db_printf("usage: show bpf_if <struct bpf_if *>\n");
 		return;
 	}
 
 	bpf_show_bpf_if((struct bpf_if *)addr);
 }
 #endif
Index: stable/11/sys/net80211/ieee80211_ht.c
===================================================================
--- stable/11/sys/net80211/ieee80211_ht.c	(revision 330445)
+++ stable/11/sys/net80211/ieee80211_ht.c	(revision 330446)
@@ -1,2992 +1,2992 @@
 /*-
  * Copyright (c) 2007-2008 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #ifdef __FreeBSD__
 __FBSDID("$FreeBSD$");
 #endif
 
 /*
  * IEEE 802.11n protocol support.
  */
 
 #include "opt_inet.h"
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/systm.h> 
 #include <sys/endian.h>
  
 #include <sys/socket.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_media.h>
 #include <net/ethernet.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_action.h>
 #include <net80211/ieee80211_input.h>
 
 /* define here, used throughout file */
 #define	MS(_v, _f)	(((_v) & _f) >> _f##_S)
 #define	SM(_v, _f)	(((_v) << _f##_S) & _f)
 
 const struct ieee80211_mcs_rates ieee80211_htrates[IEEE80211_HTRATE_MAXSIZE] = {
 	{  13,  14,   27,   30 },	/* MCS 0 */
 	{  26,  29,   54,   60 },	/* MCS 1 */
 	{  39,  43,   81,   90 },	/* MCS 2 */
 	{  52,  58,  108,  120 },	/* MCS 3 */
 	{  78,  87,  162,  180 },	/* MCS 4 */
 	{ 104, 116,  216,  240 },	/* MCS 5 */
 	{ 117, 130,  243,  270 },	/* MCS 6 */
 	{ 130, 144,  270,  300 },	/* MCS 7 */
 	{  26,  29,   54,   60 },	/* MCS 8 */
 	{  52,  58,  108,  120 },	/* MCS 9 */
 	{  78,  87,  162,  180 },	/* MCS 10 */
 	{ 104, 116,  216,  240 },	/* MCS 11 */
 	{ 156, 173,  324,  360 },	/* MCS 12 */
 	{ 208, 231,  432,  480 },	/* MCS 13 */
 	{ 234, 260,  486,  540 },	/* MCS 14 */
 	{ 260, 289,  540,  600 },	/* MCS 15 */
 	{  39,  43,   81,   90 },	/* MCS 16 */
 	{  78,  87,  162,  180 },	/* MCS 17 */
 	{ 117, 130,  243,  270 },	/* MCS 18 */
 	{ 156, 173,  324,  360 },	/* MCS 19 */
 	{ 234, 260,  486,  540 },	/* MCS 20 */
 	{ 312, 347,  648,  720 },	/* MCS 21 */
 	{ 351, 390,  729,  810 },	/* MCS 22 */
 	{ 390, 433,  810,  900 },	/* MCS 23 */
 	{  52,  58,  108,  120 },	/* MCS 24 */
 	{ 104, 116,  216,  240 },	/* MCS 25 */
 	{ 156, 173,  324,  360 },	/* MCS 26 */
 	{ 208, 231,  432,  480 },	/* MCS 27 */
 	{ 312, 347,  648,  720 },	/* MCS 28 */
 	{ 416, 462,  864,  960 },	/* MCS 29 */
 	{ 468, 520,  972, 1080 },	/* MCS 30 */
 	{ 520, 578, 1080, 1200 },	/* MCS 31 */
 	{   0,   0,   12,   13 },	/* MCS 32 */
 	{  78,  87,  162,  180 },	/* MCS 33 */
 	{ 104, 116,  216,  240 },	/* MCS 34 */
 	{ 130, 144,  270,  300 },	/* MCS 35 */
 	{ 117, 130,  243,  270 },	/* MCS 36 */
 	{ 156, 173,  324,  360 },	/* MCS 37 */
 	{ 195, 217,  405,  450 },	/* MCS 38 */
 	{ 104, 116,  216,  240 },	/* MCS 39 */
 	{ 130, 144,  270,  300 },	/* MCS 40 */
 	{ 130, 144,  270,  300 },	/* MCS 41 */
 	{ 156, 173,  324,  360 },	/* MCS 42 */
 	{ 182, 202,  378,  420 },	/* MCS 43 */
 	{ 182, 202,  378,  420 },	/* MCS 44 */
 	{ 208, 231,  432,  480 },	/* MCS 45 */
 	{ 156, 173,  324,  360 },	/* MCS 46 */
 	{ 195, 217,  405,  450 },	/* MCS 47 */
 	{ 195, 217,  405,  450 },	/* MCS 48 */
 	{ 234, 260,  486,  540 },	/* MCS 49 */
 	{ 273, 303,  567,  630 },	/* MCS 50 */
 	{ 273, 303,  567,  630 },	/* MCS 51 */
 	{ 312, 347,  648,  720 },	/* MCS 52 */
 	{ 130, 144,  270,  300 },	/* MCS 53 */
 	{ 156, 173,  324,  360 },	/* MCS 54 */
 	{ 182, 202,  378,  420 },	/* MCS 55 */
 	{ 156, 173,  324,  360 },	/* MCS 56 */
 	{ 182, 202,  378,  420 },	/* MCS 57 */
 	{ 208, 231,  432,  480 },	/* MCS 58 */
 	{ 234, 260,  486,  540 },	/* MCS 59 */
 	{ 208, 231,  432,  480 },	/* MCS 60 */
 	{ 234, 260,  486,  540 },	/* MCS 61 */
 	{ 260, 289,  540,  600 },	/* MCS 62 */
 	{ 260, 289,  540,  600 },	/* MCS 63 */
 	{ 286, 318,  594,  660 },	/* MCS 64 */
 	{ 195, 217,  405,  450 },	/* MCS 65 */
 	{ 234, 260,  486,  540 },	/* MCS 66 */
 	{ 273, 303,  567,  630 },	/* MCS 67 */
 	{ 234, 260,  486,  540 },	/* MCS 68 */
 	{ 273, 303,  567,  630 },	/* MCS 69 */
 	{ 312, 347,  648,  720 },	/* MCS 70 */
 	{ 351, 390,  729,  810 },	/* MCS 71 */
 	{ 312, 347,  648,  720 },	/* MCS 72 */
 	{ 351, 390,  729,  810 },	/* MCS 73 */
 	{ 390, 433,  810,  900 },	/* MCS 74 */
 	{ 390, 433,  810,  900 },	/* MCS 75 */
 	{ 429, 477,  891,  990 },	/* MCS 76 */
 };
 
 static	int ieee80211_ampdu_age = -1;	/* threshold for ampdu reorder q (ms) */
 SYSCTL_PROC(_net_wlan, OID_AUTO, ampdu_age, CTLTYPE_INT | CTLFLAG_RW,
 	&ieee80211_ampdu_age, 0, ieee80211_sysctl_msecs_ticks, "I",
 	"AMPDU max reorder age (ms)");
 
 static	int ieee80211_recv_bar_ena = 1;
 SYSCTL_INT(_net_wlan, OID_AUTO, recv_bar, CTLFLAG_RW, &ieee80211_recv_bar_ena,
 	    0, "BAR frame processing (ena/dis)");
 
 static	int ieee80211_addba_timeout = -1;/* timeout for ADDBA response */
 SYSCTL_PROC(_net_wlan, OID_AUTO, addba_timeout, CTLTYPE_INT | CTLFLAG_RW,
 	&ieee80211_addba_timeout, 0, ieee80211_sysctl_msecs_ticks, "I",
 	"ADDBA request timeout (ms)");
 static	int ieee80211_addba_backoff = -1;/* backoff after max ADDBA requests */
 SYSCTL_PROC(_net_wlan, OID_AUTO, addba_backoff, CTLTYPE_INT | CTLFLAG_RW,
 	&ieee80211_addba_backoff, 0, ieee80211_sysctl_msecs_ticks, "I",
 	"ADDBA request backoff (ms)");
 static	int ieee80211_addba_maxtries = 3;/* max ADDBA requests before backoff */
 SYSCTL_INT(_net_wlan, OID_AUTO, addba_maxtries, CTLFLAG_RW,
 	&ieee80211_addba_maxtries, 0, "max ADDBA requests sent before backoff");
 
 static	int ieee80211_bar_timeout = -1;	/* timeout waiting for BAR response */
 static	int ieee80211_bar_maxtries = 50;/* max BAR requests before DELBA */
 
 static	ieee80211_recv_action_func ht_recv_action_ba_addba_request;
 static	ieee80211_recv_action_func ht_recv_action_ba_addba_response;
 static	ieee80211_recv_action_func ht_recv_action_ba_delba;
 static	ieee80211_recv_action_func ht_recv_action_ht_mimopwrsave;
 static	ieee80211_recv_action_func ht_recv_action_ht_txchwidth;
 
 static	ieee80211_send_action_func ht_send_action_ba_addba;
 static	ieee80211_send_action_func ht_send_action_ba_delba;
 static	ieee80211_send_action_func ht_send_action_ht_txchwidth;
 
 static void
 ieee80211_ht_init(void)
 {
 	/*
 	 * Setup HT parameters that depends on the clock frequency.
 	 */
 	ieee80211_ampdu_age = msecs_to_ticks(500);
 	ieee80211_addba_timeout = msecs_to_ticks(250);
 	ieee80211_addba_backoff = msecs_to_ticks(10*1000);
 	ieee80211_bar_timeout = msecs_to_ticks(250);
 	/*
 	 * Register action frame handlers.
 	 */
 	ieee80211_recv_action_register(IEEE80211_ACTION_CAT_BA, 
 	    IEEE80211_ACTION_BA_ADDBA_REQUEST, ht_recv_action_ba_addba_request);
 	ieee80211_recv_action_register(IEEE80211_ACTION_CAT_BA, 
 	    IEEE80211_ACTION_BA_ADDBA_RESPONSE, ht_recv_action_ba_addba_response);
 	ieee80211_recv_action_register(IEEE80211_ACTION_CAT_BA, 
 	    IEEE80211_ACTION_BA_DELBA, ht_recv_action_ba_delba);
 	ieee80211_recv_action_register(IEEE80211_ACTION_CAT_HT, 
 	    IEEE80211_ACTION_HT_MIMOPWRSAVE, ht_recv_action_ht_mimopwrsave);
 	ieee80211_recv_action_register(IEEE80211_ACTION_CAT_HT, 
 	    IEEE80211_ACTION_HT_TXCHWIDTH, ht_recv_action_ht_txchwidth);
 
 	ieee80211_send_action_register(IEEE80211_ACTION_CAT_BA, 
 	    IEEE80211_ACTION_BA_ADDBA_REQUEST, ht_send_action_ba_addba);
 	ieee80211_send_action_register(IEEE80211_ACTION_CAT_BA, 
 	    IEEE80211_ACTION_BA_ADDBA_RESPONSE, ht_send_action_ba_addba);
 	ieee80211_send_action_register(IEEE80211_ACTION_CAT_BA, 
 	    IEEE80211_ACTION_BA_DELBA, ht_send_action_ba_delba);
 	ieee80211_send_action_register(IEEE80211_ACTION_CAT_HT, 
 	    IEEE80211_ACTION_HT_TXCHWIDTH, ht_send_action_ht_txchwidth);
 }
 SYSINIT(wlan_ht, SI_SUB_DRIVERS, SI_ORDER_FIRST, ieee80211_ht_init, NULL);
 
 static int ieee80211_ampdu_enable(struct ieee80211_node *ni,
 	struct ieee80211_tx_ampdu *tap);
 static int ieee80211_addba_request(struct ieee80211_node *ni,
 	struct ieee80211_tx_ampdu *tap,
 	int dialogtoken, int baparamset, int batimeout);
 static int ieee80211_addba_response(struct ieee80211_node *ni,
 	struct ieee80211_tx_ampdu *tap,
 	int code, int baparamset, int batimeout);
 static void ieee80211_addba_stop(struct ieee80211_node *ni,
 	struct ieee80211_tx_ampdu *tap);
 static void null_addba_response_timeout(struct ieee80211_node *ni,
 	struct ieee80211_tx_ampdu *tap);
 
 static void ieee80211_bar_response(struct ieee80211_node *ni,
 	struct ieee80211_tx_ampdu *tap, int status);
 static void ampdu_tx_stop(struct ieee80211_tx_ampdu *tap);
 static void bar_stop_timer(struct ieee80211_tx_ampdu *tap);
 static int ampdu_rx_start(struct ieee80211_node *, struct ieee80211_rx_ampdu *,
 	int baparamset, int batimeout, int baseqctl);
 static void ampdu_rx_stop(struct ieee80211_node *, struct ieee80211_rx_ampdu *);
 
 void
 ieee80211_ht_attach(struct ieee80211com *ic)
 {
 	/* setup default aggregation policy */
 	ic->ic_recv_action = ieee80211_recv_action;
 	ic->ic_send_action = ieee80211_send_action;
 	ic->ic_ampdu_enable = ieee80211_ampdu_enable;
 	ic->ic_addba_request = ieee80211_addba_request;
 	ic->ic_addba_response = ieee80211_addba_response;
 	ic->ic_addba_response_timeout = null_addba_response_timeout;
 	ic->ic_addba_stop = ieee80211_addba_stop;
 	ic->ic_bar_response = ieee80211_bar_response;
 	ic->ic_ampdu_rx_start = ampdu_rx_start;
 	ic->ic_ampdu_rx_stop = ampdu_rx_stop;
 
 	ic->ic_htprotmode = IEEE80211_PROT_RTSCTS;
 	ic->ic_curhtprotmode = IEEE80211_HTINFO_OPMODE_PURE;
 }
 
 void
 ieee80211_ht_detach(struct ieee80211com *ic)
 {
 }
 
 void
 ieee80211_ht_vattach(struct ieee80211vap *vap)
 {
 
 	/* driver can override defaults */
 	vap->iv_ampdu_rxmax = IEEE80211_HTCAP_MAXRXAMPDU_8K;
 	vap->iv_ampdu_density = IEEE80211_HTCAP_MPDUDENSITY_NA;
 	vap->iv_ampdu_limit = vap->iv_ampdu_rxmax;
 	vap->iv_amsdu_limit = vap->iv_htcaps & IEEE80211_HTCAP_MAXAMSDU;
 	/* tx aggregation traffic thresholds */
 	vap->iv_ampdu_mintraffic[WME_AC_BK] = 128;
 	vap->iv_ampdu_mintraffic[WME_AC_BE] = 64;
 	vap->iv_ampdu_mintraffic[WME_AC_VO] = 32;
 	vap->iv_ampdu_mintraffic[WME_AC_VI] = 32;
 
 	if (vap->iv_htcaps & IEEE80211_HTC_HT) {
 		/*
 		 * Device is HT capable; enable all HT-related
 		 * facilities by default.
 		 * XXX these choices may be too aggressive.
 		 */
 		vap->iv_flags_ht |= IEEE80211_FHT_HT
 				 |  IEEE80211_FHT_HTCOMPAT
 				 ;
 		if (vap->iv_htcaps & IEEE80211_HTCAP_SHORTGI20)
 			vap->iv_flags_ht |= IEEE80211_FHT_SHORTGI20;
 		/* XXX infer from channel list? */
 		if (vap->iv_htcaps & IEEE80211_HTCAP_CHWIDTH40) {
 			vap->iv_flags_ht |= IEEE80211_FHT_USEHT40;
 			if (vap->iv_htcaps & IEEE80211_HTCAP_SHORTGI40)
 				vap->iv_flags_ht |= IEEE80211_FHT_SHORTGI40;
 		}
 		/* enable RIFS if capable */
 		if (vap->iv_htcaps & IEEE80211_HTC_RIFS)
 			vap->iv_flags_ht |= IEEE80211_FHT_RIFS;
 
 		/* NB: A-MPDU and A-MSDU rx are mandated, these are tx only */
 		vap->iv_flags_ht |= IEEE80211_FHT_AMPDU_RX;
 		if (vap->iv_htcaps & IEEE80211_HTC_AMPDU)
 			vap->iv_flags_ht |= IEEE80211_FHT_AMPDU_TX;
 		vap->iv_flags_ht |= IEEE80211_FHT_AMSDU_RX;
 		if (vap->iv_htcaps & IEEE80211_HTC_AMSDU)
 			vap->iv_flags_ht |= IEEE80211_FHT_AMSDU_TX;
 
 		if (vap->iv_htcaps & IEEE80211_HTCAP_TXSTBC)
 			vap->iv_flags_ht |= IEEE80211_FHT_STBC_TX;
 		if (vap->iv_htcaps & IEEE80211_HTCAP_RXSTBC)
 			vap->iv_flags_ht |= IEEE80211_FHT_STBC_RX;
 	}
 	/* NB: disable default legacy WDS, too many issues right now */
 	if (vap->iv_flags_ext & IEEE80211_FEXT_WDSLEGACY)
 		vap->iv_flags_ht &= ~IEEE80211_FHT_HT;
 }
 
 void
 ieee80211_ht_vdetach(struct ieee80211vap *vap)
 {
 }
 
 static int
 ht_getrate(struct ieee80211com *ic, int index, enum ieee80211_phymode mode,
     int ratetype)
 {
 	int mword, rate;
 
 	mword = ieee80211_rate2media(ic, index | IEEE80211_RATE_MCS, mode);
 	if (IFM_SUBTYPE(mword) != IFM_IEEE80211_MCS)
 		return (0);
 	switch (ratetype) {
 	case 0:
 		rate = ieee80211_htrates[index].ht20_rate_800ns;
 		break;
 	case 1:
 		rate = ieee80211_htrates[index].ht20_rate_400ns;
 		break;
 	case 2:
 		rate = ieee80211_htrates[index].ht40_rate_800ns;
 		break;
 	default:
 		rate = ieee80211_htrates[index].ht40_rate_400ns;
 		break;
 	}
 	return (rate);
 }
 
 static struct printranges {
 	int	minmcs;
 	int	maxmcs;
 	int	txstream;
 	int	ratetype;
 	int	htcapflags;
 } ranges[] = {
 	{  0,  7, 1, 0, 0 },
 	{  8, 15, 2, 0, 0 },
 	{ 16, 23, 3, 0, 0 },
 	{ 24, 31, 4, 0, 0 },
 	{ 32,  0, 1, 2, IEEE80211_HTC_TXMCS32 },
 	{ 33, 38, 2, 0, IEEE80211_HTC_TXUNEQUAL },
 	{ 39, 52, 3, 0, IEEE80211_HTC_TXUNEQUAL },
 	{ 53, 76, 4, 0, IEEE80211_HTC_TXUNEQUAL },
 	{  0,  0, 0, 0, 0 },
 };
 
 static void
 ht_rateprint(struct ieee80211com *ic, enum ieee80211_phymode mode, int ratetype)
 {
 	int minrate, maxrate;
 	struct printranges *range;
 
 	for (range = ranges; range->txstream != 0; range++) {
 		if (ic->ic_txstream < range->txstream)
 			continue;
 		if (range->htcapflags &&
 		    (ic->ic_htcaps & range->htcapflags) == 0)
 			continue;
 		if (ratetype < range->ratetype)
 			continue;
 		minrate = ht_getrate(ic, range->minmcs, mode, ratetype);
 		maxrate = ht_getrate(ic, range->maxmcs, mode, ratetype);
 		if (range->maxmcs) {
 			ic_printf(ic, "MCS %d-%d: %d%sMbps - %d%sMbps\n",
 			    range->minmcs, range->maxmcs,
 			    minrate/2, ((minrate & 0x1) != 0 ? ".5" : ""),
 			    maxrate/2, ((maxrate & 0x1) != 0 ? ".5" : ""));
 		} else {
 			ic_printf(ic, "MCS %d: %d%sMbps\n", range->minmcs,
 			    minrate/2, ((minrate & 0x1) != 0 ? ".5" : ""));
 		}
 	}
 }
 
 static void
 ht_announce(struct ieee80211com *ic, enum ieee80211_phymode mode)
 {
 	const char *modestr = ieee80211_phymode_name[mode];
 
 	ic_printf(ic, "%s MCS 20MHz\n", modestr);
 	ht_rateprint(ic, mode, 0);
 	if (ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI20) {
 		ic_printf(ic, "%s MCS 20MHz SGI\n", modestr);
 		ht_rateprint(ic, mode, 1);
 	}
 	if (ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) {
 		ic_printf(ic, "%s MCS 40MHz:\n", modestr);
 		ht_rateprint(ic, mode, 2);
 	}
 	if ((ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) &&
 	    (ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI40)) {
 		ic_printf(ic, "%s MCS 40MHz SGI:\n", modestr);
 		ht_rateprint(ic, mode, 3);
 	}
 }
 
 void
 ieee80211_ht_announce(struct ieee80211com *ic)
 {
 
 	if (isset(ic->ic_modecaps, IEEE80211_MODE_11NA) ||
 	    isset(ic->ic_modecaps, IEEE80211_MODE_11NG))
 		ic_printf(ic, "%dT%dR\n", ic->ic_txstream, ic->ic_rxstream);
 	if (isset(ic->ic_modecaps, IEEE80211_MODE_11NA))
 		ht_announce(ic, IEEE80211_MODE_11NA);
 	if (isset(ic->ic_modecaps, IEEE80211_MODE_11NG))
 		ht_announce(ic, IEEE80211_MODE_11NG);
 }
 
 static struct ieee80211_htrateset htrateset;
 
 const struct ieee80211_htrateset *
 ieee80211_get_suphtrates(struct ieee80211com *ic,
     const struct ieee80211_channel *c)
 {
 #define	ADDRATE(x)	do {						\
 	htrateset.rs_rates[htrateset.rs_nrates] = x;			\
 	htrateset.rs_nrates++;						\
 } while (0)
 	int i;
 
 	memset(&htrateset, 0, sizeof(struct ieee80211_htrateset));
 	for (i = 0; i < ic->ic_txstream * 8; i++)
 		ADDRATE(i);
 	if ((ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) &&
 	    (ic->ic_htcaps & IEEE80211_HTC_TXMCS32))
 		ADDRATE(32);
 	if (ic->ic_htcaps & IEEE80211_HTC_TXUNEQUAL) {
 		if (ic->ic_txstream >= 2) {
 			 for (i = 33; i <= 38; i++)
 				ADDRATE(i);
 		}
 		if (ic->ic_txstream >= 3) {
 			for (i = 39; i <= 52; i++)
 				ADDRATE(i);
 		}
 		if (ic->ic_txstream == 4) {
 			for (i = 53; i <= 76; i++)
 				ADDRATE(i);
 		}
 	}
 	return &htrateset;
 #undef	ADDRATE
 }
 
 /*
  * Receive processing.
  */
 
 /*
  * Decap the encapsulated A-MSDU frames and dispatch all but
  * the last for delivery.  The last frame is returned for 
  * delivery via the normal path.
  */
 struct mbuf *
 ieee80211_decap_amsdu(struct ieee80211_node *ni, struct mbuf *m)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	int framelen;
 	struct mbuf *n;
 
 	/* discard 802.3 header inserted by ieee80211_decap */
 	m_adj(m, sizeof(struct ether_header));
 
 	vap->iv_stats.is_amsdu_decap++;
 
 	for (;;) {
 		/*
 		 * Decap the first frame, bust it apart from the
 		 * remainder and deliver.  We leave the last frame
 		 * delivery to the caller (for consistency with other
 		 * code paths, could also do it here).
 		 */
 		m = ieee80211_decap1(m, &framelen);
 		if (m == NULL) {
 			IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY,
 			    ni->ni_macaddr, "a-msdu", "%s", "decap failed");
 			vap->iv_stats.is_amsdu_tooshort++;
 			return NULL;
 		}
 		if (m->m_pkthdr.len == framelen)
 			break;
 		n = m_split(m, framelen, M_NOWAIT);
 		if (n == NULL) {
 			IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY,
 			    ni->ni_macaddr, "a-msdu",
 			    "%s", "unable to split encapsulated frames");
 			vap->iv_stats.is_amsdu_split++;
 			m_freem(m);			/* NB: must reclaim */
 			return NULL;
 		}
 		vap->iv_deliver_data(vap, ni, m);
 
 		/*
 		 * Remove frame contents; each intermediate frame
 		 * is required to be aligned to a 4-byte boundary.
 		 */
 		m = n;
 		m_adj(m, roundup2(framelen, 4) - framelen);	/* padding */
 	}
 	return m;				/* last delivered by caller */
 }
 
 /*
  * Purge all frames in the A-MPDU re-order queue.
  */
 static void
 ampdu_rx_purge(struct ieee80211_rx_ampdu *rap)
 {
 	struct mbuf *m;
 	int i;
 
 	for (i = 0; i < rap->rxa_wnd; i++) {
 		m = rap->rxa_m[i];
 		if (m != NULL) {
 			rap->rxa_m[i] = NULL;
 			rap->rxa_qbytes -= m->m_pkthdr.len;
 			m_freem(m);
 			if (--rap->rxa_qframes == 0)
 				break;
 		}
 	}
 	KASSERT(rap->rxa_qbytes == 0 && rap->rxa_qframes == 0,
 	    ("lost %u data, %u frames on ampdu rx q",
 	    rap->rxa_qbytes, rap->rxa_qframes));
 }
 
 /*
  * Start A-MPDU rx/re-order processing for the specified TID.
  */
 static int
 ampdu_rx_start(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap,
 	int baparamset, int batimeout, int baseqctl)
 {
 	int bufsiz = MS(baparamset, IEEE80211_BAPS_BUFSIZ);
 
 	if (rap->rxa_flags & IEEE80211_AGGR_RUNNING) {
 		/*
 		 * AMPDU previously setup and not terminated with a DELBA,
 		 * flush the reorder q's in case anything remains.
 		 */
 		ampdu_rx_purge(rap);
 	}
 	memset(rap, 0, sizeof(*rap));
 	rap->rxa_wnd = (bufsiz == 0) ?
 	    IEEE80211_AGGR_BAWMAX : min(bufsiz, IEEE80211_AGGR_BAWMAX);
 	rap->rxa_start = MS(baseqctl, IEEE80211_BASEQ_START);
 	rap->rxa_flags |=  IEEE80211_AGGR_RUNNING | IEEE80211_AGGR_XCHGPEND;
 
 	return 0;
 }
 
 /*
  * Public function; manually setup the RX ampdu state.
  */
 int
 ieee80211_ampdu_rx_start_ext(struct ieee80211_node *ni, int tid, int seq, int baw)
 {
 	struct ieee80211_rx_ampdu *rap;
 
 	/* XXX TODO: sanity check tid, seq, baw */
 
 	rap = &ni->ni_rx_ampdu[tid];
 
 	if (rap->rxa_flags & IEEE80211_AGGR_RUNNING) {
 		/*
 		 * AMPDU previously setup and not terminated with a DELBA,
 		 * flush the reorder q's in case anything remains.
 		 */
 		ampdu_rx_purge(rap);
 	}
 
 	memset(rap, 0, sizeof(*rap));
 	rap->rxa_wnd = (baw== 0) ?
 	    IEEE80211_AGGR_BAWMAX : min(baw, IEEE80211_AGGR_BAWMAX);
 	rap->rxa_start = seq;
 	rap->rxa_flags |=  IEEE80211_AGGR_RUNNING | IEEE80211_AGGR_XCHGPEND;
 
 	IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni,
 	    "%s: tid=%d, start=%d, wnd=%d, flags=0x%08x\n",
 	    __func__,
 	    tid,
 	    seq,
 	    rap->rxa_wnd,
 	    rap->rxa_flags);
 
 	return 0;
 }
 
 /*
  * Stop A-MPDU rx processing for the specified TID.
  */
 static void
 ampdu_rx_stop(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap)
 {
 
 	ampdu_rx_purge(rap);
 	rap->rxa_flags &= ~(IEEE80211_AGGR_RUNNING | IEEE80211_AGGR_XCHGPEND);
 }
 
 /*
  * Dispatch a frame from the A-MPDU reorder queue.  The
  * frame is fed back into ieee80211_input marked with an
  * M_AMPDU_MPDU flag so it doesn't come back to us (it also
  * permits ieee80211_input to optimize re-processing).
  */
 static __inline void
 ampdu_dispatch(struct ieee80211_node *ni, struct mbuf *m)
 {
 	m->m_flags |= M_AMPDU_MPDU;	/* bypass normal processing */
 	/* NB: rssi and noise are ignored w/ M_AMPDU_MPDU set */
 	(void) ieee80211_input(ni, m, 0, 0);
 }
 
 /*
  * Dispatch as many frames as possible from the re-order queue.
  * Frames will always be "at the front"; we process all frames
  * up to the first empty slot in the window.  On completion we
  * cleanup state if there are still pending frames in the current
  * BA window.  We assume the frame at slot 0 is already handled
  * by the caller; we always start at slot 1.
  */
 static void
 ampdu_rx_dispatch(struct ieee80211_rx_ampdu *rap, struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct mbuf *m;
 	int i;
 
 	/* flush run of frames */
 	for (i = 1; i < rap->rxa_wnd; i++) {
 		m = rap->rxa_m[i];
 		if (m == NULL)
 			break;
 		rap->rxa_m[i] = NULL;
 		rap->rxa_qbytes -= m->m_pkthdr.len;
 		rap->rxa_qframes--;
 
 		ampdu_dispatch(ni, m);
 	}
 	/*
 	 * If frames remain, copy the mbuf pointers down so
 	 * they correspond to the offsets in the new window.
 	 */
 	if (rap->rxa_qframes != 0) {
 		int n = rap->rxa_qframes, j;
 		for (j = i+1; j < rap->rxa_wnd; j++) {
 			if (rap->rxa_m[j] != NULL) {
 				rap->rxa_m[j-i] = rap->rxa_m[j];
 				rap->rxa_m[j] = NULL;
 				if (--n == 0)
 					break;
 			}
 		}
 		KASSERT(n == 0, ("lost %d frames", n));
 		vap->iv_stats.is_ampdu_rx_copy += rap->rxa_qframes;
 	}
 	/*
 	 * Adjust the start of the BA window to
 	 * reflect the frames just dispatched.
 	 */
 	rap->rxa_start = IEEE80211_SEQ_ADD(rap->rxa_start, i);
 	vap->iv_stats.is_ampdu_rx_oor += i;
 }
 
 /*
  * Dispatch all frames in the A-MPDU re-order queue.
  */
 static void
 ampdu_rx_flush(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct mbuf *m;
 	int i;
 
 	for (i = 0; i < rap->rxa_wnd; i++) {
 		m = rap->rxa_m[i];
 		if (m == NULL)
 			continue;
 		rap->rxa_m[i] = NULL;
 		rap->rxa_qbytes -= m->m_pkthdr.len;
 		rap->rxa_qframes--;
 		vap->iv_stats.is_ampdu_rx_oor++;
 
 		ampdu_dispatch(ni, m);
 		if (rap->rxa_qframes == 0)
 			break;
 	}
 }
 
 /*
  * Dispatch all frames in the A-MPDU re-order queue
  * preceding the specified sequence number.  This logic
  * handles window moves due to a received MSDU or BAR.
  */
 static void
 ampdu_rx_flush_upto(struct ieee80211_node *ni,
 	struct ieee80211_rx_ampdu *rap, ieee80211_seq winstart)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct mbuf *m;
 	ieee80211_seq seqno;
 	int i;
 
 	/*
 	 * Flush any complete MSDU's with a sequence number lower
 	 * than winstart.  Gaps may exist.  Note that we may actually
 	 * dispatch frames past winstart if a run continues; this is
 	 * an optimization that avoids having to do a separate pass
 	 * to dispatch frames after moving the BA window start.
 	 */
 	seqno = rap->rxa_start;
 	for (i = 0; i < rap->rxa_wnd; i++) {
 		m = rap->rxa_m[i];
 		if (m != NULL) {
 			rap->rxa_m[i] = NULL;
 			rap->rxa_qbytes -= m->m_pkthdr.len;
 			rap->rxa_qframes--;
 			vap->iv_stats.is_ampdu_rx_oor++;
 
 			ampdu_dispatch(ni, m);
 		} else {
 			if (!IEEE80211_SEQ_BA_BEFORE(seqno, winstart))
 				break;
 		}
 		seqno = IEEE80211_SEQ_INC(seqno);
 	}
 	/*
 	 * If frames remain, copy the mbuf pointers down so
 	 * they correspond to the offsets in the new window.
 	 */
 	if (rap->rxa_qframes != 0) {
 		int n = rap->rxa_qframes, j;
 
 		/* NB: this loop assumes i > 0 and/or rxa_m[0] is NULL */
 		KASSERT(rap->rxa_m[0] == NULL,
 		    ("%s: BA window slot 0 occupied", __func__));
 		for (j = i+1; j < rap->rxa_wnd; j++) {
 			if (rap->rxa_m[j] != NULL) {
 				rap->rxa_m[j-i] = rap->rxa_m[j];
 				rap->rxa_m[j] = NULL;
 				if (--n == 0)
 					break;
 			}
 		}
 		KASSERT(n == 0, ("%s: lost %d frames, qframes %d off %d "
 		    "BA win <%d:%d> winstart %d",
 		    __func__, n, rap->rxa_qframes, i, rap->rxa_start,
 		    IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1),
 		    winstart));
 		vap->iv_stats.is_ampdu_rx_copy += rap->rxa_qframes;
 	}
 	/*
 	 * Move the start of the BA window; we use the
 	 * sequence number of the last MSDU that was
 	 * passed up the stack+1 or winstart if stopped on
 	 * a gap in the reorder buffer.
 	 */
 	rap->rxa_start = seqno;
 }
 
 /*
  * Process a received QoS data frame for an HT station.  Handle
  * A-MPDU reordering: if this frame is received out of order
  * and falls within the BA window hold onto it.  Otherwise if
  * this frame completes a run, flush any pending frames.  We
  * return 1 if the frame is consumed.  A 0 is returned if
  * the frame should be processed normally by the caller.
  */
 int
 ieee80211_ampdu_reorder(struct ieee80211_node *ni, struct mbuf *m)
 {
 #define	IEEE80211_FC0_QOSDATA \
 	(IEEE80211_FC0_TYPE_DATA|IEEE80211_FC0_SUBTYPE_QOS|IEEE80211_FC0_VERSION_0)
 #define	PROCESS		0	/* caller should process frame */
 #define	CONSUMED	1	/* frame consumed, caller does nothing */
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211_qosframe *wh;
 	struct ieee80211_rx_ampdu *rap;
 	ieee80211_seq rxseq;
 	uint8_t tid;
 	int off;
 
 	KASSERT((m->m_flags & (M_AMPDU | M_AMPDU_MPDU)) == M_AMPDU,
 	    ("!a-mpdu or already re-ordered, flags 0x%x", m->m_flags));
 	KASSERT(ni->ni_flags & IEEE80211_NODE_HT, ("not an HT sta"));
 
 	/* NB: m_len known to be sufficient */
 	wh = mtod(m, struct ieee80211_qosframe *);
 	if (wh->i_fc[0] != IEEE80211_FC0_QOSDATA) {
 		/*
 		 * Not QoS data, shouldn't get here but just
 		 * return it to the caller for processing.
 		 */
 		return PROCESS;
 	}
 	if (IEEE80211_IS_DSTODS(wh))
 		tid = ((struct ieee80211_qosframe_addr4 *)wh)->i_qos[0];
 	else
 		tid = wh->i_qos[0];
 	tid &= IEEE80211_QOS_TID;
 	rap = &ni->ni_rx_ampdu[tid];
 	if ((rap->rxa_flags & IEEE80211_AGGR_XCHGPEND) == 0) {
 		/*
 		 * No ADDBA request yet, don't touch.
 		 */
 		return PROCESS;
 	}
 	rxseq = le16toh(*(uint16_t *)wh->i_seq);
 	if ((rxseq & IEEE80211_SEQ_FRAG_MASK) != 0) {
 		/*
 		 * Fragments are not allowed; toss.
 		 */
 		IEEE80211_DISCARD_MAC(vap,
 		    IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr,
 		    "A-MPDU", "fragment, rxseq 0x%x tid %u%s", rxseq, tid,
 		    wh->i_fc[1] & IEEE80211_FC1_RETRY ? " (retransmit)" : "");
 		vap->iv_stats.is_ampdu_rx_drop++;
 		IEEE80211_NODE_STAT(ni, rx_drop);
 		m_freem(m);
 		return CONSUMED;
 	}
 	rxseq >>= IEEE80211_SEQ_SEQ_SHIFT;
 	rap->rxa_nframes++;
 again:
 	if (rxseq == rap->rxa_start) {
 		/*
 		 * First frame in window.
 		 */
 		if (rap->rxa_qframes != 0) {
 			/*
 			 * Dispatch as many packets as we can.
 			 */
 			KASSERT(rap->rxa_m[0] == NULL, ("unexpected dup"));
 			ampdu_dispatch(ni, m);
 			ampdu_rx_dispatch(rap, ni);
 			return CONSUMED;
 		} else {
 			/*
 			 * In order; advance window and notify
 			 * caller to dispatch directly.
 			 */
 			rap->rxa_start = IEEE80211_SEQ_INC(rxseq);
 			return PROCESS;
 		}
 	}
 	/*
 	 * Frame is out of order; store if in the BA window.
 	 */
 	/* calculate offset in BA window */
 	off = IEEE80211_SEQ_SUB(rxseq, rap->rxa_start);
 	if (off < rap->rxa_wnd) {
 		/*
 		 * Common case (hopefully): in the BA window.
 		 * Sec 9.10.7.6.2 a) (p.137)
 		 */
 
 		/* 
 		 * Check for frames sitting too long in the reorder queue.
 		 * This should only ever happen if frames are not delivered
 		 * without the sender otherwise notifying us (e.g. with a
 		 * BAR to move the window).  Typically this happens because
 		 * of vendor bugs that cause the sequence number to jump.
 		 * When this happens we get a gap in the reorder queue that
 		 * leaves frame sitting on the queue until they get pushed
 		 * out due to window moves.  When the vendor does not send
 		 * BAR this move only happens due to explicit packet sends
 		 *
 		 * NB: we only track the time of the oldest frame in the
 		 * reorder q; this means that if we flush we might push
 		 * frames that still "new"; if this happens then subsequent
 		 * frames will result in BA window moves which cost something
 		 * but is still better than a big throughput dip.
 		 */
 		if (rap->rxa_qframes != 0) {
 			/* XXX honor batimeout? */
 			if (ticks - rap->rxa_age > ieee80211_ampdu_age) {
 				/*
 				 * Too long since we received the first
 				 * frame; flush the reorder buffer.
 				 */
 				if (rap->rxa_qframes != 0) {
 					vap->iv_stats.is_ampdu_rx_age +=
 					    rap->rxa_qframes;
 					ampdu_rx_flush(ni, rap);
 				}
 				rap->rxa_start = IEEE80211_SEQ_INC(rxseq);
 				return PROCESS;
 			}
 		} else {
 			/*
 			 * First frame, start aging timer.
 			 */
 			rap->rxa_age = ticks;
 		}
 
 		/* save packet */
 		if (rap->rxa_m[off] == NULL) {
 			rap->rxa_m[off] = m;
 			rap->rxa_qframes++;
 			rap->rxa_qbytes += m->m_pkthdr.len;
 			vap->iv_stats.is_ampdu_rx_reorder++;
 		} else {
 			IEEE80211_DISCARD_MAC(vap,
 			    IEEE80211_MSG_INPUT | IEEE80211_MSG_11N,
 			    ni->ni_macaddr, "a-mpdu duplicate",
 			    "seqno %u tid %u BA win <%u:%u>",
 			    rxseq, tid, rap->rxa_start,
 			    IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1));
 			vap->iv_stats.is_rx_dup++;
 			IEEE80211_NODE_STAT(ni, rx_dup);
 			m_freem(m);
 		}
 		return CONSUMED;
 	}
 	if (off < IEEE80211_SEQ_BA_RANGE) {
 		/*
 		 * Outside the BA window, but within range;
 		 * flush the reorder q and move the window.
 		 * Sec 9.10.7.6.2 b) (p.138)
 		 */
 		IEEE80211_NOTE(vap, IEEE80211_MSG_11N, ni,
 		    "move BA win <%u:%u> (%u frames) rxseq %u tid %u",
 		    rap->rxa_start,
 		    IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1),
 		    rap->rxa_qframes, rxseq, tid);
 		vap->iv_stats.is_ampdu_rx_move++;
 
 		/*
 		 * The spec says to flush frames up to but not including:
 		 * 	WinStart_B = rxseq - rap->rxa_wnd + 1
 		 * Then insert the frame or notify the caller to process
 		 * it immediately.  We can safely do this by just starting
 		 * over again because we know the frame will now be within
 		 * the BA window.
 		 */
 		/* NB: rxa_wnd known to be >0 */
 		ampdu_rx_flush_upto(ni, rap,
 		    IEEE80211_SEQ_SUB(rxseq, rap->rxa_wnd-1));
 		goto again;
 	} else {
 		/*
 		 * Outside the BA window and out of range; toss.
 		 * Sec 9.10.7.6.2 c) (p.138)
 		 */
 		IEEE80211_DISCARD_MAC(vap,
 		    IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr,
 		    "MPDU", "BA win <%u:%u> (%u frames) rxseq %u tid %u%s",
 		    rap->rxa_start,
 		    IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1),
 		    rap->rxa_qframes, rxseq, tid,
 		    wh->i_fc[1] & IEEE80211_FC1_RETRY ? " (retransmit)" : "");
 		vap->iv_stats.is_ampdu_rx_drop++;
 		IEEE80211_NODE_STAT(ni, rx_drop);
 		m_freem(m);
 		return CONSUMED;
 	}
 #undef CONSUMED
 #undef PROCESS
 #undef IEEE80211_FC0_QOSDATA
 }
 
 /*
  * Process a BAR ctl frame.  Dispatch all frames up to
  * the sequence number of the frame.  If this frame is
  * out of range it's discarded.
  */
 void
 ieee80211_recv_bar(struct ieee80211_node *ni, struct mbuf *m0)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211_frame_bar *wh;
 	struct ieee80211_rx_ampdu *rap;
 	ieee80211_seq rxseq;
 	int tid, off;
 
 	if (!ieee80211_recv_bar_ena) {
 #if 0
 		IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_11N,
 		    ni->ni_macaddr, "BAR", "%s", "processing disabled");
 #endif
 		vap->iv_stats.is_ampdu_bar_bad++;
 		return;
 	}
 	wh = mtod(m0, struct ieee80211_frame_bar *);
 	/* XXX check basic BAR */
 	tid = MS(le16toh(wh->i_ctl), IEEE80211_BAR_TID);
 	rap = &ni->ni_rx_ampdu[tid];
 	if ((rap->rxa_flags & IEEE80211_AGGR_XCHGPEND) == 0) {
 		/*
 		 * No ADDBA request yet, don't touch.
 		 */
 		IEEE80211_DISCARD_MAC(vap,
 		    IEEE80211_MSG_INPUT | IEEE80211_MSG_11N,
 		    ni->ni_macaddr, "BAR", "no BA stream, tid %u", tid);
 		vap->iv_stats.is_ampdu_bar_bad++;
 		return;
 	}
 	vap->iv_stats.is_ampdu_bar_rx++;
 	rxseq = le16toh(wh->i_seq) >> IEEE80211_SEQ_SEQ_SHIFT;
 	if (rxseq == rap->rxa_start)
 		return;
 	/* calculate offset in BA window */
 	off = IEEE80211_SEQ_SUB(rxseq, rap->rxa_start);
 	if (off < IEEE80211_SEQ_BA_RANGE) {
 		/*
 		 * Flush the reorder q up to rxseq and move the window.
 		 * Sec 9.10.7.6.3 a) (p.138)
 		 */
 		IEEE80211_NOTE(vap, IEEE80211_MSG_11N, ni,
 		    "BAR moves BA win <%u:%u> (%u frames) rxseq %u tid %u",
 		    rap->rxa_start,
 		    IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1),
 		    rap->rxa_qframes, rxseq, tid);
 		vap->iv_stats.is_ampdu_bar_move++;
 
 		ampdu_rx_flush_upto(ni, rap, rxseq);
 		if (off >= rap->rxa_wnd) {
 			/*
 			 * BAR specifies a window start to the right of BA
 			 * window; we must move it explicitly since
 			 * ampdu_rx_flush_upto will not.
 			 */
 			rap->rxa_start = rxseq;
 		}
 	} else {
 		/*
 		 * Out of range; toss.
 		 * Sec 9.10.7.6.3 b) (p.138)
 		 */
 		IEEE80211_DISCARD_MAC(vap,
 		    IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr,
 		    "BAR", "BA win <%u:%u> (%u frames) rxseq %u tid %u%s",
 		    rap->rxa_start,
 		    IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1),
 		    rap->rxa_qframes, rxseq, tid,
 		    wh->i_fc[1] & IEEE80211_FC1_RETRY ? " (retransmit)" : "");
 		vap->iv_stats.is_ampdu_bar_oow++;
 		IEEE80211_NODE_STAT(ni, rx_drop);
 	}
 }
 
 /*
  * Setup HT-specific state in a node.  Called only
  * when HT use is negotiated so we don't do extra
  * work for temporary and/or legacy sta's.
  */
 void
 ieee80211_ht_node_init(struct ieee80211_node *ni)
 {
 	struct ieee80211_tx_ampdu *tap;
 	int tid;
 
 	IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N,
 	    ni,
 	    "%s: called (%p)",
 	    __func__,
 	    ni);
 
 	if (ni->ni_flags & IEEE80211_NODE_HT) {
 		/*
 		 * Clean AMPDU state on re-associate.  This handles the case
 		 * where a station leaves w/o notifying us and then returns
 		 * before node is reaped for inactivity.
 		 */
 		IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N,
 		    ni,
 		    "%s: calling cleanup (%p)",
 		    __func__, ni);
 		ieee80211_ht_node_cleanup(ni);
 	}
 	for (tid = 0; tid < WME_NUM_TID; tid++) {
 		tap = &ni->ni_tx_ampdu[tid];
 		tap->txa_tid = tid;
 		tap->txa_ni = ni;
 		ieee80211_txampdu_init_pps(tap);
 		/* NB: further initialization deferred */
 	}
 	ni->ni_flags |= IEEE80211_NODE_HT | IEEE80211_NODE_AMPDU;
 }
 
 /*
  * Cleanup HT-specific state in a node.  Called only
  * when HT use has been marked.
  */
 void
 ieee80211_ht_node_cleanup(struct ieee80211_node *ni)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	int i;
 
 	IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N,
 	    ni,
 	    "%s: called (%p)",
 	    __func__, ni);
 
 	KASSERT(ni->ni_flags & IEEE80211_NODE_HT, ("not an HT node"));
 
 	/* XXX optimize this */
 	for (i = 0; i < WME_NUM_TID; i++) {
 		struct ieee80211_tx_ampdu *tap = &ni->ni_tx_ampdu[i];
 		if (tap->txa_flags & IEEE80211_AGGR_SETUP)
 			ampdu_tx_stop(tap);
 	}
 	for (i = 0; i < WME_NUM_TID; i++)
 		ic->ic_ampdu_rx_stop(ni, &ni->ni_rx_ampdu[i]);
 
 	ni->ni_htcap = 0;
 	ni->ni_flags &= ~IEEE80211_NODE_HT_ALL;
 }
 
 /*
  * Age out HT resources for a station.
  */
 void
 ieee80211_ht_node_age(struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	uint8_t tid;
 
 	KASSERT(ni->ni_flags & IEEE80211_NODE_HT, ("not an HT sta"));
 
 	for (tid = 0; tid < WME_NUM_TID; tid++) {
 		struct ieee80211_rx_ampdu *rap;
 
 		rap = &ni->ni_rx_ampdu[tid];
 		if ((rap->rxa_flags & IEEE80211_AGGR_XCHGPEND) == 0)
 			continue;
 		if (rap->rxa_qframes == 0)
 			continue;
 		/* 
 		 * Check for frames sitting too long in the reorder queue.
 		 * See above for more details on what's happening here.
 		 */
 		/* XXX honor batimeout? */
 		if (ticks - rap->rxa_age > ieee80211_ampdu_age) {
 			/*
 			 * Too long since we received the first
 			 * frame; flush the reorder buffer.
 			 */
 			vap->iv_stats.is_ampdu_rx_age += rap->rxa_qframes;
 			ampdu_rx_flush(ni, rap);
 		}
 	}
 }
 
 static struct ieee80211_channel *
 findhtchan(struct ieee80211com *ic, struct ieee80211_channel *c, int htflags)
 {
 	return ieee80211_find_channel(ic, c->ic_freq,
 	    (c->ic_flags &~ IEEE80211_CHAN_HT) | htflags);
 }
 
 /*
  * Adjust a channel to be HT/non-HT according to the vap's configuration.
  */
 struct ieee80211_channel *
 ieee80211_ht_adjust_channel(struct ieee80211com *ic,
 	struct ieee80211_channel *chan, int flags)
 {
 	struct ieee80211_channel *c;
 
 	if (flags & IEEE80211_FHT_HT) {
 		/* promote to HT if possible */
 		if (flags & IEEE80211_FHT_USEHT40) {
 			if (!IEEE80211_IS_CHAN_HT40(chan)) {
 				/* NB: arbitrarily pick ht40+ over ht40- */
 				c = findhtchan(ic, chan, IEEE80211_CHAN_HT40U);
 				if (c == NULL)
 					c = findhtchan(ic, chan,
 						IEEE80211_CHAN_HT40D);
 				if (c == NULL)
 					c = findhtchan(ic, chan,
 						IEEE80211_CHAN_HT20);
 				if (c != NULL)
 					chan = c;
 			}
 		} else if (!IEEE80211_IS_CHAN_HT20(chan)) {
 			c = findhtchan(ic, chan, IEEE80211_CHAN_HT20);
 			if (c != NULL)
 				chan = c;
 		}
 	} else if (IEEE80211_IS_CHAN_HT(chan)) {
 		/* demote to legacy, HT use is disabled */
 		c = ieee80211_find_channel(ic, chan->ic_freq,
 		    chan->ic_flags &~ IEEE80211_CHAN_HT);
 		if (c != NULL)
 			chan = c;
 	}
 	return chan;
 }
 
 /*
  * Setup HT-specific state for a legacy WDS peer.
  */
 void
 ieee80211_ht_wds_init(struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211_tx_ampdu *tap;
 	int tid;
 
 	KASSERT(vap->iv_flags_ht & IEEE80211_FHT_HT, ("no HT requested"));
 
 	/* XXX check scan cache in case peer has an ap and we have info */
 	/*
 	 * If setup with a legacy channel; locate an HT channel.
 	 * Otherwise if the inherited channel (from a companion
 	 * AP) is suitable use it so we use the same location
 	 * for the extension channel).
 	 */
 	ni->ni_chan = ieee80211_ht_adjust_channel(ni->ni_ic,
 	    ni->ni_chan, ieee80211_htchanflags(ni->ni_chan));
 
 	ni->ni_htcap = 0;
 	if (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20)
 		ni->ni_htcap |= IEEE80211_HTCAP_SHORTGI20;
 	if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) {
 		ni->ni_htcap |= IEEE80211_HTCAP_CHWIDTH40;
 		ni->ni_chw = 40;
 		if (IEEE80211_IS_CHAN_HT40U(ni->ni_chan))
 			ni->ni_ht2ndchan = IEEE80211_HTINFO_2NDCHAN_ABOVE;
 		else if (IEEE80211_IS_CHAN_HT40D(ni->ni_chan))
 			ni->ni_ht2ndchan = IEEE80211_HTINFO_2NDCHAN_BELOW;
 		if (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40)
 			ni->ni_htcap |= IEEE80211_HTCAP_SHORTGI40;
 	} else {
 		ni->ni_chw = 20;
 		ni->ni_ht2ndchan = IEEE80211_HTINFO_2NDCHAN_NONE;
 	}
 	ni->ni_htctlchan = ni->ni_chan->ic_ieee;
 	if (vap->iv_flags_ht & IEEE80211_FHT_RIFS)
 		ni->ni_flags |= IEEE80211_NODE_RIFS;
 	/* XXX does it make sense to enable SMPS? */
 
 	ni->ni_htopmode = 0;		/* XXX need protection state */
 	ni->ni_htstbc = 0;		/* XXX need info */
 
 	for (tid = 0; tid < WME_NUM_TID; tid++) {
 		tap = &ni->ni_tx_ampdu[tid];
 		tap->txa_tid = tid;
 		ieee80211_txampdu_init_pps(tap);
 	}
 	/* NB: AMPDU tx/rx governed by IEEE80211_FHT_AMPDU_{TX,RX} */
 	ni->ni_flags |= IEEE80211_NODE_HT | IEEE80211_NODE_AMPDU;
 }
 
 /*
  * Notify hostap vaps of a change in the HTINFO ie.
  */
 static void
 htinfo_notify(struct ieee80211com *ic)
 {
 	struct ieee80211vap *vap;
 	int first = 1;
 
 	IEEE80211_LOCK_ASSERT(ic);
 
 	TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) {
 		if (vap->iv_opmode != IEEE80211_M_HOSTAP)
 			continue;
 		if (vap->iv_state != IEEE80211_S_RUN ||
 		    !IEEE80211_IS_CHAN_HT(vap->iv_bss->ni_chan))
 			continue;
 		if (first) {
 			IEEE80211_NOTE(vap,
 			    IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N,
 			    vap->iv_bss,
 			    "HT bss occupancy change: %d sta, %d ht, "
 			    "%d ht40%s, HT protmode now 0x%x"
 			    , ic->ic_sta_assoc
 			    , ic->ic_ht_sta_assoc
 			    , ic->ic_ht40_sta_assoc
 			    , (ic->ic_flags_ht & IEEE80211_FHT_NONHT_PR) ?
 				 ", non-HT sta present" : ""
 			    , ic->ic_curhtprotmode);
 			first = 0;
 		}
 		ieee80211_beacon_notify(vap, IEEE80211_BEACON_HTINFO);
 	}
 }
 
 /*
  * Calculate HT protection mode from current
  * state and handle updates.
  */
 static void
 htinfo_update(struct ieee80211com *ic)
 {
 	uint8_t protmode;
 
 	if (ic->ic_sta_assoc != ic->ic_ht_sta_assoc) {
 		protmode = IEEE80211_HTINFO_OPMODE_MIXED
 			 | IEEE80211_HTINFO_NONHT_PRESENT;
 	} else if (ic->ic_flags_ht & IEEE80211_FHT_NONHT_PR) {
 		protmode = IEEE80211_HTINFO_OPMODE_PROTOPT
 			 | IEEE80211_HTINFO_NONHT_PRESENT;
 	} else if (ic->ic_bsschan != IEEE80211_CHAN_ANYC &&
 	    IEEE80211_IS_CHAN_HT40(ic->ic_bsschan) && 
 	    ic->ic_sta_assoc != ic->ic_ht40_sta_assoc) {
 		protmode = IEEE80211_HTINFO_OPMODE_HT20PR;
 	} else {
 		protmode = IEEE80211_HTINFO_OPMODE_PURE;
 	}
 	if (protmode != ic->ic_curhtprotmode) {
 		ic->ic_curhtprotmode = protmode;
 		htinfo_notify(ic);
 	}
 }
 
 /*
  * Handle an HT station joining a BSS.
  */
 void
 ieee80211_ht_node_join(struct ieee80211_node *ni)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 
 	IEEE80211_LOCK_ASSERT(ic);
 
 	if (ni->ni_flags & IEEE80211_NODE_HT) {
 		ic->ic_ht_sta_assoc++;
 		if (ni->ni_chw == 40)
 			ic->ic_ht40_sta_assoc++;
 	}
 	htinfo_update(ic);
 }
 
 /*
  * Handle an HT station leaving a BSS.
  */
 void
 ieee80211_ht_node_leave(struct ieee80211_node *ni)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 
 	IEEE80211_LOCK_ASSERT(ic);
 
 	if (ni->ni_flags & IEEE80211_NODE_HT) {
 		ic->ic_ht_sta_assoc--;
 		if (ni->ni_chw == 40)
 			ic->ic_ht40_sta_assoc--;
 	}
 	htinfo_update(ic);
 }
 
 /*
  * Public version of htinfo_update; used for processing
  * beacon frames from overlapping bss.
  *
  * Caller can specify either IEEE80211_HTINFO_OPMODE_MIXED
  * (on receipt of a beacon that advertises MIXED) or
  * IEEE80211_HTINFO_OPMODE_PROTOPT (on receipt of a beacon
  * from an overlapping legacy bss).  We treat MIXED with
  * a higher precedence than PROTOPT (i.e. we will not change
  * change PROTOPT -> MIXED; only MIXED -> PROTOPT).  This
  * corresponds to how we handle things in htinfo_update.
  */
 void
 ieee80211_htprot_update(struct ieee80211com *ic, int protmode)
 {
 #define	OPMODE(x)	SM(x, IEEE80211_HTINFO_OPMODE)
 	IEEE80211_LOCK(ic);
 
 	/* track non-HT station presence */
 	KASSERT(protmode & IEEE80211_HTINFO_NONHT_PRESENT,
 	    ("protmode 0x%x", protmode));
 	ic->ic_flags_ht |= IEEE80211_FHT_NONHT_PR;
 	ic->ic_lastnonht = ticks;
 
 	if (protmode != ic->ic_curhtprotmode &&
 	    (OPMODE(ic->ic_curhtprotmode) != IEEE80211_HTINFO_OPMODE_MIXED ||
 	     OPMODE(protmode) == IEEE80211_HTINFO_OPMODE_PROTOPT)) {
 		/* push beacon update */
 		ic->ic_curhtprotmode = protmode;
 		htinfo_notify(ic);
 	}
 	IEEE80211_UNLOCK(ic);
 #undef OPMODE
 }
 
 /*
  * Time out presence of an overlapping bss with non-HT
  * stations.  When operating in hostap mode we listen for
  * beacons from other stations and if we identify a non-HT
  * station is present we update the opmode field of the
  * HTINFO ie.  To identify when all non-HT stations are
  * gone we time out this condition.
  */
 void
 ieee80211_ht_timeout(struct ieee80211com *ic)
 {
 	IEEE80211_LOCK_ASSERT(ic);
 
 	if ((ic->ic_flags_ht & IEEE80211_FHT_NONHT_PR) &&
 	    ieee80211_time_after(ticks, ic->ic_lastnonht + IEEE80211_NONHT_PRESENT_AGE)) {
 #if 0
 		IEEE80211_NOTE(vap, IEEE80211_MSG_11N, ni,
 		    "%s", "time out non-HT STA present on channel");
 #endif
 		ic->ic_flags_ht &= ~IEEE80211_FHT_NONHT_PR;
 		htinfo_update(ic);
 	}
 }
 
 /*
  * Process an 802.11n HT capabilities ie.
  */
 void
 ieee80211_parse_htcap(struct ieee80211_node *ni, const uint8_t *ie)
 {
 	if (ie[0] == IEEE80211_ELEMID_VENDOR) {
 		/*
 		 * Station used Vendor OUI ie to associate;
 		 * mark the node so when we respond we'll use
 		 * the Vendor OUI's and not the standard ie's.
 		 */
 		ni->ni_flags |= IEEE80211_NODE_HTCOMPAT;
 		ie += 4;
 	} else
 		ni->ni_flags &= ~IEEE80211_NODE_HTCOMPAT;
 
 	ni->ni_htcap = le16dec(ie +
 		__offsetof(struct ieee80211_ie_htcap, hc_cap));
 	ni->ni_htparam = ie[__offsetof(struct ieee80211_ie_htcap, hc_param)];
 }
 
 static void
 htinfo_parse(struct ieee80211_node *ni,
 	const struct ieee80211_ie_htinfo *htinfo)
 {
 	uint16_t w;
 
 	ni->ni_htctlchan = htinfo->hi_ctrlchannel;
 	ni->ni_ht2ndchan = SM(htinfo->hi_byte1, IEEE80211_HTINFO_2NDCHAN);
 	w = le16dec(&htinfo->hi_byte2);
 	ni->ni_htopmode = SM(w, IEEE80211_HTINFO_OPMODE);
 	w = le16dec(&htinfo->hi_byte45);
 	ni->ni_htstbc = SM(w, IEEE80211_HTINFO_BASIC_STBCMCS);
 }
 
 /*
  * Parse an 802.11n HT info ie and save useful information
  * to the node state.  Note this does not effect any state
  * changes such as for channel width change.
  */
 void
 ieee80211_parse_htinfo(struct ieee80211_node *ni, const uint8_t *ie)
 {
 	if (ie[0] == IEEE80211_ELEMID_VENDOR)
 		ie += 4;
 	htinfo_parse(ni, (const struct ieee80211_ie_htinfo *) ie);
 }
 
 /*
  * Handle 11n channel switch.  Use the received HT ie's to
  * identify the right channel to use.  If we cannot locate it
  * in the channel table then fallback to legacy operation.
  * Note that we use this information to identify the node's
  * channel only; the caller is responsible for insuring any
  * required channel change is done (e.g. in sta mode when
  * parsing the contents of a beacon frame).
  */
 static int
 htinfo_update_chw(struct ieee80211_node *ni, int htflags)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ieee80211_channel *c;
 	int chanflags;
 	int ret = 0;
 
 	chanflags = (ni->ni_chan->ic_flags &~ IEEE80211_CHAN_HT) | htflags;
 	if (chanflags != ni->ni_chan->ic_flags) {
 		/* XXX not right for ht40- */
 		c = ieee80211_find_channel(ic, ni->ni_chan->ic_freq, chanflags);
 		if (c == NULL && (htflags & IEEE80211_CHAN_HT40)) {
 			/*
 			 * No HT40 channel entry in our table; fall back
 			 * to HT20 operation.  This should not happen.
 			 */
 			c = findhtchan(ic, ni->ni_chan, IEEE80211_CHAN_HT20);
 #if 0
 			IEEE80211_NOTE(ni->ni_vap,
 			    IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, ni,
 			    "no HT40 channel (freq %u), falling back to HT20",
 			    ni->ni_chan->ic_freq);
 #endif
 			/* XXX stat */
 		}
 		if (c != NULL && c != ni->ni_chan) {
 			IEEE80211_NOTE(ni->ni_vap,
 			    IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, ni,
 			    "switch station to HT%d channel %u/0x%x",
 			    IEEE80211_IS_CHAN_HT40(c) ? 40 : 20,
 			    c->ic_freq, c->ic_flags);
 			ni->ni_chan = c;
 			ret = 1;
 		}
 		/* NB: caller responsible for forcing any channel change */
 	}
 	/* update node's tx channel width */
 	ni->ni_chw = IEEE80211_IS_CHAN_HT40(ni->ni_chan)? 40 : 20;
 	return (ret);
 }
 
 /*
  * Update 11n MIMO PS state according to received htcap.
  */
 static __inline int
 htcap_update_mimo_ps(struct ieee80211_node *ni)
 {
 	uint16_t oflags = ni->ni_flags;
 
 	switch (ni->ni_htcap & IEEE80211_HTCAP_SMPS) {
 	case IEEE80211_HTCAP_SMPS_DYNAMIC:
 		ni->ni_flags |= IEEE80211_NODE_MIMO_PS;
 		ni->ni_flags |= IEEE80211_NODE_MIMO_RTS;
 		break;
 	case IEEE80211_HTCAP_SMPS_ENA:
 		ni->ni_flags |= IEEE80211_NODE_MIMO_PS;
 		ni->ni_flags &= ~IEEE80211_NODE_MIMO_RTS;
 		break;
 	case IEEE80211_HTCAP_SMPS_OFF:
 	default:		/* disable on rx of reserved value */
 		ni->ni_flags &= ~IEEE80211_NODE_MIMO_PS;
 		ni->ni_flags &= ~IEEE80211_NODE_MIMO_RTS;
 		break;
 	}
 	return (oflags ^ ni->ni_flags);
 }
 
 /*
  * Update short GI state according to received htcap
  * and local settings.
  */
 static __inline void
 htcap_update_shortgi(struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 
 	ni->ni_flags &= ~(IEEE80211_NODE_SGI20|IEEE80211_NODE_SGI40);
 	if ((ni->ni_htcap & IEEE80211_HTCAP_SHORTGI20) &&
 	    (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20))
 		ni->ni_flags |= IEEE80211_NODE_SGI20;
 	if ((ni->ni_htcap & IEEE80211_HTCAP_SHORTGI40) &&
 	    (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40))
 		ni->ni_flags |= IEEE80211_NODE_SGI40;
 }
 
 /*
  * Parse and update HT-related state extracted from
  * the HT cap and info ie's.
  */
 int
 ieee80211_ht_updateparams(struct ieee80211_node *ni,
 	const uint8_t *htcapie, const uint8_t *htinfoie)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	const struct ieee80211_ie_htinfo *htinfo;
 	int htflags;
 	int ret = 0;
 
 	ieee80211_parse_htcap(ni, htcapie);
 	if (vap->iv_htcaps & IEEE80211_HTCAP_SMPS)
 		htcap_update_mimo_ps(ni);
 	htcap_update_shortgi(ni);
 
 	if (htinfoie[0] == IEEE80211_ELEMID_VENDOR)
 		htinfoie += 4;
 	htinfo = (const struct ieee80211_ie_htinfo *) htinfoie;
 	htinfo_parse(ni, htinfo);
 
 	htflags = (vap->iv_flags_ht & IEEE80211_FHT_HT) ?
 	    IEEE80211_CHAN_HT20 : 0;
 	/* NB: honor operating mode constraint */
 	if ((htinfo->hi_byte1 & IEEE80211_HTINFO_TXWIDTH_2040) &&
 	    (vap->iv_flags_ht & IEEE80211_FHT_USEHT40)) {
 		if (ni->ni_ht2ndchan == IEEE80211_HTINFO_2NDCHAN_ABOVE)
 			htflags = IEEE80211_CHAN_HT40U;
 		else if (ni->ni_ht2ndchan == IEEE80211_HTINFO_2NDCHAN_BELOW)
 			htflags = IEEE80211_CHAN_HT40D;
 	}
 	if (htinfo_update_chw(ni, htflags))
 		ret = 1;
 
 	if ((htinfo->hi_byte1 & IEEE80211_HTINFO_RIFSMODE_PERM) &&
 	    (vap->iv_flags_ht & IEEE80211_FHT_RIFS))
 		ni->ni_flags |= IEEE80211_NODE_RIFS;
 	else
 		ni->ni_flags &= ~IEEE80211_NODE_RIFS;
 
 	return (ret);
 }
 
 /*
  * Parse and update HT-related state extracted from the HT cap ie
  * for a station joining an HT BSS.
  */
 void
 ieee80211_ht_updatehtcap(struct ieee80211_node *ni, const uint8_t *htcapie)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	int htflags;
 
 	ieee80211_parse_htcap(ni, htcapie);
 	if (vap->iv_htcaps & IEEE80211_HTCAP_SMPS)
 		htcap_update_mimo_ps(ni);
 	htcap_update_shortgi(ni);
 
 	/* NB: honor operating mode constraint */
 	/* XXX 40 MHz intolerant */
 	htflags = (vap->iv_flags_ht & IEEE80211_FHT_HT) ?
 	    IEEE80211_CHAN_HT20 : 0;
 	if ((ni->ni_htcap & IEEE80211_HTCAP_CHWIDTH40) &&
 	    (vap->iv_flags_ht & IEEE80211_FHT_USEHT40)) {
 		if (IEEE80211_IS_CHAN_HT40U(vap->iv_bss->ni_chan))
 			htflags = IEEE80211_CHAN_HT40U;
 		else if (IEEE80211_IS_CHAN_HT40D(vap->iv_bss->ni_chan))
 			htflags = IEEE80211_CHAN_HT40D;
 	}
 	(void) htinfo_update_chw(ni, htflags);
 }
 
 /*
  * Install received HT rate set by parsing the HT cap ie.
  */
 int
 ieee80211_setup_htrates(struct ieee80211_node *ni, const uint8_t *ie, int flags)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ieee80211vap *vap = ni->ni_vap;
 	const struct ieee80211_ie_htcap *htcap;
 	struct ieee80211_htrateset *rs;
 	int i, maxequalmcs, maxunequalmcs;
 
 	maxequalmcs = ic->ic_txstream * 8 - 1;
 	maxunequalmcs = 0;
 	if (ic->ic_htcaps & IEEE80211_HTC_TXUNEQUAL) {
 		if (ic->ic_txstream >= 2)
 			maxunequalmcs = 38;
 		if (ic->ic_txstream >= 3)
 			maxunequalmcs = 52;
 		if (ic->ic_txstream >= 4)
 			maxunequalmcs = 76;
 	}
 
 	rs = &ni->ni_htrates;
 	memset(rs, 0, sizeof(*rs));
 	if (ie != NULL) {
 		if (ie[0] == IEEE80211_ELEMID_VENDOR)
 			ie += 4;
 		htcap = (const struct ieee80211_ie_htcap *) ie;
 		for (i = 0; i < IEEE80211_HTRATE_MAXSIZE; i++) {
 			if (isclr(htcap->hc_mcsset, i))
 				continue;
 			if (rs->rs_nrates == IEEE80211_HTRATE_MAXSIZE) {
 				IEEE80211_NOTE(vap,
 				    IEEE80211_MSG_XRATE | IEEE80211_MSG_11N, ni,
 				    "WARNING, HT rate set too large; only "
 				    "using %u rates", IEEE80211_HTRATE_MAXSIZE);
 				vap->iv_stats.is_rx_rstoobig++;
 				break;
 			}
 			if (i <= 31 && i > maxequalmcs)
 				continue;
 			if (i == 32 &&
 			    (ic->ic_htcaps & IEEE80211_HTC_TXMCS32) == 0)
 				continue;
 			if (i > 32 && i > maxunequalmcs)
 				continue;
 			rs->rs_rates[rs->rs_nrates++] = i;
 		}
 	}
 	return ieee80211_fix_rate(ni, (struct ieee80211_rateset *) rs, flags);
 }
 
 /*
  * Mark rates in a node's HT rate set as basic according
  * to the information in the supplied HT info ie.
  */
 void
 ieee80211_setup_basic_htrates(struct ieee80211_node *ni, const uint8_t *ie)
 {
 	const struct ieee80211_ie_htinfo *htinfo;
 	struct ieee80211_htrateset *rs;
 	int i, j;
 
 	if (ie[0] == IEEE80211_ELEMID_VENDOR)
 		ie += 4;
 	htinfo = (const struct ieee80211_ie_htinfo *) ie;
 	rs = &ni->ni_htrates;
 	if (rs->rs_nrates == 0) {
 		IEEE80211_NOTE(ni->ni_vap,
 		    IEEE80211_MSG_XRATE | IEEE80211_MSG_11N, ni,
 		    "%s", "WARNING, empty HT rate set");
 		return;
 	}
 	for (i = 0; i < IEEE80211_HTRATE_MAXSIZE; i++) {
 		if (isclr(htinfo->hi_basicmcsset, i))
 			continue;
 		for (j = 0; j < rs->rs_nrates; j++)
 			if ((rs->rs_rates[j] & IEEE80211_RATE_VAL) == i)
 				rs->rs_rates[j] |= IEEE80211_RATE_BASIC;
 	}
 }
 
 static void
 ampdu_tx_setup(struct ieee80211_tx_ampdu *tap)
 {
 	callout_init(&tap->txa_timer, 1);
 	tap->txa_flags |= IEEE80211_AGGR_SETUP;
 	tap->txa_lastsample = ticks;
 }
 
 static void
 ampdu_tx_stop(struct ieee80211_tx_ampdu *tap)
 {
 	struct ieee80211_node *ni = tap->txa_ni;
 	struct ieee80211com *ic = ni->ni_ic;
 
 	IEEE80211_NOTE(tap->txa_ni->ni_vap, IEEE80211_MSG_11N,
 	    tap->txa_ni,
 	    "%s: called",
 	    __func__);
 
 	KASSERT(tap->txa_flags & IEEE80211_AGGR_SETUP,
 	    ("txa_flags 0x%x tid %d ac %d", tap->txa_flags, tap->txa_tid,
 	    TID_TO_WME_AC(tap->txa_tid)));
 
 	/*
 	 * Stop BA stream if setup so driver has a chance
 	 * to reclaim any resources it might have allocated.
 	 */
 	ic->ic_addba_stop(ni, tap);
 	/*
 	 * Stop any pending BAR transmit.
 	 */
 	bar_stop_timer(tap);
 
 	/*
 	 * Reset packet estimate.
 	 */
 	ieee80211_txampdu_init_pps(tap);
 
 	/* NB: clearing NAK means we may re-send ADDBA */ 
 	tap->txa_flags &= ~(IEEE80211_AGGR_SETUP | IEEE80211_AGGR_NAK);
 }
 
 /*
  * ADDBA response timeout.
  *
  * If software aggregation and per-TID queue management was done here,
  * that queue would be unpaused after the ADDBA timeout occurs.
  */
 static void
 addba_timeout(void *arg)
 {
 	struct ieee80211_tx_ampdu *tap = arg;
 	struct ieee80211_node *ni = tap->txa_ni;
 	struct ieee80211com *ic = ni->ni_ic;
 
 	/* XXX ? */
 	tap->txa_flags &= ~IEEE80211_AGGR_XCHGPEND;
 	tap->txa_attempts++;
 	ic->ic_addba_response_timeout(ni, tap);
 }
 
 static void
 addba_start_timeout(struct ieee80211_tx_ampdu *tap)
 {
 	/* XXX use CALLOUT_PENDING instead? */
 	callout_reset(&tap->txa_timer, ieee80211_addba_timeout,
 	    addba_timeout, tap);
 	tap->txa_flags |= IEEE80211_AGGR_XCHGPEND;
 	tap->txa_nextrequest = ticks + ieee80211_addba_timeout;
 }
 
 static void
 addba_stop_timeout(struct ieee80211_tx_ampdu *tap)
 {
 	/* XXX use CALLOUT_PENDING instead? */
 	if (tap->txa_flags & IEEE80211_AGGR_XCHGPEND) {
 		callout_stop(&tap->txa_timer);
 		tap->txa_flags &= ~IEEE80211_AGGR_XCHGPEND;
 	}
 }
 
 static void
 null_addba_response_timeout(struct ieee80211_node *ni,
     struct ieee80211_tx_ampdu *tap)
 {
 }
 
 /*
  * Default method for requesting A-MPDU tx aggregation.
  * We setup the specified state block and start a timer
  * to wait for an ADDBA response frame.
  */
 static int
 ieee80211_addba_request(struct ieee80211_node *ni,
 	struct ieee80211_tx_ampdu *tap,
 	int dialogtoken, int baparamset, int batimeout)
 {
 	int bufsiz;
 
 	/* XXX locking */
 	tap->txa_token = dialogtoken;
 	tap->txa_flags |= IEEE80211_AGGR_IMMEDIATE;
 	bufsiz = MS(baparamset, IEEE80211_BAPS_BUFSIZ);
 	tap->txa_wnd = (bufsiz == 0) ?
 	    IEEE80211_AGGR_BAWMAX : min(bufsiz, IEEE80211_AGGR_BAWMAX);
 	addba_start_timeout(tap);
 	return 1;
 }
 
 /*
  * Called by drivers that wish to request an ADDBA session be
  * setup.  This brings it up and starts the request timer.
  */
 int
 ieee80211_ampdu_tx_request_ext(struct ieee80211_node *ni, int tid)
 {
 	struct ieee80211_tx_ampdu *tap;
 
 	if (tid < 0 || tid > 15)
 		return (0);
 	tap = &ni->ni_tx_ampdu[tid];
 
 	/* XXX locking */
 	if ((tap->txa_flags & IEEE80211_AGGR_SETUP) == 0) {
 		/* do deferred setup of state */
 		ampdu_tx_setup(tap);
 	}
 	/* XXX hack for not doing proper locking */
 	tap->txa_flags &= ~IEEE80211_AGGR_NAK;
 	addba_start_timeout(tap);
 	return (1);
 }
 
 /*
  * Called by drivers that have marked a session as active.
  */
 int
 ieee80211_ampdu_tx_request_active_ext(struct ieee80211_node *ni, int tid,
     int status)
 {
 	struct ieee80211_tx_ampdu *tap;
 
 	if (tid < 0 || tid > 15)
 		return (0);
 	tap = &ni->ni_tx_ampdu[tid];
 
 	/* XXX locking */
 	addba_stop_timeout(tap);
 	if (status == 1) {
 		tap->txa_flags |= IEEE80211_AGGR_RUNNING;
 		tap->txa_attempts = 0;
 	} else {
 		/* mark tid so we don't try again */
 		tap->txa_flags |= IEEE80211_AGGR_NAK;
 	}
 	return (1);
 }
 
 /*
  * Default method for processing an A-MPDU tx aggregation
  * response.  We shutdown any pending timer and update the
  * state block according to the reply.
  */
 static int
 ieee80211_addba_response(struct ieee80211_node *ni,
 	struct ieee80211_tx_ampdu *tap,
 	int status, int baparamset, int batimeout)
 {
 	int bufsiz, tid;
 
 	/* XXX locking */
 	addba_stop_timeout(tap);
 	if (status == IEEE80211_STATUS_SUCCESS) {
 		bufsiz = MS(baparamset, IEEE80211_BAPS_BUFSIZ);
 		/* XXX override our request? */
 		tap->txa_wnd = (bufsiz == 0) ?
 		    IEEE80211_AGGR_BAWMAX : min(bufsiz, IEEE80211_AGGR_BAWMAX);
 		/* XXX AC/TID */
 		tid = MS(baparamset, IEEE80211_BAPS_TID);
 		tap->txa_flags |= IEEE80211_AGGR_RUNNING;
 		tap->txa_attempts = 0;
 	} else {
 		/* mark tid so we don't try again */
 		tap->txa_flags |= IEEE80211_AGGR_NAK;
 	}
 	return 1;
 }
 
 /*
  * Default method for stopping A-MPDU tx aggregation.
  * Any timer is cleared and we drain any pending frames.
  */
 static void
 ieee80211_addba_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap)
 {
 	/* XXX locking */
 	addba_stop_timeout(tap);
 	if (tap->txa_flags & IEEE80211_AGGR_RUNNING) {
 		/* XXX clear aggregation queue */
 		tap->txa_flags &= ~IEEE80211_AGGR_RUNNING;
 	}
 	tap->txa_attempts = 0;
 }
 
 /*
  * Process a received action frame using the default aggregation
  * policy.  We intercept ADDBA-related frames and use them to
  * update our aggregation state.  All other frames are passed up
  * for processing by ieee80211_recv_action.
  */
 static int
 ht_recv_action_ba_addba_request(struct ieee80211_node *ni,
 	const struct ieee80211_frame *wh,
 	const uint8_t *frm, const uint8_t *efrm)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211_rx_ampdu *rap;
 	uint8_t dialogtoken;
 	uint16_t baparamset, batimeout, baseqctl;
 	uint16_t args[5];
 	int tid;
 
 	dialogtoken = frm[2];
 	baparamset = le16dec(frm+3);
 	batimeout = le16dec(frm+5);
 	baseqctl = le16dec(frm+7);
 
 	tid = MS(baparamset, IEEE80211_BAPS_TID);
 
 	IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni,
 	    "recv ADDBA request: dialogtoken %u baparamset 0x%x "
 	    "(tid %d bufsiz %d) batimeout %d baseqctl %d:%d",
 	    dialogtoken, baparamset,
 	    tid, MS(baparamset, IEEE80211_BAPS_BUFSIZ),
 	    batimeout,
 	    MS(baseqctl, IEEE80211_BASEQ_START),
 	    MS(baseqctl, IEEE80211_BASEQ_FRAG));
 
 	rap = &ni->ni_rx_ampdu[tid];
 
 	/* Send ADDBA response */
 	args[0] = dialogtoken;
 	/*
 	 * NB: We ack only if the sta associated with HT and
 	 * the ap is configured to do AMPDU rx (the latter
 	 * violates the 11n spec and is mostly for testing).
 	 */
 	if ((ni->ni_flags & IEEE80211_NODE_AMPDU_RX) &&
 	    (vap->iv_flags_ht & IEEE80211_FHT_AMPDU_RX)) {
 		/* XXX handle ampdu_rx_start failure */
 		ic->ic_ampdu_rx_start(ni, rap,
 		    baparamset, batimeout, baseqctl);
 
 		args[1] = IEEE80211_STATUS_SUCCESS;
 	} else {
 		IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N,
 		    ni, "reject ADDBA request: %s",
 		    ni->ni_flags & IEEE80211_NODE_AMPDU_RX ?
 		       "administratively disabled" :
 		       "not negotiated for station");
 		vap->iv_stats.is_addba_reject++;
 		args[1] = IEEE80211_STATUS_UNSPECIFIED;
 	}
 	/* XXX honor rap flags? */
 	args[2] = IEEE80211_BAPS_POLICY_IMMEDIATE
 		| SM(tid, IEEE80211_BAPS_TID)
 		| SM(rap->rxa_wnd, IEEE80211_BAPS_BUFSIZ)
 		;
 	args[3] = 0;
 	args[4] = 0;
 	ic->ic_send_action(ni, IEEE80211_ACTION_CAT_BA,
 		IEEE80211_ACTION_BA_ADDBA_RESPONSE, args);
 	return 0;
 }
 
 static int
 ht_recv_action_ba_addba_response(struct ieee80211_node *ni,
 	const struct ieee80211_frame *wh,
 	const uint8_t *frm, const uint8_t *efrm)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211_tx_ampdu *tap;
 	uint8_t dialogtoken, policy;
 	uint16_t baparamset, batimeout, code;
 	int tid, bufsiz;
 
 	dialogtoken = frm[2];
 	code = le16dec(frm+3);
 	baparamset = le16dec(frm+5);
 	tid = MS(baparamset, IEEE80211_BAPS_TID);
 	bufsiz = MS(baparamset, IEEE80211_BAPS_BUFSIZ);
 	policy = MS(baparamset, IEEE80211_BAPS_POLICY);
 	batimeout = le16dec(frm+7);
 
 	tap = &ni->ni_tx_ampdu[tid];
 	if ((tap->txa_flags & IEEE80211_AGGR_XCHGPEND) == 0) {
 		IEEE80211_DISCARD_MAC(vap,
 		    IEEE80211_MSG_ACTION | IEEE80211_MSG_11N,
 		    ni->ni_macaddr, "ADDBA response",
 		    "no pending ADDBA, tid %d dialogtoken %u "
 		    "code %d", tid, dialogtoken, code);
 		vap->iv_stats.is_addba_norequest++;
 		return 0;
 	}
 	if (dialogtoken != tap->txa_token) {
 		IEEE80211_DISCARD_MAC(vap,
 		    IEEE80211_MSG_ACTION | IEEE80211_MSG_11N,
 		    ni->ni_macaddr, "ADDBA response",
 		    "dialogtoken mismatch: waiting for %d, "
 		    "received %d, tid %d code %d",
 		    tap->txa_token, dialogtoken, tid, code);
 		vap->iv_stats.is_addba_badtoken++;
 		return 0;
 	}
 	/* NB: assumes IEEE80211_AGGR_IMMEDIATE is 1 */
 	if (policy != (tap->txa_flags & IEEE80211_AGGR_IMMEDIATE)) {
 		IEEE80211_DISCARD_MAC(vap,
 		    IEEE80211_MSG_ACTION | IEEE80211_MSG_11N,
 		    ni->ni_macaddr, "ADDBA response",
 		    "policy mismatch: expecting %s, "
 		    "received %s, tid %d code %d",
 		    tap->txa_flags & IEEE80211_AGGR_IMMEDIATE,
 		    policy, tid, code);
 		vap->iv_stats.is_addba_badpolicy++;
 		return 0;
 	}
 #if 0
 	/* XXX we take MIN in ieee80211_addba_response */
 	if (bufsiz > IEEE80211_AGGR_BAWMAX) {
 		IEEE80211_DISCARD_MAC(vap,
 		    IEEE80211_MSG_ACTION | IEEE80211_MSG_11N,
 		    ni->ni_macaddr, "ADDBA response",
 		    "BA window too large: max %d, "
 		    "received %d, tid %d code %d",
 		    bufsiz, IEEE80211_AGGR_BAWMAX, tid, code);
 		vap->iv_stats.is_addba_badbawinsize++;
 		return 0;
 	}
 #endif
 	IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni,
 	    "recv ADDBA response: dialogtoken %u code %d "
 	    "baparamset 0x%x (tid %d bufsiz %d) batimeout %d",
 	    dialogtoken, code, baparamset, tid, bufsiz,
 	    batimeout);
 	ic->ic_addba_response(ni, tap, code, baparamset, batimeout);
 	return 0;
 }
 
 static int
 ht_recv_action_ba_delba(struct ieee80211_node *ni,
 	const struct ieee80211_frame *wh,
 	const uint8_t *frm, const uint8_t *efrm)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ieee80211_rx_ampdu *rap;
 	struct ieee80211_tx_ampdu *tap;
 	uint16_t baparamset, code;
 	int tid;
 
 	baparamset = le16dec(frm+2);
 	code = le16dec(frm+4);
 
 	tid = MS(baparamset, IEEE80211_DELBAPS_TID);
 
 	IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni,
 	    "recv DELBA: baparamset 0x%x (tid %d initiator %d) "
 	    "code %d", baparamset, tid,
 	    MS(baparamset, IEEE80211_DELBAPS_INIT), code);
 
 	if ((baparamset & IEEE80211_DELBAPS_INIT) == 0) {
 		tap = &ni->ni_tx_ampdu[tid];
 		ic->ic_addba_stop(ni, tap);
 	} else {
 		rap = &ni->ni_rx_ampdu[tid];
 		ic->ic_ampdu_rx_stop(ni, rap);
 	}
 	return 0;
 }
 
 static int
 ht_recv_action_ht_txchwidth(struct ieee80211_node *ni,
 	const struct ieee80211_frame *wh,
 	const uint8_t *frm, const uint8_t *efrm)
 {
 	int chw;
 
 	chw = (frm[2] == IEEE80211_A_HT_TXCHWIDTH_2040) ? 40 : 20;
 
 	IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni,
 	    "%s: HT txchwidth, width %d%s",
 	    __func__, chw, ni->ni_chw != chw ? "*" : "");
 	if (chw != ni->ni_chw) {
 		ni->ni_chw = chw;
 		/* XXX notify on change */
 	}
 	return 0;
 }
 
 static int
 ht_recv_action_ht_mimopwrsave(struct ieee80211_node *ni,
 	const struct ieee80211_frame *wh,
 	const uint8_t *frm, const uint8_t *efrm)
 {
 	const struct ieee80211_action_ht_mimopowersave *mps =
 	    (const struct ieee80211_action_ht_mimopowersave *) frm;
 
 	/* XXX check iv_htcaps */
 	if (mps->am_control & IEEE80211_A_HT_MIMOPWRSAVE_ENA)
 		ni->ni_flags |= IEEE80211_NODE_MIMO_PS;
 	else
 		ni->ni_flags &= ~IEEE80211_NODE_MIMO_PS;
 	if (mps->am_control & IEEE80211_A_HT_MIMOPWRSAVE_MODE)
 		ni->ni_flags |= IEEE80211_NODE_MIMO_RTS;
 	else
 		ni->ni_flags &= ~IEEE80211_NODE_MIMO_RTS;
 	/* XXX notify on change */
 	IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni,
 	    "%s: HT MIMO PS (%s%s)", __func__,
 	    (ni->ni_flags & IEEE80211_NODE_MIMO_PS) ?  "on" : "off",
 	    (ni->ni_flags & IEEE80211_NODE_MIMO_RTS) ?  "+rts" : ""
 	);
 	return 0;
 }
 
 /*
  * Transmit processing.
  */
 
 /*
  * Check if A-MPDU should be requested/enabled for a stream.
  * We require a traffic rate above a per-AC threshold and we
  * also handle backoff from previous failed attempts.
  *
  * Drivers may override this method to bring in information
  * such as link state conditions in making the decision.
  */
 static int
 ieee80211_ampdu_enable(struct ieee80211_node *ni,
 	struct ieee80211_tx_ampdu *tap)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 
 	if (tap->txa_avgpps <
 	    vap->iv_ampdu_mintraffic[TID_TO_WME_AC(tap->txa_tid)])
 		return 0;
 	/* XXX check rssi? */
 	if (tap->txa_attempts >= ieee80211_addba_maxtries &&
 	    ieee80211_time_after(ticks, tap->txa_nextrequest)) {
 		/*
 		 * Don't retry too often; txa_nextrequest is set
 		 * to the minimum interval we'll retry after
 		 * ieee80211_addba_maxtries failed attempts are made.
 		 */
 		return 0;
 	}
 	IEEE80211_NOTE(vap, IEEE80211_MSG_11N, ni,
 	    "enable AMPDU on tid %d (%s), avgpps %d pkts %d attempt %d",
 	    tap->txa_tid, ieee80211_wme_acnames[TID_TO_WME_AC(tap->txa_tid)],
 	    tap->txa_avgpps, tap->txa_pkts, tap->txa_attempts);
 	return 1;
 }
 
 /*
  * Request A-MPDU tx aggregation.  Setup local state and
  * issue an ADDBA request.  BA use will only happen after
  * the other end replies with ADDBA response.
  */
 int
 ieee80211_ampdu_request(struct ieee80211_node *ni,
 	struct ieee80211_tx_ampdu *tap)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	uint16_t args[5];
 	int tid, dialogtoken;
 	static int tokens = 0;	/* XXX */
 
 	/* XXX locking */
 	if ((tap->txa_flags & IEEE80211_AGGR_SETUP) == 0) {
 		/* do deferred setup of state */
 		ampdu_tx_setup(tap);
 	}
 	/* XXX hack for not doing proper locking */
 	tap->txa_flags &= ~IEEE80211_AGGR_NAK;
 
 	dialogtoken = (tokens+1) % 63;		/* XXX */
 	tid = tap->txa_tid;
 	tap->txa_start = ni->ni_txseqs[tid];
 
 	args[0] = dialogtoken;
 	args[1] = 0;	/* NB: status code not used */
 	args[2]	= IEEE80211_BAPS_POLICY_IMMEDIATE
 		| SM(tid, IEEE80211_BAPS_TID)
 		| SM(IEEE80211_AGGR_BAWMAX, IEEE80211_BAPS_BUFSIZ)
 		;
 	args[3] = 0;	/* batimeout */
 	/* NB: do first so there's no race against reply */
 	if (!ic->ic_addba_request(ni, tap, dialogtoken, args[2], args[3])) {
 		/* unable to setup state, don't make request */
 		IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N,
 		    ni, "%s: could not setup BA stream for TID %d AC %d",
 		    __func__, tap->txa_tid, TID_TO_WME_AC(tap->txa_tid));
 		/* defer next try so we don't slam the driver with requests */
 		tap->txa_attempts = ieee80211_addba_maxtries;
 		/* NB: check in case driver wants to override */
 		if (tap->txa_nextrequest <= ticks)
 			tap->txa_nextrequest = ticks + ieee80211_addba_backoff;
 		return 0;
 	}
 	tokens = dialogtoken;			/* allocate token */
 	/* NB: after calling ic_addba_request so driver can set txa_start */
 	args[4] = SM(tap->txa_start, IEEE80211_BASEQ_START)
 		| SM(0, IEEE80211_BASEQ_FRAG)
 		;
 	return ic->ic_send_action(ni, IEEE80211_ACTION_CAT_BA,
 		IEEE80211_ACTION_BA_ADDBA_REQUEST, args);
 }
 
 /*
  * Terminate an AMPDU tx stream.  State is reclaimed
  * and the peer notified with a DelBA Action frame.
  */
 void
 ieee80211_ampdu_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap,
 	int reason)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ieee80211vap *vap = ni->ni_vap;
 	uint16_t args[4];
 
 	/* XXX locking */
 	tap->txa_flags &= ~IEEE80211_AGGR_BARPEND;
 	if (IEEE80211_AMPDU_RUNNING(tap)) {
 		IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N,
 		    ni, "%s: stop BA stream for TID %d (reason: %d (%s))",
 		    __func__, tap->txa_tid, reason,
 		    ieee80211_reason_to_string(reason));
 		vap->iv_stats.is_ampdu_stop++;
 
 		ic->ic_addba_stop(ni, tap);
 		args[0] = tap->txa_tid;
 		args[1] = IEEE80211_DELBAPS_INIT;
 		args[2] = reason;			/* XXX reason code */
 		ic->ic_send_action(ni, IEEE80211_ACTION_CAT_BA,
 			IEEE80211_ACTION_BA_DELBA, args);
 	} else {
 		IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N,
 		    ni, "%s: BA stream for TID %d not running "
 		    "(reason: %d (%s))", __func__, tap->txa_tid, reason,
 		    ieee80211_reason_to_string(reason));
 		vap->iv_stats.is_ampdu_stop_failed++;
 	}
 }
 
 /* XXX */
 static void bar_start_timer(struct ieee80211_tx_ampdu *tap);
 
 static void
 bar_timeout(void *arg)
 {
 	struct ieee80211_tx_ampdu *tap = arg;
 	struct ieee80211_node *ni = tap->txa_ni;
 
 	KASSERT((tap->txa_flags & IEEE80211_AGGR_XCHGPEND) == 0,
 	    ("bar/addba collision, flags 0x%x", tap->txa_flags));
 
 	IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N,
 	    ni, "%s: tid %u flags 0x%x attempts %d", __func__,
 	    tap->txa_tid, tap->txa_flags, tap->txa_attempts);
 
 	/* guard against race with bar_tx_complete */
 	if ((tap->txa_flags & IEEE80211_AGGR_BARPEND) == 0)
 		return;
 	/* XXX ? */
 	if (tap->txa_attempts >= ieee80211_bar_maxtries) {
 		struct ieee80211com *ic = ni->ni_ic;
 
 		ni->ni_vap->iv_stats.is_ampdu_bar_tx_fail++;
 		/*
 		 * If (at least) the last BAR TX timeout was due to
 		 * an ieee80211_send_bar() failures, then we need
 		 * to make sure we notify the driver that a BAR
 		 * TX did occur and fail.  This gives the driver
 		 * a chance to undo any queue pause that may
 		 * have occurred.
 		 */
 		ic->ic_bar_response(ni, tap, 1);
 		ieee80211_ampdu_stop(ni, tap, IEEE80211_REASON_TIMEOUT);
 	} else {
 		ni->ni_vap->iv_stats.is_ampdu_bar_tx_retry++;
 		if (ieee80211_send_bar(ni, tap, tap->txa_seqpending) != 0) {
 			IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N,
 			    ni, "%s: failed to TX, starting timer\n",
 			    __func__);
 			/*
 			 * If ieee80211_send_bar() fails here, the
 			 * timer may have stopped and/or the pending
 			 * flag may be clear.  Because of this,
 			 * fake the BARPEND and reset the timer.
 			 * A retransmission attempt will then occur
 			 * during the next timeout.
 			 */
 			/* XXX locking */
 			tap->txa_flags |= IEEE80211_AGGR_BARPEND;
 			bar_start_timer(tap);
 		}
 	}
 }
 
 static void
 bar_start_timer(struct ieee80211_tx_ampdu *tap)
 {
 	IEEE80211_NOTE(tap->txa_ni->ni_vap, IEEE80211_MSG_11N,
 	    tap->txa_ni,
 	    "%s: called",
 	    __func__);
 	callout_reset(&tap->txa_timer, ieee80211_bar_timeout, bar_timeout, tap);
 }
 
 static void
 bar_stop_timer(struct ieee80211_tx_ampdu *tap)
 {
 	IEEE80211_NOTE(tap->txa_ni->ni_vap, IEEE80211_MSG_11N,
 	    tap->txa_ni,
 	    "%s: called",
 	    __func__);
 	callout_stop(&tap->txa_timer);
 }
 
 static void
 bar_tx_complete(struct ieee80211_node *ni, void *arg, int status)
 {
 	struct ieee80211_tx_ampdu *tap = arg;
 
 	IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N,
 	    ni, "%s: tid %u flags 0x%x pending %d status %d",
 	    __func__, tap->txa_tid, tap->txa_flags,
 	    callout_pending(&tap->txa_timer), status);
 
 	ni->ni_vap->iv_stats.is_ampdu_bar_tx++;
 	/* XXX locking */
 	if ((tap->txa_flags & IEEE80211_AGGR_BARPEND) &&
 	    callout_pending(&tap->txa_timer)) {
 		struct ieee80211com *ic = ni->ni_ic;
 
 		if (status == 0)		/* ACK'd */
 			bar_stop_timer(tap);
 		ic->ic_bar_response(ni, tap, status);
 		/* NB: just let timer expire so we pace requests */
 	}
 }
 
 static void
 ieee80211_bar_response(struct ieee80211_node *ni,
 	struct ieee80211_tx_ampdu *tap, int status)
 {
 
 	IEEE80211_NOTE(tap->txa_ni->ni_vap, IEEE80211_MSG_11N,
 	    tap->txa_ni,
 	    "%s: called",
 	    __func__);
 	if (status == 0) {		/* got ACK */
 		IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N,
 		    ni, "BAR moves BA win <%u:%u> (%u frames) txseq %u tid %u",
 		    tap->txa_start,
 		    IEEE80211_SEQ_ADD(tap->txa_start, tap->txa_wnd-1),
 		    tap->txa_qframes, tap->txa_seqpending,
 		    tap->txa_tid);
 
 		/* NB: timer already stopped in bar_tx_complete */
 		tap->txa_start = tap->txa_seqpending;
 		tap->txa_flags &= ~IEEE80211_AGGR_BARPEND;
 	}
 }
 
 /*
  * Transmit a BAR frame to the specified node.  The
  * BAR contents are drawn from the supplied aggregation
  * state associated with the node.
  *
  * NB: we only handle immediate ACK w/ compressed bitmap.
  */
 int
 ieee80211_send_bar(struct ieee80211_node *ni,
 	struct ieee80211_tx_ampdu *tap, ieee80211_seq seq)
 {
 #define	senderr(_x, _v)	do { vap->iv_stats._v++; ret = _x; goto bad; } while (0)
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ieee80211_frame_bar *bar;
 	struct mbuf *m;
 	uint16_t barctl, barseqctl;
 	uint8_t *frm;
 	int tid, ret;
 
 
 	IEEE80211_NOTE(tap->txa_ni->ni_vap, IEEE80211_MSG_11N,
 	    tap->txa_ni,
 	    "%s: called",
 	    __func__);
 
 	if ((tap->txa_flags & IEEE80211_AGGR_RUNNING) == 0) {
 		/* no ADDBA response, should not happen */
 		/* XXX stat+msg */
 		return EINVAL;
 	}
 	/* XXX locking */
 	bar_stop_timer(tap);
 
 	ieee80211_ref_node(ni);
 
 	m = ieee80211_getmgtframe(&frm, ic->ic_headroom, sizeof(*bar));
 	if (m == NULL)
 		senderr(ENOMEM, is_tx_nobuf);
 
 	if (!ieee80211_add_callback(m, bar_tx_complete, tap)) {
 		m_freem(m);
 		senderr(ENOMEM, is_tx_nobuf);	/* XXX */
 		/* NOTREACHED */
 	}
 
 	bar = mtod(m, struct ieee80211_frame_bar *);
 	bar->i_fc[0] = IEEE80211_FC0_VERSION_0 |
 		IEEE80211_FC0_TYPE_CTL | IEEE80211_FC0_SUBTYPE_BAR;
 	bar->i_fc[1] = 0;
 	IEEE80211_ADDR_COPY(bar->i_ra, ni->ni_macaddr);
 	IEEE80211_ADDR_COPY(bar->i_ta, vap->iv_myaddr);
 
 	tid = tap->txa_tid;
 	barctl 	= (tap->txa_flags & IEEE80211_AGGR_IMMEDIATE ?
 			0 : IEEE80211_BAR_NOACK)
 		| IEEE80211_BAR_COMP
 		| SM(tid, IEEE80211_BAR_TID)
 		;
 	barseqctl = SM(seq, IEEE80211_BAR_SEQ_START);
 	/* NB: known to have proper alignment */
 	bar->i_ctl = htole16(barctl);
 	bar->i_seq = htole16(barseqctl);
 	m->m_pkthdr.len = m->m_len = sizeof(struct ieee80211_frame_bar);
 
 	M_WME_SETAC(m, WME_AC_VO);
 
 	IEEE80211_NODE_STAT(ni, tx_mgmt);	/* XXX tx_ctl? */
 
 	/* XXX locking */
 	/* init/bump attempts counter */
 	if ((tap->txa_flags & IEEE80211_AGGR_BARPEND) == 0)
 		tap->txa_attempts = 1;
 	else
 		tap->txa_attempts++;
 	tap->txa_seqpending = seq;
 	tap->txa_flags |= IEEE80211_AGGR_BARPEND;
 
 	IEEE80211_NOTE(vap, IEEE80211_MSG_DEBUG | IEEE80211_MSG_11N,
 	    ni, "send BAR: tid %u ctl 0x%x start %u (attempt %d)",
 	    tid, barctl, seq, tap->txa_attempts);
 
 	/*
 	 * ic_raw_xmit will free the node reference
 	 * regardless of queue/TX success or failure.
 	 */
 	IEEE80211_TX_LOCK(ic);
 	ret = ieee80211_raw_output(vap, ni, m, NULL);
 	IEEE80211_TX_UNLOCK(ic);
 	if (ret != 0) {
 		IEEE80211_NOTE(vap, IEEE80211_MSG_DEBUG | IEEE80211_MSG_11N,
 		    ni, "send BAR: failed: (ret = %d)\n",
 		    ret);
 		/* xmit failed, clear state flag */
 		tap->txa_flags &= ~IEEE80211_AGGR_BARPEND;
 		vap->iv_stats.is_ampdu_bar_tx_fail++;
 		return ret;
 	}
 	/* XXX hack against tx complete happening before timer is started */
 	if (tap->txa_flags & IEEE80211_AGGR_BARPEND)
 		bar_start_timer(tap);
 	return 0;
 bad:
 	IEEE80211_NOTE(tap->txa_ni->ni_vap, IEEE80211_MSG_11N,
 	    tap->txa_ni,
 	    "%s: bad! ret=%d",
 	    __func__, ret);
 	vap->iv_stats.is_ampdu_bar_tx_fail++;
 	ieee80211_free_node(ni);
 	return ret;
 #undef senderr
 }
 
 static int
 ht_action_output(struct ieee80211_node *ni, struct mbuf *m)
 {
 	struct ieee80211_bpf_params params;
 
 	memset(&params, 0, sizeof(params));
 	params.ibp_pri = WME_AC_VO;
 	params.ibp_rate0 = ni->ni_txparms->mgmtrate;
 	/* NB: we know all frames are unicast */
 	params.ibp_try0 = ni->ni_txparms->maxretry;
 	params.ibp_power = ni->ni_txpower;
 	return ieee80211_mgmt_output(ni, m, IEEE80211_FC0_SUBTYPE_ACTION,
 	     &params);
 }
 
 #define	ADDSHORT(frm, v) do {			\
 	frm[0] = (v) & 0xff;			\
 	frm[1] = (v) >> 8;			\
 	frm += 2;				\
 } while (0)
 
 /*
  * Send an action management frame.  The arguments are stuff
  * into a frame without inspection; the caller is assumed to
  * prepare them carefully (e.g. based on the aggregation state).
  */
 static int
 ht_send_action_ba_addba(struct ieee80211_node *ni,
 	int category, int action, void *arg0)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211com *ic = ni->ni_ic;
 	uint16_t *args = arg0;
 	struct mbuf *m;
 	uint8_t *frm;
 
 	IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni,
 	    "send ADDBA %s: dialogtoken %d status %d "
 	    "baparamset 0x%x (tid %d) batimeout 0x%x baseqctl 0x%x",
 	    (action == IEEE80211_ACTION_BA_ADDBA_REQUEST) ?
 		"request" : "response",
 	    args[0], args[1], args[2], MS(args[2], IEEE80211_BAPS_TID),
 	    args[3], args[4]);
 
 	IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE,
 	    "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__,
 	    ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1);
 	ieee80211_ref_node(ni);
 
 	m = ieee80211_getmgtframe(&frm,
 	    ic->ic_headroom + sizeof(struct ieee80211_frame),
 	    sizeof(uint16_t)	/* action+category */
 	    /* XXX may action payload */
 	    + sizeof(struct ieee80211_action_ba_addbaresponse)
 	);
 	if (m != NULL) {
 		*frm++ = category;
 		*frm++ = action;
 		*frm++ = args[0];		/* dialog token */
 		if (action == IEEE80211_ACTION_BA_ADDBA_RESPONSE)
 			ADDSHORT(frm, args[1]);	/* status code */
 		ADDSHORT(frm, args[2]);		/* baparamset */
 		ADDSHORT(frm, args[3]);		/* batimeout */
 		if (action == IEEE80211_ACTION_BA_ADDBA_REQUEST)
 			ADDSHORT(frm, args[4]);	/* baseqctl */
 		m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *);
 		return ht_action_output(ni, m);
 	} else {
 		vap->iv_stats.is_tx_nobuf++;
 		ieee80211_free_node(ni);
 		return ENOMEM;
 	}
 }
 
 static int
 ht_send_action_ba_delba(struct ieee80211_node *ni,
 	int category, int action, void *arg0)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211com *ic = ni->ni_ic;
 	uint16_t *args = arg0;
 	struct mbuf *m;
 	uint16_t baparamset;
 	uint8_t *frm;
 
 	baparamset = SM(args[0], IEEE80211_DELBAPS_TID)
 		   | args[1]
 		   ;
 	IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni,
 	    "send DELBA action: tid %d, initiator %d reason %d (%s)",
 	    args[0], args[1], args[2], ieee80211_reason_to_string(args[2]));
 
 	IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE,
 	    "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__,
 	    ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1);
 	ieee80211_ref_node(ni);
 
 	m = ieee80211_getmgtframe(&frm,
 	    ic->ic_headroom + sizeof(struct ieee80211_frame),
 	    sizeof(uint16_t)	/* action+category */
 	    /* XXX may action payload */
 	    + sizeof(struct ieee80211_action_ba_addbaresponse)
 	);
 	if (m != NULL) {
 		*frm++ = category;
 		*frm++ = action;
 		ADDSHORT(frm, baparamset);
 		ADDSHORT(frm, args[2]);		/* reason code */
 		m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *);
 		return ht_action_output(ni, m);
 	} else {
 		vap->iv_stats.is_tx_nobuf++;
 		ieee80211_free_node(ni);
 		return ENOMEM;
 	}
 }
 
 static int
 ht_send_action_ht_txchwidth(struct ieee80211_node *ni,
 	int category, int action, void *arg0)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211com *ic = ni->ni_ic;
 	struct mbuf *m;
 	uint8_t *frm;
 
 	IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni,
 	    "send HT txchwidth: width %d",
 	    IEEE80211_IS_CHAN_HT40(ni->ni_chan) ? 40 : 20);
 
 	IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE,
 	    "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__,
 	    ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1);
 	ieee80211_ref_node(ni);
 
 	m = ieee80211_getmgtframe(&frm,
 	    ic->ic_headroom + sizeof(struct ieee80211_frame),
 	    sizeof(uint16_t)	/* action+category */
 	    /* XXX may action payload */
 	    + sizeof(struct ieee80211_action_ba_addbaresponse)
 	);
 	if (m != NULL) {
 		*frm++ = category;
 		*frm++ = action;
 		*frm++ = IEEE80211_IS_CHAN_HT40(ni->ni_chan) ? 
 			IEEE80211_A_HT_TXCHWIDTH_2040 :
 			IEEE80211_A_HT_TXCHWIDTH_20;
 		m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *);
 		return ht_action_output(ni, m);
 	} else {
 		vap->iv_stats.is_tx_nobuf++;
 		ieee80211_free_node(ni);
 		return ENOMEM;
 	}
 }
 #undef ADDSHORT
 
 /*
  * Construct the MCS bit mask for inclusion in an HT capabilities
  * information element.
  */
 static void
 ieee80211_set_mcsset(struct ieee80211com *ic, uint8_t *frm)
 {
 	int i;
 	uint8_t txparams;
 
 	KASSERT((ic->ic_rxstream > 0 && ic->ic_rxstream <= 4),
 	    ("ic_rxstream %d out of range", ic->ic_rxstream));
 	KASSERT((ic->ic_txstream > 0 && ic->ic_txstream <= 4),
 	    ("ic_txstream %d out of range", ic->ic_txstream));
 
 	for (i = 0; i < ic->ic_rxstream * 8; i++)
 		setbit(frm, i);
 	if ((ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) &&
 	    (ic->ic_htcaps & IEEE80211_HTC_RXMCS32))
 		setbit(frm, 32);
 	if (ic->ic_htcaps & IEEE80211_HTC_RXUNEQUAL) {
 		if (ic->ic_rxstream >= 2) {
 			for (i = 33; i <= 38; i++)
 				setbit(frm, i);
 		}
 		if (ic->ic_rxstream >= 3) {
 			for (i = 39; i <= 52; i++)
 				setbit(frm, i);
 		}
 		if (ic->ic_txstream >= 4) {
 			for (i = 53; i <= 76; i++)
 				setbit(frm, i);
 		}
 	}
 
 	if (ic->ic_rxstream != ic->ic_txstream) {
 		txparams = 0x1;			/* TX MCS set defined */
 		txparams |= 0x2;		/* TX RX MCS not equal */
 		txparams |= (ic->ic_txstream - 1) << 2;	/* num TX streams */
 		if (ic->ic_htcaps & IEEE80211_HTC_TXUNEQUAL)
 			txparams |= 0x16;	/* TX unequal modulation sup */
 	} else
 		txparams = 0;
 	frm[12] = txparams;
 }
 
 /*
  * Add body of an HTCAP information element.
  */
 static uint8_t *
 ieee80211_add_htcap_body(uint8_t *frm, struct ieee80211_node *ni)
 {
 #define	ADDSHORT(frm, v) do {			\
 	frm[0] = (v) & 0xff;			\
 	frm[1] = (v) >> 8;			\
 	frm += 2;				\
 } while (0)
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ieee80211vap *vap = ni->ni_vap;
 	uint16_t caps, extcaps;
 	int rxmax, density;
 
 	/* HT capabilities */
 	caps = vap->iv_htcaps & 0xffff;
 	/*
 	 * Note channel width depends on whether we are operating as
 	 * a sta or not.  When operating as a sta we are generating
 	 * a request based on our desired configuration.  Otherwise
 	 * we are operational and the channel attributes identify
 	 * how we've been setup (which might be different if a fixed
 	 * channel is specified).
 	 */
 	if (vap->iv_opmode == IEEE80211_M_STA) {
 		/* override 20/40 use based on config */
 		if (vap->iv_flags_ht & IEEE80211_FHT_USEHT40)
 			caps |= IEEE80211_HTCAP_CHWIDTH40;
 		else
 			caps &= ~IEEE80211_HTCAP_CHWIDTH40;
 
 		/* Start by using the advertised settings */
 		rxmax = MS(ni->ni_htparam, IEEE80211_HTCAP_MAXRXAMPDU);
 		density = MS(ni->ni_htparam, IEEE80211_HTCAP_MPDUDENSITY);
 
 		IEEE80211_DPRINTF(vap, IEEE80211_MSG_11N,
 		    "%s: advertised rxmax=%d, density=%d, vap rxmax=%d, density=%d\n",
 		    __func__,
 		    rxmax,
 		    density,
 		    vap->iv_ampdu_rxmax,
 		    vap->iv_ampdu_density);
 
 		/* Cap at VAP rxmax */
 		if (rxmax > vap->iv_ampdu_rxmax)
 			rxmax = vap->iv_ampdu_rxmax;
 
 		/*
 		 * If the VAP ampdu density value greater, use that.
 		 *
 		 * (Larger density value == larger minimum gap between A-MPDU
 		 * subframes.)
 		 */
 		if (vap->iv_ampdu_density > density)
 			density = vap->iv_ampdu_density;
 
 		/*
 		 * NB: Hardware might support HT40 on some but not all
 		 * channels. We can't determine this earlier because only
 		 * after association the channel is upgraded to HT based
 		 * on the negotiated capabilities.
 		 */
 		if (ni->ni_chan != IEEE80211_CHAN_ANYC &&
 		    findhtchan(ic, ni->ni_chan, IEEE80211_CHAN_HT40U) == NULL &&
 		    findhtchan(ic, ni->ni_chan, IEEE80211_CHAN_HT40D) == NULL)
 			caps &= ~IEEE80211_HTCAP_CHWIDTH40;
 	} else {
 		/* override 20/40 use based on current channel */
 		if (IEEE80211_IS_CHAN_HT40(ni->ni_chan))
 			caps |= IEEE80211_HTCAP_CHWIDTH40;
 		else
 			caps &= ~IEEE80211_HTCAP_CHWIDTH40;
 
 		/* XXX TODO should it start by using advertised settings? */
 		rxmax = vap->iv_ampdu_rxmax;
 		density = vap->iv_ampdu_density;
 	}
 
 	/* adjust short GI based on channel and config */
 	if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20) == 0)
 		caps &= ~IEEE80211_HTCAP_SHORTGI20;
 	if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40) == 0 ||
 	    (caps & IEEE80211_HTCAP_CHWIDTH40) == 0)
 		caps &= ~IEEE80211_HTCAP_SHORTGI40;
 
 	/* adjust STBC based on receive capabilities */
 	if ((vap->iv_flags_ht & IEEE80211_FHT_STBC_RX) == 0)
 		caps &= ~IEEE80211_HTCAP_RXSTBC;
 
 	/* XXX TODO: adjust LDPC based on receive capabilities */
 
 	ADDSHORT(frm, caps);
 
 	/* HT parameters */
 	*frm = SM(rxmax, IEEE80211_HTCAP_MAXRXAMPDU)
 	     | SM(density, IEEE80211_HTCAP_MPDUDENSITY)
 	     ;
 	frm++;
 
 	/* pre-zero remainder of ie */
 	memset(frm, 0, sizeof(struct ieee80211_ie_htcap) - 
 		__offsetof(struct ieee80211_ie_htcap, hc_mcsset));
 
 	/* supported MCS set */
 	/*
 	 * XXX: For sta mode the rate set should be restricted based
 	 * on the AP's capabilities, but ni_htrates isn't setup when
 	 * we're called to form an AssocReq frame so for now we're
 	 * restricted to the device capabilities.
 	 */
 	ieee80211_set_mcsset(ni->ni_ic, frm);
 
 	frm += __offsetof(struct ieee80211_ie_htcap, hc_extcap) -
 		__offsetof(struct ieee80211_ie_htcap, hc_mcsset);
 
 	/* HT extended capabilities */
 	extcaps = vap->iv_htextcaps & 0xffff;
 
 	ADDSHORT(frm, extcaps);
 
 	frm += sizeof(struct ieee80211_ie_htcap) -
 		__offsetof(struct ieee80211_ie_htcap, hc_txbf);
 
 	return frm;
 #undef ADDSHORT
 }
 
 /*
  * Add 802.11n HT capabilities information element
  */
 uint8_t *
 ieee80211_add_htcap(uint8_t *frm, struct ieee80211_node *ni)
 {
 	frm[0] = IEEE80211_ELEMID_HTCAP;
 	frm[1] = sizeof(struct ieee80211_ie_htcap) - 2;
 	return ieee80211_add_htcap_body(frm + 2, ni);
 }
 
 /*
  * Add Broadcom OUI wrapped standard HTCAP ie; this is
  * used for compatibility w/ pre-draft implementations.
  */
 uint8_t *
 ieee80211_add_htcap_vendor(uint8_t *frm, struct ieee80211_node *ni)
 {
 	frm[0] = IEEE80211_ELEMID_VENDOR;
 	frm[1] = 4 + sizeof(struct ieee80211_ie_htcap) - 2;
 	frm[2] = (BCM_OUI >> 0) & 0xff;
 	frm[3] = (BCM_OUI >> 8) & 0xff;
 	frm[4] = (BCM_OUI >> 16) & 0xff;
 	frm[5] = BCM_OUI_HTCAP;
 	return ieee80211_add_htcap_body(frm + 6, ni);
 }
 
 /*
  * Construct the MCS bit mask of basic rates
  * for inclusion in an HT information element.
  */
 static void
 ieee80211_set_basic_htrates(uint8_t *frm, const struct ieee80211_htrateset *rs)
 {
 	int i;
 
 	for (i = 0; i < rs->rs_nrates; i++) {
 		int r = rs->rs_rates[i] & IEEE80211_RATE_VAL;
 		if ((rs->rs_rates[i] & IEEE80211_RATE_BASIC) &&
 		    r < IEEE80211_HTRATE_MAXSIZE) {
 			/* NB: this assumes a particular implementation */
 			setbit(frm, r);
 		}
 	}
 }
 
 /*
  * Update the HTINFO ie for a beacon frame.
  */
 void
 ieee80211_ht_update_beacon(struct ieee80211vap *vap,
 	struct ieee80211_beacon_offsets *bo)
 {
 #define	PROTMODE	(IEEE80211_HTINFO_OPMODE|IEEE80211_HTINFO_NONHT_PRESENT)
 	struct ieee80211_node *ni;
 	const struct ieee80211_channel *bsschan;
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ieee80211_ie_htinfo *ht =
 	   (struct ieee80211_ie_htinfo *) bo->bo_htinfo;
 
 	ni = ieee80211_ref_node(vap->iv_bss);
 	bsschan = ni->ni_chan;
 
 	/* XXX only update on channel change */
 	ht->hi_ctrlchannel = ieee80211_chan2ieee(ic, bsschan);
 	if (vap->iv_flags_ht & IEEE80211_FHT_RIFS)
 		ht->hi_byte1 = IEEE80211_HTINFO_RIFSMODE_PERM;
 	else
 		ht->hi_byte1 = IEEE80211_HTINFO_RIFSMODE_PROH;
 	if (IEEE80211_IS_CHAN_HT40U(bsschan))
 		ht->hi_byte1 |= IEEE80211_HTINFO_2NDCHAN_ABOVE;
 	else if (IEEE80211_IS_CHAN_HT40D(bsschan))
 		ht->hi_byte1 |= IEEE80211_HTINFO_2NDCHAN_BELOW;
 	else
 		ht->hi_byte1 |= IEEE80211_HTINFO_2NDCHAN_NONE;
 	if (IEEE80211_IS_CHAN_HT40(bsschan))
 		ht->hi_byte1 |= IEEE80211_HTINFO_TXWIDTH_2040;
 
 	/* protection mode */
 	ht->hi_byte2 = (ht->hi_byte2 &~ PROTMODE) | ic->ic_curhtprotmode;
 
 	ieee80211_free_node(ni);
 
 	/* XXX propagate to vendor ie's */
 #undef PROTMODE
 }
 
 /*
  * Add body of an HTINFO information element.
  *
  * NB: We don't use struct ieee80211_ie_htinfo because we can
  * be called to fillin both a standard ie and a compat ie that
  * has a vendor OUI at the front.
  */
 static uint8_t *
 ieee80211_add_htinfo_body(uint8_t *frm, struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211com *ic = ni->ni_ic;
 
 	/* pre-zero remainder of ie */
 	memset(frm, 0, sizeof(struct ieee80211_ie_htinfo) - 2);
 
 	/* primary/control channel center */
 	*frm++ = ieee80211_chan2ieee(ic, ni->ni_chan);
 
 	if (vap->iv_flags_ht & IEEE80211_FHT_RIFS)
 		frm[0] = IEEE80211_HTINFO_RIFSMODE_PERM;
 	else
 		frm[0] = IEEE80211_HTINFO_RIFSMODE_PROH;
 	if (IEEE80211_IS_CHAN_HT40U(ni->ni_chan))
 		frm[0] |= IEEE80211_HTINFO_2NDCHAN_ABOVE;
 	else if (IEEE80211_IS_CHAN_HT40D(ni->ni_chan))
 		frm[0] |= IEEE80211_HTINFO_2NDCHAN_BELOW;
 	else
 		frm[0] |= IEEE80211_HTINFO_2NDCHAN_NONE;
 	if (IEEE80211_IS_CHAN_HT40(ni->ni_chan))
 		frm[0] |= IEEE80211_HTINFO_TXWIDTH_2040;
 
 	frm[1] = ic->ic_curhtprotmode;
 
 	frm += 5;
 
 	/* basic MCS set */
 	ieee80211_set_basic_htrates(frm, &ni->ni_htrates);
 	frm += sizeof(struct ieee80211_ie_htinfo) -
 		__offsetof(struct ieee80211_ie_htinfo, hi_basicmcsset);
 	return frm;
 }
 
 /*
- * Add 802.11n HT information information element.
+ * Add 802.11n HT information element.
  */
 uint8_t *
 ieee80211_add_htinfo(uint8_t *frm, struct ieee80211_node *ni)
 {
 	frm[0] = IEEE80211_ELEMID_HTINFO;
 	frm[1] = sizeof(struct ieee80211_ie_htinfo) - 2;
 	return ieee80211_add_htinfo_body(frm + 2, ni);
 }
 
 /*
  * Add Broadcom OUI wrapped standard HTINFO ie; this is
  * used for compatibility w/ pre-draft implementations.
  */
 uint8_t *
 ieee80211_add_htinfo_vendor(uint8_t *frm, struct ieee80211_node *ni)
 {
 	frm[0] = IEEE80211_ELEMID_VENDOR;
 	frm[1] = 4 + sizeof(struct ieee80211_ie_htinfo) - 2;
 	frm[2] = (BCM_OUI >> 0) & 0xff;
 	frm[3] = (BCM_OUI >> 8) & 0xff;
 	frm[4] = (BCM_OUI >> 16) & 0xff;
 	frm[5] = BCM_OUI_HTINFO;
 	return ieee80211_add_htinfo_body(frm + 6, ni);
 }
Index: stable/11/sys/net80211/ieee80211_scan_sta.c
===================================================================
--- stable/11/sys/net80211/ieee80211_scan_sta.c	(revision 330445)
+++ stable/11/sys/net80211/ieee80211_scan_sta.c	(revision 330446)
@@ -1,1935 +1,1935 @@
 /*-
  * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * IEEE 802.11 station scanning support.
  */
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/endian.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 
 #include <sys/socket.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_media.h>
 #include <net/ethernet.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_input.h>
 #include <net80211/ieee80211_regdomain.h>
 #ifdef IEEE80211_SUPPORT_TDMA
 #include <net80211/ieee80211_tdma.h>
 #endif
 #ifdef IEEE80211_SUPPORT_MESH
 #include <net80211/ieee80211_mesh.h>
 #endif
 #include <net80211/ieee80211_ratectl.h>
 
 #include <net/bpf.h>
 
 /*
  * Parameters for managing cache entries:
  *
  * o a station with STA_FAILS_MAX failures is not considered
  *   when picking a candidate
  * o a station that hasn't had an update in STA_PURGE_SCANS
  *   (background) scans is discarded
  * o after STA_FAILS_AGE seconds we clear the failure count
  */
 #define	STA_FAILS_MAX	2		/* assoc failures before ignored */
 #define	STA_FAILS_AGE	(2*60)		/* time before clearing fails (secs) */
 #define	STA_PURGE_SCANS	2		/* age for purging entries (scans) */
 
 /* XXX tunable */
 #define	STA_RSSI_MIN	8		/* min acceptable rssi */
 #define	STA_RSSI_MAX	40		/* max rssi for comparison */
 
 struct sta_entry {
 	struct ieee80211_scan_entry base;
 	TAILQ_ENTRY(sta_entry) se_list;
 	LIST_ENTRY(sta_entry) se_hash;
 	uint8_t		se_fails;		/* failure to associate count */
 	uint8_t		se_seen;		/* seen during current scan */
 	uint8_t		se_notseen;		/* not seen in previous scans */
 	uint8_t		se_flags;
 #define	STA_DEMOTE11B	0x01			/* match w/ demoted 11b chan */
 	uint32_t	se_avgrssi;		/* LPF rssi state */
 	unsigned long	se_lastupdate;		/* time of last update */
 	unsigned long	se_lastfail;		/* time of last failure */
 	unsigned long	se_lastassoc;		/* time of last association */
 	u_int		se_scangen;		/* iterator scan gen# */
 	u_int		se_countrygen;		/* gen# of last cc notify */
 };
 
 #define	STA_HASHSIZE	32
 /* simple hash is enough for variation of macaddr */
 #define	STA_HASH(addr)	\
 	(((const uint8_t *)(addr))[IEEE80211_ADDR_LEN - 1] % STA_HASHSIZE)
 
 #define	MAX_IEEE_CHAN	256			/* max acceptable IEEE chan # */
 CTASSERT(MAX_IEEE_CHAN >= 256);
 
 struct sta_table {
 	ieee80211_scan_table_lock_t st_lock;	/* on scan table */
 	TAILQ_HEAD(, sta_entry) st_entry;	/* all entries */
 	LIST_HEAD(, sta_entry) st_hash[STA_HASHSIZE];
 	ieee80211_scan_iter_lock_t st_scanlock;		/* on st_scaniter */
 	u_int		st_scaniter;		/* gen# for iterator */
 	u_int		st_scangen;		/* scan generation # */
 	int		st_newscan;
 	/* ap-related state */
 	int		st_maxrssi[MAX_IEEE_CHAN];
 };
 
 static void sta_flush_table(struct sta_table *);
 /*
  * match_bss returns a bitmask describing if an entry is suitable
  * for use.  If non-zero the entry was deemed not suitable and it's
- * contents explains why.  The following flags are or'd to to this
+ * contents explains why.  The following flags are or'd to this
  * mask and can be used to figure out why the entry was rejected.
  */
 #define	MATCH_CHANNEL		0x00001	/* channel mismatch */
 #define	MATCH_CAPINFO		0x00002	/* capabilities mismatch, e.g. no ess */
 #define	MATCH_PRIVACY		0x00004	/* privacy mismatch */
 #define	MATCH_RATE		0x00008	/* rate set mismatch */
 #define	MATCH_SSID		0x00010	/* ssid mismatch */
 #define	MATCH_BSSID		0x00020	/* bssid mismatch */
 #define	MATCH_FAILS		0x00040	/* too many failed auth attempts */
 #define	MATCH_NOTSEEN		0x00080	/* not seen in recent scans */
 #define	MATCH_RSSI		0x00100	/* rssi deemed too low to use */
 #define	MATCH_CC		0x00200	/* country code mismatch */
 #ifdef IEEE80211_SUPPORT_TDMA
 #define	MATCH_TDMA_NOIE		0x00400	/* no TDMA ie */
 #define	MATCH_TDMA_NOTMASTER	0x00800	/* not TDMA master */
 #define	MATCH_TDMA_NOSLOT	0x01000	/* all TDMA slots occupied */
 #define	MATCH_TDMA_LOCAL	0x02000	/* local address */
 #define	MATCH_TDMA_VERSION	0x04000	/* protocol version mismatch */
 #endif
 #define	MATCH_MESH_NOID		0x10000	/* no MESHID ie */
 #define	MATCH_MESHID		0x20000	/* meshid mismatch */
 static int match_bss(struct ieee80211vap *,
 	const struct ieee80211_scan_state *, struct sta_entry *, int);
 static void adhoc_age(struct ieee80211_scan_state *);
 
 static __inline int
 isocmp(const uint8_t cc1[], const uint8_t cc2[])
 {
      return (cc1[0] == cc2[0] && cc1[1] == cc2[1]);
 }
 
 /* number of references from net80211 layer */
 static	int nrefs = 0;
 /*
  * Module glue.
  */
 IEEE80211_SCANNER_MODULE(sta, 1);
 
 /*
  * Attach prior to any scanning work.
  */
 static int
 sta_attach(struct ieee80211_scan_state *ss)
 {
 	struct sta_table *st;
 
 	st = (struct sta_table *) IEEE80211_MALLOC(sizeof(struct sta_table),
 		M_80211_SCAN,
 		IEEE80211_M_NOWAIT | IEEE80211_M_ZERO);
 	if (st == NULL)
 		return 0;
 	IEEE80211_SCAN_TABLE_LOCK_INIT(st, "scantable");
 	IEEE80211_SCAN_ITER_LOCK_INIT(st, "scangen");
 	TAILQ_INIT(&st->st_entry);
 	ss->ss_priv = st;
 	nrefs++;			/* NB: we assume caller locking */
 	return 1;
 }
 
 /*
  * Cleanup any private state.
  */
 static int
 sta_detach(struct ieee80211_scan_state *ss)
 {
 	struct sta_table *st = ss->ss_priv;
 
 	if (st != NULL) {
 		sta_flush_table(st);
 		IEEE80211_SCAN_TABLE_LOCK_DESTROY(st);
 		IEEE80211_SCAN_ITER_LOCK_DESTROY(st);
 		IEEE80211_FREE(st, M_80211_SCAN);
 		KASSERT(nrefs > 0, ("imbalanced attach/detach"));
 		nrefs--;		/* NB: we assume caller locking */
 	}
 	return 1;
 }
 
 /*
  * Flush all per-scan state.
  */
 static int
 sta_flush(struct ieee80211_scan_state *ss)
 {
 	struct sta_table *st = ss->ss_priv;
 
 	IEEE80211_SCAN_TABLE_LOCK(st);
 	sta_flush_table(st);
 	IEEE80211_SCAN_TABLE_UNLOCK(st);
 	ss->ss_last = 0;
 	return 0;
 }
 
 /*
  * Flush all entries in the scan cache.
  */
 static void
 sta_flush_table(struct sta_table *st)
 {
 	struct sta_entry *se, *next;
 
 	TAILQ_FOREACH_SAFE(se, &st->st_entry, se_list, next) {
 		TAILQ_REMOVE(&st->st_entry, se, se_list);
 		LIST_REMOVE(se, se_hash);
 		ieee80211_ies_cleanup(&se->base.se_ies);
 		IEEE80211_FREE(se, M_80211_SCAN);
 	}
 	memset(st->st_maxrssi, 0, sizeof(st->st_maxrssi));
 }
 
 /*
  * Process a beacon or probe response frame; create an
  * entry in the scan cache or update any previous entry.
  */
 static int
 sta_add(struct ieee80211_scan_state *ss, 
 	struct ieee80211_channel *curchan,
 	const struct ieee80211_scanparams *sp,
 	const struct ieee80211_frame *wh,
 	int subtype, int rssi, int noise)
 {
 #define	ISPROBE(_st)	((_st) == IEEE80211_FC0_SUBTYPE_PROBE_RESP)
 #define	PICK1ST(_ss) \
 	((ss->ss_flags & (IEEE80211_SCAN_PICK1ST | IEEE80211_SCAN_GOTPICK)) == \
 	IEEE80211_SCAN_PICK1ST)
 	struct sta_table *st = ss->ss_priv;
 	const uint8_t *macaddr = wh->i_addr2;
 	struct ieee80211vap *vap = ss->ss_vap;
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ieee80211_channel *c;
 	struct sta_entry *se;
 	struct ieee80211_scan_entry *ise;
 	int hash;
 
 	hash = STA_HASH(macaddr);
 
 	IEEE80211_SCAN_TABLE_LOCK(st);
 	LIST_FOREACH(se, &st->st_hash[hash], se_hash)
 		if (IEEE80211_ADDR_EQ(se->base.se_macaddr, macaddr))
 			goto found;
 	se = (struct sta_entry *) IEEE80211_MALLOC(sizeof(struct sta_entry),
 		M_80211_SCAN, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO);
 	if (se == NULL) {
 		IEEE80211_SCAN_TABLE_UNLOCK(st);
 		return 0;
 	}
 	se->se_scangen = st->st_scaniter-1;
 	se->se_avgrssi = IEEE80211_RSSI_DUMMY_MARKER;
 	IEEE80211_ADDR_COPY(se->base.se_macaddr, macaddr);
 	TAILQ_INSERT_TAIL(&st->st_entry, se, se_list);
 	LIST_INSERT_HEAD(&st->st_hash[hash], se, se_hash);
 found:
 	ise = &se->base;
 	/* XXX ap beaconing multiple ssid w/ same bssid */
 	if (sp->ssid[1] != 0 &&
 	    (ISPROBE(subtype) || ise->se_ssid[1] == 0))
 		memcpy(ise->se_ssid, sp->ssid, 2+sp->ssid[1]);
 	KASSERT(sp->rates[1] <= IEEE80211_RATE_MAXSIZE,
 		("rate set too large: %u", sp->rates[1]));
 	memcpy(ise->se_rates, sp->rates, 2+sp->rates[1]);
 	if (sp->xrates != NULL) {
 		/* XXX validate xrates[1] */
 		KASSERT(sp->xrates[1] <= IEEE80211_RATE_MAXSIZE,
 			("xrate set too large: %u", sp->xrates[1]));
 		memcpy(ise->se_xrates, sp->xrates, 2+sp->xrates[1]);
 	} else
 		ise->se_xrates[1] = 0;
 	IEEE80211_ADDR_COPY(ise->se_bssid, wh->i_addr3);
 	if ((sp->status & IEEE80211_BPARSE_OFFCHAN) == 0) {
 		/*
 		 * Record rssi data using extended precision LPF filter.
 		 *
 		 * NB: use only on-channel data to insure we get a good
 		 *     estimate of the signal we'll see when associated.
 		 */
 		IEEE80211_RSSI_LPF(se->se_avgrssi, rssi);
 		ise->se_rssi = IEEE80211_RSSI_GET(se->se_avgrssi);
 		ise->se_noise = noise;
 	}
 	memcpy(ise->se_tstamp.data, sp->tstamp, sizeof(ise->se_tstamp));
 	ise->se_intval = sp->bintval;
 	ise->se_capinfo = sp->capinfo;
 #ifdef IEEE80211_SUPPORT_MESH
 	if (sp->meshid != NULL && sp->meshid[1] != 0)
 		memcpy(ise->se_meshid, sp->meshid, 2+sp->meshid[1]);
 #endif
 	/*
 	 * Beware of overriding se_chan for frames seen
 	 * off-channel; this can cause us to attempt an
 	 * association on the wrong channel.
 	 */
 	if (sp->status & IEEE80211_BPARSE_OFFCHAN) {
 		/*
 		 * Off-channel, locate the home/bss channel for the sta
 		 * using the value broadcast in the DSPARMS ie.  We know
 		 * sp->chan has this value because it's used to calculate
 		 * IEEE80211_BPARSE_OFFCHAN.
 		 */
 		c = ieee80211_find_channel_byieee(ic, sp->chan,
 		    curchan->ic_flags);
 		if (c != NULL) {
 			ise->se_chan = c;
 		} else if (ise->se_chan == NULL) {
 			/* should not happen, pick something */
 			ise->se_chan = curchan;
 		}
 	} else
 		ise->se_chan = curchan;
 	if (IEEE80211_IS_CHAN_HT(ise->se_chan) && sp->htcap == NULL) {
 		/* Demote legacy networks to a non-HT channel. */
 		c = ieee80211_find_channel(ic, ise->se_chan->ic_freq,
 		    ise->se_chan->ic_flags & ~IEEE80211_CHAN_HT);
 		KASSERT(c != NULL,
 		    ("no legacy channel %u", ise->se_chan->ic_ieee));
 		ise->se_chan = c;
 	}
 	ise->se_fhdwell = sp->fhdwell;
 	ise->se_fhindex = sp->fhindex;
 	ise->se_erp = sp->erp;
 	ise->se_timoff = sp->timoff;
 	if (sp->tim != NULL) {
 		const struct ieee80211_tim_ie *tim =
 		    (const struct ieee80211_tim_ie *) sp->tim;
 		ise->se_dtimperiod = tim->tim_period;
 	}
 	if (sp->country != NULL) {
 		const struct ieee80211_country_ie *cie =
 		    (const struct ieee80211_country_ie *) sp->country;
 		/*
 		 * If 11d is enabled and we're attempting to join a bss
 		 * that advertises it's country code then compare our
 		 * current settings to what we fetched from the country ie.
 		 * If our country code is unspecified or different then
 		 * dispatch an event to user space that identifies the
 		 * country code so our regdomain config can be changed.
 		 */
 		/* XXX only for STA mode? */
 		if ((IEEE80211_IS_CHAN_11D(ise->se_chan) ||
 		    (vap->iv_flags_ext & IEEE80211_FEXT_DOTD)) &&
 		    (ic->ic_regdomain.country == CTRY_DEFAULT ||
 		     !isocmp(cie->cc, ic->ic_regdomain.isocc))) {
 			/* only issue one notify event per scan */
 			if (se->se_countrygen != st->st_scangen) {
 				ieee80211_notify_country(vap, ise->se_bssid,
 				    cie->cc);
 				se->se_countrygen = st->st_scangen;
 			}
 		}
 		ise->se_cc[0] = cie->cc[0];
 		ise->se_cc[1] = cie->cc[1];
 	}
 	/* NB: no need to setup ie ptrs; they are not (currently) used */
 	(void) ieee80211_ies_init(&ise->se_ies, sp->ies, sp->ies_len);
 
 	/* clear failure count after STA_FAIL_AGE passes */
 	if (se->se_fails && (ticks - se->se_lastfail) > STA_FAILS_AGE*hz) {
 		se->se_fails = 0;
 		IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_SCAN, macaddr,
 		    "%s: fails %u", __func__, se->se_fails);
 	}
 
 	se->se_lastupdate = ticks;		/* update time */
 	se->se_seen = 1;
 	se->se_notseen = 0;
 
 	KASSERT(sizeof(sp->bchan) == 1, ("bchan size"));
 	if (rssi > st->st_maxrssi[sp->bchan])
 		st->st_maxrssi[sp->bchan] = rssi;
 
 	IEEE80211_SCAN_TABLE_UNLOCK(st);
 
 	/*
 	 * If looking for a quick choice and nothing's
 	 * been found check here.
 	 */
 	if (PICK1ST(ss) && match_bss(vap, ss, se, IEEE80211_MSG_SCAN) == 0)
 		ss->ss_flags |= IEEE80211_SCAN_GOTPICK;
 
 	return 1;
 #undef PICK1ST
 #undef ISPROBE
 }
 
 /*
  * Check if a channel is excluded by user request.
  */
 static int
 isexcluded(struct ieee80211vap *vap, const struct ieee80211_channel *c)
 {
 	return (isclr(vap->iv_ic->ic_chan_active, c->ic_ieee) ||
 	    (vap->iv_des_chan != IEEE80211_CHAN_ANYC &&
 	     c->ic_freq != vap->iv_des_chan->ic_freq));
 }
 
 static struct ieee80211_channel *
 find11gchannel(struct ieee80211com *ic, int i, int freq)
 {
 	struct ieee80211_channel *c;
 	int j;
 
 	/*
 	 * The normal ordering in the channel list is b channel
 	 * immediately followed by g so optimize the search for
 	 * this.  We'll still do a full search just in case.
 	 */
 	for (j = i+1; j < ic->ic_nchans; j++) {
 		c = &ic->ic_channels[j];
 		if (c->ic_freq == freq && IEEE80211_IS_CHAN_G(c))
 			return c;
 	}
 	for (j = 0; j < i; j++) {
 		c = &ic->ic_channels[j];
 		if (c->ic_freq == freq && IEEE80211_IS_CHAN_G(c))
 			return c;
 	}
 	return NULL;
 }
 
 static const u_int chanflags[IEEE80211_MODE_MAX] = {
 	[IEEE80211_MODE_AUTO]	  = IEEE80211_CHAN_B,
 	[IEEE80211_MODE_11A]	  = IEEE80211_CHAN_A,
 	[IEEE80211_MODE_11B]	  = IEEE80211_CHAN_B,
 	[IEEE80211_MODE_11G]	  = IEEE80211_CHAN_G,
 	[IEEE80211_MODE_FH]	  = IEEE80211_CHAN_FHSS,
 	/* check base channel */
 	[IEEE80211_MODE_TURBO_A]  = IEEE80211_CHAN_A,
 	[IEEE80211_MODE_TURBO_G]  = IEEE80211_CHAN_G,
 	[IEEE80211_MODE_STURBO_A] = IEEE80211_CHAN_ST,
 	[IEEE80211_MODE_HALF]	  = IEEE80211_CHAN_HALF,
 	[IEEE80211_MODE_QUARTER]  = IEEE80211_CHAN_QUARTER,
 	/* check legacy */
 	[IEEE80211_MODE_11NA]	  = IEEE80211_CHAN_A,
 	[IEEE80211_MODE_11NG]	  = IEEE80211_CHAN_G,
 };
 
 static void
 add_channels(struct ieee80211vap *vap,
 	struct ieee80211_scan_state *ss,
 	enum ieee80211_phymode mode, const uint16_t freq[], int nfreq)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ieee80211_channel *c, *cg;
 	u_int modeflags;
 	int i;
 
 	KASSERT(mode < nitems(chanflags), ("Unexpected mode %u", mode));
 	modeflags = chanflags[mode];
 	for (i = 0; i < nfreq; i++) {
 		if (ss->ss_last >= IEEE80211_SCAN_MAX)
 			break;
 
 		c = ieee80211_find_channel(ic, freq[i], modeflags);
 		if (c == NULL || isexcluded(vap, c))
 			continue;
 		if (mode == IEEE80211_MODE_AUTO) {
 			/*
 			 * XXX special-case 11b/g channels so we select
 			 *     the g channel if both are present.
 			 */
 			if (IEEE80211_IS_CHAN_B(c) &&
 			    (cg = find11gchannel(ic, i, c->ic_freq)) != NULL)
 				c = cg;
 		}
 		ss->ss_chans[ss->ss_last++] = c;
 	}
 }
 
 struct scanlist {
 	uint16_t	mode;
 	uint16_t	count;
 	const uint16_t	*list;
 };
 
 static int
 checktable(const struct scanlist *scan, const struct ieee80211_channel *c)
 {
 	int i;
 
 	for (; scan->list != NULL; scan++) {
 		for (i = 0; i < scan->count; i++)
 			if (scan->list[i] == c->ic_freq) 
 				return 1;
 	}
 	return 0;
 }
 
 static int
 onscanlist(const struct ieee80211_scan_state *ss,
 	const struct ieee80211_channel *c)
 {
 	int i;
 
 	for (i = 0; i < ss->ss_last; i++)
 		if (ss->ss_chans[i] == c)
 			return 1;
 	return 0;
 }
 
 static void
 sweepchannels(struct ieee80211_scan_state *ss, struct ieee80211vap *vap,
 	const struct scanlist table[])
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ieee80211_channel *c;
 	int i;
 
 	for (i = 0; i < ic->ic_nchans; i++) {
 		if (ss->ss_last >= IEEE80211_SCAN_MAX)
 			break;
 
 		c = &ic->ic_channels[i];
 		/*
 		 * Ignore dynamic turbo channels; we scan them
 		 * in normal mode (i.e. not boosted).  Likewise
 		 * for HT channels, they get scanned using
 		 * legacy rates.
 		 */
 		if (IEEE80211_IS_CHAN_DTURBO(c) || IEEE80211_IS_CHAN_HT(c))
 			continue;
 
 		/*
 		 * If a desired mode was specified, scan only 
 		 * channels that satisfy that constraint.
 		 */
 		if (vap->iv_des_mode != IEEE80211_MODE_AUTO &&
 		    vap->iv_des_mode != ieee80211_chan2mode(c))
 			continue;
 
 		/*
 		 * Skip channels excluded by user request.
 		 */
 		if (isexcluded(vap, c))
 			continue;
 
 		/*
 		 * Add the channel unless it is listed in the
 		 * fixed scan order tables.  This insures we
 		 * don't sweep back in channels we filtered out
 		 * above.
 		 */
 		if (checktable(table, c))
 			continue;
 
 		/* Add channel to scanning list. */
 		ss->ss_chans[ss->ss_last++] = c;
 	}
 	/*
 	 * Explicitly add any desired channel if:
 	 * - not already on the scan list
 	 * - allowed by any desired mode constraint
 	 * - there is space in the scan list
 	 * This allows the channel to be used when the filtering
 	 * mechanisms would otherwise elide it (e.g HT, turbo).
 	 */
 	c = vap->iv_des_chan;
 	if (c != IEEE80211_CHAN_ANYC &&
 	    !onscanlist(ss, c) &&
 	    (vap->iv_des_mode == IEEE80211_MODE_AUTO ||
 	     vap->iv_des_mode == ieee80211_chan2mode(c)) &&
 	    ss->ss_last < IEEE80211_SCAN_MAX)
 		ss->ss_chans[ss->ss_last++] = c;
 }
 
 static void
 makescanlist(struct ieee80211_scan_state *ss, struct ieee80211vap *vap,
 	const struct scanlist table[])
 {
 	const struct scanlist *scan;
 	enum ieee80211_phymode mode;
 
 	ss->ss_last = 0;
 	/*
 	 * Use the table of ordered channels to construct the list
 	 * of channels for scanning.  Any channels in the ordered
 	 * list not in the master list will be discarded.
 	 */
 	for (scan = table; scan->list != NULL; scan++) {
 		mode = scan->mode;
 		if (vap->iv_des_mode != IEEE80211_MODE_AUTO) {
 			/*
 			 * If a desired mode was specified, scan only 
 			 * channels that satisfy that constraint.
 			 */
 			if (vap->iv_des_mode != mode) {
 				/*
 				 * The scan table marks 2.4Ghz channels as b
 				 * so if the desired mode is 11g, then use
 				 * the 11b channel list but upgrade the mode.
 				 */
 				if (vap->iv_des_mode == IEEE80211_MODE_11G) {
 					if (mode == IEEE80211_MODE_11G) /* Skip the G check */
 						continue;
 					else if (mode == IEEE80211_MODE_11B)
 						mode = IEEE80211_MODE_11G;	/* upgrade */
 				}
 			}
 		} else {
 			/*
 			 * This lets add_channels upgrade an 11b channel
 			 * to 11g if available.
 			 */
 			if (mode == IEEE80211_MODE_11B)
 				mode = IEEE80211_MODE_AUTO;
 		}
 #ifdef IEEE80211_F_XR
 		/* XR does not operate on turbo channels */
 		if ((vap->iv_flags & IEEE80211_F_XR) &&
 		    (mode == IEEE80211_MODE_TURBO_A ||
 		     mode == IEEE80211_MODE_TURBO_G ||
 		     mode == IEEE80211_MODE_STURBO_A))
 			continue;
 #endif
 		/*
 		 * Add the list of the channels; any that are not
 		 * in the master channel list will be discarded.
 		 */
 		add_channels(vap, ss, mode, scan->list, scan->count);
 	}
 
 	/*
 	 * Add the channels from the ic that are not present
 	 * in the table.
 	 */
 	sweepchannels(ss, vap, table);
 }
 
 static const uint16_t rcl1[] =		/* 8 FCC channel: 52, 56, 60, 64, 36, 40, 44, 48 */
 { 5260, 5280, 5300, 5320, 5180, 5200, 5220, 5240 };
 static const uint16_t rcl2[] =		/* 4 MKK channels: 34, 38, 42, 46 */
 { 5170, 5190, 5210, 5230 };
 static const uint16_t rcl3[] =		/* 2.4Ghz ch: 1,6,11,7,13 */
 { 2412, 2437, 2462, 2442, 2472 };
 static const uint16_t rcl4[] =		/* 5 FCC channel: 149, 153, 161, 165 */
 { 5745, 5765, 5785, 5805, 5825 };
 static const uint16_t rcl7[] =		/* 11 ETSI channel: 100,104,108,112,116,120,124,128,132,136,140 */
 { 5500, 5520, 5540, 5560, 5580, 5600, 5620, 5640, 5660, 5680, 5700 };
 static const uint16_t rcl8[] =		/* 2.4Ghz ch: 2,3,4,5,8,9,10,12 */
 { 2417, 2422, 2427, 2432, 2447, 2452, 2457, 2467 };
 static const uint16_t rcl9[] =		/* 2.4Ghz ch: 14 */
 { 2484 };
 static const uint16_t rcl10[] =	/* Added Korean channels 2312-2372 */
 { 2312, 2317, 2322, 2327, 2332, 2337, 2342, 2347, 2352, 2357, 2362, 2367, 2372 };
 static const uint16_t rcl11[] =	/* Added Japan channels in 4.9/5.0 spectrum */
 { 5040, 5060, 5080, 4920, 4940, 4960, 4980 };
 #ifdef ATH_TURBO_SCAN
 static const uint16_t rcl5[] =		/* 3 static turbo channels */
 { 5210, 5250, 5290 };
 static const uint16_t rcl6[] =		/* 2 static turbo channels */
 { 5760, 5800 };
 static const uint16_t rcl6x[] =	/* 4 FCC3 turbo channels */
 { 5540, 5580, 5620, 5660 };
 static const uint16_t rcl12[] =	/* 2.4Ghz Turbo channel 6 */
 { 2437 };
 static const uint16_t rcl13[] =	/* dynamic Turbo channels */
 { 5200, 5240, 5280, 5765, 5805 };
 #endif /* ATH_TURBO_SCAN */
 
 #define	X(a)	.count = sizeof(a)/sizeof(a[0]), .list = a
 
 static const struct scanlist staScanTable[] = {
 	{ IEEE80211_MODE_11B,   	X(rcl3) },
 	{ IEEE80211_MODE_11A,   	X(rcl1) },
 	{ IEEE80211_MODE_11A,   	X(rcl2) },
 	{ IEEE80211_MODE_11B,   	X(rcl8) },
 	{ IEEE80211_MODE_11B,   	X(rcl9) },
 	{ IEEE80211_MODE_11A,   	X(rcl4) },
 #ifdef ATH_TURBO_SCAN
 	{ IEEE80211_MODE_STURBO_A,	X(rcl5) },
 	{ IEEE80211_MODE_STURBO_A,	X(rcl6) },
 	{ IEEE80211_MODE_TURBO_A,	X(rcl6x) },
 	{ IEEE80211_MODE_TURBO_A,	X(rcl13) },
 #endif /* ATH_TURBO_SCAN */
 	{ IEEE80211_MODE_11A,		X(rcl7) },
 	{ IEEE80211_MODE_11B,		X(rcl10) },
 	{ IEEE80211_MODE_11A,		X(rcl11) },
 #ifdef ATH_TURBO_SCAN
 	{ IEEE80211_MODE_TURBO_G,	X(rcl12) },
 #endif /* ATH_TURBO_SCAN */
 	{ .list = NULL }
 };
 
 /*
  * Start a station-mode scan by populating the channel list.
  */
 static int
 sta_start(struct ieee80211_scan_state *ss, struct ieee80211vap *vap)
 {
 	struct sta_table *st = ss->ss_priv;
 
 	makescanlist(ss, vap, staScanTable);
 
 	if (ss->ss_mindwell == 0)
 		ss->ss_mindwell = msecs_to_ticks(20);	/* 20ms */
 	if (ss->ss_maxdwell == 0)
 		ss->ss_maxdwell = msecs_to_ticks(200);	/* 200ms */
 
 	st->st_scangen++;
 	st->st_newscan = 1;
 
 	return 0;
 }
 
 /*
  * Restart a scan, typically a bg scan but can
  * also be a fg scan that came up empty.
  */
 static int
 sta_restart(struct ieee80211_scan_state *ss, struct ieee80211vap *vap)
 {
 	struct sta_table *st = ss->ss_priv;
 
 	st->st_newscan = 1;
 	return 0;
 }
 
 /*
  * Cancel an ongoing scan.
  */
 static int
 sta_cancel(struct ieee80211_scan_state *ss, struct ieee80211vap *vap)
 {
 	return 0;
 }
 
 /*
  * Demote any supplied 11g channel to 11b.  There should
  * always be an 11b channel but we check anyway...
  */
 static struct ieee80211_channel *
 demote11b(struct ieee80211vap *vap, struct ieee80211_channel *chan)
 {
 	struct ieee80211_channel *c;
 
 	if (IEEE80211_IS_CHAN_ANYG(chan) &&
 	    vap->iv_des_mode == IEEE80211_MODE_AUTO) {
 		c = ieee80211_find_channel(vap->iv_ic, chan->ic_freq,
 		    (chan->ic_flags &~ (IEEE80211_CHAN_PUREG | IEEE80211_CHAN_G)) |
 		    IEEE80211_CHAN_B);
 		if (c != NULL)
 			chan = c;
 	}
 	return chan;
 }
 
 static int
 maxrate(const struct ieee80211_scan_entry *se)
 {
 	const struct ieee80211_ie_htcap *htcap =
 	    (const struct ieee80211_ie_htcap *) se->se_ies.htcap_ie;
 	int rmax, r, i, txstream;
 	uint16_t caps;
 	uint8_t txparams;
 
 	rmax = 0;
 	if (htcap != NULL) {
 		/*
 		 * HT station; inspect supported MCS and then adjust
 		 * rate by channel width.
 		 */
 		txparams = htcap->hc_mcsset[12];
 		if (txparams & 0x3) {
 			/*
 			 * TX MCS parameters defined and not equal to RX,
 			 * extract the number of spartial streams and
 			 * map it to the highest MCS rate.
 			 */
 			txstream = ((txparams & 0xc) >> 2) + 1;
 			i = txstream * 8 - 1;
 		} else
 			for (i = 31; i >= 0 && isclr(htcap->hc_mcsset, i); i--);
 		if (i >= 0) {
 			caps = le16dec(&htcap->hc_cap);
 			if ((caps & IEEE80211_HTCAP_CHWIDTH40) &&
 			    (caps & IEEE80211_HTCAP_SHORTGI40))
 				rmax = ieee80211_htrates[i].ht40_rate_400ns;
 			else if (caps & IEEE80211_HTCAP_CHWIDTH40)
 				rmax = ieee80211_htrates[i].ht40_rate_800ns;
 			else if (caps & IEEE80211_HTCAP_SHORTGI20)
 				rmax = ieee80211_htrates[i].ht20_rate_400ns;
 			else
 				rmax = ieee80211_htrates[i].ht20_rate_800ns;
 		}
 	}
 	for (i = 0; i < se->se_rates[1]; i++) {
 		r = se->se_rates[2+i] & IEEE80211_RATE_VAL;
 		if (r > rmax)
 			rmax = r;
 	}
 	for (i = 0; i < se->se_xrates[1]; i++) {
 		r = se->se_xrates[2+i] & IEEE80211_RATE_VAL;
 		if (r > rmax)
 			rmax = r;
 	}
 	return rmax;
 }
 
 /*
  * Compare the capabilities of two entries and decide which is
  * more desirable (return >0 if a is considered better).  Note
  * that we assume compatibility/usability has already been checked
  * so we don't need to (e.g. validate whether privacy is supported).
  * Used to select the best scan candidate for association in a BSS.
  */
 static int
 sta_compare(const struct sta_entry *a, const struct sta_entry *b)
 {
 #define	PREFER(_a,_b,_what) do {			\
 	if (((_a) ^ (_b)) & (_what))			\
 		return ((_a) & (_what)) ? 1 : -1;	\
 } while (0)
 	int maxa, maxb;
 	int8_t rssia, rssib;
 	int weight;
 
 	/* privacy support */
 	PREFER(a->base.se_capinfo, b->base.se_capinfo,
 		IEEE80211_CAPINFO_PRIVACY);
 
 	/* compare count of previous failures */
 	weight = b->se_fails - a->se_fails;
 	if (abs(weight) > 1)
 		return weight;
 
 	/*
 	 * Compare rssi.  If the two are considered equivalent
 	 * then fallback to other criteria.  We threshold the
 	 * comparisons to avoid selecting an ap purely by rssi
 	 * when both values may be good but one ap is otherwise
 	 * more desirable (e.g. an 11b-only ap with stronger
 	 * signal than an 11g ap).
 	 */
 	rssia = MIN(a->base.se_rssi, STA_RSSI_MAX);
 	rssib = MIN(b->base.se_rssi, STA_RSSI_MAX);
 	if (abs(rssib - rssia) < 5) {
 		/* best/max rate preferred if signal level close enough XXX */
 		maxa = maxrate(&a->base);
 		maxb = maxrate(&b->base);
 		if (maxa != maxb)
 			return maxa - maxb;
 		/* XXX use freq for channel preference */
 		/* for now just prefer 5Ghz band to all other bands */
 		PREFER(IEEE80211_IS_CHAN_5GHZ(a->base.se_chan),
 		       IEEE80211_IS_CHAN_5GHZ(b->base.se_chan), 1);
 	}
 	/* all things being equal, use signal level */
 	return a->base.se_rssi - b->base.se_rssi;
 #undef PREFER
 }
 
 /*
  * Check rate set suitability and return the best supported rate.
  * XXX inspect MCS for HT
  */
 static int
 check_rate(struct ieee80211vap *vap, const struct ieee80211_channel *chan,
     const struct ieee80211_scan_entry *se)
 {
 	const struct ieee80211_rateset *srs;
 	int i, j, nrs, r, okrate, badrate, fixedrate, ucastrate;
 	const uint8_t *rs;
 
 	okrate = badrate = 0;
 
 	srs = ieee80211_get_suprates(vap->iv_ic, chan);
 	nrs = se->se_rates[1];
 	rs = se->se_rates+2;
 	/* XXX MCS */
 	ucastrate = vap->iv_txparms[ieee80211_chan2mode(chan)].ucastrate;
 	fixedrate = IEEE80211_FIXED_RATE_NONE;
 again:
 	for (i = 0; i < nrs; i++) {
 		r = IEEE80211_RV(rs[i]);
 		badrate = r;
 		/*
 		 * Check any fixed rate is included. 
 		 */
 		if (r == ucastrate)
 			fixedrate = r;
 		/*
 		 * Check against our supported rates.
 		 */
 		for (j = 0; j < srs->rs_nrates; j++)
 			if (r == IEEE80211_RV(srs->rs_rates[j])) {
 				if (r > okrate)		/* NB: track max */
 					okrate = r;
 				break;
 			}
 
 		if (j == srs->rs_nrates && (rs[i] & IEEE80211_RATE_BASIC)) {
 			/*
 			 * Don't try joining a BSS, if we don't support
 			 * one of its basic rates.
 			 */
 			okrate = 0;
 			goto back;
 		}
 	}
 	if (rs == se->se_rates+2) {
 		/* scan xrates too; sort of an algol68-style for loop */
 		nrs = se->se_xrates[1];
 		rs = se->se_xrates+2;
 		goto again;
 	}
 
 back:
 	if (okrate == 0 || ucastrate != fixedrate)
 		return badrate | IEEE80211_RATE_BASIC;
 	else
 		return IEEE80211_RV(okrate);
 }
 
 static __inline int
 match_id(const uint8_t *ie, const uint8_t *val, int len)
 {
 	return (ie[1] == len && memcmp(ie+2, val, len) == 0);
 }
 
 static int
 match_ssid(const uint8_t *ie,
 	int nssid, const struct ieee80211_scan_ssid ssids[])
 {
 	int i;
 
 	for (i = 0; i < nssid; i++) {
 		if (match_id(ie, ssids[i].ssid, ssids[i].len))
 			return 1;
 	}
 	return 0;
 }
 
 #ifdef IEEE80211_SUPPORT_TDMA
 static int
 tdma_isfull(const struct ieee80211_tdma_param *tdma)
 {
 	int slot, slotcnt;
 
 	slotcnt = tdma->tdma_slotcnt;
 	for (slot = slotcnt-1; slot >= 0; slot--)
 		if (isclr(tdma->tdma_inuse, slot))
 			return 0;
 	return 1;
 }
 #endif /* IEEE80211_SUPPORT_TDMA */
 
 /*
  * Test a scan candidate for suitability/compatibility.
  */
 static int
 match_bss(struct ieee80211vap *vap,
 	const struct ieee80211_scan_state *ss, struct sta_entry *se0,
 	int debug)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ieee80211_scan_entry *se = &se0->base;
         uint8_t rate;
         int fail;
 
 	fail = 0;
 	if (isclr(ic->ic_chan_active, ieee80211_chan2ieee(ic, se->se_chan)))
 		fail |= MATCH_CHANNEL;
 	/*
 	 * NB: normally the desired mode is used to construct
 	 * the channel list, but it's possible for the scan
 	 * cache to include entries for stations outside this
 	 * list so we check the desired mode here to weed them
 	 * out.
 	 */
 	if (vap->iv_des_mode != IEEE80211_MODE_AUTO &&
 	    (se->se_chan->ic_flags & IEEE80211_CHAN_ALLTURBO) !=
 	    chanflags[vap->iv_des_mode])
 		fail |= MATCH_CHANNEL;
 	if (vap->iv_opmode == IEEE80211_M_IBSS) {
 		if ((se->se_capinfo & IEEE80211_CAPINFO_IBSS) == 0)
 			fail |= MATCH_CAPINFO;
 #ifdef IEEE80211_SUPPORT_TDMA
 	} else if (vap->iv_opmode == IEEE80211_M_AHDEMO) {
 		/*
 		 * Adhoc demo network setup shouldn't really be scanning
 		 * but just in case skip stations operating in IBSS or
 		 * BSS mode.
 		 */
 		if (se->se_capinfo & (IEEE80211_CAPINFO_IBSS|IEEE80211_CAPINFO_ESS))
 			fail |= MATCH_CAPINFO;
 		/*
 		 * TDMA operation cannot coexist with a normal 802.11 network;
 		 * skip if IBSS or ESS capabilities are marked and require
 		 * the beacon have a TDMA ie present.
 		 */
 		if (vap->iv_caps & IEEE80211_C_TDMA) {
 			const struct ieee80211_tdma_param *tdma =
 			    (const struct ieee80211_tdma_param *)se->se_ies.tdma_ie;
 			const struct ieee80211_tdma_state *ts = vap->iv_tdma;
 
 			if (tdma == NULL)
 				fail |= MATCH_TDMA_NOIE;
 			else if (tdma->tdma_version != ts->tdma_version)
 				fail |= MATCH_TDMA_VERSION;
 			else if (tdma->tdma_slot != 0)
 				fail |= MATCH_TDMA_NOTMASTER;
 			else if (tdma_isfull(tdma))
 				fail |= MATCH_TDMA_NOSLOT;
 #if 0
 			else if (ieee80211_local_address(se->se_macaddr))
 				fail |= MATCH_TDMA_LOCAL;
 #endif
 		}
 #endif /* IEEE80211_SUPPORT_TDMA */
 #ifdef IEEE80211_SUPPORT_MESH
 	} else if (vap->iv_opmode == IEEE80211_M_MBSS) {
 		const struct ieee80211_mesh_state *ms = vap->iv_mesh;
 		/*
 		 * Mesh nodes have IBSS & ESS bits in capinfo turned off
 		 * and two special ie's that must be present.
 		 */
 		if (se->se_capinfo & (IEEE80211_CAPINFO_IBSS|IEEE80211_CAPINFO_ESS))
 			fail |= MATCH_CAPINFO;
 		else if (se->se_meshid[0] != IEEE80211_ELEMID_MESHID)
 			fail |= MATCH_MESH_NOID;
 		else if (ms->ms_idlen != 0 &&
 		    match_id(se->se_meshid, ms->ms_id, ms->ms_idlen))
 			fail |= MATCH_MESHID;
 #endif
 	} else {
 		if ((se->se_capinfo & IEEE80211_CAPINFO_ESS) == 0)
 			fail |= MATCH_CAPINFO;
 		/*
 		 * If 11d is enabled and we're attempting to join a bss
 		 * that advertises it's country code then compare our
 		 * current settings to what we fetched from the country ie.
 		 * If our country code is unspecified or different then do
 		 * not attempt to join the bss.  We should have already
 		 * dispatched an event to user space that identifies the
 		 * new country code so our regdomain config should match.
 		 */
 		if ((IEEE80211_IS_CHAN_11D(se->se_chan) ||
 		    (vap->iv_flags_ext & IEEE80211_FEXT_DOTD)) &&
 		    se->se_cc[0] != 0 &&
 		    (ic->ic_regdomain.country == CTRY_DEFAULT ||
 		     !isocmp(se->se_cc, ic->ic_regdomain.isocc)))
 			fail |= MATCH_CC;
 	}
 	if (vap->iv_flags & IEEE80211_F_PRIVACY) {
 		if ((se->se_capinfo & IEEE80211_CAPINFO_PRIVACY) == 0)
 			fail |= MATCH_PRIVACY;
 	} else {
 		/* XXX does this mean privacy is supported or required? */
 		if (se->se_capinfo & IEEE80211_CAPINFO_PRIVACY)
 			fail |= MATCH_PRIVACY;
 	}
 	se0->se_flags &= ~STA_DEMOTE11B;
 	rate = check_rate(vap, se->se_chan, se);
 	if (rate & IEEE80211_RATE_BASIC) {
 		fail |= MATCH_RATE;
 		/*
 		 * An 11b-only ap will give a rate mismatch if there is an
 		 * OFDM fixed tx rate for 11g.  Try downgrading the channel
 		 * in the scan list to 11b and retry the rate check.
 		 */
 		if (IEEE80211_IS_CHAN_ANYG(se->se_chan)) {
 			rate = check_rate(vap, demote11b(vap, se->se_chan), se);
 			if ((rate & IEEE80211_RATE_BASIC) == 0) {
 				fail &= ~MATCH_RATE;
 				se0->se_flags |= STA_DEMOTE11B;
 			}
 		}
 	} else if (rate < 2*24) {
 		/*
 		 * This is an 11b-only ap.  Check the desired mode in
 		 * case that needs to be honored (mode 11g filters out
 		 * 11b-only ap's).  Otherwise force any 11g channel used
 		 * in scanning to be demoted.
 		 *
 		 * NB: we cheat a bit here by looking at the max rate;
 		 *     we could/should check the rates.
 		 */
 		if (!(vap->iv_des_mode == IEEE80211_MODE_AUTO ||
 		      vap->iv_des_mode == IEEE80211_MODE_11B))
 			fail |= MATCH_RATE;
 		else
 			se0->se_flags |= STA_DEMOTE11B;
 	}
 	if (ss->ss_nssid != 0 &&
 	    !match_ssid(se->se_ssid, ss->ss_nssid, ss->ss_ssid))
 		fail |= MATCH_SSID;
 	if ((vap->iv_flags & IEEE80211_F_DESBSSID) &&
 	    !IEEE80211_ADDR_EQ(vap->iv_des_bssid, se->se_bssid))
 		fail |= MATCH_BSSID;
 	if (se0->se_fails >= STA_FAILS_MAX)
 		fail |= MATCH_FAILS;
 	if (se0->se_notseen >= STA_PURGE_SCANS)
 		fail |= MATCH_NOTSEEN;
 	if (se->se_rssi < STA_RSSI_MIN)
 		fail |= MATCH_RSSI;
 #ifdef IEEE80211_DEBUG
 	if (ieee80211_msg(vap, debug)) {
 		printf(" %c %s",
 		    fail & MATCH_FAILS ? '=' :
 		    fail & MATCH_NOTSEEN ? '^' :
 		    fail & MATCH_CC ? '$' :
 #ifdef IEEE80211_SUPPORT_TDMA
 		    fail & MATCH_TDMA_NOIE ? '&' :
 		    fail & MATCH_TDMA_VERSION ? 'v' :
 		    fail & MATCH_TDMA_NOTMASTER ? 's' :
 		    fail & MATCH_TDMA_NOSLOT ? 'f' :
 		    fail & MATCH_TDMA_LOCAL ? 'l' :
 #endif
 		    fail & MATCH_MESH_NOID ? 'm' :
 		    fail ? '-' : '+', ether_sprintf(se->se_macaddr));
 		printf(" %s%c", ether_sprintf(se->se_bssid),
 		    fail & MATCH_BSSID ? '!' : ' ');
 		printf(" %3d%c", ieee80211_chan2ieee(ic, se->se_chan),
 			fail & MATCH_CHANNEL ? '!' : ' ');
 		printf(" %+4d%c", se->se_rssi, fail & MATCH_RSSI ? '!' : ' ');
 		printf(" %2dM%c", (rate & IEEE80211_RATE_VAL) / 2,
 		    fail & MATCH_RATE ? '!' : ' ');
 		printf(" %4s%c",
 		    (se->se_capinfo & IEEE80211_CAPINFO_ESS) ? "ess" :
 		    (se->se_capinfo & IEEE80211_CAPINFO_IBSS) ? "ibss" : "",
 		    fail & MATCH_CAPINFO ? '!' : ' ');
 		printf(" %3s%c ",
 		    (se->se_capinfo & IEEE80211_CAPINFO_PRIVACY) ?
 		    "wep" : "no",
 		    fail & MATCH_PRIVACY ? '!' : ' ');
 		ieee80211_print_essid(se->se_ssid+2, se->se_ssid[1]);
 		printf("%s\n", fail & (MATCH_SSID | MATCH_MESHID) ? "!" : "");
 	}
 #endif
 	return fail;
 }
 
 static void
 sta_update_notseen(struct sta_table *st)
 {
 	struct sta_entry *se;
 
 	IEEE80211_SCAN_TABLE_LOCK(st);
 	TAILQ_FOREACH(se, &st->st_entry, se_list) {
 		/*
 		 * If seen the reset and don't bump the count;
 		 * otherwise bump the ``not seen'' count.  Note
 		 * that this insures that stations for which we
 		 * see frames while not scanning but not during
 		 * this scan will not be penalized.
 		 */
 		if (se->se_seen)
 			se->se_seen = 0;
 		else
 			se->se_notseen++;
 	}
 	IEEE80211_SCAN_TABLE_UNLOCK(st);
 }
 
 static void
 sta_dec_fails(struct sta_table *st)
 {
 	struct sta_entry *se;
 
 	IEEE80211_SCAN_TABLE_LOCK(st);
 	TAILQ_FOREACH(se, &st->st_entry, se_list)
 		if (se->se_fails)
 			se->se_fails--;
 	IEEE80211_SCAN_TABLE_UNLOCK(st);
 }
 
 static struct sta_entry *
 select_bss(struct ieee80211_scan_state *ss, struct ieee80211vap *vap, int debug)
 {
 	struct sta_table *st = ss->ss_priv;
 	struct sta_entry *se, *selbs = NULL;
 
 	IEEE80211_DPRINTF(vap, debug, " %s\n",
 	    "macaddr          bssid         chan  rssi  rate flag  wep  essid");
 	IEEE80211_SCAN_TABLE_LOCK(st);
 	TAILQ_FOREACH(se, &st->st_entry, se_list) {
 		ieee80211_ies_expand(&se->base.se_ies);
 		if (match_bss(vap, ss, se, debug) == 0) {
 			if (selbs == NULL)
 				selbs = se;
 			else if (sta_compare(se, selbs) > 0)
 				selbs = se;
 		}
 	}
 	IEEE80211_SCAN_TABLE_UNLOCK(st);
 
 	return selbs;
 }
 
 /*
  * Pick an ap or ibss network to join or find a channel
  * to use to start an ibss network.
  */
 static int
 sta_pick_bss(struct ieee80211_scan_state *ss, struct ieee80211vap *vap)
 {
 	struct sta_table *st = ss->ss_priv;
 	struct sta_entry *selbs;
 	struct ieee80211_channel *chan;
 
 	KASSERT(vap->iv_opmode == IEEE80211_M_STA,
 		("wrong mode %u", vap->iv_opmode));
 
 	if (st->st_newscan) {
 		sta_update_notseen(st);
 		st->st_newscan = 0;
 	}
 	if (ss->ss_flags & IEEE80211_SCAN_NOPICK) {
 		/*
 		 * Manual/background scan, don't select+join the
 		 * bss, just return.  The scanning framework will
 		 * handle notification that this has completed.
 		 */
 		ss->ss_flags &= ~IEEE80211_SCAN_NOPICK;
 		return 1;
 	}
 	/*
 	 * Automatic sequencing; look for a candidate and
 	 * if found join the network.
 	 */
 	/* NB: unlocked read should be ok */
 	if (TAILQ_FIRST(&st->st_entry) == NULL) {
 		IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN,
 			"%s: no scan candidate\n", __func__);
 		if (ss->ss_flags & IEEE80211_SCAN_NOJOIN)
 			return 0;
 notfound:
 		/*
 		 * If nothing suitable was found decrement
 		 * the failure counts so entries will be
 		 * reconsidered the next time around.  We
 		 * really want to do this only for sta's
 		 * where we've previously had some success.
 		 */
 		sta_dec_fails(st);
 		st->st_newscan = 1;
 		return 0;			/* restart scan */
 	}
 	selbs = select_bss(ss, vap, IEEE80211_MSG_SCAN);
 	if (ss->ss_flags & IEEE80211_SCAN_NOJOIN)
 		return (selbs != NULL);
 	if (selbs == NULL)
 		goto notfound;
 	chan = selbs->base.se_chan;
 	if (selbs->se_flags & STA_DEMOTE11B)
 		chan = demote11b(vap, chan);
 	if (!ieee80211_sta_join(vap, chan, &selbs->base))
 		goto notfound;
 	return 1;				/* terminate scan */
 }
 
 /*
  * Lookup an entry in the scan cache.  We assume we're
  * called from the bottom half or such that we don't need
  * to block the bottom half so that it's safe to return
  * a reference to an entry w/o holding the lock on the table.
  */
 static struct sta_entry *
 sta_lookup(struct sta_table *st, const uint8_t macaddr[IEEE80211_ADDR_LEN])
 {
 	struct sta_entry *se;
 	int hash = STA_HASH(macaddr);
 
 	IEEE80211_SCAN_TABLE_LOCK(st);
 	LIST_FOREACH(se, &st->st_hash[hash], se_hash)
 		if (IEEE80211_ADDR_EQ(se->base.se_macaddr, macaddr))
 			break;
 	IEEE80211_SCAN_TABLE_UNLOCK(st);
 
 	return se;		/* NB: unlocked */
 }
 
 static void
 sta_roam_check(struct ieee80211_scan_state *ss, struct ieee80211vap *vap)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ieee80211_node *ni = vap->iv_bss;
 	struct sta_table *st = ss->ss_priv;
 	enum ieee80211_phymode mode;
 	struct sta_entry *se, *selbs;
 	uint8_t roamRate, curRate, ucastRate;
 	int8_t roamRssi, curRssi;
 
 	se = sta_lookup(st, ni->ni_macaddr);
 	if (se == NULL) {
 		/* XXX something is wrong */
 		return;
 	}
 
 	mode = ieee80211_chan2mode(ic->ic_bsschan);
 	roamRate = vap->iv_roamparms[mode].rate;
 	roamRssi = vap->iv_roamparms[mode].rssi;
 	ucastRate = vap->iv_txparms[mode].ucastrate;
 	/* NB: the most up to date rssi is in the node, not the scan cache */
 	curRssi = ic->ic_node_getrssi(ni);
 	if (ucastRate == IEEE80211_FIXED_RATE_NONE) {
 		curRate = ni->ni_txrate;
 		roamRate &= IEEE80211_RATE_VAL;
 		IEEE80211_DPRINTF(vap, IEEE80211_MSG_ROAM,
 		    "%s: currssi %d currate %u roamrssi %d roamrate %u\n",
 		    __func__, curRssi, curRate, roamRssi, roamRate);
 	} else {
 		curRate = roamRate;	/* NB: insure compare below fails */
 		IEEE80211_DPRINTF(vap, IEEE80211_MSG_ROAM,
 		    "%s: currssi %d roamrssi %d\n", __func__, curRssi, roamRssi);
 	}
 	/*
 	 * Check if a new ap should be used and switch.
 	 * XXX deauth current ap
 	 */
 	if (curRate < roamRate || curRssi < roamRssi) {
 		if (ieee80211_time_after(ticks, ic->ic_lastscan + vap->iv_scanvalid)) {
 			/*
 			 * Scan cache contents are too old; force a scan now
 			 * if possible so we have current state to make a
 			 * decision with.  We don't kick off a bg scan if
 			 * we're using dynamic turbo and boosted or if the
 			 * channel is busy.
 			 * XXX force immediate switch on scan complete
 			 */
 			if (!IEEE80211_IS_CHAN_DTURBO(ic->ic_curchan) &&
 			    ieee80211_time_after(ticks, ic->ic_lastdata + vap->iv_bgscanidle))
 				ieee80211_bg_scan(vap, 0);
 			return;
 		}
 		se->base.se_rssi = curRssi;
 		selbs = select_bss(ss, vap, IEEE80211_MSG_ROAM);
 		if (selbs != NULL && selbs != se) {
 			struct ieee80211_channel *chan;
 
 			IEEE80211_DPRINTF(vap,
 			    IEEE80211_MSG_ROAM | IEEE80211_MSG_DEBUG,
 			    "%s: ROAM: curRate %u, roamRate %u, "
 			    "curRssi %d, roamRssi %d\n", __func__,
 			    curRate, roamRate, curRssi, roamRssi);
 
 			chan = selbs->base.se_chan;
 			if (selbs->se_flags & STA_DEMOTE11B)
 				chan = demote11b(vap, chan);
 			(void) ieee80211_sta_join(vap, chan, &selbs->base);
 		}
 	}
 }
 
 /*
  * Age entries in the scan cache.
  * XXX also do roaming since it's convenient
  */
 static void
 sta_age(struct ieee80211_scan_state *ss)
 {
 	struct ieee80211vap *vap = ss->ss_vap;
 
 	adhoc_age(ss);
 	/*
 	 * If rate control is enabled check periodically to see if
 	 * we should roam from our current connection to one that
 	 * might be better.  This only applies when we're operating
 	 * in sta mode and automatic roaming is set.
 	 * XXX defer if busy
 	 * XXX repeater station
 	 * XXX do when !bgscan?
 	 */
 	KASSERT(vap->iv_opmode == IEEE80211_M_STA,
 		("wrong mode %u", vap->iv_opmode));
 	if (vap->iv_roaming == IEEE80211_ROAMING_AUTO &&
 	    (vap->iv_flags & IEEE80211_F_BGSCAN) &&
 	    vap->iv_state >= IEEE80211_S_RUN)
 		/* XXX vap is implicit */
 		sta_roam_check(ss, vap);
 }
 
 /*
  * Iterate over the entries in the scan cache, invoking
  * the callback function on each one.
  */
 static void
 sta_iterate(struct ieee80211_scan_state *ss, 
 	ieee80211_scan_iter_func *f, void *arg)
 {
 	struct sta_table *st = ss->ss_priv;
 	struct sta_entry *se;
 	u_int gen;
 
 	IEEE80211_SCAN_ITER_LOCK(st);
 	gen = st->st_scaniter++;
 restart:
 	IEEE80211_SCAN_TABLE_LOCK(st);
 	TAILQ_FOREACH(se, &st->st_entry, se_list) {
 		if (se->se_scangen != gen) {
 			se->se_scangen = gen;
 			/* update public state */
 			se->base.se_age = ticks - se->se_lastupdate;
 			IEEE80211_SCAN_TABLE_UNLOCK(st);
 			(*f)(arg, &se->base);
 			goto restart;
 		}
 	}
 	IEEE80211_SCAN_TABLE_UNLOCK(st);
 
 	IEEE80211_SCAN_ITER_UNLOCK(st);
 }
 
 static void
 sta_assoc_fail(struct ieee80211_scan_state *ss,
 	const uint8_t macaddr[IEEE80211_ADDR_LEN], int reason)
 {
 	struct sta_table *st = ss->ss_priv;
 	struct sta_entry *se;
 
 	se = sta_lookup(st, macaddr);
 	if (se != NULL) {
 		se->se_fails++;
 		se->se_lastfail = ticks;
 		IEEE80211_NOTE_MAC(ss->ss_vap, IEEE80211_MSG_SCAN,
 		    macaddr, "%s: reason %u fails %u",
 		    __func__, reason, se->se_fails);
 	}
 }
 
 static void
 sta_assoc_success(struct ieee80211_scan_state *ss,
 	const uint8_t macaddr[IEEE80211_ADDR_LEN])
 {
 	struct sta_table *st = ss->ss_priv;
 	struct sta_entry *se;
 
 	se = sta_lookup(st, macaddr);
 	if (se != NULL) {
 #if 0
 		se->se_fails = 0;
 		IEEE80211_NOTE_MAC(ss->ss_vap, IEEE80211_MSG_SCAN,
 		    macaddr, "%s: fails %u",
 		    __func__, se->se_fails);
 #endif
 		se->se_lastassoc = ticks;
 	}
 }
 
 static const struct ieee80211_scanner sta_default = {
 	.scan_name		= "default",
 	.scan_attach		= sta_attach,
 	.scan_detach		= sta_detach,
 	.scan_start		= sta_start,
 	.scan_restart		= sta_restart,
 	.scan_cancel		= sta_cancel,
 	.scan_end		= sta_pick_bss,
 	.scan_flush		= sta_flush,
 	.scan_add		= sta_add,
 	.scan_age		= sta_age,
 	.scan_iterate		= sta_iterate,
 	.scan_assoc_fail	= sta_assoc_fail,
 	.scan_assoc_success	= sta_assoc_success,
 };
 IEEE80211_SCANNER_ALG(sta, IEEE80211_M_STA, sta_default);
 
 /*
  * Adhoc mode-specific support.
  */
 
 static const uint16_t adhocWorld[] =		/* 36, 40, 44, 48 */
 { 5180, 5200, 5220, 5240 };
 static const uint16_t adhocFcc3[] =		/* 36, 40, 44, 48 145, 149, 153, 157, 161, 165 */
 { 5180, 5200, 5220, 5240, 5725, 5745, 5765, 5785, 5805, 5825 };
 static const uint16_t adhocMkk[] =		/* 34, 38, 42, 46 */
 { 5170, 5190, 5210, 5230 };
 static const uint16_t adhoc11b[] =		/* 10, 11 */
 { 2457, 2462 };
 
 static const struct scanlist adhocScanTable[] = {
 	{ IEEE80211_MODE_11B,   	X(adhoc11b) },
 	{ IEEE80211_MODE_11A,   	X(adhocWorld) },
 	{ IEEE80211_MODE_11A,   	X(adhocFcc3) },
 	{ IEEE80211_MODE_11B,   	X(adhocMkk) },
 	{ .list = NULL }
 };
 #undef X
 
 /*
  * Start an adhoc-mode scan by populating the channel list.
  */
 static int
 adhoc_start(struct ieee80211_scan_state *ss, struct ieee80211vap *vap)
 {
 	struct sta_table *st = ss->ss_priv;
 	
 	makescanlist(ss, vap, adhocScanTable);
 
 	if (ss->ss_mindwell == 0)
 		ss->ss_mindwell = msecs_to_ticks(200);	/* 200ms */
 	if (ss->ss_maxdwell == 0)
 		ss->ss_maxdwell = msecs_to_ticks(200);	/* 200ms */
 
 	st->st_scangen++;
 	st->st_newscan = 1;
 
 	return 0;
 }
 
 /*
  * Select a channel to start an adhoc network on.
  * The channel list was populated with appropriate
  * channels so select one that looks least occupied.
  */
 static struct ieee80211_channel *
 adhoc_pick_channel(struct ieee80211_scan_state *ss, int flags)
 {
 	struct sta_table *st = ss->ss_priv;
 	struct sta_entry *se;
 	struct ieee80211_channel *c, *bestchan;
 	int i, bestrssi, maxrssi;
 
 	bestchan = NULL;
 	bestrssi = -1;
 
 	IEEE80211_SCAN_TABLE_LOCK(st);
 	for (i = 0; i < ss->ss_last; i++) {
 		c = ss->ss_chans[i];
 		/* never consider a channel with radar */
 		if (IEEE80211_IS_CHAN_RADAR(c))
 			continue;
 		/* skip channels disallowed by regulatory settings */
 		if (IEEE80211_IS_CHAN_NOADHOC(c))
 			continue;
 		/* check channel attributes for band compatibility */
 		if (flags != 0 && (c->ic_flags & flags) != flags)
 			continue;
 		maxrssi = 0;
 		TAILQ_FOREACH(se, &st->st_entry, se_list) {
 			if (se->base.se_chan != c)
 				continue;
 			if (se->base.se_rssi > maxrssi)
 				maxrssi = se->base.se_rssi;
 		}
 		if (bestchan == NULL || maxrssi < bestrssi)
 			bestchan = c;
 	}
 	IEEE80211_SCAN_TABLE_UNLOCK(st);
 
 	return bestchan;
 }
 
 /*
  * Pick an ibss network to join or find a channel
  * to use to start an ibss network.
  */
 static int
 adhoc_pick_bss(struct ieee80211_scan_state *ss, struct ieee80211vap *vap)
 {
 	struct sta_table *st = ss->ss_priv;
 	struct sta_entry *selbs;
 	struct ieee80211_channel *chan;
 	struct ieee80211com *ic = vap->iv_ic;
 
 	KASSERT(vap->iv_opmode == IEEE80211_M_IBSS ||
 		vap->iv_opmode == IEEE80211_M_AHDEMO ||
 		vap->iv_opmode == IEEE80211_M_MBSS,
 		("wrong opmode %u", vap->iv_opmode));
 
 	if (st->st_newscan) {
 		sta_update_notseen(st);
 		st->st_newscan = 0;
 	}
 	if (ss->ss_flags & IEEE80211_SCAN_NOPICK) {
 		/*
 		 * Manual/background scan, don't select+join the
 		 * bss, just return.  The scanning framework will
 		 * handle notification that this has completed.
 		 */
 		ss->ss_flags &= ~IEEE80211_SCAN_NOPICK;
 		return 1;
 	}
 	/*
 	 * Automatic sequencing; look for a candidate and
 	 * if found join the network.
 	 */
 	/* NB: unlocked read should be ok */
 	if (TAILQ_FIRST(&st->st_entry) == NULL) {
 		IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN,
 			"%s: no scan candidate\n", __func__);
 		if (ss->ss_flags & IEEE80211_SCAN_NOJOIN)
 			return 0;
 notfound:
 		/* NB: never auto-start a tdma network for slot !0 */
 #ifdef IEEE80211_SUPPORT_TDMA
 		if (vap->iv_des_nssid &&
 		    ((vap->iv_caps & IEEE80211_C_TDMA) == 0 ||
 		     ieee80211_tdma_getslot(vap) == 0)) {
 #else
 		if (vap->iv_des_nssid) {
 #endif
 			/*
 			 * No existing adhoc network to join and we have
 			 * an ssid; start one up.  If no channel was
 			 * specified, try to select a channel.
 			 */
 			if (vap->iv_des_chan == IEEE80211_CHAN_ANYC ||
 			    IEEE80211_IS_CHAN_RADAR(vap->iv_des_chan)) {
 				chan = adhoc_pick_channel(ss, 0);
 			} else
 				chan = vap->iv_des_chan;
 			if (chan != NULL) {
 				/*
 				 * Create a HT capable IBSS; the per-node
 				 * probe request/response will result in
 				 * "correct" rate control capabilities being
 				 * negotiated.
 				 */
 				chan = ieee80211_ht_adjust_channel(ic,
 				    chan, vap->iv_flags_ht);
 				ieee80211_create_ibss(vap, chan);
 				return 1;
 			}
 		}
 		/*
 		 * If nothing suitable was found decrement
 		 * the failure counts so entries will be
 		 * reconsidered the next time around.  We
 		 * really want to do this only for sta's
 		 * where we've previously had some success.
 		 */
 		sta_dec_fails(st);
 		st->st_newscan = 1;
 		return 0;			/* restart scan */
 	}
 	selbs = select_bss(ss, vap, IEEE80211_MSG_SCAN);
 	if (ss->ss_flags & IEEE80211_SCAN_NOJOIN)
 		return (selbs != NULL);
 	if (selbs == NULL)
 		goto notfound;
 	chan = selbs->base.se_chan;
 	if (selbs->se_flags & STA_DEMOTE11B)
 		chan = demote11b(vap, chan);
 	/*
 	 * If HT is available, make it a possibility here.
 	 * The intent is to enable HT20/HT40 when joining a non-HT
 	 * IBSS node; we can then advertise HT IEs and speak HT
 	 * to any subsequent nodes that support it.
 	 */
 	chan = ieee80211_ht_adjust_channel(ic,
 	    chan, vap->iv_flags_ht);
 	if (!ieee80211_sta_join(vap, chan, &selbs->base))
 		goto notfound;
 	return 1;				/* terminate scan */
 }
 
 /*
  * Age entries in the scan cache.
  */
 static void
 adhoc_age(struct ieee80211_scan_state *ss)
 {
 	struct sta_table *st = ss->ss_priv;
 	struct sta_entry *se, *next;
 
 	IEEE80211_SCAN_TABLE_LOCK(st);
 	TAILQ_FOREACH_SAFE(se, &st->st_entry, se_list, next) {
 		if (se->se_notseen > STA_PURGE_SCANS) {
 			TAILQ_REMOVE(&st->st_entry, se, se_list);
 			LIST_REMOVE(se, se_hash);
 			ieee80211_ies_cleanup(&se->base.se_ies);
 			IEEE80211_FREE(se, M_80211_SCAN);
 		}
 	}
 	IEEE80211_SCAN_TABLE_UNLOCK(st);
 }
 
 static const struct ieee80211_scanner adhoc_default = {
 	.scan_name		= "default",
 	.scan_attach		= sta_attach,
 	.scan_detach		= sta_detach,
 	.scan_start		= adhoc_start,
 	.scan_restart		= sta_restart,
 	.scan_cancel		= sta_cancel,
 	.scan_end		= adhoc_pick_bss,
 	.scan_flush		= sta_flush,
 	.scan_pickchan		= adhoc_pick_channel,
 	.scan_add		= sta_add,
 	.scan_age		= adhoc_age,
 	.scan_iterate		= sta_iterate,
 	.scan_assoc_fail	= sta_assoc_fail,
 	.scan_assoc_success	= sta_assoc_success,
 };
 IEEE80211_SCANNER_ALG(ibss, IEEE80211_M_IBSS, adhoc_default);
 IEEE80211_SCANNER_ALG(ahdemo, IEEE80211_M_AHDEMO, adhoc_default);
 
 static int
 ap_start(struct ieee80211_scan_state *ss, struct ieee80211vap *vap)
 {
 	struct sta_table *st = ss->ss_priv;
 
 	makescanlist(ss, vap, staScanTable);
 
 	if (ss->ss_mindwell == 0)
 		ss->ss_mindwell = msecs_to_ticks(200);	/* 200ms */
 	if (ss->ss_maxdwell == 0)
 		ss->ss_maxdwell = msecs_to_ticks(200);	/* 200ms */
 
 	st->st_scangen++;
 	st->st_newscan = 1;
 
 	return 0;
 }
 
 /*
  * Cancel an ongoing scan.
  */
 static int
 ap_cancel(struct ieee80211_scan_state *ss, struct ieee80211vap *vap)
 {
 	return 0;
 }
 
 /*
  * Pick a quiet channel to use for ap operation.
  */
 static struct ieee80211_channel *
 ap_pick_channel(struct ieee80211_scan_state *ss, int flags)
 {
 	struct sta_table *st = ss->ss_priv;
 	struct ieee80211_channel *bestchan = NULL;
 	int i;
 
 	/* XXX select channel more intelligently, e.g. channel spread, power */
 	/* NB: use scan list order to preserve channel preference */
 	for (i = 0; i < ss->ss_last; i++) {
 		struct ieee80211_channel *chan = ss->ss_chans[i];
 		/*
 		 * If the channel is unoccupied the max rssi
 		 * should be zero; just take it.  Otherwise
 		 * track the channel with the lowest rssi and
 		 * use that when all channels appear occupied.
 		 */
 		if (IEEE80211_IS_CHAN_RADAR(chan))
 			continue;
 		if (IEEE80211_IS_CHAN_NOHOSTAP(chan))
 			continue;
 		/* check channel attributes for band compatibility */
 		if (flags != 0 && (chan->ic_flags & flags) != flags)
 			continue;
 		KASSERT(sizeof(chan->ic_ieee) == 1, ("ic_chan size"));
 		/* XXX channel have interference */
 		if (st->st_maxrssi[chan->ic_ieee] == 0) {
 			/* XXX use other considerations */
 			return chan;
 		}
 		if (bestchan == NULL ||
 		    st->st_maxrssi[chan->ic_ieee] < st->st_maxrssi[bestchan->ic_ieee])
 			bestchan = chan;
 	}
 	return bestchan;
 }
 
 /*
  * Pick a quiet channel to use for ap operation.
  */
 static int
 ap_end(struct ieee80211_scan_state *ss, struct ieee80211vap *vap)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ieee80211_channel *bestchan;
 
 	KASSERT(vap->iv_opmode == IEEE80211_M_HOSTAP,
 		("wrong opmode %u", vap->iv_opmode));
 	bestchan = ap_pick_channel(ss, 0);
 	if (bestchan == NULL) {
 		/* no suitable channel, should not happen */
 		IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN,
 		    "%s: no suitable channel! (should not happen)\n", __func__);
 		/* XXX print something? */
 		return 0;			/* restart scan */
 	}
 	/*
 	 * If this is a dynamic turbo channel, start with the unboosted one.
 	 */
 	if (IEEE80211_IS_CHAN_TURBO(bestchan)) {
 		bestchan = ieee80211_find_channel(ic, bestchan->ic_freq,
 			bestchan->ic_flags & ~IEEE80211_CHAN_TURBO);
 		if (bestchan == NULL) {
 			/* should never happen ?? */
 			return 0;
 		}
 	}
 	if (ss->ss_flags & (IEEE80211_SCAN_NOPICK | IEEE80211_SCAN_NOJOIN)) {
 		/*
 		 * Manual/background scan, don't select+join the
 		 * bss, just return.  The scanning framework will
 		 * handle notification that this has completed.
 		 */
 		ss->ss_flags &= ~IEEE80211_SCAN_NOPICK;
 		return 1;
 	}
 	ieee80211_create_ibss(vap,
 	    ieee80211_ht_adjust_channel(ic, bestchan, vap->iv_flags_ht));
 	return 1;
 }
 
 static const struct ieee80211_scanner ap_default = {
 	.scan_name		= "default",
 	.scan_attach		= sta_attach,
 	.scan_detach		= sta_detach,
 	.scan_start		= ap_start,
 	.scan_restart		= sta_restart,
 	.scan_cancel		= ap_cancel,
 	.scan_end		= ap_end,
 	.scan_flush		= sta_flush,
 	.scan_pickchan		= ap_pick_channel,
 	.scan_add		= sta_add,
 	.scan_age		= adhoc_age,
 	.scan_iterate		= sta_iterate,
 	.scan_assoc_success	= sta_assoc_success,
 	.scan_assoc_fail	= sta_assoc_fail,
 };
 IEEE80211_SCANNER_ALG(ap, IEEE80211_M_HOSTAP, ap_default);
 
 #ifdef IEEE80211_SUPPORT_MESH
 /*
  * Pick an mbss network to join or find a channel
  * to use to start an mbss network.
  */
 static int
 mesh_pick_bss(struct ieee80211_scan_state *ss, struct ieee80211vap *vap)
 {
 	struct sta_table *st = ss->ss_priv;
 	struct ieee80211_mesh_state *ms = vap->iv_mesh;
 	struct sta_entry *selbs;
 	struct ieee80211_channel *chan;
 
 	KASSERT(vap->iv_opmode == IEEE80211_M_MBSS,
 		("wrong opmode %u", vap->iv_opmode));
 
 	if (st->st_newscan) {
 		sta_update_notseen(st);
 		st->st_newscan = 0;
 	}
 	if (ss->ss_flags & IEEE80211_SCAN_NOPICK) {
 		/*
 		 * Manual/background scan, don't select+join the
 		 * bss, just return.  The scanning framework will
 		 * handle notification that this has completed.
 		 */
 		ss->ss_flags &= ~IEEE80211_SCAN_NOPICK;
 		return 1;
 	}
 	/*
 	 * Automatic sequencing; look for a candidate and
 	 * if found join the network.
 	 */
 	/* NB: unlocked read should be ok */
 	if (TAILQ_FIRST(&st->st_entry) == NULL) {
 		IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN,
 			"%s: no scan candidate\n", __func__);
 		if (ss->ss_flags & IEEE80211_SCAN_NOJOIN)
 			return 0;
 notfound:
 		if (ms->ms_idlen != 0) {
 			/*
 			 * No existing mbss network to join and we have
 			 * a meshid; start one up.  If no channel was
 			 * specified, try to select a channel.
 			 */
 			if (vap->iv_des_chan == IEEE80211_CHAN_ANYC ||
 			    IEEE80211_IS_CHAN_RADAR(vap->iv_des_chan)) {
 				struct ieee80211com *ic = vap->iv_ic;
 
 				chan = adhoc_pick_channel(ss, 0);
 				if (chan != NULL)
 					chan = ieee80211_ht_adjust_channel(ic,
 					    chan, vap->iv_flags_ht);
 			} else
 				chan = vap->iv_des_chan;
 			if (chan != NULL) {
 				ieee80211_create_ibss(vap, chan);
 				return 1;
 			}
 		}
 		/*
 		 * If nothing suitable was found decrement
 		 * the failure counts so entries will be
 		 * reconsidered the next time around.  We
 		 * really want to do this only for sta's
 		 * where we've previously had some success.
 		 */
 		sta_dec_fails(st);
 		st->st_newscan = 1;
 		return 0;			/* restart scan */
 	}
 	selbs = select_bss(ss, vap, IEEE80211_MSG_SCAN);
 	if (ss->ss_flags & IEEE80211_SCAN_NOJOIN)
 		return (selbs != NULL);
 	if (selbs == NULL)
 		goto notfound;
 	chan = selbs->base.se_chan;
 	if (selbs->se_flags & STA_DEMOTE11B)
 		chan = demote11b(vap, chan);
 	if (!ieee80211_sta_join(vap, chan, &selbs->base))
 		goto notfound;
 	return 1;				/* terminate scan */
 }
 
 static const struct ieee80211_scanner mesh_default = {
 	.scan_name		= "default",
 	.scan_attach		= sta_attach,
 	.scan_detach		= sta_detach,
 	.scan_start		= adhoc_start,
 	.scan_restart		= sta_restart,
 	.scan_cancel		= sta_cancel,
 	.scan_end		= mesh_pick_bss,
 	.scan_flush		= sta_flush,
 	.scan_pickchan		= adhoc_pick_channel,
 	.scan_add		= sta_add,
 	.scan_age		= adhoc_age,
 	.scan_iterate		= sta_iterate,
 	.scan_assoc_fail	= sta_assoc_fail,
 	.scan_assoc_success	= sta_assoc_success,
 };
 IEEE80211_SCANNER_ALG(mesh, IEEE80211_M_MBSS, mesh_default);
 #endif /* IEEE80211_SUPPORT_MESH */
Index: stable/11/sys/powerpc/booke/locore.S
===================================================================
--- stable/11/sys/powerpc/booke/locore.S	(revision 330445)
+++ stable/11/sys/powerpc/booke/locore.S	(revision 330446)
@@ -1,868 +1,868 @@
 /*-
  * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com>
  * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
  * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "assym.s"
 
 #include "opt_hwpmc_hooks.h"
 
 #include <machine/asm.h>
 #include <machine/hid.h>
 #include <machine/param.h>
 #include <machine/spr.h>
 #include <machine/pte.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 #include <machine/tlb.h>
 
 #define TMPSTACKSZ	16384
 
 	.text
 	.globl	btext
 btext:
 
 /*
  * This symbol is here for the benefit of kvm_mkdb, and is supposed to
  * mark the start of kernel text.
  */
 	.globl	kernel_text
 kernel_text:
 
 /*
  * Startup entry.  Note, this must be the first thing in the text segment!
  */
 	.text
 	.globl	__start
 __start:
 
 /*
  * Assumptions on the boot loader:
  *  - System memory starts from physical address 0
  *  - It's mapped by a single TLB1 entry
  *  - TLB1 mapping is 1:1 pa to va
  *  - Kernel is loaded at 64MB boundary
  *  - All PID registers are set to the same value
  *  - CPU is running in AS=0
  *
  * Registers contents provided by the loader(8):
  *	r1	: stack pointer
  *	r3	: metadata pointer
  *
  * We rearrange the TLB1 layout as follows:
  *  - Find TLB1 entry we started in
  *  - Make sure it's protected, invalidate other entries
  *  - Create temp entry in the second AS (make sure it's not TLB[1])
  *  - Switch to temp mapping
  *  - Map 64MB of RAM in TLB1[1]
  *  - Use AS=1, set EPN to KERNBASE and RPN to kernel load address
- *  - Switch to to TLB1[1] mapping
+ *  - Switch to TLB1[1] mapping
  *  - Invalidate temp mapping
  *
  * locore registers use:
  *	r1	: stack pointer
  *	r2	: trace pointer (AP only, for early diagnostics)
  *	r3-r27	: scratch registers
  *	r28	: temp TLB1 entry
  *	r29	: initial TLB1 entry we started in
  *	r30-r31	: arguments (metadata pointer)
  */
 
 /*
  * Keep arguments in r30 & r31 for later use.
  */
 	mr	%r30, %r3
 	mr	%r31, %r4
 
 /*
  * Initial cleanup
  */
 	li	%r3, PSL_DE	/* Keep debug exceptions for CodeWarrior. */
 	mtmsr	%r3
 	isync
 
 /*
  * Initial HIDs configuration
  */
 1:
 	mfpvr	%r3
 	rlwinm	%r3, %r3, 16, 16, 31
 
 	lis	%r4, HID0_E500_DEFAULT_SET@h
 	ori	%r4, %r4, HID0_E500_DEFAULT_SET@l
 
 	/* Check for e500mc and e5500 */
 	cmpli	0, 0, %r3, FSL_E500mc
 	bne	2f
 
 	lis	%r4, HID0_E500MC_DEFAULT_SET@h
 	ori	%r4, %r4, HID0_E500MC_DEFAULT_SET@l
 	b	3f
 2:
 	cmpli	0, 0, %r3, FSL_E5500
 	bne	3f
 
 	lis	%r4, HID0_E5500_DEFAULT_SET@h
 	ori	%r4, %r4, HID0_E5500_DEFAULT_SET@l
 
 3:
 	mtspr	SPR_HID0, %r4
 	isync
 
 /*
  * E500mc and E5500 do not have HID1 register, so skip HID1 setup on
  * this core.
  */
 	cmpli	0, 0, %r3, FSL_E500mc
 	beq	1f
 	cmpli	0, 0, %r3, FSL_E5500
 	beq	1f
 
 	lis	%r3, HID1_E500_DEFAULT_SET@h
 	ori	%r3, %r3, HID1_E500_DEFAULT_SET@l
 	mtspr	SPR_HID1, %r3
 	isync
 1:
 	/* Invalidate all entries in TLB0 */
 	li	%r3, 0
 	bl	tlb_inval_all
 
 	cmpwi	%r30, 0
 	beq	done_mapping
 
 /*
  * Locate the TLB1 entry that maps this code
  */
 	bl	1f
 1:	mflr	%r3
 	bl	tlb1_find_current	/* the entry found is returned in r29 */
 
 	bl	tlb1_inval_all_but_current
 
 /*
  * Create temporary mapping in AS=1 and switch to it
  */
 	bl	tlb1_temp_mapping_as1
 
 	mfmsr	%r3
 	ori	%r3, %r3, (PSL_IS | PSL_DS)
 	bl	2f
 2:	mflr	%r4
 	addi	%r4, %r4, 20
 	mtspr	SPR_SRR0, %r4
 	mtspr	SPR_SRR1, %r3
 	rfi				/* Switch context */
 
 /*
  * Invalidate initial entry
  */
 	mr	%r3, %r29
 	bl	tlb1_inval_entry
 
 /*
  * Setup final mapping in TLB1[1] and switch to it
  */
 	/* Final kernel mapping, map in 64 MB of RAM */
 	lis	%r3, MAS0_TLBSEL1@h	/* Select TLB1 */
 	li	%r4, 0			/* Entry 0 */
 	rlwimi	%r3, %r4, 16, 10, 15
 	mtspr	SPR_MAS0, %r3
 	isync
 
 	li	%r3, (TLB_SIZE_64M << MAS1_TSIZE_SHIFT)@l
 	oris	%r3, %r3, (MAS1_VALID | MAS1_IPROT)@h
 	mtspr	SPR_MAS1, %r3		/* note TS was not filled, so it's TS=0 */
 	isync
 
 	lis	%r3, KERNBASE@h
 	ori	%r3, %r3, KERNBASE@l	/* EPN = KERNBASE */
 #ifdef SMP
 	ori	%r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */
 #endif
 	mtspr	SPR_MAS2, %r3
 	isync
 
 	/* Discover phys load address */
 	bl	3f
 3:	mflr	%r4			/* Use current address */
 	rlwinm	%r4, %r4, 0, 0, 5	/* 64MB alignment mask */
 	ori	%r4, %r4, (MAS3_SX | MAS3_SW | MAS3_SR)@l
 	mtspr	SPR_MAS3, %r4		/* Set RPN and protection */
 	isync
 	bl	zero_mas7
 	bl	zero_mas8
 	tlbwe
 	isync
 	msync
 
 	/* Switch to the above TLB1[1] mapping */
 	bl	4f
 4:	mflr	%r4
 	rlwinm	%r4, %r4, 0, 8, 31	/* Current offset from kernel load address */
 	rlwinm	%r3, %r3, 0, 0, 19
 	add	%r4, %r4, %r3		/* Convert to kernel virtual address */
 	addi	%r4, %r4, 36
 	li	%r3, PSL_DE		/* Note AS=0 */
 	mtspr   SPR_SRR0, %r4
 	mtspr   SPR_SRR1, %r3
 	rfi
 
 /*
  * Invalidate temp mapping
  */
 	mr	%r3, %r28
 	bl	tlb1_inval_entry
 
 done_mapping:
 
 /*
  * Setup a temporary stack
  */
 	bl	1f
 	.long tmpstack-.
 1:	mflr	%r1
 	lwz	%r2,0(%r1)
 	add	%r1,%r1,%r2
 	addi	%r1, %r1, (TMPSTACKSZ - 16)
 
 /*
  * Relocate kernel
  */
 	bl      1f
 	.long   _DYNAMIC-.
 	.long   _GLOBAL_OFFSET_TABLE_-.
 1:	mflr    %r5
 	lwz	%r3,0(%r5)	/* _DYNAMIC in %r3 */
 	add	%r3,%r3,%r5
 	lwz	%r4,4(%r5)	/* GOT pointer */
 	add	%r4,%r4,%r5
 	lwz	%r4,4(%r4)	/* got[0] is _DYNAMIC link addr */
 	subf	%r4,%r4,%r3	/* subtract to calculate relocbase */
 	bl	elf_reloc_self
 
 /*
  * Initialise exception vector offsets
  */
 	bl	ivor_setup
 
 /*
  * Set up arguments and jump to system initialization code
  */
 	mr	%r3, %r30
 	mr	%r4, %r31
 
 	/* Prepare core */
 	bl	booke_init
 
 	/* Switch to thread0.td_kstack now */
 	mr	%r1, %r3
 	li	%r3, 0
 	stw	%r3, 0(%r1)
 
 	/* Machine independet part, does not return */
 	bl	mi_startup
 	/* NOT REACHED */
 5:	b	5b
 
 
 #ifdef SMP
 /************************************************************************/
 /* AP Boot page */
 /************************************************************************/
 	.text
 	.globl	__boot_page
 	.align	12
 __boot_page:
 	bl	1f
 
 	.globl	bp_trace
 bp_trace:
 	.long	0
 
 	.globl	bp_kernload
 bp_kernload:
 	.long	0
 
 /*
  * Initial configuration
  */
 1:
 	mflr    %r31		/* r31 hold the address of bp_trace */
 
 	/* Set HIDs */
 	mfpvr	%r3
 	rlwinm	%r3, %r3, 16, 16, 31
 
 	/* HID0 for E500 is default */
 	lis	%r4, HID0_E500_DEFAULT_SET@h
 	ori	%r4, %r4, HID0_E500_DEFAULT_SET@l
 
 	cmpli	0, 0, %r3, FSL_E500mc
 	bne	2f
 	lis	%r4, HID0_E500MC_DEFAULT_SET@h
 	ori	%r4, %r4, HID0_E500MC_DEFAULT_SET@l
 	b	3f
 2:
 	cmpli	0, 0, %r3, FSL_E5500
 	bne	3f
 	lis	%r4, HID0_E5500_DEFAULT_SET@h
 	ori	%r4, %r4, HID0_E5500_DEFAULT_SET@l
 3:
 	mtspr	SPR_HID0, %r4
 	isync
 
 	/* Enable branch prediction */
 	li	%r3, BUCSR_BPEN
 	mtspr	SPR_BUCSR, %r3
 	isync
 
 	/* Invalidate all entries in TLB0 */
 	li	%r3, 0
 	bl	tlb_inval_all
 
 /*
  * Find TLB1 entry which is translating us now
  */
 	bl	2f
 2:	mflr	%r3
 	bl	tlb1_find_current	/* the entry number found is in r29 */
 
 	bl	tlb1_inval_all_but_current
 
 /*
  * Create temporary translation in AS=1 and switch to it
  */
 
 	bl	tlb1_temp_mapping_as1
 
 	mfmsr	%r3
 	ori	%r3, %r3, (PSL_IS | PSL_DS)
 	bl	3f
 3:	mflr	%r4
 	addi	%r4, %r4, 20
 	mtspr	SPR_SRR0, %r4
 	mtspr	SPR_SRR1, %r3
 	rfi				/* Switch context */
 
 /*
  * Invalidate initial entry
  */
 	mr	%r3, %r29
 	bl	tlb1_inval_entry
 
 /*
  * Setup final mapping in TLB1[1] and switch to it
  */
 	/* Final kernel mapping, map in 64 MB of RAM */
 	lis	%r3, MAS0_TLBSEL1@h	/* Select TLB1 */
 	li	%r4, 0			/* Entry 0 */
 	rlwimi	%r3, %r4, 16, 4, 15
 	mtspr	SPR_MAS0, %r3
 	isync
 
 	li	%r3, (TLB_SIZE_64M << MAS1_TSIZE_SHIFT)@l
 	oris	%r3, %r3, (MAS1_VALID | MAS1_IPROT)@h
 	mtspr	SPR_MAS1, %r3		/* note TS was not filled, so it's TS=0 */
 	isync
 
 	lis	%r3, KERNBASE@h
 	ori	%r3, %r3, KERNBASE@l	/* EPN = KERNBASE */
 	ori	%r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */
 	mtspr	SPR_MAS2, %r3
 	isync
 
 	/* Retrieve kernel load [physical] address from bp_kernload */
 	bl	4f
 	.long	bp_kernload
 	.long	__boot_page
 4:	mflr	%r3
 	lwz	%r4, 0(%r3)
 	lwz	%r5, 4(%r3)
 	rlwinm	%r3, %r3, 0, 0, 19
 	sub	%r4, %r4, %r5	/* offset of bp_kernload within __boot_page */
 	lwzx	%r3, %r4, %r3
 
 	/* Set RPN and protection */
 	ori	%r3, %r3, (MAS3_SX | MAS3_SW | MAS3_SR)@l
 	mtspr	SPR_MAS3, %r3
 	isync
 	bl	zero_mas7
 	bl	zero_mas8
 	tlbwe
 	isync
 	msync
 
 	/* Switch to the final mapping */
 	bl	5f
 5:	mflr	%r3
 	rlwinm	%r3, %r3, 0, 0xfff	/* Offset from boot page start */
 	add	%r3, %r3, %r5		/* Make this virtual address */
 	addi	%r3, %r3, 32
 	li	%r4, 0			/* Note AS=0 */
 	mtspr	SPR_SRR0, %r3
 	mtspr	SPR_SRR1, %r4
 	rfi
 
 /*
  * At this point we're running at virtual addresses KERNBASE and beyond so
  * it's allowed to directly access all locations the kernel was linked
  * against.
  */
 
 /*
  * Invalidate temp mapping
  */
 	mr	%r3, %r28
 	bl	tlb1_inval_entry
 
 /*
  * Setup a temporary stack
  */
 	bl	1f
 	.long tmpstack-.
 1:	mflr	%r1
 	lwz	%r2,0(%r1)
 	add	%r1,%r1,%r2
 	stw	%r1, 0(%r1)
 	addi	%r1, %r1, (TMPSTACKSZ - 16)
 
 /*
  * Initialise exception vector offsets
  */
 	bl	ivor_setup
 
 	/*
 	 * Assign our pcpu instance
 	 */
 	bl	1f
 	.long ap_pcpu-.
 1:	mflr	%r4
 	lwz	%r3, 0(%r4)
 	add	%r3, %r3, %r4
 	lwz	%r3, 0(%r3)
 	mtsprg0	%r3
 
 	bl	pmap_bootstrap_ap
 
 	bl	cpudep_ap_bootstrap
 	/* Switch to the idle thread's kstack */
 	mr	%r1, %r3
 	
 	bl	machdep_ap_bootstrap
 
 	/* NOT REACHED */
 6:	b	6b
 #endif /* SMP */
 
 #if defined (BOOKE_E500)
 /*
  * Invalidate all entries in the given TLB.
  *
  * r3	TLBSEL
  */
 tlb_inval_all:
 	rlwinm	%r3, %r3, 3, (1 << 3)	/* TLBSEL */
 	ori	%r3, %r3, (1 << 2)	/* INVALL */
 	tlbivax	0, %r3
 	isync
 	msync
 
 	tlbsync
 	msync
 	blr
 
 /*
  * expects address to look up in r3, returns entry number in r29
  *
  * FIXME: the hidden assumption is we are now running in AS=0, but we should
  * retrieve actual AS from MSR[IS|DS] and put it in MAS6[SAS]
  */
 tlb1_find_current:
 	mfspr	%r17, SPR_PID0
 	slwi	%r17, %r17, MAS6_SPID0_SHIFT
 	mtspr	SPR_MAS6, %r17
 	isync
 	tlbsx	0, %r3
 	mfspr	%r17, SPR_MAS0
 	rlwinm	%r29, %r17, 16, 26, 31		/* MAS0[ESEL] -> r29 */
 
 	/* Make sure we have IPROT set on the entry */
 	mfspr	%r17, SPR_MAS1
 	oris	%r17, %r17, MAS1_IPROT@h
 	mtspr	SPR_MAS1, %r17
 	isync
 	tlbwe
 	isync
 	msync
 	blr
 
 /*
  * Invalidates a single entry in TLB1.
  *
  * r3		ESEL
  * r4-r5	scratched
  */
 tlb1_inval_entry:
 	lis	%r4, MAS0_TLBSEL1@h	/* Select TLB1 */
 	rlwimi	%r4, %r3, 16, 10, 15	/* Select our entry */
 	mtspr	SPR_MAS0, %r4
 	isync
 	tlbre
 	li	%r5, 0			/* MAS1[V] = 0 */
 	mtspr	SPR_MAS1, %r5
 	isync
 	tlbwe
 	isync
 	msync
 	blr
 
 /*
  * r29		current entry number
  * r28		returned temp entry
  * r3-r5	scratched
  */
 tlb1_temp_mapping_as1:
 	/* Read our current translation */
 	lis	%r3, MAS0_TLBSEL1@h	/* Select TLB1 */
 	rlwimi	%r3, %r29, 16, 10, 15	/* Select our current entry */
 	mtspr	SPR_MAS0, %r3
 	isync
 	tlbre
 
 	/*
 	 * Prepare and write temp entry
 	 *
 	 * FIXME this is not robust against overflow i.e. when the current
 	 * entry is the last in TLB1
 	 */
 	lis	%r3, MAS0_TLBSEL1@h	/* Select TLB1 */
 	addi	%r28, %r29, 1		/* Use next entry. */
 	rlwimi	%r3, %r28, 16, 10, 15	/* Select temp entry */
 	mtspr	SPR_MAS0, %r3
 	isync
 	mfspr	%r5, SPR_MAS1
 	li	%r4, 1			/* AS=1 */
 	rlwimi	%r5, %r4, 12, 19, 19
 	li	%r4, 0			/* Global mapping, TID=0 */
 	rlwimi	%r5, %r4, 16, 8, 15
 	oris	%r5, %r5, (MAS1_VALID | MAS1_IPROT)@h
 	mtspr	SPR_MAS1, %r5
 	isync
 	mflr	%r3
 	bl	zero_mas7
 	bl	zero_mas8
 	mtlr	%r3
 	tlbwe
 	isync
 	msync
 	blr
 
 /*
  * Loops over TLB1, invalidates all entries skipping the one which currently
  * maps this code.
  *
  * r29		current entry
  * r3-r5	scratched
  */
 tlb1_inval_all_but_current:
 	mr	%r6, %r3
 	mfspr	%r3, SPR_TLB1CFG	/* Get number of entries */
 	andi.	%r3, %r3, TLBCFG_NENTRY_MASK@l
 	li	%r4, 0			/* Start from Entry 0 */
 1:	lis	%r5, MAS0_TLBSEL1@h
 	rlwimi	%r5, %r4, 16, 10, 15
 	mtspr	SPR_MAS0, %r5
 	isync
 	tlbre
 	mfspr	%r5, SPR_MAS1
 	cmpw	%r4, %r29		/* our current entry? */
 	beq	2f
 	rlwinm	%r5, %r5, 0, 2, 31	/* clear VALID and IPROT bits */
 	mtspr	SPR_MAS1, %r5
 	isync
 	tlbwe
 	isync
 	msync
 2:	addi	%r4, %r4, 1
 	cmpw	%r4, %r3		/* Check if this is the last entry */
 	bne	1b
 	blr
 
 /*
  * MAS7 and MAS8 conditional zeroing.
  */
 .globl zero_mas7
 zero_mas7:
 	mfpvr	%r20
 	rlwinm	%r20, %r20, 16, 16, 31
 	cmpli	0, 0, %r20, FSL_E500v1
 	beq	1f
 
 	li	%r20, 0
 	mtspr	SPR_MAS7, %r20
 	isync
 1:
 	blr
 
 .globl zero_mas8
 zero_mas8:
 	mfpvr	%r20
 	rlwinm	%r20, %r20, 16, 16, 31
 	cmpli	0, 0, %r20, FSL_E500mc
 	beq	1f
 	cmpli	0, 0, %r20, FSL_E5500
 	beq	1f
 
 	blr
 1:
 	li	%r20, 0
 	mtspr	SPR_MAS8, %r20
 	isync
 	blr
 #endif
 
 #ifdef SMP
 .globl __boot_tlb1
 	/*
 	 * The __boot_tlb1 table is used to hold BSP TLB1 entries
 	 * marked with _TLB_ENTRY_SHARED flag during AP bootstrap.
 	 * The BSP fills in the table in tlb_ap_prep() function. Next,
 	 * AP loads its contents to TLB1 hardware in pmap_bootstrap_ap().
 	 */
 __boot_tlb1:
 	.space TLB1_MAX_ENTRIES * TLB_ENTRY_SIZE
 
 __boot_page_padding:
 	/*
 	 * Boot page needs to be exactly 4K, with the last word of this page
 	 * acting as the reset vector, so we need to stuff the remainder.
 	 * Upon release from holdoff CPU fetches the last word of the boot
 	 * page.
 	 */
 	.space	4092 - (__boot_page_padding - __boot_page)
 	b	__boot_page
 #endif /* SMP */
 
 /************************************************************************/
 /* locore subroutines */
 /************************************************************************/
 
 /*
  * Cache disable/enable/inval sequences according
  * to section 2.16 of E500CORE RM.
  */
 ENTRY(dcache_inval)
 	/* Invalidate d-cache */
 	mfspr	%r3, SPR_L1CSR0
 	ori	%r3, %r3, (L1CSR0_DCFI | L1CSR0_DCLFR)@l
 	msync
 	isync
 	mtspr	SPR_L1CSR0, %r3
 	isync
 1:	mfspr	%r3, SPR_L1CSR0
 	andi.	%r3, %r3, L1CSR0_DCFI
 	bne	1b
 	blr
 
 ENTRY(dcache_disable)
 	/* Disable d-cache */
 	mfspr	%r3, SPR_L1CSR0
 	li	%r4, L1CSR0_DCE@l
 	not	%r4, %r4
 	and	%r3, %r3, %r4
 	msync
 	isync
 	mtspr	SPR_L1CSR0, %r3
 	isync
 	blr
 
 ENTRY(dcache_enable)
 	/* Enable d-cache */
 	mfspr	%r3, SPR_L1CSR0
 	oris	%r3, %r3, (L1CSR0_DCPE | L1CSR0_DCE)@h
 	ori	%r3, %r3, (L1CSR0_DCPE | L1CSR0_DCE)@l
 	msync
 	isync
 	mtspr	SPR_L1CSR0, %r3
 	isync
 	blr
 
 ENTRY(icache_inval)
 	/* Invalidate i-cache */
 	mfspr	%r3, SPR_L1CSR1
 	ori	%r3, %r3, (L1CSR1_ICFI | L1CSR1_ICLFR)@l
 	isync
 	mtspr	SPR_L1CSR1, %r3
 	isync
 1:	mfspr	%r3, SPR_L1CSR1
 	andi.	%r3, %r3, L1CSR1_ICFI
 	bne	1b
 	blr
 
 ENTRY(icache_disable)
 	/* Disable i-cache */
 	mfspr	%r3, SPR_L1CSR1
 	li	%r4, L1CSR1_ICE@l
 	not	%r4, %r4
 	and	%r3, %r3, %r4
 	isync
 	mtspr	SPR_L1CSR1, %r3
 	isync
 	blr
 
 ENTRY(icache_enable)
 	/* Enable i-cache */
 	mfspr	%r3, SPR_L1CSR1
 	oris	%r3, %r3, (L1CSR1_ICPE | L1CSR1_ICE)@h
 	ori	%r3, %r3, (L1CSR1_ICPE | L1CSR1_ICE)@l
 	isync
 	mtspr	SPR_L1CSR1, %r3
 	isync
 	blr
 
 /*
  * L2 cache disable/enable/inval sequences for E500mc.
  */
 
 ENTRY(l2cache_inval)
 	mfspr	%r3, SPR_L2CSR0
 	oris	%r3, %r3, (L2CSR0_L2FI | L2CSR0_L2LFC)@h
 	ori	%r3, %r3, (L2CSR0_L2FI | L2CSR0_L2LFC)@l
 	isync
 	mtspr	SPR_L2CSR0, %r3
 	isync
 1:	mfspr   %r3, SPR_L2CSR0
 	andis.	%r3, %r3, L2CSR0_L2FI@h
 	bne	1b
 	blr
 
 ENTRY(l2cache_enable)
 	mfspr	%r3, SPR_L2CSR0
 	oris	%r3, %r3, (L2CSR0_L2E | L2CSR0_L2PE)@h
 	isync
 	mtspr	SPR_L2CSR0, %r3
 	isync
 	blr
 
 /*
  * Branch predictor setup.
  */
 ENTRY(bpred_enable)
 	mfspr	%r3, SPR_BUCSR
 	ori	%r3, %r3, BUCSR_BBFI
 	isync
 	mtspr	SPR_BUCSR, %r3
 	isync
 	ori	%r3, %r3, BUCSR_BPEN
 	isync
 	mtspr	SPR_BUCSR, %r3
 	isync
 	blr
 
 ENTRY(dataloss_erratum_access)
 	/* Lock two cache lines into I-Cache */
 	sync
 	mfspr	%r11, SPR_L1CSR1
 	rlwinm	%r11, %r11, 0, ~L1CSR1_ICUL
 	sync
 	isync
 	mtspr	SPR_L1CSR1, %r11
 	isync
 
 	lis	%r8, 2f@h
 	ori	%r8, %r8, 2f@l
 	icbtls	0, 0, %r8
 	addi	%r9, %r8, 64
 
 	sync
 	mfspr	%r11, SPR_L1CSR1
 3:	andi.	%r11, %r11, L1CSR1_ICUL
 	bne	3b
 
 	icbtls	0, 0, %r9
 
 	sync
 	mfspr	%r11, SPR_L1CSR1
 3:	andi.	%r11, %r11, L1CSR1_ICUL
 	bne	3b
 
 	b	2f
 	.align	6
 	/* Inside a locked cacheline, wait a while, write, then wait a while */
 2:	sync
 
 	mfspr	%r5, TBR_TBL
 4:	addis	%r11, %r5, 0x100000@h	/* wait around one million timebase ticks */
 	mfspr	%r5, TBR_TBL
 	subf.	%r5, %r5, %r11
 	bgt	4b
 
 	stw	%r4, 0(%r3)
 
 	mfspr	%r5, TBR_TBL
 4:	addis	%r11, %r5, 0x100000@h	/* wait around one million timebase ticks */
 	mfspr	%r5, TBR_TBL
 	subf.	%r5, %r5, %r11
 	bgt	4b
 
 	sync
 
 	/*
 	 * Fill out the rest of this cache line and the next with nops,
 	 * to ensure that nothing outside the locked area will be
 	 * fetched due to a branch.
 	 */
 	.rept 19
 	nop
 	.endr
 
 	icblc	0, 0, %r8
 	icblc	0, 0, %r9
 
 	blr
 
 /************************************************************************/
 /* Data section								*/
 /************************************************************************/
 	.data
 	.align 3
 GLOBAL(__startkernel)
 	.long   begin
 GLOBAL(__endkernel)
 	.long   end
 	.align	4
 tmpstack:
 	.space	TMPSTACKSZ
 tmpstackbound:
 	.space 10240	/* XXX: this really should not be necessary */
 
 /*
  * Compiled KERNBASE locations
  */
 	.globl	kernbase
 	.set	kernbase, KERNBASE
 
 #include <powerpc/booke/trap_subr.S>
Index: stable/11/sys/sparc64/pci/sbbc.c
===================================================================
--- stable/11/sys/sparc64/pci/sbbc.c	(revision 330445)
+++ stable/11/sys/sparc64/pci/sbbc.c	(revision 330446)
@@ -1,1111 +1,1111 @@
 /*	$OpenBSD: sbbc.c,v 1.7 2009/11/09 17:53:39 nicm Exp $	*/
 /*-
  * Copyright (c) 2008 Mark Kettenis
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 /*-
  * Copyright (c) 2010 Marius Strobl <marius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/clock.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/resource.h>
 #include <sys/rman.h>
 
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/openfirm.h>
 
 #include <machine/bus.h>
 #include <machine/cpu.h>
 #include <machine/resource.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/uart/uart.h>
 #include <dev/uart/uart_cpu.h>
 #include <dev/uart/uart_bus.h>
 
 #include "clock_if.h"
 #include "uart_if.h"
 
 #define	SBBC_PCI_BAR		PCIR_BAR(0)
 #define	SBBC_PCI_VENDOR		0x108e
 #define	SBBC_PCI_PRODUCT	0xc416
 
 #define	SBBC_REGS_OFFSET	0x800000
 #define	SBBC_REGS_SIZE		0x6230
 #define	SBBC_EPLD_OFFSET	0x8e0000
 #define	SBBC_EPLD_SIZE		0x20
 #define	SBBC_SRAM_OFFSET	0x900000
 #define	SBBC_SRAM_SIZE		0x20000	/* 128KB SRAM */
 
 #define	SBBC_PCI_INT_STATUS	0x2320
 #define	SBBC_PCI_INT_ENABLE	0x2330
 #define	SBBC_PCI_ENABLE_INT_A	0x11
 
 #define	SBBC_EPLD_INTERRUPT	0x13
 #define	SBBC_EPLD_INTERRUPT_ON	0x01
 
 #define	SBBC_SRAM_CONS_IN		0x00000001
 #define	SBBC_SRAM_CONS_OUT		0x00000002
 #define	SBBC_SRAM_CONS_BRK		0x00000004
 #define	SBBC_SRAM_CONS_SPACE_IN		0x00000008
 #define	SBBC_SRAM_CONS_SPACE_OUT	0x00000010
 
 #define	SBBC_TAG_KEY_SIZE	8
 #define	SBBC_TAG_KEY_SCSOLIE	"SCSOLIE"	/* SC -> OS int. enable */
 #define	SBBC_TAG_KEY_SCSOLIR	"SCSOLIR"	/* SC -> OS int. reason */
 #define	SBBC_TAG_KEY_SOLCONS	"SOLCONS"	/* OS console buffer */
 #define	SBBC_TAG_KEY_SOLSCIE	"SOLSCIE"	/* OS -> SC int. enable */
 #define	SBBC_TAG_KEY_SOLSCIR	"SOLSCIR"	/* OS -> SC int. reason */
 #define	SBBC_TAG_KEY_TODDATA	"TODDATA"	/* OS TOD struct */
 #define	SBBC_TAG_OFF(x)		offsetof(struct sbbc_sram_tag, x)
 
 struct sbbc_sram_tag {
 	char		tag_key[SBBC_TAG_KEY_SIZE];
 	uint32_t	tag_size;
 	uint32_t	tag_offset;
 } __packed;
 
 #define	SBBC_TOC_MAGIC		"TOCSRAM"
 #define	SBBC_TOC_MAGIC_SIZE	8
 #define	SBBC_TOC_TAGS_MAX	32
 #define	SBBC_TOC_OFF(x)		offsetof(struct sbbc_sram_toc, x)
 
 struct sbbc_sram_toc {
 	char			toc_magic[SBBC_TOC_MAGIC_SIZE];
 	uint8_t			toc_reserved;
 	uint8_t			toc_type;
 	uint16_t		toc_version;
 	uint32_t		toc_ntags;
 	struct sbbc_sram_tag	toc_tag[SBBC_TOC_TAGS_MAX];
 } __packed;
 
 #define	SBBC_TOD_MAGIC		0x54443100	/* "TD1" */
 #define	SBBC_TOD_VERSION	1
 #define	SBBC_TOD_OFF(x)		offsetof(struct sbbc_sram_tod, x)
 
 struct sbbc_sram_tod {
 	uint32_t	tod_magic;
 	uint32_t	tod_version;
 	uint64_t	tod_time;
 	uint64_t	tod_skew;
 	uint32_t	tod_reserved;
 	uint32_t	tod_heartbeat;
 	uint32_t	tod_timeout;
 } __packed;
 
 #define	SBBC_CONS_MAGIC		0x434f4e00	/* "CON" */
 #define	SBBC_CONS_VERSION	1
 #define	SBBC_CONS_OFF(x)	offsetof(struct sbbc_sram_cons, x)
 
 struct sbbc_sram_cons {
 	uint32_t cons_magic;
 	uint32_t cons_version;
 	uint32_t cons_size;
 
 	uint32_t cons_in_begin;
 	uint32_t cons_in_end;
 	uint32_t cons_in_rdptr;
 	uint32_t cons_in_wrptr;
 
 	uint32_t cons_out_begin;
 	uint32_t cons_out_end;
 	uint32_t cons_out_rdptr;
 	uint32_t cons_out_wrptr;
 } __packed;
 
 struct sbbc_softc {
 	struct resource *sc_res;
 };
 
 #define	SBBC_READ_N(wdth, offs)						\
 	bus_space_read_ ## wdth((bst), (bsh), (offs))
 #define	SBBC_WRITE_N(wdth, offs, val)					\
 	bus_space_write_ ## wdth((bst), (bsh), (offs), (val))
 
 #define	SBBC_READ_1(offs)						\
 	SBBC_READ_N(1, (offs))
 #define	SBBC_READ_2(offs)						\
 	bswap16(SBBC_READ_N(2, (offs)))
 #define	SBBC_READ_4(offs)						\
 	bswap32(SBBC_READ_N(4, (offs)))
 #define	SBBC_READ_8(offs)						\
 	bswap64(SBBC_READ_N(8, (offs)))
 #define	SBBC_WRITE_1(offs, val)						\
 	SBBC_WRITE_N(1, (offs), (val))
 #define	SBBC_WRITE_2(offs, val)						\
 	SBBC_WRITE_N(2, (offs), bswap16(val))
 #define	SBBC_WRITE_4(offs, val)						\
 	SBBC_WRITE_N(4, (offs), bswap32(val))
 #define	SBBC_WRITE_8(offs, val)						\
 	SBBC_WRITE_N(8, (offs), bswap64(val))
 
 #define	SBBC_REGS_READ_1(offs)						\
 	SBBC_READ_1((offs) + SBBC_REGS_OFFSET)
 #define	SBBC_REGS_READ_2(offs)						\
 	SBBC_READ_2((offs) + SBBC_REGS_OFFSET)
 #define	SBBC_REGS_READ_4(offs)						\
 	SBBC_READ_4((offs) + SBBC_REGS_OFFSET)
 #define	SBBC_REGS_READ_8(offs)						\
 	SBBC_READ_8((offs) + SBBC_REGS_OFFSET)
 #define	SBBC_REGS_WRITE_1(offs, val)					\
 	SBBC_WRITE_1((offs) + SBBC_REGS_OFFSET, (val))
 #define	SBBC_REGS_WRITE_2(offs, val)					\
 	SBBC_WRITE_2((offs) + SBBC_REGS_OFFSET, (val))
 #define	SBBC_REGS_WRITE_4(offs, val)					\
 	SBBC_WRITE_4((offs) + SBBC_REGS_OFFSET, (val))
 #define	SBBC_REGS_WRITE_8(offs, val)					\
 	SBBC_WRITE_8((offs) + SBBC_REGS_OFFSET, (val))
 
 #define	SBBC_EPLD_READ_1(offs)						\
 	SBBC_READ_1((offs) + SBBC_EPLD_OFFSET)
 #define	SBBC_EPLD_READ_2(offs)						\
 	SBBC_READ_2((offs) + SBBC_EPLD_OFFSET)
 #define	SBBC_EPLD_READ_4(offs)						\
 	SBBC_READ_4((offs) + SBBC_EPLD_OFFSET)
 #define	SBBC_EPLD_READ_8(offs)						\
 	SBBC_READ_8((offs) + SBBC_EPLD_OFFSET)
 #define	SBBC_EPLD_WRITE_1(offs, val)					\
 	SBBC_WRITE_1((offs) + SBBC_EPLD_OFFSET, (val))
 #define	SBBC_EPLD_WRITE_2(offs, val)					\
 	SBBC_WRITE_2((offs) + SBBC_EPLD_OFFSET, (val))
 #define	SBBC_EPLD_WRITE_4(offs, val)					\
 	SBBC_WRITE_4((offs) + SBBC_EPLD_OFFSET, (val))
 #define	SBBC_EPLD_WRITE_8(offs, val)					\
 	SBBC_WRITE_8((offs) + SBBC_EPLD_OFFSET, (val))
 
 #define	SBBC_SRAM_READ_1(offs)						\
 	SBBC_READ_1((offs) + SBBC_SRAM_OFFSET)
 #define	SBBC_SRAM_READ_2(offs)						\
 	SBBC_READ_2((offs) + SBBC_SRAM_OFFSET)
 #define	SBBC_SRAM_READ_4(offs)						\
 	SBBC_READ_4((offs) + SBBC_SRAM_OFFSET)
 #define	SBBC_SRAM_READ_8(offs)						\
 	SBBC_READ_8((offs) + SBBC_SRAM_OFFSET)
 #define	SBBC_SRAM_WRITE_1(offs, val)					\
 	SBBC_WRITE_1((offs) + SBBC_SRAM_OFFSET, (val))
 #define	SBBC_SRAM_WRITE_2(offs, val)					\
 	SBBC_WRITE_2((offs) + SBBC_SRAM_OFFSET, (val))
 #define	SBBC_SRAM_WRITE_4(offs, val)					\
 	SBBC_WRITE_4((offs) + SBBC_SRAM_OFFSET, (val))
 #define	SBBC_SRAM_WRITE_8(offs, val)					\
 	SBBC_WRITE_8((offs) + SBBC_SRAM_OFFSET, (val))
 
 #define	SUNW_SETCONSINPUT	"SUNW,set-console-input"
 #define	SUNW_SETCONSINPUT_CLNT	"CON_CLNT"
 #define	SUNW_SETCONSINPUT_OBP	"CON_OBP"
 
 static u_int sbbc_console;
 
 static uint32_t	sbbc_scsolie;
 static uint32_t	sbbc_scsolir;
 static uint32_t	sbbc_solcons;
 static uint32_t	sbbc_solscie;
 static uint32_t	sbbc_solscir;
 static uint32_t	sbbc_toddata;
 
 /*
  * internal helpers
  */
 static int sbbc_parse_toc(bus_space_tag_t bst, bus_space_handle_t bsh);
 static inline void sbbc_send_intr(bus_space_tag_t bst,
     bus_space_handle_t bsh);
 static const char *sbbc_serengeti_set_console_input(char *new);
 
 /*
  * SBBC PCI interface
  */
 static bus_activate_resource_t sbbc_bus_activate_resource;
 static bus_adjust_resource_t sbbc_bus_adjust_resource;
 static bus_deactivate_resource_t sbbc_bus_deactivate_resource;
 static bus_alloc_resource_t sbbc_bus_alloc_resource;
 static bus_release_resource_t sbbc_bus_release_resource;
 static bus_get_resource_list_t sbbc_bus_get_resource_list;
 static bus_setup_intr_t sbbc_bus_setup_intr;
 static bus_teardown_intr_t sbbc_bus_teardown_intr;
 
 static device_attach_t sbbc_pci_attach;
 static device_probe_t sbbc_pci_probe;
 
 static clock_gettime_t sbbc_tod_gettime;
 static clock_settime_t sbbc_tod_settime;
 
 static device_method_t sbbc_pci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		sbbc_pci_probe),
 	DEVMETHOD(device_attach,	sbbc_pci_attach),
 
 	DEVMETHOD(bus_alloc_resource,	sbbc_bus_alloc_resource),
 	DEVMETHOD(bus_activate_resource,sbbc_bus_activate_resource),
 	DEVMETHOD(bus_deactivate_resource,sbbc_bus_deactivate_resource),
 	DEVMETHOD(bus_adjust_resource,	sbbc_bus_adjust_resource),
 	DEVMETHOD(bus_release_resource,	sbbc_bus_release_resource),
 	DEVMETHOD(bus_setup_intr,	sbbc_bus_setup_intr),
 	DEVMETHOD(bus_teardown_intr,	sbbc_bus_teardown_intr),
 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
 	DEVMETHOD(bus_get_resource_list, sbbc_bus_get_resource_list),
 
 	/* clock interface */
 	DEVMETHOD(clock_gettime,	sbbc_tod_gettime),
 	DEVMETHOD(clock_settime,	sbbc_tod_settime),
 
 	DEVMETHOD_END
 };
 
 static devclass_t sbbc_devclass;
 
 DEFINE_CLASS_0(sbbc, sbbc_driver, sbbc_pci_methods, sizeof(struct sbbc_softc));
 DRIVER_MODULE(sbbc, pci, sbbc_driver, sbbc_devclass, NULL, NULL);
 
 static int
 sbbc_pci_probe(device_t dev)
 {
 
 	if (pci_get_vendor(dev) == SBBC_PCI_VENDOR &&
 	    pci_get_device(dev) == SBBC_PCI_PRODUCT) {
 		device_set_desc(dev, "Sun BootBus controller");
 		return (BUS_PROBE_DEFAULT);
 	}
 	return (ENXIO);
 }
 
 static int
 sbbc_pci_attach(device_t dev)
 {
 	struct sbbc_softc *sc;
 	struct timespec ts;
 	device_t child;
 	bus_space_tag_t bst;
 	bus_space_handle_t bsh;
 	phandle_t node;
 	int error, rid;
 	uint32_t val;
 
-	/* Nothing to to if we're not the chosen one. */
+	/* Nothing to do if we're not the chosen one. */
 	if ((node = OF_finddevice("/chosen")) == -1) {
 		device_printf(dev, "failed to find /chosen\n");
 		return (ENXIO);
 	}
 	if (OF_getprop(node, "iosram", &node, sizeof(node)) == -1) {
 		device_printf(dev, "failed to get iosram\n");
 		return (ENXIO);
 	}
 	if (node != ofw_bus_get_node(dev))
 		return (0);
 
 	sc = device_get_softc(dev);
 	rid = SBBC_PCI_BAR;
 	sc->sc_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (sc->sc_res == NULL) {
 		device_printf(dev, "failed to allocate resources\n");
 		return (ENXIO);
 	}
 	bst = rman_get_bustag(sc->sc_res);
 	bsh = rman_get_bushandle(sc->sc_res);
 	if (sbbc_console != 0) {
 		/* Once again the interrupt pin isn't set. */
 		if (pci_get_intpin(dev) == 0)
 			pci_set_intpin(dev, 1);
 		child = device_add_child(dev, NULL, -1);
 		if (child == NULL)
 			device_printf(dev, "failed to add UART device\n");
 		error = bus_generic_attach(dev);
 		if (error != 0)
 			device_printf(dev, "failed to attach UART device\n");
 	} else {
 		error = sbbc_parse_toc(bst, bsh);
 		if (error != 0) {
 			device_printf(dev, "failed to parse TOC\n");
 			if (sbbc_console != 0) {
 				bus_release_resource(dev, SYS_RES_MEMORY, rid,
 				    sc->sc_res);
 				return (error);
 			}
 		}
 	}
 	if (sbbc_toddata != 0) {
 		if ((val = SBBC_SRAM_READ_4(sbbc_toddata +
 		    SBBC_TOD_OFF(tod_magic))) != SBBC_TOD_MAGIC)
 			device_printf(dev, "invalid TOD magic %#x\n", val);
 		else if ((val = SBBC_SRAM_READ_4(sbbc_toddata +
 		    SBBC_TOD_OFF(tod_version))) < SBBC_TOD_VERSION)
 			device_printf(dev, "invalid TOD version %#x\n", val);
 		else {
 			clock_register(dev, 1000000); /* 1 sec. resolution */
 			if (bootverbose) {
 				sbbc_tod_gettime(dev, &ts);
 				device_printf(dev,
 				    "current time: %ld.%09ld\n",
 				    (long)ts.tv_sec, ts.tv_nsec);
 			}
 		}
 	}
 	return (0);
 }
 
 /*
  * Note that the bus methods don't pass-through the uart(4) requests but act
  * as if they would come from sbbc(4) in order to avoid complications with
  * pci(4) (actually, uart(4) isn't a real child but rather a function of
  * sbbc(4) anyway).
  */
 
 static struct resource *
 sbbc_bus_alloc_resource(device_t dev, device_t child __unused, int type,
     int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
 	struct sbbc_softc *sc;
 
 	sc = device_get_softc(dev);
 	switch (type) {
 	case SYS_RES_IRQ:
 		return (bus_generic_alloc_resource(dev, dev, type, rid, start,
 		    end, count, flags));
 	case SYS_RES_MEMORY:
 		return (sc->sc_res);
 	default:
 		return (NULL);
 	}
 }
 
 static int
 sbbc_bus_activate_resource(device_t bus, device_t child, int type, int rid,
     struct resource *res)
 {
 
 	if (type == SYS_RES_MEMORY)
 		return (0);
 	return (bus_generic_activate_resource(bus, child, type, rid, res));
 }
 
 static int
 sbbc_bus_deactivate_resource(device_t bus, device_t child, int type, int rid,
     struct resource *res)
 {
 
 	if (type == SYS_RES_MEMORY)
 		return (0);
 	return (bus_generic_deactivate_resource(bus, child, type, rid, res));
 }
 
 static int
 sbbc_bus_adjust_resource(device_t bus __unused, device_t child __unused,
     int type __unused, struct resource *res __unused, rman_res_t start __unused,
     rman_res_t end __unused)
 {
 
 	return (ENXIO);
 }
 
 static int
 sbbc_bus_release_resource(device_t dev, device_t child __unused, int type,
     int rid, struct resource *res)
 {
 
 	if (type == SYS_RES_IRQ)
 		return (bus_generic_release_resource(dev, dev, type, rid,
 		    res));
 	return (0);
 }
 
 static struct resource_list *
 sbbc_bus_get_resource_list(device_t dev, device_t child __unused)
 {
 
 	return (bus_generic_get_resource_list(dev, dev));
 }
 
 static int
 sbbc_bus_setup_intr(device_t dev, device_t child __unused,
     struct resource *res, int flags, driver_filter_t *filt,
     driver_intr_t *intr, void *arg, void **cookiep)
 {
 
 	return (bus_generic_setup_intr(dev, dev, res, flags, filt, intr, arg,
 	    cookiep));
 }
 
 static int
 sbbc_bus_teardown_intr(device_t dev, device_t child __unused,
     struct resource *res, void *cookie)
 {
 
 	return (bus_generic_teardown_intr(dev, dev, res, cookie));
 }
 
 /*
  * internal helpers
  */
 static int
 sbbc_parse_toc(bus_space_tag_t bst, bus_space_handle_t bsh)
 {
 	char buf[MAX(SBBC_TAG_KEY_SIZE, SBBC_TOC_MAGIC_SIZE)];
 	bus_size_t tag;
 	phandle_t node;
 	uint32_t off, sram_toc;
 	u_int i, tags;
 
 	if ((node = OF_finddevice("/chosen")) == -1)
 		return (ENXIO);
 	/* SRAM TOC offset defaults to 0. */
 	if (OF_getprop(node, "iosram-toc", &sram_toc, sizeof(sram_toc)) <= 0)
 		sram_toc = 0;
 
 	bus_space_read_region_1(bst, bsh, SBBC_SRAM_OFFSET + sram_toc +
 	    SBBC_TOC_OFF(toc_magic), buf, SBBC_TOC_MAGIC_SIZE);
 	buf[SBBC_TOC_MAGIC_SIZE - 1] = '\0';
 	if (strcmp(buf, SBBC_TOC_MAGIC) != 0)
 		return (ENXIO);
 
 	tags = SBBC_SRAM_READ_4(sram_toc + SBBC_TOC_OFF(toc_ntags));
 	for (i = 0; i < tags; i++) {
 		tag = sram_toc + SBBC_TOC_OFF(toc_tag) +
 		    i * sizeof(struct sbbc_sram_tag);
 		bus_space_read_region_1(bst, bsh, SBBC_SRAM_OFFSET + tag +
 		    SBBC_TAG_OFF(tag_key), buf, SBBC_TAG_KEY_SIZE);
 		buf[SBBC_TAG_KEY_SIZE - 1] = '\0';
 		off = SBBC_SRAM_READ_4(tag + SBBC_TAG_OFF(tag_offset));
 		if (strcmp(buf, SBBC_TAG_KEY_SCSOLIE) == 0)
 			sbbc_scsolie = off;
 		else if (strcmp(buf, SBBC_TAG_KEY_SCSOLIR) == 0)
 			sbbc_scsolir = off;
 		else if (strcmp(buf, SBBC_TAG_KEY_SOLCONS) == 0)
 			sbbc_solcons = off;
 		else if (strcmp(buf, SBBC_TAG_KEY_SOLSCIE) == 0)
 			sbbc_solscie = off;
 		else if (strcmp(buf, SBBC_TAG_KEY_SOLSCIR) == 0)
 			sbbc_solscir = off;
 		else if (strcmp(buf, SBBC_TAG_KEY_TODDATA) == 0)
 			sbbc_toddata = off;
 	}
 	return (0);
 }
 
 static const char *
 sbbc_serengeti_set_console_input(char *new)
 {
 	struct {
 		cell_t name;
 		cell_t nargs;
 		cell_t nreturns;
 		cell_t new;
 		cell_t old;
 	} args = {
 		(cell_t)SUNW_SETCONSINPUT,
 		1,
 		1,
 	};
 
 	args.new = (cell_t)new;
 	if (ofw_entry(&args) == -1)
 		return (NULL);
 	return ((const char *)args.old);
 }
 
 static inline void
 sbbc_send_intr(bus_space_tag_t bst, bus_space_handle_t bsh)
 {
 
 	SBBC_EPLD_WRITE_1(SBBC_EPLD_INTERRUPT, SBBC_EPLD_INTERRUPT_ON);
 	bus_space_barrier(bst, bsh, SBBC_EPLD_OFFSET + SBBC_EPLD_INTERRUPT, 1,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 }
 
 /*
  * TOD interface
  */
 static int
 sbbc_tod_gettime(device_t dev, struct timespec *ts)
 {
 	struct sbbc_softc *sc;
 	bus_space_tag_t bst;
 	bus_space_handle_t bsh;
 
 	sc = device_get_softc(dev);
 	bst = rman_get_bustag(sc->sc_res);
 	bsh = rman_get_bushandle(sc->sc_res);
 
 	ts->tv_sec = SBBC_SRAM_READ_8(sbbc_toddata + SBBC_TOD_OFF(tod_time)) +
 	    SBBC_SRAM_READ_8(sbbc_toddata + SBBC_TOD_OFF(tod_skew));
 	ts->tv_nsec = 0;
 	return (0);
 }
 
 static int
 sbbc_tod_settime(device_t dev, struct timespec *ts)
 {
 	struct sbbc_softc *sc;
 	bus_space_tag_t bst;
 	bus_space_handle_t bsh;
 
 	sc = device_get_softc(dev);
 	bst = rman_get_bustag(sc->sc_res);
 	bsh = rman_get_bushandle(sc->sc_res);
 
 	SBBC_SRAM_WRITE_8(sbbc_toddata + SBBC_TOD_OFF(tod_skew), ts->tv_sec -
 	    SBBC_SRAM_READ_8(sbbc_toddata + SBBC_TOD_OFF(tod_time)));
 	return (0);
 }
 
 /*
  * UART bus front-end
  */
 static device_probe_t sbbc_uart_sbbc_probe;
 
 static device_method_t sbbc_uart_sbbc_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		sbbc_uart_sbbc_probe),
 	DEVMETHOD(device_attach,	uart_bus_attach),
 	DEVMETHOD(device_detach,	uart_bus_detach),
 
 	DEVMETHOD_END
 };
 
 DEFINE_CLASS_0(uart, sbbc_uart_driver, sbbc_uart_sbbc_methods,
     sizeof(struct uart_softc));
 DRIVER_MODULE(uart, sbbc, sbbc_uart_driver, uart_devclass, NULL, NULL);
 
 static int
 sbbc_uart_sbbc_probe(device_t dev)
 {
 	struct uart_softc *sc;
 
 	sc = device_get_softc(dev);
 	sc->sc_class = &uart_sbbc_class;
 	device_set_desc(dev, "Serengeti console");
 	return (uart_bus_probe(dev, 0, 0, SBBC_PCI_BAR, 0));
 }
 
 /*
  * Low-level UART interface
  */
 static int sbbc_uart_probe(struct uart_bas *bas);
 static void sbbc_uart_init(struct uart_bas *bas, int baudrate, int databits,
     int stopbits, int parity);
 static void sbbc_uart_term(struct uart_bas *bas);
 static void sbbc_uart_putc(struct uart_bas *bas, int c);
 static int sbbc_uart_rxready(struct uart_bas *bas);
 static int sbbc_uart_getc(struct uart_bas *bas, struct mtx *hwmtx);
 
 static struct uart_ops sbbc_uart_ops = {
 	.probe = sbbc_uart_probe,
 	.init = sbbc_uart_init,
 	.term = sbbc_uart_term,
 	.putc = sbbc_uart_putc,
 	.rxready = sbbc_uart_rxready,
 	.getc = sbbc_uart_getc,
 };
 
 static int
 sbbc_uart_probe(struct uart_bas *bas)
 {
 	bus_space_tag_t bst;
 	bus_space_handle_t bsh;
 	int error;
 
 	sbbc_console = 1;
 	bst = bas->bst;
 	bsh = bas->bsh;
 	error = sbbc_parse_toc(bst, bsh);
 	if (error != 0)
 		return (error);
 
 	if (sbbc_scsolie == 0 || sbbc_scsolir == 0 || sbbc_solcons == 0 ||
 	    sbbc_solscie == 0 || sbbc_solscir == 0)
 		return (ENXIO);
 
 	if (SBBC_SRAM_READ_4(sbbc_solcons + SBBC_CONS_OFF(cons_magic)) !=
 	    SBBC_CONS_MAGIC || SBBC_SRAM_READ_4(sbbc_solcons +
 	    SBBC_CONS_OFF(cons_version)) < SBBC_CONS_VERSION)
 		return (ENXIO);
 	return (0);
 }
 
 static void
 sbbc_uart_init(struct uart_bas *bas, int baudrate __unused,
     int databits __unused, int stopbits __unused, int parity __unused)
 {
 	bus_space_tag_t bst;
 	bus_space_handle_t bsh;
 
 	bst = bas->bst;
 	bsh = bas->bsh;
 
 	/* Enable output to and space in from the SC interrupts. */
 	SBBC_SRAM_WRITE_4(sbbc_solscie, SBBC_SRAM_READ_4(sbbc_solscie) |
 	    SBBC_SRAM_CONS_OUT | SBBC_SRAM_CONS_SPACE_IN);
 	uart_barrier(bas);
 
 	/* Take over the console input. */
 	sbbc_serengeti_set_console_input(SUNW_SETCONSINPUT_CLNT);
 }
 
 static void
 sbbc_uart_term(struct uart_bas *bas __unused)
 {
 
 	/* Give back the console input. */
 	sbbc_serengeti_set_console_input(SUNW_SETCONSINPUT_OBP);
 }
 
 static void
 sbbc_uart_putc(struct uart_bas *bas, int c)
 {
 	bus_space_tag_t bst;
 	bus_space_handle_t bsh;
 	uint32_t wrptr;
 
 	bst = bas->bst;
 	bsh = bas->bsh;
 
 	wrptr = SBBC_SRAM_READ_4(sbbc_solcons +
 	    SBBC_CONS_OFF(cons_out_wrptr));
 	SBBC_SRAM_WRITE_1(sbbc_solcons + wrptr, c);
 	uart_barrier(bas);
 	if (++wrptr == SBBC_SRAM_READ_4(sbbc_solcons +
 	    SBBC_CONS_OFF(cons_out_end)))
 		wrptr = SBBC_SRAM_READ_4(sbbc_solcons +
 		    SBBC_CONS_OFF(cons_out_begin));
 	SBBC_SRAM_WRITE_4(sbbc_solcons + SBBC_CONS_OFF(cons_out_wrptr),
 	    wrptr);
 	uart_barrier(bas);
 
 	SBBC_SRAM_WRITE_4(sbbc_solscir, SBBC_SRAM_READ_4(sbbc_solscir) |
 	    SBBC_SRAM_CONS_OUT);
 	uart_barrier(bas);
 	sbbc_send_intr(bst, bsh);
 }
 
 static int
 sbbc_uart_rxready(struct uart_bas *bas)
 {
 	bus_space_tag_t bst;
 	bus_space_handle_t bsh;
 
 	bst = bas->bst;
 	bsh = bas->bsh;
 
 	if (SBBC_SRAM_READ_4(sbbc_solcons + SBBC_CONS_OFF(cons_in_rdptr)) ==
 	    SBBC_SRAM_READ_4(sbbc_solcons + SBBC_CONS_OFF(cons_in_wrptr)))
 		return (0);
 	return (1);
 }
 
 static int
 sbbc_uart_getc(struct uart_bas *bas, struct mtx *hwmtx)
 {
 	bus_space_tag_t bst;
 	bus_space_handle_t bsh;
 	int c;
 	uint32_t rdptr;
 
 	bst = bas->bst;
 	bsh = bas->bsh;
 
 	uart_lock(hwmtx);
 
 	while (sbbc_uart_rxready(bas) == 0) {
 		uart_unlock(hwmtx);
 		DELAY(4);
 		uart_lock(hwmtx);
 	}
 
 	rdptr = SBBC_SRAM_READ_4(sbbc_solcons + SBBC_CONS_OFF(cons_in_rdptr));
 	c = SBBC_SRAM_READ_1(sbbc_solcons + rdptr);
 	uart_barrier(bas);
 	if (++rdptr == SBBC_SRAM_READ_4(sbbc_solcons +
 	    SBBC_CONS_OFF(cons_in_end)))
 		rdptr = SBBC_SRAM_READ_4(sbbc_solcons +
 		    SBBC_CONS_OFF(cons_in_begin));
 	SBBC_SRAM_WRITE_4(sbbc_solcons + SBBC_CONS_OFF(cons_in_rdptr),
 	    rdptr);
 	uart_barrier(bas);
 	SBBC_SRAM_WRITE_4(sbbc_solscir, SBBC_SRAM_READ_4(sbbc_solscir) |
 	    SBBC_SRAM_CONS_SPACE_IN);
 	uart_barrier(bas);
 	sbbc_send_intr(bst, bsh);
 
 	uart_unlock(hwmtx);
 	return (c);
 }
 
 /*
  * High-level UART interface
  */
 static int sbbc_uart_bus_attach(struct uart_softc *sc);
 static int sbbc_uart_bus_detach(struct uart_softc *sc);
 static int sbbc_uart_bus_flush(struct uart_softc *sc, int what);
 static int sbbc_uart_bus_getsig(struct uart_softc *sc);
 static int sbbc_uart_bus_ioctl(struct uart_softc *sc, int request,
     intptr_t data);
 static int sbbc_uart_bus_ipend(struct uart_softc *sc);
 static int sbbc_uart_bus_param(struct uart_softc *sc, int baudrate,
     int databits, int stopbits, int parity);
 static int sbbc_uart_bus_probe(struct uart_softc *sc);
 static int sbbc_uart_bus_receive(struct uart_softc *sc);
 static int sbbc_uart_bus_setsig(struct uart_softc *sc, int sig);
 static int sbbc_uart_bus_transmit(struct uart_softc *sc);
 
 static kobj_method_t sbbc_uart_methods[] = {
 	KOBJMETHOD(uart_attach,		sbbc_uart_bus_attach),
 	KOBJMETHOD(uart_detach,		sbbc_uart_bus_detach),
 	KOBJMETHOD(uart_flush,		sbbc_uart_bus_flush),
 	KOBJMETHOD(uart_getsig,		sbbc_uart_bus_getsig),
 	KOBJMETHOD(uart_ioctl,		sbbc_uart_bus_ioctl),
 	KOBJMETHOD(uart_ipend,		sbbc_uart_bus_ipend),
 	KOBJMETHOD(uart_param,		sbbc_uart_bus_param),
 	KOBJMETHOD(uart_probe,		sbbc_uart_bus_probe),
 	KOBJMETHOD(uart_receive,	sbbc_uart_bus_receive),
 	KOBJMETHOD(uart_setsig,		sbbc_uart_bus_setsig),
 	KOBJMETHOD(uart_transmit,	sbbc_uart_bus_transmit),
 
 	DEVMETHOD_END
 };
 
 struct uart_class uart_sbbc_class = {
 	"sbbc",
 	sbbc_uart_methods,
 	sizeof(struct uart_softc),
 	.uc_ops = &sbbc_uart_ops,
 	.uc_range = 1,
 	.uc_rclk = 0x5bbc,	/* arbitrary */
 	.uc_rshift = 0
 };
 
 #define	SIGCHG(c, i, s, d)						\
 	if ((c) != 0) {							\
 		i |= (((i) & (s)) != 0) ? (s) : (s) | (d);		\
 	} else {							\
 		i = (((i) & (s)) != 0) ? ((i) & ~(s)) | (d) : (i);	\
 	}
 
 static int
 sbbc_uart_bus_attach(struct uart_softc *sc)
 {
 	struct uart_bas *bas;
 	bus_space_tag_t bst;
 	bus_space_handle_t bsh;
 	uint32_t wrptr;
 
 	bas = &sc->sc_bas;
 	bst = bas->bst;
 	bsh = bas->bsh;
 
 	uart_lock(sc->sc_hwmtx);
 
 	/*
 	 * Let the current output drain before enabling interrupts.  Not
 	 * doing so tends to cause lost output when turning them on.
 	 */
 	wrptr = SBBC_SRAM_READ_4(sbbc_solcons +
 	    SBBC_CONS_OFF(cons_out_wrptr));
 	while (SBBC_SRAM_READ_4(sbbc_solcons +
 	    SBBC_CONS_OFF(cons_out_rdptr)) != wrptr);
 		cpu_spinwait();
 
 	/* Clear and acknowledge possibly outstanding interrupts. */
 	SBBC_SRAM_WRITE_4(sbbc_scsolir, 0);
 	uart_barrier(bas);
 	SBBC_REGS_WRITE_4(SBBC_PCI_INT_STATUS,
 	    SBBC_SRAM_READ_4(sbbc_scsolir));
 	uart_barrier(bas);
 	/* Enable PCI interrupts. */
 	SBBC_REGS_WRITE_4(SBBC_PCI_INT_ENABLE, SBBC_PCI_ENABLE_INT_A);
 	uart_barrier(bas);
 	/* Enable input from and output to SC as well as break interrupts. */
 	SBBC_SRAM_WRITE_4(sbbc_scsolie, SBBC_SRAM_READ_4(sbbc_scsolie) |
 	    SBBC_SRAM_CONS_IN | SBBC_SRAM_CONS_BRK |
 	    SBBC_SRAM_CONS_SPACE_OUT);
 	uart_barrier(bas);
 
 	uart_unlock(sc->sc_hwmtx);
 	return (0);
 }
 
 static int
 sbbc_uart_bus_detach(struct uart_softc *sc)
 {
 
 	/* Give back the console input. */
 	sbbc_serengeti_set_console_input(SUNW_SETCONSINPUT_OBP);
 	return (0);
 }
 
 static int
 sbbc_uart_bus_flush(struct uart_softc *sc, int what)
 {
 	struct uart_bas *bas;
 	bus_space_tag_t bst;
 	bus_space_handle_t bsh;
 
 	bas = &sc->sc_bas;
 	bst = bas->bst;
 	bsh = bas->bsh;
 
 	if ((what & UART_FLUSH_TRANSMITTER) != 0)
 		return (ENODEV);
 	if ((what & UART_FLUSH_RECEIVER) != 0) {
 		SBBC_SRAM_WRITE_4(sbbc_solcons +
 		    SBBC_CONS_OFF(cons_in_rdptr),
 		    SBBC_SRAM_READ_4(sbbc_solcons +
 		    SBBC_CONS_OFF(cons_in_wrptr)));
 		uart_barrier(bas);
 	}
 	return (0);
 }
 
 static int
 sbbc_uart_bus_getsig(struct uart_softc *sc)
 {
 	uint32_t dummy, new, old, sig;
 
 	do {
 		old = sc->sc_hwsig;
 		sig = old;
 		dummy = 0;
 		SIGCHG(dummy, sig, SER_CTS, SER_DCTS);
 		SIGCHG(dummy, sig, SER_DCD, SER_DDCD);
 		SIGCHG(dummy, sig, SER_DSR, SER_DDSR);
 		new = sig & ~SER_MASK_DELTA;
 	} while (!atomic_cmpset_32(&sc->sc_hwsig, old, new));
 	return (sig);
 }
 
 static int
 sbbc_uart_bus_ioctl(struct uart_softc *sc, int request, intptr_t data)
 {
 	int error;
 
 	error = 0;
 	uart_lock(sc->sc_hwmtx);
 	switch (request) {
 	case UART_IOCTL_BAUD:
 		*(int*)data = 9600;	/* arbitrary */
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	uart_unlock(sc->sc_hwmtx);
 	return (error);
 }
 
 static int
 sbbc_uart_bus_ipend(struct uart_softc *sc)
 {
 	struct uart_bas *bas;
 	bus_space_tag_t bst;
 	bus_space_handle_t bsh;
 	int ipend;
 	uint32_t reason, status;
 
 	bas = &sc->sc_bas;
 	bst = bas->bst;
 	bsh = bas->bsh;
 
 	uart_lock(sc->sc_hwmtx);
 	status = SBBC_REGS_READ_4(SBBC_PCI_INT_STATUS);
 	if (status == 0) {
 		uart_unlock(sc->sc_hwmtx);
 		return (0);
 	}
 
 	/*
 	 * Unfortunately, we can't use compare and swap for non-cachable
 	 * memory.
 	 */
 	reason = SBBC_SRAM_READ_4(sbbc_scsolir);
 	SBBC_SRAM_WRITE_4(sbbc_scsolir, 0);
 	uart_barrier(bas);
 	/* Acknowledge the interrupt. */
 	SBBC_REGS_WRITE_4(SBBC_PCI_INT_STATUS, status);
 	uart_barrier(bas);
 
 	uart_unlock(sc->sc_hwmtx);
 
 	ipend = 0;
 	if ((reason & SBBC_SRAM_CONS_IN) != 0)
 		ipend |= SER_INT_RXREADY;
 	if ((reason & SBBC_SRAM_CONS_BRK) != 0)
 		ipend |= SER_INT_BREAK;
 	if ((reason & SBBC_SRAM_CONS_SPACE_OUT) != 0 &&
 	    SBBC_SRAM_READ_4(sbbc_solcons + SBBC_CONS_OFF(cons_out_rdptr)) ==
 	    SBBC_SRAM_READ_4(sbbc_solcons + SBBC_CONS_OFF(cons_out_wrptr)))
 		ipend |= SER_INT_TXIDLE;
 	return (ipend);
 }
 
 static int
 sbbc_uart_bus_param(struct uart_softc *sc __unused, int baudrate __unused,
     int databits __unused, int stopbits __unused, int parity __unused)
 {
 
 	return (0);
 }
 
 static int
 sbbc_uart_bus_probe(struct uart_softc *sc)
 {
 	struct uart_bas *bas;
 	bus_space_tag_t bst;
 	bus_space_handle_t bsh;
 
 	if (sbbc_console != 0) {
 		bas = &sc->sc_bas;
 		bst = bas->bst;
 		bsh = bas->bsh;
 		sc->sc_rxfifosz = SBBC_SRAM_READ_4(sbbc_solcons +
 		    SBBC_CONS_OFF(cons_in_end)) - SBBC_SRAM_READ_4(sbbc_solcons +
 		    SBBC_CONS_OFF(cons_in_begin)) - 1;
 		sc->sc_txfifosz = SBBC_SRAM_READ_4(sbbc_solcons +
 		    SBBC_CONS_OFF(cons_out_end)) - SBBC_SRAM_READ_4(sbbc_solcons +
 		    SBBC_CONS_OFF(cons_out_begin)) - 1;
 		return (0);
 	}
 	return (ENXIO);
 }
 
 static int
 sbbc_uart_bus_receive(struct uart_softc *sc)
 {
 	struct uart_bas *bas;
 	bus_space_tag_t bst;
 	bus_space_handle_t bsh;
 	int c;
 	uint32_t end, rdptr, wrptr;
 
 	bas = &sc->sc_bas;
 	bst = bas->bst;
 	bsh = bas->bsh;
 
 	uart_lock(sc->sc_hwmtx);
 
 	end = SBBC_SRAM_READ_4(sbbc_solcons + SBBC_CONS_OFF(cons_in_end));
 	rdptr = SBBC_SRAM_READ_4(sbbc_solcons + SBBC_CONS_OFF(cons_in_rdptr));
 	wrptr = SBBC_SRAM_READ_4(sbbc_solcons + SBBC_CONS_OFF(cons_in_wrptr));
 	while (rdptr != wrptr) {
 		if (uart_rx_full(sc) != 0) {
 			sc->sc_rxbuf[sc->sc_rxput] = UART_STAT_OVERRUN;
 			break;
 		}
 		c = SBBC_SRAM_READ_1(sbbc_solcons + rdptr);
 		uart_rx_put(sc, c);
 		if (++rdptr == end)
 			rdptr = SBBC_SRAM_READ_4(sbbc_solcons +
 			    SBBC_CONS_OFF(cons_in_begin));
 	}
 	uart_barrier(bas);
 	SBBC_SRAM_WRITE_4(sbbc_solcons + SBBC_CONS_OFF(cons_in_rdptr),
 	    rdptr);
 	uart_barrier(bas);
 	SBBC_SRAM_WRITE_4(sbbc_solscir, SBBC_SRAM_READ_4(sbbc_solscir) |
 	    SBBC_SRAM_CONS_SPACE_IN);
 	uart_barrier(bas);
 	sbbc_send_intr(bst, bsh);
 
 	uart_unlock(sc->sc_hwmtx);
 	return (0);
 }
 
 static int
 sbbc_uart_bus_setsig(struct uart_softc *sc, int sig)
 {
 	struct uart_bas *bas;
 	uint32_t new, old;
 
 	bas = &sc->sc_bas;
 	do {
 		old = sc->sc_hwsig;
 		new = old;
 		if ((sig & SER_DDTR) != 0) {
 			SIGCHG(sig & SER_DTR, new, SER_DTR, SER_DDTR);
 		}
 		if ((sig & SER_DRTS) != 0) {
 			SIGCHG(sig & SER_RTS, new, SER_RTS, SER_DRTS);
 		}
 	} while (!atomic_cmpset_32(&sc->sc_hwsig, old, new));
 	return (0);
 }
 
 static int
 sbbc_uart_bus_transmit(struct uart_softc *sc)
 {
 	struct uart_bas *bas;
 	bus_space_tag_t bst;
 	bus_space_handle_t bsh;
 	int i;
 	uint32_t end, wrptr;
 
 	bas = &sc->sc_bas;
 	bst = bas->bst;
 	bsh = bas->bsh;
 
 	uart_lock(sc->sc_hwmtx);
 
 	end = SBBC_SRAM_READ_4(sbbc_solcons + SBBC_CONS_OFF(cons_out_end));
 	wrptr = SBBC_SRAM_READ_4(sbbc_solcons +
 	    SBBC_CONS_OFF(cons_out_wrptr));
 	for (i = 0; i < sc->sc_txdatasz; i++) {
 		SBBC_SRAM_WRITE_1(sbbc_solcons + wrptr, sc->sc_txbuf[i]);
 		if (++wrptr == end)
 			wrptr = SBBC_SRAM_READ_4(sbbc_solcons +
 			    SBBC_CONS_OFF(cons_out_begin));
 	}
 	uart_barrier(bas);
 	SBBC_SRAM_WRITE_4(sbbc_solcons + SBBC_CONS_OFF(cons_out_wrptr),
 	    wrptr);
 	uart_barrier(bas);
 	SBBC_SRAM_WRITE_4(sbbc_solscir, SBBC_SRAM_READ_4(sbbc_solscir) |
 	    SBBC_SRAM_CONS_OUT);
 	uart_barrier(bas);
 	sbbc_send_intr(bst, bsh);
 	sc->sc_txbusy = 1;
 
 	uart_unlock(sc->sc_hwmtx);
 	return (0);
 }
Index: stable/11/sys/ufs/ffs/ffs_softdep.c
===================================================================
--- stable/11/sys/ufs/ffs/ffs_softdep.c	(revision 330445)
+++ stable/11/sys/ufs/ffs/ffs_softdep.c	(revision 330446)
@@ -1,14469 +1,14469 @@
 /*-
  * Copyright 1998, 2000 Marshall Kirk McKusick.
  * Copyright 2009, 2010 Jeffrey W. Roberson <jeff@FreeBSD.org>
  * All rights reserved.
  *
  * The soft updates code is derived from the appendix of a University
  * of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
  * "Soft Updates: A Solution to the Metadata Update Problem in File
  * Systems", CSE-TR-254-95, August 1995).
  *
  * Further information about soft updates can be obtained from:
  *
  *	Marshall Kirk McKusick		http://www.mckusick.com/softdep/
  *	1614 Oxford Street		mckusick@mckusick.com
  *	Berkeley, CA 94709-1608		+1-510-843-9542
  *	USA
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *	from: @(#)ffs_softdep.c	9.59 (McKusick) 6/21/00
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ffs.h"
 #include "opt_quota.h"
 #include "opt_ddb.h"
 
 /*
  * For now we want the safety net that the DEBUG flag provides.
  */
 #ifndef DEBUG
 #define DEBUG
 #endif
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/kdb.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/rwlock.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/vnode.h>
 #include <sys/conf.h>
 
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/softdep.h>
 #include <ufs/ffs/ffs_extern.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 
 #include <geom/geom.h>
 
 #include <ddb/ddb.h>
 
 #define	KTR_SUJ	0	/* Define to KTR_SPARE. */
 
 #ifndef SOFTUPDATES
 
 int
 softdep_flushfiles(oldmnt, flags, td)
 	struct mount *oldmnt;
 	int flags;
 	struct thread *td;
 {
 
 	panic("softdep_flushfiles called");
 }
 
 int
 softdep_mount(devvp, mp, fs, cred)
 	struct vnode *devvp;
 	struct mount *mp;
 	struct fs *fs;
 	struct ucred *cred;
 {
 
 	return (0);
 }
 
 void
 softdep_initialize()
 {
 
 	return;
 }
 
 void
 softdep_uninitialize()
 {
 
 	return;
 }
 
 void
 softdep_unmount(mp)
 	struct mount *mp;
 {
 
 	panic("softdep_unmount called");
 }
 
 void
 softdep_setup_sbupdate(ump, fs, bp)
 	struct ufsmount *ump;
 	struct fs *fs;
 	struct buf *bp;
 {
 
 	panic("softdep_setup_sbupdate called");
 }
 
 void
 softdep_setup_inomapdep(bp, ip, newinum, mode)
 	struct buf *bp;
 	struct inode *ip;
 	ino_t newinum;
 	int mode;
 {
 
 	panic("softdep_setup_inomapdep called");
 }
 
 void
 softdep_setup_blkmapdep(bp, mp, newblkno, frags, oldfrags)
 	struct buf *bp;
 	struct mount *mp;
 	ufs2_daddr_t newblkno;
 	int frags;
 	int oldfrags;
 {
 
 	panic("softdep_setup_blkmapdep called");
 }
 
 void
 softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 	struct inode *ip;
 	ufs_lbn_t lbn;
 	ufs2_daddr_t newblkno;
 	ufs2_daddr_t oldblkno;
 	long newsize;
 	long oldsize;
 	struct buf *bp;
 {
 	
 	panic("softdep_setup_allocdirect called");
 }
 
 void
 softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 	struct inode *ip;
 	ufs_lbn_t lbn;
 	ufs2_daddr_t newblkno;
 	ufs2_daddr_t oldblkno;
 	long newsize;
 	long oldsize;
 	struct buf *bp;
 {
 	
 	panic("softdep_setup_allocext called");
 }
 
 void
 softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
 	struct inode *ip;
 	ufs_lbn_t lbn;
 	struct buf *bp;
 	int ptrno;
 	ufs2_daddr_t newblkno;
 	ufs2_daddr_t oldblkno;
 	struct buf *nbp;
 {
 
 	panic("softdep_setup_allocindir_page called");
 }
 
 void
 softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno)
 	struct buf *nbp;
 	struct inode *ip;
 	struct buf *bp;
 	int ptrno;
 	ufs2_daddr_t newblkno;
 {
 
 	panic("softdep_setup_allocindir_meta called");
 }
 
 void
 softdep_journal_freeblocks(ip, cred, length, flags)
 	struct inode *ip;
 	struct ucred *cred;
 	off_t length;
 	int flags;
 {
 	
 	panic("softdep_journal_freeblocks called");
 }
 
 void
 softdep_journal_fsync(ip)
 	struct inode *ip;
 {
 
 	panic("softdep_journal_fsync called");
 }
 
 void
 softdep_setup_freeblocks(ip, length, flags)
 	struct inode *ip;
 	off_t length;
 	int flags;
 {
 	
 	panic("softdep_setup_freeblocks called");
 }
 
 void
 softdep_freefile(pvp, ino, mode)
 		struct vnode *pvp;
 		ino_t ino;
 		int mode;
 {
 
 	panic("softdep_freefile called");
 }
 
 int
 softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
 	struct buf *bp;
 	struct inode *dp;
 	off_t diroffset;
 	ino_t newinum;
 	struct buf *newdirbp;
 	int isnewblk;
 {
 
 	panic("softdep_setup_directory_add called");
 }
 
 void
 softdep_change_directoryentry_offset(bp, dp, base, oldloc, newloc, entrysize)
 	struct buf *bp;
 	struct inode *dp;
 	caddr_t base;
 	caddr_t oldloc;
 	caddr_t newloc;
 	int entrysize;
 {
 
 	panic("softdep_change_directoryentry_offset called");
 }
 
 void
 softdep_setup_remove(bp, dp, ip, isrmdir)
 	struct buf *bp;
 	struct inode *dp;
 	struct inode *ip;
 	int isrmdir;
 {
 	
 	panic("softdep_setup_remove called");
 }
 
 void
 softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
 	struct buf *bp;
 	struct inode *dp;
 	struct inode *ip;
 	ino_t newinum;
 	int isrmdir;
 {
 
 	panic("softdep_setup_directory_change called");
 }
 
 void
 softdep_setup_blkfree(mp, bp, blkno, frags, wkhd)
 	struct mount *mp;
 	struct buf *bp;
 	ufs2_daddr_t blkno;
 	int frags;
 	struct workhead *wkhd;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_setup_inofree(mp, bp, ino, wkhd)
 	struct mount *mp;
 	struct buf *bp;
 	ino_t ino;
 	struct workhead *wkhd;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_setup_unlink(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_setup_link(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_revert_link(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_setup_rmdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_revert_rmdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_setup_create(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_revert_create(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_setup_mkdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_revert_mkdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_setup_dotdot_link(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 int
 softdep_prealloc(vp, waitok)
 	struct vnode *vp;
 	int waitok;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 int
 softdep_journal_lookup(mp, vpp)
 	struct mount *mp;
 	struct vnode **vpp;
 {
 
 	return (ENOENT);
 }
 
 void
 softdep_change_linkcnt(ip)
 	struct inode *ip;
 {
 
 	panic("softdep_change_linkcnt called");
 }
 
 void 
 softdep_load_inodeblock(ip)
 	struct inode *ip;
 {
 
 	panic("softdep_load_inodeblock called");
 }
 
 void
 softdep_update_inodeblock(ip, bp, waitfor)
 	struct inode *ip;
 	struct buf *bp;
 	int waitfor;
 {
 
 	panic("softdep_update_inodeblock called");
 }
 
 int
 softdep_fsync(vp)
 	struct vnode *vp;	/* the "in_core" copy of the inode */
 {
 
 	return (0);
 }
 
 void
 softdep_fsync_mountdev(vp)
 	struct vnode *vp;
 {
 
 	return;
 }
 
 int
 softdep_flushworklist(oldmnt, countp, td)
 	struct mount *oldmnt;
 	int *countp;
 	struct thread *td;
 {
 
 	*countp = 0;
 	return (0);
 }
 
 int
 softdep_sync_metadata(struct vnode *vp)
 {
 
 	panic("softdep_sync_metadata called");
 }
 
 int
 softdep_sync_buf(struct vnode *vp, struct buf *bp, int waitfor)
 {
 
 	panic("softdep_sync_buf called");
 }
 
 int
 softdep_slowdown(vp)
 	struct vnode *vp;
 {
 
 	panic("softdep_slowdown called");
 }
 
 int
 softdep_request_cleanup(fs, vp, cred, resource)
 	struct fs *fs;
 	struct vnode *vp;
 	struct ucred *cred;
 	int resource;
 {
 
 	return (0);
 }
 
 int
 softdep_check_suspend(struct mount *mp,
 		      struct vnode *devvp,
 		      int softdep_depcnt,
 		      int softdep_accdepcnt,
 		      int secondary_writes,
 		      int secondary_accwrites)
 {
 	struct bufobj *bo;
 	int error;
 	
 	(void) softdep_depcnt,
 	(void) softdep_accdepcnt;
 
 	bo = &devvp->v_bufobj;
 	ASSERT_BO_WLOCKED(bo);
 
 	MNT_ILOCK(mp);
 	while (mp->mnt_secondary_writes != 0) {
 		BO_UNLOCK(bo);
 		msleep(&mp->mnt_secondary_writes, MNT_MTX(mp),
 		    (PUSER - 1) | PDROP, "secwr", 0);
 		BO_LOCK(bo);
 		MNT_ILOCK(mp);
 	}
 
 	/*
 	 * Reasons for needing more work before suspend:
 	 * - Dirty buffers on devvp.
 	 * - Secondary writes occurred after start of vnode sync loop
 	 */
 	error = 0;
 	if (bo->bo_numoutput > 0 ||
 	    bo->bo_dirty.bv_cnt > 0 ||
 	    secondary_writes != 0 ||
 	    mp->mnt_secondary_writes != 0 ||
 	    secondary_accwrites != mp->mnt_secondary_accwrites)
 		error = EAGAIN;
 	BO_UNLOCK(bo);
 	return (error);
 }
 
 void
 softdep_get_depcounts(struct mount *mp,
 		      int *softdepactivep,
 		      int *softdepactiveaccp)
 {
 	(void) mp;
 	*softdepactivep = 0;
 	*softdepactiveaccp = 0;
 }
 
 void
 softdep_buf_append(bp, wkhd)
 	struct buf *bp;
 	struct workhead *wkhd;
 {
 
 	panic("softdep_buf_appendwork called");
 }
 
 void
 softdep_inode_append(ip, cred, wkhd)
 	struct inode *ip;
 	struct ucred *cred;
 	struct workhead *wkhd;
 {
 
 	panic("softdep_inode_appendwork called");
 }
 
 void
 softdep_freework(wkhd)
 	struct workhead *wkhd;
 {
 
 	panic("softdep_freework called");
 }
 
 #else
 
 FEATURE(softupdates, "FFS soft-updates support");
 
 static SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW, 0,
     "soft updates stats");
 static SYSCTL_NODE(_debug_softdep, OID_AUTO, total, CTLFLAG_RW, 0,
     "total dependencies allocated");
 static SYSCTL_NODE(_debug_softdep, OID_AUTO, highuse, CTLFLAG_RW, 0,
     "high use dependencies allocated");
 static SYSCTL_NODE(_debug_softdep, OID_AUTO, current, CTLFLAG_RW, 0,
     "current dependencies allocated");
 static SYSCTL_NODE(_debug_softdep, OID_AUTO, write, CTLFLAG_RW, 0,
     "current dependencies written");
 
 unsigned long dep_current[D_LAST + 1];
 unsigned long dep_highuse[D_LAST + 1];
 unsigned long dep_total[D_LAST + 1];
 unsigned long dep_write[D_LAST + 1];
 
 #define	SOFTDEP_TYPE(type, str, long)					\
     static MALLOC_DEFINE(M_ ## type, #str, long);			\
     SYSCTL_ULONG(_debug_softdep_total, OID_AUTO, str, CTLFLAG_RD,	\
 	&dep_total[D_ ## type], 0, "");					\
     SYSCTL_ULONG(_debug_softdep_current, OID_AUTO, str, CTLFLAG_RD, 	\
 	&dep_current[D_ ## type], 0, "");				\
     SYSCTL_ULONG(_debug_softdep_highuse, OID_AUTO, str, CTLFLAG_RD, 	\
 	&dep_highuse[D_ ## type], 0, "");				\
     SYSCTL_ULONG(_debug_softdep_write, OID_AUTO, str, CTLFLAG_RD, 	\
 	&dep_write[D_ ## type], 0, "");
 
 SOFTDEP_TYPE(PAGEDEP, pagedep, "File page dependencies"); 
 SOFTDEP_TYPE(INODEDEP, inodedep, "Inode dependencies");
 SOFTDEP_TYPE(BMSAFEMAP, bmsafemap,
     "Block or frag allocated from cyl group map");
 SOFTDEP_TYPE(NEWBLK, newblk, "New block or frag allocation dependency");
 SOFTDEP_TYPE(ALLOCDIRECT, allocdirect, "Block or frag dependency for an inode");
 SOFTDEP_TYPE(INDIRDEP, indirdep, "Indirect block dependencies");
 SOFTDEP_TYPE(ALLOCINDIR, allocindir, "Block dependency for an indirect block");
 SOFTDEP_TYPE(FREEFRAG, freefrag, "Previously used frag for an inode");
 SOFTDEP_TYPE(FREEBLKS, freeblks, "Blocks freed from an inode");
 SOFTDEP_TYPE(FREEFILE, freefile, "Inode deallocated");
 SOFTDEP_TYPE(DIRADD, diradd, "New directory entry");
 SOFTDEP_TYPE(MKDIR, mkdir, "New directory");
 SOFTDEP_TYPE(DIRREM, dirrem, "Directory entry deleted");
 SOFTDEP_TYPE(NEWDIRBLK, newdirblk, "Unclaimed new directory block");
 SOFTDEP_TYPE(FREEWORK, freework, "free an inode block");
 SOFTDEP_TYPE(FREEDEP, freedep, "track a block free");
 SOFTDEP_TYPE(JADDREF, jaddref, "Journal inode ref add");
 SOFTDEP_TYPE(JREMREF, jremref, "Journal inode ref remove");
 SOFTDEP_TYPE(JMVREF, jmvref, "Journal inode ref move");
 SOFTDEP_TYPE(JNEWBLK, jnewblk, "Journal new block");
 SOFTDEP_TYPE(JFREEBLK, jfreeblk, "Journal free block");
 SOFTDEP_TYPE(JFREEFRAG, jfreefrag, "Journal free frag");
 SOFTDEP_TYPE(JSEG, jseg, "Journal segment");
 SOFTDEP_TYPE(JSEGDEP, jsegdep, "Journal segment complete");
 SOFTDEP_TYPE(SBDEP, sbdep, "Superblock write dependency");
 SOFTDEP_TYPE(JTRUNC, jtrunc, "Journal inode truncation");
 SOFTDEP_TYPE(JFSYNC, jfsync, "Journal fsync complete");
 
 static MALLOC_DEFINE(M_SENTINEL, "sentinel", "Worklist sentinel");
 
 static MALLOC_DEFINE(M_SAVEDINO, "savedino", "Saved inodes");
 static MALLOC_DEFINE(M_JBLOCKS, "jblocks", "Journal block locations");
 static MALLOC_DEFINE(M_MOUNTDATA, "softdep", "Softdep per-mount data");
 
 #define M_SOFTDEP_FLAGS	(M_WAITOK)
 
 /* 
  * translate from workitem type to memory type
  * MUST match the defines above, such that memtype[D_XXX] == M_XXX
  */
 static struct malloc_type *memtype[] = {
 	M_PAGEDEP,
 	M_INODEDEP,
 	M_BMSAFEMAP,
 	M_NEWBLK,
 	M_ALLOCDIRECT,
 	M_INDIRDEP,
 	M_ALLOCINDIR,
 	M_FREEFRAG,
 	M_FREEBLKS,
 	M_FREEFILE,
 	M_DIRADD,
 	M_MKDIR,
 	M_DIRREM,
 	M_NEWDIRBLK,
 	M_FREEWORK,
 	M_FREEDEP,
 	M_JADDREF,
 	M_JREMREF,
 	M_JMVREF,
 	M_JNEWBLK,
 	M_JFREEBLK,
 	M_JFREEFRAG,
 	M_JSEG,
 	M_JSEGDEP,
 	M_SBDEP,
 	M_JTRUNC,
 	M_JFSYNC,
 	M_SENTINEL
 };
 
 #define DtoM(type) (memtype[type])
 
 /*
  * Names of malloc types.
  */
 #define TYPENAME(type)  \
 	((unsigned)(type) <= D_LAST ? memtype[type]->ks_shortdesc : "???")
 /*
  * End system adaptation definitions.
  */
 
 #define	DOTDOT_OFFSET	offsetof(struct dirtemplate, dotdot_ino)
 #define	DOT_OFFSET	offsetof(struct dirtemplate, dot_ino)
 
 /*
  * Internal function prototypes.
  */
 static	void check_clear_deps(struct mount *);
 static	void softdep_error(char *, int);
 static	int softdep_process_worklist(struct mount *, int);
 static	int softdep_waitidle(struct mount *, int);
 static	void drain_output(struct vnode *);
 static	struct buf *getdirtybuf(struct buf *, struct rwlock *, int);
 static	int check_inodedep_free(struct inodedep *);
 static	void clear_remove(struct mount *);
 static	void clear_inodedeps(struct mount *);
 static	void unlinked_inodedep(struct mount *, struct inodedep *);
 static	void clear_unlinked_inodedep(struct inodedep *);
 static	struct inodedep *first_unlinked_inodedep(struct ufsmount *);
 static	int flush_pagedep_deps(struct vnode *, struct mount *,
 	    struct diraddhd *);
 static	int free_pagedep(struct pagedep *);
 static	int flush_newblk_dep(struct vnode *, struct mount *, ufs_lbn_t);
 static	int flush_inodedep_deps(struct vnode *, struct mount *, ino_t);
 static	int flush_deplist(struct allocdirectlst *, int, int *);
 static	int sync_cgs(struct mount *, int);
 static	int handle_written_filepage(struct pagedep *, struct buf *, int);
 static	int handle_written_sbdep(struct sbdep *, struct buf *);
 static	void initiate_write_sbdep(struct sbdep *);
 static	void diradd_inode_written(struct diradd *, struct inodedep *);
 static	int handle_written_indirdep(struct indirdep *, struct buf *,
 	    struct buf**, int);
 static	int handle_written_inodeblock(struct inodedep *, struct buf *, int);
 static	int jnewblk_rollforward(struct jnewblk *, struct fs *, struct cg *,
 	    uint8_t *);
 static	int handle_written_bmsafemap(struct bmsafemap *, struct buf *, int);
 static	void handle_written_jaddref(struct jaddref *);
 static	void handle_written_jremref(struct jremref *);
 static	void handle_written_jseg(struct jseg *, struct buf *);
 static	void handle_written_jnewblk(struct jnewblk *);
 static	void handle_written_jblkdep(struct jblkdep *);
 static	void handle_written_jfreefrag(struct jfreefrag *);
 static	void complete_jseg(struct jseg *);
 static	void complete_jsegs(struct jseg *);
 static	void jseg_write(struct ufsmount *ump, struct jseg *, uint8_t *);
 static	void jaddref_write(struct jaddref *, struct jseg *, uint8_t *);
 static	void jremref_write(struct jremref *, struct jseg *, uint8_t *);
 static	void jmvref_write(struct jmvref *, struct jseg *, uint8_t *);
 static	void jtrunc_write(struct jtrunc *, struct jseg *, uint8_t *);
 static	void jfsync_write(struct jfsync *, struct jseg *, uint8_t *data);
 static	void jnewblk_write(struct jnewblk *, struct jseg *, uint8_t *);
 static	void jfreeblk_write(struct jfreeblk *, struct jseg *, uint8_t *);
 static	void jfreefrag_write(struct jfreefrag *, struct jseg *, uint8_t *);
 static	inline void inoref_write(struct inoref *, struct jseg *,
 	    struct jrefrec *);
 static	void handle_allocdirect_partdone(struct allocdirect *,
 	    struct workhead *);
 static	struct jnewblk *cancel_newblk(struct newblk *, struct worklist *,
 	    struct workhead *);
 static	void indirdep_complete(struct indirdep *);
 static	int indirblk_lookup(struct mount *, ufs2_daddr_t);
 static	void indirblk_insert(struct freework *);
 static	void indirblk_remove(struct freework *);
 static	void handle_allocindir_partdone(struct allocindir *);
 static	void initiate_write_filepage(struct pagedep *, struct buf *);
 static	void initiate_write_indirdep(struct indirdep*, struct buf *);
 static	void handle_written_mkdir(struct mkdir *, int);
 static	int jnewblk_rollback(struct jnewblk *, struct fs *, struct cg *,
 	    uint8_t *);
 static	void initiate_write_bmsafemap(struct bmsafemap *, struct buf *);
 static	void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *);
 static	void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *);
 static	void handle_workitem_freefile(struct freefile *);
 static	int handle_workitem_remove(struct dirrem *, int);
 static	struct dirrem *newdirrem(struct buf *, struct inode *,
 	    struct inode *, int, struct dirrem **);
 static	struct indirdep *indirdep_lookup(struct mount *, struct inode *,
 	    struct buf *);
 static	void cancel_indirdep(struct indirdep *, struct buf *,
 	    struct freeblks *);
 static	void free_indirdep(struct indirdep *);
 static	void free_diradd(struct diradd *, struct workhead *);
 static	void merge_diradd(struct inodedep *, struct diradd *);
 static	void complete_diradd(struct diradd *);
 static	struct diradd *diradd_lookup(struct pagedep *, int);
 static	struct jremref *cancel_diradd_dotdot(struct inode *, struct dirrem *,
 	    struct jremref *);
 static	struct jremref *cancel_mkdir_dotdot(struct inode *, struct dirrem *,
 	    struct jremref *);
 static	void cancel_diradd(struct diradd *, struct dirrem *, struct jremref *,
 	    struct jremref *, struct jremref *);
 static	void dirrem_journal(struct dirrem *, struct jremref *, struct jremref *,
 	    struct jremref *);
 static	void cancel_allocindir(struct allocindir *, struct buf *bp,
 	    struct freeblks *, int);
 static	int setup_trunc_indir(struct freeblks *, struct inode *,
 	    ufs_lbn_t, ufs_lbn_t, ufs2_daddr_t);
 static	void complete_trunc_indir(struct freework *);
 static	void trunc_indirdep(struct indirdep *, struct freeblks *, struct buf *,
 	    int);
 static	void complete_mkdir(struct mkdir *);
 static	void free_newdirblk(struct newdirblk *);
 static	void free_jremref(struct jremref *);
 static	void free_jaddref(struct jaddref *);
 static	void free_jsegdep(struct jsegdep *);
 static	void free_jsegs(struct jblocks *);
 static	void rele_jseg(struct jseg *);
 static	void free_jseg(struct jseg *, struct jblocks *);
 static	void free_jnewblk(struct jnewblk *);
 static	void free_jblkdep(struct jblkdep *);
 static	void free_jfreefrag(struct jfreefrag *);
 static	void free_freedep(struct freedep *);
 static	void journal_jremref(struct dirrem *, struct jremref *,
 	    struct inodedep *);
 static	void cancel_jnewblk(struct jnewblk *, struct workhead *);
 static	int cancel_jaddref(struct jaddref *, struct inodedep *,
 	    struct workhead *);
 static	void cancel_jfreefrag(struct jfreefrag *);
 static	inline void setup_freedirect(struct freeblks *, struct inode *,
 	    int, int);
 static	inline void setup_freeext(struct freeblks *, struct inode *, int, int);
 static	inline void setup_freeindir(struct freeblks *, struct inode *, int,
 	    ufs_lbn_t, int);
 static	inline struct freeblks *newfreeblks(struct mount *, struct inode *);
 static	void freeblks_free(struct ufsmount *, struct freeblks *, int);
 static	void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t);
 static	ufs2_daddr_t blkcount(struct fs *, ufs2_daddr_t, off_t);
 static	int trunc_check_buf(struct buf *, int *, ufs_lbn_t, int, int);
 static	void trunc_dependencies(struct inode *, struct freeblks *, ufs_lbn_t,
 	    int, int);
 static	void trunc_pages(struct inode *, off_t, ufs2_daddr_t, int);
 static 	int cancel_pagedep(struct pagedep *, struct freeblks *, int);
 static	int deallocate_dependencies(struct buf *, struct freeblks *, int);
 static	void newblk_freefrag(struct newblk*);
 static	void free_newblk(struct newblk *);
 static	void cancel_allocdirect(struct allocdirectlst *,
 	    struct allocdirect *, struct freeblks *);
 static	int check_inode_unwritten(struct inodedep *);
 static	int free_inodedep(struct inodedep *);
 static	void freework_freeblock(struct freework *);
 static	void freework_enqueue(struct freework *);
 static	int handle_workitem_freeblocks(struct freeblks *, int);
 static	int handle_complete_freeblocks(struct freeblks *, int);
 static	void handle_workitem_indirblk(struct freework *);
 static	void handle_written_freework(struct freework *);
 static	void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *);
 static	struct worklist *jnewblk_merge(struct worklist *, struct worklist *,
 	    struct workhead *);
 static	struct freefrag *setup_allocindir_phase2(struct buf *, struct inode *,
 	    struct inodedep *, struct allocindir *, ufs_lbn_t);
 static	struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t,
 	    ufs2_daddr_t, ufs_lbn_t);
 static	void handle_workitem_freefrag(struct freefrag *);
 static	struct freefrag *newfreefrag(struct inode *, ufs2_daddr_t, long,
 	    ufs_lbn_t);
 static	void allocdirect_merge(struct allocdirectlst *,
 	    struct allocdirect *, struct allocdirect *);
 static	struct freefrag *allocindir_merge(struct allocindir *,
 	    struct allocindir *);
 static	int bmsafemap_find(struct bmsafemap_hashhead *, int,
 	    struct bmsafemap **);
 static	struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *,
 	    int cg, struct bmsafemap *);
 static	int newblk_find(struct newblk_hashhead *, ufs2_daddr_t, int,
 	    struct newblk **);
 static	int newblk_lookup(struct mount *, ufs2_daddr_t, int, struct newblk **);
 static	int inodedep_find(struct inodedep_hashhead *, ino_t,
 	    struct inodedep **);
 static	int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **);
 static	int pagedep_lookup(struct mount *, struct buf *bp, ino_t, ufs_lbn_t,
 	    int, struct pagedep **);
 static	int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,
 	    struct pagedep **);
 static	void pause_timer(void *);
 static	int request_cleanup(struct mount *, int);
 static	int softdep_request_cleanup_flush(struct mount *, struct ufsmount *);
 static	void schedule_cleanup(struct mount *);
 static void softdep_ast_cleanup_proc(struct thread *);
 static struct ufsmount *softdep_bp_to_mp(struct buf *bp);
 static	int process_worklist_item(struct mount *, int, int);
 static	void process_removes(struct vnode *);
 static	void process_truncates(struct vnode *);
 static	void jwork_move(struct workhead *, struct workhead *);
 static	void jwork_insert(struct workhead *, struct jsegdep *);
 static	void add_to_worklist(struct worklist *, int);
 static	void wake_worklist(struct worklist *);
 static	void wait_worklist(struct worklist *, char *);
 static	void remove_from_worklist(struct worklist *);
 static	void softdep_flush(void *);
 static	void softdep_flushjournal(struct mount *);
 static	int softdep_speedup(struct ufsmount *);
 static	void worklist_speedup(struct mount *);
 static	int journal_mount(struct mount *, struct fs *, struct ucred *);
 static	void journal_unmount(struct ufsmount *);
 static	int journal_space(struct ufsmount *, int);
 static	void journal_suspend(struct ufsmount *);
 static	int journal_unsuspend(struct ufsmount *ump);
 static	void softdep_prelink(struct vnode *, struct vnode *);
 static	void add_to_journal(struct worklist *);
 static	void remove_from_journal(struct worklist *);
 static	bool softdep_excess_items(struct ufsmount *, int);
 static	void softdep_process_journal(struct mount *, struct worklist *, int);
 static	struct jremref *newjremref(struct dirrem *, struct inode *,
 	    struct inode *ip, off_t, nlink_t);
 static	struct jaddref *newjaddref(struct inode *, ino_t, off_t, int16_t,
 	    uint16_t);
 static	inline void newinoref(struct inoref *, ino_t, ino_t, off_t, nlink_t,
 	    uint16_t);
 static	inline struct jsegdep *inoref_jseg(struct inoref *);
 static	struct jmvref *newjmvref(struct inode *, ino_t, off_t, off_t);
 static	struct jfreeblk *newjfreeblk(struct freeblks *, ufs_lbn_t,
 	    ufs2_daddr_t, int);
 static	void adjust_newfreework(struct freeblks *, int);
 static	struct jtrunc *newjtrunc(struct freeblks *, off_t, int);
 static	void move_newblock_dep(struct jaddref *, struct inodedep *);
 static	void cancel_jfreeblk(struct freeblks *, ufs2_daddr_t);
 static	struct jfreefrag *newjfreefrag(struct freefrag *, struct inode *,
 	    ufs2_daddr_t, long, ufs_lbn_t);
 static	struct freework *newfreework(struct ufsmount *, struct freeblks *,
 	    struct freework *, ufs_lbn_t, ufs2_daddr_t, int, int, int);
 static	int jwait(struct worklist *, int);
 static	struct inodedep *inodedep_lookup_ip(struct inode *);
 static	int bmsafemap_backgroundwrite(struct bmsafemap *, struct buf *);
 static	struct freefile *handle_bufwait(struct inodedep *, struct workhead *);
 static	void handle_jwork(struct workhead *);
 static	struct mkdir *setup_newdir(struct diradd *, ino_t, ino_t, struct buf *,
 	    struct mkdir **);
 static	struct jblocks *jblocks_create(void);
 static	ufs2_daddr_t jblocks_alloc(struct jblocks *, int, int *);
 static	void jblocks_free(struct jblocks *, struct mount *, int);
 static	void jblocks_destroy(struct jblocks *);
 static	void jblocks_add(struct jblocks *, ufs2_daddr_t, int);
 
 /*
  * Exported softdep operations.
  */
 static	void softdep_disk_io_initiation(struct buf *);
 static	void softdep_disk_write_complete(struct buf *);
 static	void softdep_deallocate_dependencies(struct buf *);
 static	int softdep_count_dependencies(struct buf *bp, int);
 
 /*
  * Global lock over all of soft updates.
  */
 static struct mtx lk;
 MTX_SYSINIT(softdep_lock, &lk, "Global Softdep Lock", MTX_DEF);
 
 #define ACQUIRE_GBLLOCK(lk)	mtx_lock(lk)
 #define FREE_GBLLOCK(lk)	mtx_unlock(lk)
 #define GBLLOCK_OWNED(lk)	mtx_assert((lk), MA_OWNED)
 
 /*
  * Per-filesystem soft-updates locking.
  */
 #define LOCK_PTR(ump)		(&(ump)->um_softdep->sd_fslock)
 #define TRY_ACQUIRE_LOCK(ump)	rw_try_wlock(&(ump)->um_softdep->sd_fslock)
 #define ACQUIRE_LOCK(ump)	rw_wlock(&(ump)->um_softdep->sd_fslock)
 #define FREE_LOCK(ump)		rw_wunlock(&(ump)->um_softdep->sd_fslock)
 #define LOCK_OWNED(ump)		rw_assert(&(ump)->um_softdep->sd_fslock, \
 				    RA_WLOCKED)
 
 #define	BUF_AREC(bp)		lockallowrecurse(&(bp)->b_lock)
 #define	BUF_NOREC(bp)		lockdisablerecurse(&(bp)->b_lock)
 
 /*
  * Worklist queue management.
  * These routines require that the lock be held.
  */
 #ifndef /* NOT */ DEBUG
 #define WORKLIST_INSERT(head, item) do {	\
 	(item)->wk_state |= ONWORKLIST;		\
 	LIST_INSERT_HEAD(head, item, wk_list);	\
 } while (0)
 #define WORKLIST_REMOVE(item) do {		\
 	(item)->wk_state &= ~ONWORKLIST;	\
 	LIST_REMOVE(item, wk_list);		\
 } while (0)
 #define WORKLIST_INSERT_UNLOCKED	WORKLIST_INSERT
 #define WORKLIST_REMOVE_UNLOCKED	WORKLIST_REMOVE
 
 #else /* DEBUG */
 static	void worklist_insert(struct workhead *, struct worklist *, int);
 static	void worklist_remove(struct worklist *, int);
 
 #define WORKLIST_INSERT(head, item) worklist_insert(head, item, 1)
 #define WORKLIST_INSERT_UNLOCKED(head, item) worklist_insert(head, item, 0)
 #define WORKLIST_REMOVE(item) worklist_remove(item, 1)
 #define WORKLIST_REMOVE_UNLOCKED(item) worklist_remove(item, 0)
 
 static void
 worklist_insert(head, item, locked)
 	struct workhead *head;
 	struct worklist *item;
 	int locked;
 {
 
 	if (locked)
 		LOCK_OWNED(VFSTOUFS(item->wk_mp));
 	if (item->wk_state & ONWORKLIST)
 		panic("worklist_insert: %p %s(0x%X) already on list",
 		    item, TYPENAME(item->wk_type), item->wk_state);
 	item->wk_state |= ONWORKLIST;
 	LIST_INSERT_HEAD(head, item, wk_list);
 }
 
 static void
 worklist_remove(item, locked)
 	struct worklist *item;
 	int locked;
 {
 
 	if (locked)
 		LOCK_OWNED(VFSTOUFS(item->wk_mp));
 	if ((item->wk_state & ONWORKLIST) == 0)
 		panic("worklist_remove: %p %s(0x%X) not on list",
 		    item, TYPENAME(item->wk_type), item->wk_state);
 	item->wk_state &= ~ONWORKLIST;
 	LIST_REMOVE(item, wk_list);
 }
 #endif /* DEBUG */
 
 /*
  * Merge two jsegdeps keeping only the oldest one as newer references
  * can't be discarded until after older references.
  */
 static inline struct jsegdep *
 jsegdep_merge(struct jsegdep *one, struct jsegdep *two)
 {
 	struct jsegdep *swp;
 
 	if (two == NULL)
 		return (one);
 
 	if (one->jd_seg->js_seq > two->jd_seg->js_seq) {
 		swp = one;
 		one = two;
 		two = swp;
 	}
 	WORKLIST_REMOVE(&two->jd_list);
 	free_jsegdep(two);
 
 	return (one);
 }
 
 /*
  * If two freedeps are compatible free one to reduce list size.
  */
 static inline struct freedep *
 freedep_merge(struct freedep *one, struct freedep *two)
 {
 	if (two == NULL)
 		return (one);
 
 	if (one->fd_freework == two->fd_freework) {
 		WORKLIST_REMOVE(&two->fd_list);
 		free_freedep(two);
 	}
 	return (one);
 }
 
 /*
  * Move journal work from one list to another.  Duplicate freedeps and
  * jsegdeps are coalesced to keep the lists as small as possible.
  */
 static void
 jwork_move(dst, src)
 	struct workhead *dst;
 	struct workhead *src;
 {
 	struct freedep *freedep;
 	struct jsegdep *jsegdep;
 	struct worklist *wkn;
 	struct worklist *wk;
 
 	KASSERT(dst != src,
 	    ("jwork_move: dst == src"));
 	freedep = NULL;
 	jsegdep = NULL;
 	LIST_FOREACH_SAFE(wk, dst, wk_list, wkn) {
 		if (wk->wk_type == D_JSEGDEP)
 			jsegdep = jsegdep_merge(WK_JSEGDEP(wk), jsegdep);
 		else if (wk->wk_type == D_FREEDEP)
 			freedep = freedep_merge(WK_FREEDEP(wk), freedep);
 	}
 
 	while ((wk = LIST_FIRST(src)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(dst, wk);
 		if (wk->wk_type == D_JSEGDEP) {
 			jsegdep = jsegdep_merge(WK_JSEGDEP(wk), jsegdep);
 			continue;
 		}
 		if (wk->wk_type == D_FREEDEP)
 			freedep = freedep_merge(WK_FREEDEP(wk), freedep);
 	}
 }
 
 static void
 jwork_insert(dst, jsegdep)
 	struct workhead *dst;
 	struct jsegdep *jsegdep;
 {
 	struct jsegdep *jsegdepn;
 	struct worklist *wk;
 
 	LIST_FOREACH(wk, dst, wk_list)
 		if (wk->wk_type == D_JSEGDEP)
 			break;
 	if (wk == NULL) {
 		WORKLIST_INSERT(dst, &jsegdep->jd_list);
 		return;
 	}
 	jsegdepn = WK_JSEGDEP(wk);
 	if (jsegdep->jd_seg->js_seq < jsegdepn->jd_seg->js_seq) {
 		WORKLIST_REMOVE(wk);
 		free_jsegdep(jsegdepn);
 		WORKLIST_INSERT(dst, &jsegdep->jd_list);
 	} else
 		free_jsegdep(jsegdep);
 }
 
 /*
  * Routines for tracking and managing workitems.
  */
 static	void workitem_free(struct worklist *, int);
 static	void workitem_alloc(struct worklist *, int, struct mount *);
 static	void workitem_reassign(struct worklist *, int);
 
 #define	WORKITEM_FREE(item, type) \
 	workitem_free((struct worklist *)(item), (type))
 #define	WORKITEM_REASSIGN(item, type) \
 	workitem_reassign((struct worklist *)(item), (type))
 
 static void
 workitem_free(item, type)
 	struct worklist *item;
 	int type;
 {
 	struct ufsmount *ump;
 
 #ifdef DEBUG
 	if (item->wk_state & ONWORKLIST)
 		panic("workitem_free: %s(0x%X) still on list",
 		    TYPENAME(item->wk_type), item->wk_state);
 	if (item->wk_type != type && type != D_NEWBLK)
 		panic("workitem_free: type mismatch %s != %s",
 		    TYPENAME(item->wk_type), TYPENAME(type));
 #endif
 	if (item->wk_state & IOWAITING)
 		wakeup(item);
 	ump = VFSTOUFS(item->wk_mp);
 	LOCK_OWNED(ump);
 	KASSERT(ump->softdep_deps > 0,
 	    ("workitem_free: %s: softdep_deps going negative",
 	    ump->um_fs->fs_fsmnt));
 	if (--ump->softdep_deps == 0 && ump->softdep_req)
 		wakeup(&ump->softdep_deps);
 	KASSERT(dep_current[item->wk_type] > 0,
 	    ("workitem_free: %s: dep_current[%s] going negative",
 	    ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
 	KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
 	    ("workitem_free: %s: softdep_curdeps[%s] going negative",
 	    ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
 	atomic_subtract_long(&dep_current[item->wk_type], 1);
 	ump->softdep_curdeps[item->wk_type] -= 1;
 	free(item, DtoM(type));
 }
 
 static void
 workitem_alloc(item, type, mp)
 	struct worklist *item;
 	int type;
 	struct mount *mp;
 {
 	struct ufsmount *ump;
 
 	item->wk_type = type;
 	item->wk_mp = mp;
 	item->wk_state = 0;
 
 	ump = VFSTOUFS(mp);
 	ACQUIRE_GBLLOCK(&lk);
 	dep_current[type]++;
 	if (dep_current[type] > dep_highuse[type])
 		dep_highuse[type] = dep_current[type];
 	dep_total[type]++;
 	FREE_GBLLOCK(&lk);
 	ACQUIRE_LOCK(ump);
 	ump->softdep_curdeps[type] += 1;
 	ump->softdep_deps++;
 	ump->softdep_accdeps++;
 	FREE_LOCK(ump);
 }
 
 static void
 workitem_reassign(item, newtype)
 	struct worklist *item;
 	int newtype;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(item->wk_mp);
 	LOCK_OWNED(ump);
 	KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
 	    ("workitem_reassign: %s: softdep_curdeps[%s] going negative",
 	    VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
 	ump->softdep_curdeps[item->wk_type] -= 1;
 	ump->softdep_curdeps[newtype] += 1;
 	KASSERT(dep_current[item->wk_type] > 0,
 	    ("workitem_reassign: %s: dep_current[%s] going negative",
 	    VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
 	ACQUIRE_GBLLOCK(&lk);
 	dep_current[newtype]++;
 	dep_current[item->wk_type]--;
 	if (dep_current[newtype] > dep_highuse[newtype])
 		dep_highuse[newtype] = dep_current[newtype];
 	dep_total[newtype]++;
 	FREE_GBLLOCK(&lk);
 	item->wk_type = newtype;
 }
 
 /*
  * Workitem queue management
  */
 static int max_softdeps;	/* maximum number of structs before slowdown */
 static int tickdelay = 2;	/* number of ticks to pause during slowdown */
 static int proc_waiting;	/* tracks whether we have a timeout posted */
 static int *stat_countp;	/* statistic to count in proc_waiting timeout */
 static struct callout softdep_callout;
 static int req_clear_inodedeps;	/* syncer process flush some inodedeps */
 static int req_clear_remove;	/* syncer process flush some freeblks */
 static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */
 
 /*
  * runtime statistics
  */
 static int stat_flush_threads;	/* number of softdep flushing threads */
 static int stat_worklist_push;	/* number of worklist cleanups */
 static int stat_blk_limit_push;	/* number of times block limit neared */
 static int stat_ino_limit_push;	/* number of times inode limit neared */
 static int stat_blk_limit_hit;	/* number of times block slowdown imposed */
 static int stat_ino_limit_hit;	/* number of times inode slowdown imposed */
 static int stat_sync_limit_hit;	/* number of synchronous slowdowns imposed */
 static int stat_indir_blk_ptrs;	/* bufs redirtied as indir ptrs not written */
 static int stat_inode_bitmap;	/* bufs redirtied as inode bitmap not written */
 static int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
 static int stat_dir_entry;	/* bufs redirtied as dir entry cannot write */
 static int stat_jaddref;	/* bufs redirtied as ino bitmap can not write */
 static int stat_jnewblk;	/* bufs redirtied as blk bitmap can not write */
 static int stat_journal_min;	/* Times hit journal min threshold */
 static int stat_journal_low;	/* Times hit journal low threshold */
 static int stat_journal_wait;	/* Times blocked in jwait(). */
 static int stat_jwait_filepage;	/* Times blocked in jwait() for filepage. */
 static int stat_jwait_freeblks;	/* Times blocked in jwait() for freeblks. */
 static int stat_jwait_inode;	/* Times blocked in jwait() for inodes. */
 static int stat_jwait_newblk;	/* Times blocked in jwait() for newblks. */
 static int stat_cleanup_high_delay; /* Maximum cleanup delay (in ticks) */
 static int stat_cleanup_blkrequests; /* Number of block cleanup requests */
 static int stat_cleanup_inorequests; /* Number of inode cleanup requests */
 static int stat_cleanup_retries; /* Number of cleanups that needed to flush */
 static int stat_cleanup_failures; /* Number of cleanup requests that failed */
 static int stat_emptyjblocks; /* Number of potentially empty journal blocks */
 
 SYSCTL_INT(_debug_softdep, OID_AUTO, max_softdeps, CTLFLAG_RW,
     &max_softdeps, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, tickdelay, CTLFLAG_RW,
     &tickdelay, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, flush_threads, CTLFLAG_RD,
     &stat_flush_threads, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, worklist_push, CTLFLAG_RW,
     &stat_worklist_push, 0,"");
 SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_push, CTLFLAG_RW,
     &stat_blk_limit_push, 0,"");
 SYSCTL_INT(_debug_softdep, OID_AUTO, ino_limit_push, CTLFLAG_RW,
     &stat_ino_limit_push, 0,"");
 SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_hit, CTLFLAG_RW,
     &stat_blk_limit_hit, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, ino_limit_hit, CTLFLAG_RW,
     &stat_ino_limit_hit, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, sync_limit_hit, CTLFLAG_RW,
     &stat_sync_limit_hit, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW,
     &stat_indir_blk_ptrs, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, inode_bitmap, CTLFLAG_RW,
     &stat_inode_bitmap, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW,
     &stat_direct_blk_ptrs, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, dir_entry, CTLFLAG_RW,
     &stat_dir_entry, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, jaddref_rollback, CTLFLAG_RW,
     &stat_jaddref, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, jnewblk_rollback, CTLFLAG_RW,
     &stat_jnewblk, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, journal_low, CTLFLAG_RW,
     &stat_journal_low, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, journal_min, CTLFLAG_RW,
     &stat_journal_min, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, journal_wait, CTLFLAG_RW,
     &stat_journal_wait, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_filepage, CTLFLAG_RW,
     &stat_jwait_filepage, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_freeblks, CTLFLAG_RW,
     &stat_jwait_freeblks, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_inode, CTLFLAG_RW,
     &stat_jwait_inode, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_newblk, CTLFLAG_RW,
     &stat_jwait_newblk, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_blkrequests, CTLFLAG_RW,
     &stat_cleanup_blkrequests, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_inorequests, CTLFLAG_RW,
     &stat_cleanup_inorequests, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_high_delay, CTLFLAG_RW,
     &stat_cleanup_high_delay, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_retries, CTLFLAG_RW,
     &stat_cleanup_retries, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_failures, CTLFLAG_RW,
     &stat_cleanup_failures, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, flushcache, CTLFLAG_RW,
     &softdep_flushcache, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, emptyjblocks, CTLFLAG_RD,
     &stat_emptyjblocks, 0, "");
 
 SYSCTL_DECL(_vfs_ffs);
 
 /* Whether to recompute the summary at mount time */
 static int compute_summary_at_mount = 0;
 SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW,
 	   &compute_summary_at_mount, 0, "Recompute summary at mount");
 static int print_threads = 0;
 SYSCTL_INT(_debug_softdep, OID_AUTO, print_threads, CTLFLAG_RW,
     &print_threads, 0, "Notify flusher thread start/stop");
 
 /* List of all filesystems mounted with soft updates */
 static TAILQ_HEAD(, mount_softdeps) softdepmounts;
 
 /*
  * This function cleans the worklist for a filesystem.
  * Each filesystem running with soft dependencies gets its own
  * thread to run in this function. The thread is started up in
  * softdep_mount and shutdown in softdep_unmount. They show up
  * as part of the kernel "bufdaemon" process whose process
  * entry is available in bufdaemonproc.
  */
 static int searchfailed;
 extern struct proc *bufdaemonproc;
 static void
 softdep_flush(addr)
 	void *addr;
 {
 	struct mount *mp;
 	struct thread *td;
 	struct ufsmount *ump;
 
 	td = curthread;
 	td->td_pflags |= TDP_NORUNNINGBUF;
 	mp = (struct mount *)addr;
 	ump = VFSTOUFS(mp);
 	atomic_add_int(&stat_flush_threads, 1);
 	ACQUIRE_LOCK(ump);
 	ump->softdep_flags &= ~FLUSH_STARTING;
 	wakeup(&ump->softdep_flushtd);
 	FREE_LOCK(ump);
 	if (print_threads) {
 		if (stat_flush_threads == 1)
 			printf("Running %s at pid %d\n", bufdaemonproc->p_comm,
 			    bufdaemonproc->p_pid);
 		printf("Start thread %s\n", td->td_name);
 	}
 	for (;;) {	
 		while (softdep_process_worklist(mp, 0) > 0 ||
 		    (MOUNTEDSUJ(mp) &&
 		    VFSTOUFS(mp)->softdep_jblocks->jb_suspended))
 			kthread_suspend_check();
 		ACQUIRE_LOCK(ump);
 		if ((ump->softdep_flags & (FLUSH_CLEANUP | FLUSH_EXIT)) == 0)
 			msleep(&ump->softdep_flushtd, LOCK_PTR(ump), PVM,
 			    "sdflush", hz / 2);
 		ump->softdep_flags &= ~FLUSH_CLEANUP;
 		/*
 		 * Check to see if we are done and need to exit.
 		 */
 		if ((ump->softdep_flags & FLUSH_EXIT) == 0) {
 			FREE_LOCK(ump);
 			continue;
 		}
 		ump->softdep_flags &= ~FLUSH_EXIT;
 		FREE_LOCK(ump);
 		wakeup(&ump->softdep_flags);
 		if (print_threads)
 			printf("Stop thread %s: searchfailed %d, did cleanups %d\n", td->td_name, searchfailed, ump->um_softdep->sd_cleanups);
 		atomic_subtract_int(&stat_flush_threads, 1);
 		kthread_exit();
 		panic("kthread_exit failed\n");
 	}
 }
 
 static void
 worklist_speedup(mp)
 	struct mount *mp;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	if ((ump->softdep_flags & (FLUSH_CLEANUP | FLUSH_EXIT)) == 0)
 		ump->softdep_flags |= FLUSH_CLEANUP;
 	wakeup(&ump->softdep_flushtd);
 }
 
 static int
 softdep_speedup(ump)
 	struct ufsmount *ump;
 {
 	struct ufsmount *altump;
 	struct mount_softdeps *sdp;
 
 	LOCK_OWNED(ump);
 	worklist_speedup(ump->um_mountp);
 	bd_speedup();
 	/*
 	 * If we have global shortages, then we need other
 	 * filesystems to help with the cleanup. Here we wakeup a
 	 * flusher thread for a filesystem that is over its fair
 	 * share of resources.
 	 */
 	if (req_clear_inodedeps || req_clear_remove) {
 		ACQUIRE_GBLLOCK(&lk);
 		TAILQ_FOREACH(sdp, &softdepmounts, sd_next) {
 			if ((altump = sdp->sd_ump) == ump)
 				continue;
 			if (((req_clear_inodedeps &&
 			    altump->softdep_curdeps[D_INODEDEP] >
 			    max_softdeps / stat_flush_threads) ||
 			    (req_clear_remove &&
 			    altump->softdep_curdeps[D_DIRREM] >
 			    (max_softdeps / 2) / stat_flush_threads)) &&
 			    TRY_ACQUIRE_LOCK(altump))
 				break;
 		}
 		if (sdp == NULL) {
 			searchfailed++;
 			FREE_GBLLOCK(&lk);
 		} else {
 			/*
 			 * Move to the end of the list so we pick a
 			 * different one on out next try.
 			 */
 			TAILQ_REMOVE(&softdepmounts, sdp, sd_next);
 			TAILQ_INSERT_TAIL(&softdepmounts, sdp, sd_next);
 			FREE_GBLLOCK(&lk);
 			if ((altump->softdep_flags &
 			    (FLUSH_CLEANUP | FLUSH_EXIT)) == 0)
 				altump->softdep_flags |= FLUSH_CLEANUP;
 			altump->um_softdep->sd_cleanups++;
 			wakeup(&altump->softdep_flushtd);
 			FREE_LOCK(altump);
 		}
 	}
 	return (speedup_syncer());
 }
 
 /*
  * Add an item to the end of the work queue.
  * This routine requires that the lock be held.
  * This is the only routine that adds items to the list.
  * The following routine is the only one that removes items
  * and does so in order from first to last.
  */
 
 #define	WK_HEAD		0x0001	/* Add to HEAD. */
 #define	WK_NODELAY	0x0002	/* Process immediately. */
 
 static void
 add_to_worklist(wk, flags)
 	struct worklist *wk;
 	int flags;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(wk->wk_mp);
 	LOCK_OWNED(ump);
 	if (wk->wk_state & ONWORKLIST)
 		panic("add_to_worklist: %s(0x%X) already on list",
 		    TYPENAME(wk->wk_type), wk->wk_state);
 	wk->wk_state |= ONWORKLIST;
 	if (ump->softdep_on_worklist == 0) {
 		LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list);
 		ump->softdep_worklist_tail = wk;
 	} else if (flags & WK_HEAD) {
 		LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list);
 	} else {
 		LIST_INSERT_AFTER(ump->softdep_worklist_tail, wk, wk_list);
 		ump->softdep_worklist_tail = wk;
 	}
 	ump->softdep_on_worklist += 1;
 	if (flags & WK_NODELAY)
 		worklist_speedup(wk->wk_mp);
 }
 
 /*
  * Remove the item to be processed. If we are removing the last
  * item on the list, we need to recalculate the tail pointer.
  */
 static void
 remove_from_worklist(wk)
 	struct worklist *wk;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(wk->wk_mp);
 	if (ump->softdep_worklist_tail == wk)
 		ump->softdep_worklist_tail =
 		    (struct worklist *)wk->wk_list.le_prev;
 	WORKLIST_REMOVE(wk);
 	ump->softdep_on_worklist -= 1;
 }
 
 static void
 wake_worklist(wk)
 	struct worklist *wk;
 {
 	if (wk->wk_state & IOWAITING) {
 		wk->wk_state &= ~IOWAITING;
 		wakeup(wk);
 	}
 }
 
 static void
 wait_worklist(wk, wmesg)
 	struct worklist *wk;
 	char *wmesg;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(wk->wk_mp);
 	wk->wk_state |= IOWAITING;
 	msleep(wk, LOCK_PTR(ump), PVM, wmesg, 0);
 }
 
 /*
  * Process that runs once per second to handle items in the background queue.
  *
  * Note that we ensure that everything is done in the order in which they
  * appear in the queue. The code below depends on this property to ensure
  * that blocks of a file are freed before the inode itself is freed. This
  * ordering ensures that no new <vfsid, inum, lbn> triples will be generated
  * until all the old ones have been purged from the dependency lists.
  */
 static int 
 softdep_process_worklist(mp, full)
 	struct mount *mp;
 	int full;
 {
 	int cnt, matchcnt;
 	struct ufsmount *ump;
 	long starttime;
 
 	KASSERT(mp != NULL, ("softdep_process_worklist: NULL mp"));
 	if (MOUNTEDSOFTDEP(mp) == 0)
 		return (0);
 	matchcnt = 0;
 	ump = VFSTOUFS(mp);
 	ACQUIRE_LOCK(ump);
 	starttime = time_second;
 	softdep_process_journal(mp, NULL, full ? MNT_WAIT : 0);
 	check_clear_deps(mp);
 	while (ump->softdep_on_worklist > 0) {
 		if ((cnt = process_worklist_item(mp, 10, LK_NOWAIT)) == 0)
 			break;
 		else
 			matchcnt += cnt;
 		check_clear_deps(mp);
 		/*
 		 * We do not generally want to stop for buffer space, but if
 		 * we are really being a buffer hog, we will stop and wait.
 		 */
 		if (should_yield()) {
 			FREE_LOCK(ump);
 			kern_yield(PRI_USER);
 			bwillwrite();
 			ACQUIRE_LOCK(ump);
 		}
 		/*
 		 * Never allow processing to run for more than one
 		 * second. This gives the syncer thread the opportunity
 		 * to pause if appropriate.
 		 */
 		if (!full && starttime != time_second)
 			break;
 	}
 	if (full == 0)
 		journal_unsuspend(ump);
 	FREE_LOCK(ump);
 	return (matchcnt);
 }
 
 /*
  * Process all removes associated with a vnode if we are running out of
  * journal space.  Any other process which attempts to flush these will
  * be unable as we have the vnodes locked.
  */
 static void
 process_removes(vp)
 	struct vnode *vp;
 {
 	struct inodedep *inodedep;
 	struct dirrem *dirrem;
 	struct ufsmount *ump;
 	struct mount *mp;
 	ino_t inum;
 
 	mp = vp->v_mount;
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	inum = VTOI(vp)->i_number;
 	for (;;) {
 top:
 		if (inodedep_lookup(mp, inum, 0, &inodedep) == 0)
 			return;
 		LIST_FOREACH(dirrem, &inodedep->id_dirremhd, dm_inonext) {
 			/*
 			 * If another thread is trying to lock this vnode
 			 * it will fail but we must wait for it to do so
 			 * before we can proceed.
 			 */
 			if (dirrem->dm_state & INPROGRESS) {
 				wait_worklist(&dirrem->dm_list, "pwrwait");
 				goto top;
 			}
 			if ((dirrem->dm_state & (COMPLETE | ONWORKLIST)) == 
 			    (COMPLETE | ONWORKLIST))
 				break;
 		}
 		if (dirrem == NULL)
 			return;
 		remove_from_worklist(&dirrem->dm_list);
 		FREE_LOCK(ump);
 		if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
 			panic("process_removes: suspended filesystem");
 		handle_workitem_remove(dirrem, 0);
 		vn_finished_secondary_write(mp);
 		ACQUIRE_LOCK(ump);
 	}
 }
 
 /*
  * Process all truncations associated with a vnode if we are running out
  * of journal space.  This is called when the vnode lock is already held
  * and no other process can clear the truncation.  This function returns
  * a value greater than zero if it did any work.
  */
 static void
 process_truncates(vp)
 	struct vnode *vp;
 {
 	struct inodedep *inodedep;
 	struct freeblks *freeblks;
 	struct ufsmount *ump;
 	struct mount *mp;
 	ino_t inum;
 	int cgwait;
 
 	mp = vp->v_mount;
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	inum = VTOI(vp)->i_number;
 	for (;;) {
 		if (inodedep_lookup(mp, inum, 0, &inodedep) == 0)
 			return;
 		cgwait = 0;
 		TAILQ_FOREACH(freeblks, &inodedep->id_freeblklst, fb_next) {
 			/* Journal entries not yet written.  */
 			if (!LIST_EMPTY(&freeblks->fb_jblkdephd)) {
 				jwait(&LIST_FIRST(
 				    &freeblks->fb_jblkdephd)->jb_list,
 				    MNT_WAIT);
 				break;
 			}
 			/* Another thread is executing this item. */
 			if (freeblks->fb_state & INPROGRESS) {
 				wait_worklist(&freeblks->fb_list, "ptrwait");
 				break;
 			}
 			/* Freeblks is waiting on a inode write. */
 			if ((freeblks->fb_state & COMPLETE) == 0) {
 				FREE_LOCK(ump);
 				ffs_update(vp, 1);
 				ACQUIRE_LOCK(ump);
 				break;
 			}
 			if ((freeblks->fb_state & (ALLCOMPLETE | ONWORKLIST)) ==
 			    (ALLCOMPLETE | ONWORKLIST)) {
 				remove_from_worklist(&freeblks->fb_list);
 				freeblks->fb_state |= INPROGRESS;
 				FREE_LOCK(ump);
 				if (vn_start_secondary_write(NULL, &mp,
 				    V_NOWAIT))
 					panic("process_truncates: "
 					    "suspended filesystem");
 				handle_workitem_freeblocks(freeblks, 0);
 				vn_finished_secondary_write(mp);
 				ACQUIRE_LOCK(ump);
 				break;
 			}
 			if (freeblks->fb_cgwait)
 				cgwait++;
 		}
 		if (cgwait) {
 			FREE_LOCK(ump);
 			sync_cgs(mp, MNT_WAIT);
 			ffs_sync_snap(mp, MNT_WAIT);
 			ACQUIRE_LOCK(ump);
 			continue;
 		}
 		if (freeblks == NULL)
 			break;
 	}
 	return;
 }
 
 /*
  * Process one item on the worklist.
  */
 static int
 process_worklist_item(mp, target, flags)
 	struct mount *mp;
 	int target;
 	int flags;
 {
 	struct worklist sentinel;
 	struct worklist *wk;
 	struct ufsmount *ump;
 	int matchcnt;
 	int error;
 
 	KASSERT(mp != NULL, ("process_worklist_item: NULL mp"));
 	/*
 	 * If we are being called because of a process doing a
 	 * copy-on-write, then it is not safe to write as we may
 	 * recurse into the copy-on-write routine.
 	 */
 	if (curthread->td_pflags & TDP_COWINPROGRESS)
 		return (-1);
 	PHOLD(curproc);	/* Don't let the stack go away. */
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	matchcnt = 0;
 	sentinel.wk_mp = NULL;
 	sentinel.wk_type = D_SENTINEL;
 	LIST_INSERT_HEAD(&ump->softdep_workitem_pending, &sentinel, wk_list);
 	for (wk = LIST_NEXT(&sentinel, wk_list); wk != NULL;
 	    wk = LIST_NEXT(&sentinel, wk_list)) {
 		if (wk->wk_type == D_SENTINEL) {
 			LIST_REMOVE(&sentinel, wk_list);
 			LIST_INSERT_AFTER(wk, &sentinel, wk_list);
 			continue;
 		}
 		if (wk->wk_state & INPROGRESS)
 			panic("process_worklist_item: %p already in progress.",
 			    wk);
 		wk->wk_state |= INPROGRESS;
 		remove_from_worklist(wk);
 		FREE_LOCK(ump);
 		if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
 			panic("process_worklist_item: suspended filesystem");
 		switch (wk->wk_type) {
 		case D_DIRREM:
 			/* removal of a directory entry */
 			error = handle_workitem_remove(WK_DIRREM(wk), flags);
 			break;
 
 		case D_FREEBLKS:
 			/* releasing blocks and/or fragments from a file */
 			error = handle_workitem_freeblocks(WK_FREEBLKS(wk),
 			    flags);
 			break;
 
 		case D_FREEFRAG:
 			/* releasing a fragment when replaced as a file grows */
 			handle_workitem_freefrag(WK_FREEFRAG(wk));
 			error = 0;
 			break;
 
 		case D_FREEFILE:
 			/* releasing an inode when its link count drops to 0 */
 			handle_workitem_freefile(WK_FREEFILE(wk));
 			error = 0;
 			break;
 
 		default:
 			panic("%s_process_worklist: Unknown type %s",
 			    "softdep", TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 		vn_finished_secondary_write(mp);
 		ACQUIRE_LOCK(ump);
 		if (error == 0) {
 			if (++matchcnt == target)
 				break;
 			continue;
 		}
 		/*
 		 * We have to retry the worklist item later.  Wake up any
 		 * waiters who may be able to complete it immediately and
 		 * add the item back to the head so we don't try to execute
 		 * it again.
 		 */
 		wk->wk_state &= ~INPROGRESS;
 		wake_worklist(wk);
 		add_to_worklist(wk, WK_HEAD);
 	}
 	/* Sentinal could've become the tail from remove_from_worklist. */
 	if (ump->softdep_worklist_tail == &sentinel)
 		ump->softdep_worklist_tail =
 		    (struct worklist *)sentinel.wk_list.le_prev;
 	LIST_REMOVE(&sentinel, wk_list);
 	PRELE(curproc);
 	return (matchcnt);
 }
 
 /*
  * Move dependencies from one buffer to another.
  */
 int
 softdep_move_dependencies(oldbp, newbp)
 	struct buf *oldbp;
 	struct buf *newbp;
 {
 	struct worklist *wk, *wktail;
 	struct ufsmount *ump;
 	int dirty;
 
 	if ((wk = LIST_FIRST(&oldbp->b_dep)) == NULL)
 		return (0);
 	KASSERT(MOUNTEDSOFTDEP(wk->wk_mp) != 0,
 	    ("softdep_move_dependencies called on non-softdep filesystem"));
 	dirty = 0;
 	wktail = NULL;
 	ump = VFSTOUFS(wk->wk_mp);
 	ACQUIRE_LOCK(ump);
 	while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {
 		LIST_REMOVE(wk, wk_list);
 		if (wk->wk_type == D_BMSAFEMAP &&
 		    bmsafemap_backgroundwrite(WK_BMSAFEMAP(wk), newbp))
 			dirty = 1;
 		if (wktail == NULL)
 			LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);
 		else
 			LIST_INSERT_AFTER(wktail, wk, wk_list);
 		wktail = wk;
 	}
 	FREE_LOCK(ump);
 
 	return (dirty);
 }
 
 /*
  * Purge the work list of all items associated with a particular mount point.
  */
 int
 softdep_flushworklist(oldmnt, countp, td)
 	struct mount *oldmnt;
 	int *countp;
 	struct thread *td;
 {
 	struct vnode *devvp;
 	struct ufsmount *ump;
 	int count, error;
 
 	/*
 	 * Alternately flush the block device associated with the mount
 	 * point and process any dependencies that the flushing
 	 * creates. We continue until no more worklist dependencies
 	 * are found.
 	 */
 	*countp = 0;
 	error = 0;
 	ump = VFSTOUFS(oldmnt);
 	devvp = ump->um_devvp;
 	while ((count = softdep_process_worklist(oldmnt, 1)) > 0) {
 		*countp += count;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 		error = VOP_FSYNC(devvp, MNT_WAIT, td);
 		VOP_UNLOCK(devvp, 0);
 		if (error != 0)
 			break;
 	}
 	return (error);
 }
 
 #define	SU_WAITIDLE_RETRIES	20
 static int
 softdep_waitidle(struct mount *mp, int flags __unused)
 {
 	struct ufsmount *ump;
 	struct vnode *devvp;
 	struct thread *td;
 	int error, i;
 
 	ump = VFSTOUFS(mp);
 	devvp = ump->um_devvp;
 	td = curthread;
 	error = 0;
 	ACQUIRE_LOCK(ump);
 	for (i = 0; i < SU_WAITIDLE_RETRIES && ump->softdep_deps != 0; i++) {
 		ump->softdep_req = 1;
 		KASSERT((flags & FORCECLOSE) == 0 ||
 		    ump->softdep_on_worklist == 0,
 		    ("softdep_waitidle: work added after flush"));
 		msleep(&ump->softdep_deps, LOCK_PTR(ump), PVM | PDROP,
 		    "softdeps", 10 * hz);
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 		error = VOP_FSYNC(devvp, MNT_WAIT, td);
 		VOP_UNLOCK(devvp, 0);
 		ACQUIRE_LOCK(ump);
 		if (error != 0)
 			break;
 	}
 	ump->softdep_req = 0;
 	if (i == SU_WAITIDLE_RETRIES && error == 0 && ump->softdep_deps != 0) {
 		error = EBUSY;
 		printf("softdep_waitidle: Failed to flush worklist for %p\n",
 		    mp);
 	}
 	FREE_LOCK(ump);
 	return (error);
 }
 
 /*
  * Flush all vnodes and worklist items associated with a specified mount point.
  */
 int
 softdep_flushfiles(oldmnt, flags, td)
 	struct mount *oldmnt;
 	int flags;
 	struct thread *td;
 {
 #ifdef QUOTA
 	struct ufsmount *ump;
 	int i;
 #endif
 	int error, early, depcount, loopcnt, retry_flush_count, retry;
 	int morework;
 
 	KASSERT(MOUNTEDSOFTDEP(oldmnt) != 0,
 	    ("softdep_flushfiles called on non-softdep filesystem"));
 	loopcnt = 10;
 	retry_flush_count = 3;
 retry_flush:
 	error = 0;
 
 	/*
 	 * Alternately flush the vnodes associated with the mount
 	 * point and process any dependencies that the flushing
 	 * creates. In theory, this loop can happen at most twice,
 	 * but we give it a few extra just to be sure.
 	 */
 	for (; loopcnt > 0; loopcnt--) {
 		/*
 		 * Do another flush in case any vnodes were brought in
 		 * as part of the cleanup operations.
 		 */
 		early = retry_flush_count == 1 || (oldmnt->mnt_kern_flag &
 		    MNTK_UNMOUNT) == 0 ? 0 : EARLYFLUSH;
 		if ((error = ffs_flushfiles(oldmnt, flags | early, td)) != 0)
 			break;
 		if ((error = softdep_flushworklist(oldmnt, &depcount, td)) != 0 ||
 		    depcount == 0)
 			break;
 	}
 	/*
 	 * If we are unmounting then it is an error to fail. If we
 	 * are simply trying to downgrade to read-only, then filesystem
 	 * activity can keep us busy forever, so we just fail with EBUSY.
 	 */
 	if (loopcnt == 0) {
 		if (oldmnt->mnt_kern_flag & MNTK_UNMOUNT)
 			panic("softdep_flushfiles: looping");
 		error = EBUSY;
 	}
 	if (!error)
 		error = softdep_waitidle(oldmnt, flags);
 	if (!error) {
 		if (oldmnt->mnt_kern_flag & MNTK_UNMOUNT) {
 			retry = 0;
 			MNT_ILOCK(oldmnt);
 			KASSERT((oldmnt->mnt_kern_flag & MNTK_NOINSMNTQ) != 0,
 			    ("softdep_flushfiles: !MNTK_NOINSMNTQ"));
 			morework = oldmnt->mnt_nvnodelistsize > 0;
 #ifdef QUOTA
 			ump = VFSTOUFS(oldmnt);
 			UFS_LOCK(ump);
 			for (i = 0; i < MAXQUOTAS; i++) {
 				if (ump->um_quotas[i] != NULLVP)
 					morework = 1;
 			}
 			UFS_UNLOCK(ump);
 #endif
 			if (morework) {
 				if (--retry_flush_count > 0) {
 					retry = 1;
 					loopcnt = 3;
 				} else
 					error = EBUSY;
 			}
 			MNT_IUNLOCK(oldmnt);
 			if (retry)
 				goto retry_flush;
 		}
 	}
 	return (error);
 }
 
 /*
  * Structure hashing.
  * 
  * There are four types of structures that can be looked up:
  *	1) pagedep structures identified by mount point, inode number,
  *	   and logical block.
  *	2) inodedep structures identified by mount point and inode number.
  *	3) newblk structures identified by mount point and
  *	   physical block number.
  *	4) bmsafemap structures identified by mount point and
  *	   cylinder group number.
  *
  * The "pagedep" and "inodedep" dependency structures are hashed
  * separately from the file blocks and inodes to which they correspond.
  * This separation helps when the in-memory copy of an inode or
  * file block must be replaced. It also obviates the need to access
  * an inode or file page when simply updating (or de-allocating)
  * dependency structures. Lookup of newblk structures is needed to
  * find newly allocated blocks when trying to associate them with
  * their allocdirect or allocindir structure.
  *
  * The lookup routines optionally create and hash a new instance when
  * an existing entry is not found. The bmsafemap lookup routine always
  * allocates a new structure if an existing one is not found.
  */
 #define DEPALLOC	0x0001	/* allocate structure if lookup fails */
 
 /*
  * Structures and routines associated with pagedep caching.
  */
 #define	PAGEDEP_HASH(ump, inum, lbn) \
 	(&(ump)->pagedep_hashtbl[((inum) + (lbn)) & (ump)->pagedep_hash_size])
 
 static int
 pagedep_find(pagedephd, ino, lbn, pagedeppp)
 	struct pagedep_hashhead *pagedephd;
 	ino_t ino;
 	ufs_lbn_t lbn;
 	struct pagedep **pagedeppp;
 {
 	struct pagedep *pagedep;
 
 	LIST_FOREACH(pagedep, pagedephd, pd_hash) {
 		if (ino == pagedep->pd_ino && lbn == pagedep->pd_lbn) {
 			*pagedeppp = pagedep;
 			return (1);
 		}
 	}
 	*pagedeppp = NULL;
 	return (0);
 }
 /*
  * Look up a pagedep. Return 1 if found, 0 otherwise.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in pagedeppp.
  * This routine must be called with splbio interrupts blocked.
  */
 static int
 pagedep_lookup(mp, bp, ino, lbn, flags, pagedeppp)
 	struct mount *mp;
 	struct buf *bp;
 	ino_t ino;
 	ufs_lbn_t lbn;
 	int flags;
 	struct pagedep **pagedeppp;
 {
 	struct pagedep *pagedep;
 	struct pagedep_hashhead *pagedephd;
 	struct worklist *wk;
 	struct ufsmount *ump;
 	int ret;
 	int i;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	if (bp) {
 		LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 			if (wk->wk_type == D_PAGEDEP) {
 				*pagedeppp = WK_PAGEDEP(wk);
 				return (1);
 			}
 		}
 	}
 	pagedephd = PAGEDEP_HASH(ump, ino, lbn);
 	ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
 	if (ret) {
 		if (((*pagedeppp)->pd_state & ONWORKLIST) == 0 && bp)
 			WORKLIST_INSERT(&bp->b_dep, &(*pagedeppp)->pd_list);
 		return (1);
 	}
 	if ((flags & DEPALLOC) == 0)
 		return (0);
 	FREE_LOCK(ump);
 	pagedep = malloc(sizeof(struct pagedep),
 	    M_PAGEDEP, M_SOFTDEP_FLAGS|M_ZERO);
 	workitem_alloc(&pagedep->pd_list, D_PAGEDEP, mp);
 	ACQUIRE_LOCK(ump);
 	ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
 	if (*pagedeppp) {
 		/*
 		 * This should never happen since we only create pagedeps
 		 * with the vnode lock held.  Could be an assert.
 		 */
 		WORKITEM_FREE(pagedep, D_PAGEDEP);
 		return (ret);
 	}
 	pagedep->pd_ino = ino;
 	pagedep->pd_lbn = lbn;
 	LIST_INIT(&pagedep->pd_dirremhd);
 	LIST_INIT(&pagedep->pd_pendinghd);
 	for (i = 0; i < DAHASHSZ; i++)
 		LIST_INIT(&pagedep->pd_diraddhd[i]);
 	LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash);
 	WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
 	*pagedeppp = pagedep;
 	return (0);
 }
 
 /*
  * Structures and routines associated with inodedep caching.
  */
 #define	INODEDEP_HASH(ump, inum) \
       (&(ump)->inodedep_hashtbl[(inum) & (ump)->inodedep_hash_size])
 
 static int
 inodedep_find(inodedephd, inum, inodedeppp)
 	struct inodedep_hashhead *inodedephd;
 	ino_t inum;
 	struct inodedep **inodedeppp;
 {
 	struct inodedep *inodedep;
 
 	LIST_FOREACH(inodedep, inodedephd, id_hash)
 		if (inum == inodedep->id_ino)
 			break;
 	if (inodedep) {
 		*inodedeppp = inodedep;
 		return (1);
 	}
 	*inodedeppp = NULL;
 
 	return (0);
 }
 /*
  * Look up an inodedep. Return 1 if found, 0 if not found.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in inodedeppp.
  * This routine must be called with splbio interrupts blocked.
  */
 static int
 inodedep_lookup(mp, inum, flags, inodedeppp)
 	struct mount *mp;
 	ino_t inum;
 	int flags;
 	struct inodedep **inodedeppp;
 {
 	struct inodedep *inodedep;
 	struct inodedep_hashhead *inodedephd;
 	struct ufsmount *ump;
 	struct fs *fs;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	fs = ump->um_fs;
 	inodedephd = INODEDEP_HASH(ump, inum);
 
 	if (inodedep_find(inodedephd, inum, inodedeppp))
 		return (1);
 	if ((flags & DEPALLOC) == 0)
 		return (0);
 	/*
 	 * If the system is over its limit and our filesystem is
 	 * responsible for more than our share of that usage and
 	 * we are not in a rush, request some inodedep cleanup.
 	 */
 	if (softdep_excess_items(ump, D_INODEDEP))
 		schedule_cleanup(mp);
 	else
 		FREE_LOCK(ump);
 	inodedep = malloc(sizeof(struct inodedep),
 		M_INODEDEP, M_SOFTDEP_FLAGS);
 	workitem_alloc(&inodedep->id_list, D_INODEDEP, mp);
 	ACQUIRE_LOCK(ump);
 	if (inodedep_find(inodedephd, inum, inodedeppp)) {
 		WORKITEM_FREE(inodedep, D_INODEDEP);
 		return (1);
 	}
 	inodedep->id_fs = fs;
 	inodedep->id_ino = inum;
 	inodedep->id_state = ALLCOMPLETE;
 	inodedep->id_nlinkdelta = 0;
 	inodedep->id_savedino1 = NULL;
 	inodedep->id_savedsize = -1;
 	inodedep->id_savedextsize = -1;
 	inodedep->id_savednlink = -1;
 	inodedep->id_bmsafemap = NULL;
 	inodedep->id_mkdiradd = NULL;
 	LIST_INIT(&inodedep->id_dirremhd);
 	LIST_INIT(&inodedep->id_pendinghd);
 	LIST_INIT(&inodedep->id_inowait);
 	LIST_INIT(&inodedep->id_bufwait);
 	TAILQ_INIT(&inodedep->id_inoreflst);
 	TAILQ_INIT(&inodedep->id_inoupdt);
 	TAILQ_INIT(&inodedep->id_newinoupdt);
 	TAILQ_INIT(&inodedep->id_extupdt);
 	TAILQ_INIT(&inodedep->id_newextupdt);
 	TAILQ_INIT(&inodedep->id_freeblklst);
 	LIST_INSERT_HEAD(inodedephd, inodedep, id_hash);
 	*inodedeppp = inodedep;
 	return (0);
 }
 
 /*
  * Structures and routines associated with newblk caching.
  */
 #define	NEWBLK_HASH(ump, inum) \
 	(&(ump)->newblk_hashtbl[(inum) & (ump)->newblk_hash_size])
 
 static int
 newblk_find(newblkhd, newblkno, flags, newblkpp)
 	struct newblk_hashhead *newblkhd;
 	ufs2_daddr_t newblkno;
 	int flags;
 	struct newblk **newblkpp;
 {
 	struct newblk *newblk;
 
 	LIST_FOREACH(newblk, newblkhd, nb_hash) {
 		if (newblkno != newblk->nb_newblkno)
 			continue;
 		/*
 		 * If we're creating a new dependency don't match those that
 		 * have already been converted to allocdirects.  This is for
 		 * a frag extend.
 		 */
 		if ((flags & DEPALLOC) && newblk->nb_list.wk_type != D_NEWBLK)
 			continue;
 		break;
 	}
 	if (newblk) {
 		*newblkpp = newblk;
 		return (1);
 	}
 	*newblkpp = NULL;
 	return (0);
 }
 
 /*
  * Look up a newblk. Return 1 if found, 0 if not found.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in newblkpp.
  */
 static int
 newblk_lookup(mp, newblkno, flags, newblkpp)
 	struct mount *mp;
 	ufs2_daddr_t newblkno;
 	int flags;
 	struct newblk **newblkpp;
 {
 	struct newblk *newblk;
 	struct newblk_hashhead *newblkhd;
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	newblkhd = NEWBLK_HASH(ump, newblkno);
 	if (newblk_find(newblkhd, newblkno, flags, newblkpp))
 		return (1);
 	if ((flags & DEPALLOC) == 0)
 		return (0);
 	if (softdep_excess_items(ump, D_NEWBLK) ||
 	    softdep_excess_items(ump, D_ALLOCDIRECT) ||
 	    softdep_excess_items(ump, D_ALLOCINDIR))
 		schedule_cleanup(mp);
 	else
 		FREE_LOCK(ump);
 	newblk = malloc(sizeof(union allblk), M_NEWBLK,
 	    M_SOFTDEP_FLAGS | M_ZERO);
 	workitem_alloc(&newblk->nb_list, D_NEWBLK, mp);
 	ACQUIRE_LOCK(ump);
 	if (newblk_find(newblkhd, newblkno, flags, newblkpp)) {
 		WORKITEM_FREE(newblk, D_NEWBLK);
 		return (1);
 	}
 	newblk->nb_freefrag = NULL;
 	LIST_INIT(&newblk->nb_indirdeps);
 	LIST_INIT(&newblk->nb_newdirblk);
 	LIST_INIT(&newblk->nb_jwork);
 	newblk->nb_state = ATTACHED;
 	newblk->nb_newblkno = newblkno;
 	LIST_INSERT_HEAD(newblkhd, newblk, nb_hash);
 	*newblkpp = newblk;
 	return (0);
 }
 
 /*
  * Structures and routines associated with freed indirect block caching.
  */
 #define	INDIR_HASH(ump, blkno) \
 	(&(ump)->indir_hashtbl[(blkno) & (ump)->indir_hash_size])
 
 /*
  * Lookup an indirect block in the indir hash table.  The freework is
  * removed and potentially freed.  The caller must do a blocking journal
  * write before writing to the blkno.
  */
 static int
 indirblk_lookup(mp, blkno)
 	struct mount *mp;
 	ufs2_daddr_t blkno;
 {
 	struct freework *freework;
 	struct indir_hashhead *wkhd;
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(mp);
 	wkhd = INDIR_HASH(ump, blkno);
 	TAILQ_FOREACH(freework, wkhd, fw_next) {
 		if (freework->fw_blkno != blkno)
 			continue;
 		indirblk_remove(freework);
 		return (1);
 	}
 	return (0);
 }
 
 /*
  * Insert an indirect block represented by freework into the indirblk
  * hash table so that it may prevent the block from being re-used prior
  * to the journal being written.
  */
 static void
 indirblk_insert(freework)
 	struct freework *freework;
 {
 	struct jblocks *jblocks;
 	struct jseg *jseg;
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(freework->fw_list.wk_mp);
 	jblocks = ump->softdep_jblocks;
 	jseg = TAILQ_LAST(&jblocks->jb_segs, jseglst);
 	if (jseg == NULL)
 		return;
 	
 	LIST_INSERT_HEAD(&jseg->js_indirs, freework, fw_segs);
 	TAILQ_INSERT_HEAD(INDIR_HASH(ump, freework->fw_blkno), freework,
 	    fw_next);
 	freework->fw_state &= ~DEPCOMPLETE;
 }
 
 static void
 indirblk_remove(freework)
 	struct freework *freework;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(freework->fw_list.wk_mp);
 	LIST_REMOVE(freework, fw_segs);
 	TAILQ_REMOVE(INDIR_HASH(ump, freework->fw_blkno), freework, fw_next);
 	freework->fw_state |= DEPCOMPLETE;
 	if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE)
 		WORKITEM_FREE(freework, D_FREEWORK);
 }
 
 /*
  * Executed during filesystem system initialization before
  * mounting any filesystems.
  */
 void 
 softdep_initialize()
 {
 
 	TAILQ_INIT(&softdepmounts);
 #ifdef __LP64__
 	max_softdeps = desiredvnodes * 4;
 #else
 	max_softdeps = desiredvnodes * 2;
 #endif
 
 	/* initialise bioops hack */
 	bioops.io_start = softdep_disk_io_initiation;
 	bioops.io_complete = softdep_disk_write_complete;
 	bioops.io_deallocate = softdep_deallocate_dependencies;
 	bioops.io_countdeps = softdep_count_dependencies;
 	softdep_ast_cleanup = softdep_ast_cleanup_proc;
 
 	/* Initialize the callout with an mtx. */
 	callout_init_mtx(&softdep_callout, &lk, 0);
 }
 
 /*
  * Executed after all filesystems have been unmounted during
  * filesystem module unload.
  */
 void
 softdep_uninitialize()
 {
 
 	/* clear bioops hack */
 	bioops.io_start = NULL;
 	bioops.io_complete = NULL;
 	bioops.io_deallocate = NULL;
 	bioops.io_countdeps = NULL;
 	softdep_ast_cleanup = NULL;
 
 	callout_drain(&softdep_callout);
 }
 
 /*
  * Called at mount time to notify the dependency code that a
  * filesystem wishes to use it.
  */
 int
 softdep_mount(devvp, mp, fs, cred)
 	struct vnode *devvp;
 	struct mount *mp;
 	struct fs *fs;
 	struct ucred *cred;
 {
 	struct csum_total cstotal;
 	struct mount_softdeps *sdp;
 	struct ufsmount *ump;
 	struct cg *cgp;
 	struct buf *bp;
 	int i, error, cyl;
 
 	sdp = malloc(sizeof(struct mount_softdeps), M_MOUNTDATA,
 	    M_WAITOK | M_ZERO);
 	MNT_ILOCK(mp);
 	mp->mnt_flag = (mp->mnt_flag & ~MNT_ASYNC) | MNT_SOFTDEP;
 	if ((mp->mnt_kern_flag & MNTK_SOFTDEP) == 0) {
 		mp->mnt_kern_flag = (mp->mnt_kern_flag & ~MNTK_ASYNC) | 
 			MNTK_SOFTDEP | MNTK_NOASYNC;
 	}
 	ump = VFSTOUFS(mp);
 	ump->um_softdep = sdp;
 	MNT_IUNLOCK(mp);
 	rw_init(LOCK_PTR(ump), "Per-Filesystem Softdep Lock");
 	sdp->sd_ump = ump;
 	LIST_INIT(&ump->softdep_workitem_pending);
 	LIST_INIT(&ump->softdep_journal_pending);
 	TAILQ_INIT(&ump->softdep_unlinked);
 	LIST_INIT(&ump->softdep_dirtycg);
 	ump->softdep_worklist_tail = NULL;
 	ump->softdep_on_worklist = 0;
 	ump->softdep_deps = 0;
 	LIST_INIT(&ump->softdep_mkdirlisthd);
 	ump->pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP,
 	    &ump->pagedep_hash_size);
 	ump->pagedep_nextclean = 0;
 	ump->inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP,
 	    &ump->inodedep_hash_size);
 	ump->inodedep_nextclean = 0;
 	ump->newblk_hashtbl = hashinit(max_softdeps / 2,  M_NEWBLK,
 	    &ump->newblk_hash_size);
 	ump->bmsafemap_hashtbl = hashinit(1024, M_BMSAFEMAP,
 	    &ump->bmsafemap_hash_size);
 	i = 1 << (ffs(desiredvnodes / 10) - 1);
 	ump->indir_hashtbl = malloc(i * sizeof(struct indir_hashhead),
 	    M_FREEWORK, M_WAITOK);
 	ump->indir_hash_size = i - 1;
 	for (i = 0; i <= ump->indir_hash_size; i++)
 		TAILQ_INIT(&ump->indir_hashtbl[i]);
 	ACQUIRE_GBLLOCK(&lk);
 	TAILQ_INSERT_TAIL(&softdepmounts, sdp, sd_next);
 	FREE_GBLLOCK(&lk);
 	if ((fs->fs_flags & FS_SUJ) &&
 	    (error = journal_mount(mp, fs, cred)) != 0) {
 		printf("Failed to start journal: %d\n", error);
 		softdep_unmount(mp);
 		return (error);
 	}
 	/*
 	 * Start our flushing thread in the bufdaemon process.
 	 */
 	ACQUIRE_LOCK(ump);
 	ump->softdep_flags |= FLUSH_STARTING;
 	FREE_LOCK(ump);
 	kproc_kthread_add(&softdep_flush, mp, &bufdaemonproc,
 	    &ump->softdep_flushtd, 0, 0, "softdepflush", "%s worker",
 	    mp->mnt_stat.f_mntonname);
 	ACQUIRE_LOCK(ump);
 	while ((ump->softdep_flags & FLUSH_STARTING) != 0) {
 		msleep(&ump->softdep_flushtd, LOCK_PTR(ump), PVM, "sdstart",
 		    hz / 2);
 	}
 	FREE_LOCK(ump);
 	/*
 	 * When doing soft updates, the counters in the
 	 * superblock may have gotten out of sync. Recomputation
 	 * can take a long time and can be deferred for background
 	 * fsck.  However, the old behavior of scanning the cylinder
 	 * groups and recalculating them at mount time is available
 	 * by setting vfs.ffs.compute_summary_at_mount to one.
 	 */
 	if (compute_summary_at_mount == 0 || fs->fs_clean != 0)
 		return (0);
 	bzero(&cstotal, sizeof cstotal);
 	for (cyl = 0; cyl < fs->fs_ncg; cyl++) {
 		if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl)),
 		    fs->fs_cgsize, cred, &bp)) != 0) {
 			brelse(bp);
 			softdep_unmount(mp);
 			return (error);
 		}
 		cgp = (struct cg *)bp->b_data;
 		cstotal.cs_nffree += cgp->cg_cs.cs_nffree;
 		cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree;
 		cstotal.cs_nifree += cgp->cg_cs.cs_nifree;
 		cstotal.cs_ndir += cgp->cg_cs.cs_ndir;
 		fs->fs_cs(fs, cyl) = cgp->cg_cs;
 		brelse(bp);
 	}
 #ifdef DEBUG
 	if (bcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal))
 		printf("%s: superblock summary recomputed\n", fs->fs_fsmnt);
 #endif
 	bcopy(&cstotal, &fs->fs_cstotal, sizeof cstotal);
 	return (0);
 }
 
 void
 softdep_unmount(mp)
 	struct mount *mp;
 {
 	struct ufsmount *ump;
 #ifdef INVARIANTS
 	int i;
 #endif
 
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_unmount called on non-softdep filesystem"));
 	ump = VFSTOUFS(mp);
 	MNT_ILOCK(mp);
 	mp->mnt_flag &= ~MNT_SOFTDEP;
 	if (MOUNTEDSUJ(mp) == 0) {
 		MNT_IUNLOCK(mp);
 	} else {
 		mp->mnt_flag &= ~MNT_SUJ;
 		MNT_IUNLOCK(mp);
 		journal_unmount(ump);
 	}
 	/*
 	 * Shut down our flushing thread. Check for NULL is if
 	 * softdep_mount errors out before the thread has been created.
 	 */
 	if (ump->softdep_flushtd != NULL) {
 		ACQUIRE_LOCK(ump);
 		ump->softdep_flags |= FLUSH_EXIT;
 		wakeup(&ump->softdep_flushtd);
 		msleep(&ump->softdep_flags, LOCK_PTR(ump), PVM | PDROP,
 		    "sdwait", 0);
 		KASSERT((ump->softdep_flags & FLUSH_EXIT) == 0,
 		    ("Thread shutdown failed"));
 	}
 	/*
 	 * Free up our resources.
 	 */
 	ACQUIRE_GBLLOCK(&lk);
 	TAILQ_REMOVE(&softdepmounts, ump->um_softdep, sd_next);
 	FREE_GBLLOCK(&lk);
 	rw_destroy(LOCK_PTR(ump));
 	hashdestroy(ump->pagedep_hashtbl, M_PAGEDEP, ump->pagedep_hash_size);
 	hashdestroy(ump->inodedep_hashtbl, M_INODEDEP, ump->inodedep_hash_size);
 	hashdestroy(ump->newblk_hashtbl, M_NEWBLK, ump->newblk_hash_size);
 	hashdestroy(ump->bmsafemap_hashtbl, M_BMSAFEMAP,
 	    ump->bmsafemap_hash_size);
 	free(ump->indir_hashtbl, M_FREEWORK);
 #ifdef INVARIANTS
 	for (i = 0; i <= D_LAST; i++)
 		KASSERT(ump->softdep_curdeps[i] == 0,
 		    ("Unmount %s: Dep type %s != 0 (%ld)", ump->um_fs->fs_fsmnt,
 		    TYPENAME(i), ump->softdep_curdeps[i]));
 #endif
 	free(ump->um_softdep, M_MOUNTDATA);
 }
 
 static struct jblocks *
 jblocks_create(void)
 {
 	struct jblocks *jblocks;
 
 	jblocks = malloc(sizeof(*jblocks), M_JBLOCKS, M_WAITOK | M_ZERO);
 	TAILQ_INIT(&jblocks->jb_segs);
 	jblocks->jb_avail = 10;
 	jblocks->jb_extent = malloc(sizeof(struct jextent) * jblocks->jb_avail,
 	    M_JBLOCKS, M_WAITOK | M_ZERO);
 
 	return (jblocks);
 }
 
 static ufs2_daddr_t
 jblocks_alloc(jblocks, bytes, actual)
 	struct jblocks *jblocks;
 	int bytes;
 	int *actual;
 {
 	ufs2_daddr_t daddr;
 	struct jextent *jext;
 	int freecnt;
 	int blocks;
 
 	blocks = bytes / DEV_BSIZE;
 	jext = &jblocks->jb_extent[jblocks->jb_head];
 	freecnt = jext->je_blocks - jblocks->jb_off;
 	if (freecnt == 0) {
 		jblocks->jb_off = 0;
 		if (++jblocks->jb_head > jblocks->jb_used)
 			jblocks->jb_head = 0;
 		jext = &jblocks->jb_extent[jblocks->jb_head];
 		freecnt = jext->je_blocks;
 	}
 	if (freecnt > blocks)
 		freecnt = blocks;
 	*actual = freecnt * DEV_BSIZE;
 	daddr = jext->je_daddr + jblocks->jb_off;
 	jblocks->jb_off += freecnt;
 	jblocks->jb_free -= freecnt;
 
 	return (daddr);
 }
 
 static void
 jblocks_free(jblocks, mp, bytes)
 	struct jblocks *jblocks;
 	struct mount *mp;
 	int bytes;
 {
 
 	LOCK_OWNED(VFSTOUFS(mp));
 	jblocks->jb_free += bytes / DEV_BSIZE;
 	if (jblocks->jb_suspended)
 		worklist_speedup(mp);
 	wakeup(jblocks);
 }
 
 static void
 jblocks_destroy(jblocks)
 	struct jblocks *jblocks;
 {
 
 	if (jblocks->jb_extent)
 		free(jblocks->jb_extent, M_JBLOCKS);
 	free(jblocks, M_JBLOCKS);
 }
 
 static void
 jblocks_add(jblocks, daddr, blocks)
 	struct jblocks *jblocks;
 	ufs2_daddr_t daddr;
 	int blocks;
 {
 	struct jextent *jext;
 
 	jblocks->jb_blocks += blocks;
 	jblocks->jb_free += blocks;
 	jext = &jblocks->jb_extent[jblocks->jb_used];
 	/* Adding the first block. */
 	if (jext->je_daddr == 0) {
 		jext->je_daddr = daddr;
 		jext->je_blocks = blocks;
 		return;
 	}
 	/* Extending the last extent. */
 	if (jext->je_daddr + jext->je_blocks == daddr) {
 		jext->je_blocks += blocks;
 		return;
 	}
 	/* Adding a new extent. */
 	if (++jblocks->jb_used == jblocks->jb_avail) {
 		jblocks->jb_avail *= 2;
 		jext = malloc(sizeof(struct jextent) * jblocks->jb_avail,
 		    M_JBLOCKS, M_WAITOK | M_ZERO);
 		memcpy(jext, jblocks->jb_extent,
 		    sizeof(struct jextent) * jblocks->jb_used);
 		free(jblocks->jb_extent, M_JBLOCKS);
 		jblocks->jb_extent = jext;
 	}
 	jext = &jblocks->jb_extent[jblocks->jb_used];
 	jext->je_daddr = daddr;
 	jext->je_blocks = blocks;
 	return;
 }
 
 int
 softdep_journal_lookup(mp, vpp)
 	struct mount *mp;
 	struct vnode **vpp;
 {
 	struct componentname cnp;
 	struct vnode *dvp;
 	ino_t sujournal;
 	int error;
 
 	error = VFS_VGET(mp, ROOTINO, LK_EXCLUSIVE, &dvp);
 	if (error)
 		return (error);
 	bzero(&cnp, sizeof(cnp));
 	cnp.cn_nameiop = LOOKUP;
 	cnp.cn_flags = ISLASTCN;
 	cnp.cn_thread = curthread;
 	cnp.cn_cred = curthread->td_ucred;
 	cnp.cn_pnbuf = SUJ_FILE;
 	cnp.cn_nameptr = SUJ_FILE;
 	cnp.cn_namelen = strlen(SUJ_FILE);
 	error = ufs_lookup_ino(dvp, NULL, &cnp, &sujournal);
 	vput(dvp);
 	if (error != 0)
 		return (error);
 	error = VFS_VGET(mp, sujournal, LK_EXCLUSIVE, vpp);
 	return (error);
 }
 
 /*
  * Open and verify the journal file.
  */
 static int
 journal_mount(mp, fs, cred)
 	struct mount *mp;
 	struct fs *fs;
 	struct ucred *cred;
 {
 	struct jblocks *jblocks;
 	struct ufsmount *ump;
 	struct vnode *vp;
 	struct inode *ip;
 	ufs2_daddr_t blkno;
 	int bcount;
 	int error;
 	int i;
 
 	ump = VFSTOUFS(mp);
 	ump->softdep_journal_tail = NULL;
 	ump->softdep_on_journal = 0;
 	ump->softdep_accdeps = 0;
 	ump->softdep_req = 0;
 	ump->softdep_jblocks = NULL;
 	error = softdep_journal_lookup(mp, &vp);
 	if (error != 0) {
 		printf("Failed to find journal.  Use tunefs to create one\n");
 		return (error);
 	}
 	ip = VTOI(vp);
 	if (ip->i_size < SUJ_MIN) {
 		error = ENOSPC;
 		goto out;
 	}
 	bcount = lblkno(fs, ip->i_size);	/* Only use whole blocks. */
 	jblocks = jblocks_create();
 	for (i = 0; i < bcount; i++) {
 		error = ufs_bmaparray(vp, i, &blkno, NULL, NULL, NULL);
 		if (error)
 			break;
 		jblocks_add(jblocks, blkno, fsbtodb(fs, fs->fs_frag));
 	}
 	if (error) {
 		jblocks_destroy(jblocks);
 		goto out;
 	}
 	jblocks->jb_low = jblocks->jb_free / 3;	/* Reserve 33%. */
 	jblocks->jb_min = jblocks->jb_free / 10; /* Suspend at 10%. */
 	ump->softdep_jblocks = jblocks;
 out:
 	if (error == 0) {
 		MNT_ILOCK(mp);
 		mp->mnt_flag |= MNT_SUJ;
 		mp->mnt_flag &= ~MNT_SOFTDEP;
 		MNT_IUNLOCK(mp);
 		/*
 		 * Only validate the journal contents if the
 		 * filesystem is clean, otherwise we write the logs
 		 * but they'll never be used.  If the filesystem was
 		 * still dirty when we mounted it the journal is
 		 * invalid and a new journal can only be valid if it
 		 * starts from a clean mount.
 		 */
 		if (fs->fs_clean) {
 			DIP_SET(ip, i_modrev, fs->fs_mtime);
 			ip->i_flags |= IN_MODIFIED;
 			ffs_update(vp, 1);
 		}
 	}
 	vput(vp);
 	return (error);
 }
 
 static void
 journal_unmount(ump)
 	struct ufsmount *ump;
 {
 
 	if (ump->softdep_jblocks)
 		jblocks_destroy(ump->softdep_jblocks);
 	ump->softdep_jblocks = NULL;
 }
 
 /*
  * Called when a journal record is ready to be written.  Space is allocated
  * and the journal entry is created when the journal is flushed to stable
  * store.
  */
 static void
 add_to_journal(wk)
 	struct worklist *wk;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(wk->wk_mp);
 	LOCK_OWNED(ump);
 	if (wk->wk_state & ONWORKLIST)
 		panic("add_to_journal: %s(0x%X) already on list",
 		    TYPENAME(wk->wk_type), wk->wk_state);
 	wk->wk_state |= ONWORKLIST | DEPCOMPLETE;
 	if (LIST_EMPTY(&ump->softdep_journal_pending)) {
 		ump->softdep_jblocks->jb_age = ticks;
 		LIST_INSERT_HEAD(&ump->softdep_journal_pending, wk, wk_list);
 	} else
 		LIST_INSERT_AFTER(ump->softdep_journal_tail, wk, wk_list);
 	ump->softdep_journal_tail = wk;
 	ump->softdep_on_journal += 1;
 }
 
 /*
  * Remove an arbitrary item for the journal worklist maintain the tail
  * pointer.  This happens when a new operation obviates the need to
  * journal an old operation.
  */
 static void
 remove_from_journal(wk)
 	struct worklist *wk;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(wk->wk_mp);
 	LOCK_OWNED(ump);
 #ifdef SUJ_DEBUG
 	{
 		struct worklist *wkn;
 
 		LIST_FOREACH(wkn, &ump->softdep_journal_pending, wk_list)
 			if (wkn == wk)
 				break;
 		if (wkn == NULL)
 			panic("remove_from_journal: %p is not in journal", wk);
 	}
 #endif
 	/*
 	 * We emulate a TAILQ to save space in most structures which do not
 	 * require TAILQ semantics.  Here we must update the tail position
 	 * when removing the tail which is not the final entry. This works
 	 * only if the worklist linkage are at the beginning of the structure.
 	 */
 	if (ump->softdep_journal_tail == wk)
 		ump->softdep_journal_tail =
 		    (struct worklist *)wk->wk_list.le_prev;
 	WORKLIST_REMOVE(wk);
 	ump->softdep_on_journal -= 1;
 }
 
 /*
  * Check for journal space as well as dependency limits so the prelink
  * code can throttle both journaled and non-journaled filesystems.
  * Threshold is 0 for low and 1 for min.
  */
 static int
 journal_space(ump, thresh)
 	struct ufsmount *ump;
 	int thresh;
 {
 	struct jblocks *jblocks;
 	int limit, avail;
 
 	jblocks = ump->softdep_jblocks;
 	if (jblocks == NULL)
 		return (1);
 	/*
 	 * We use a tighter restriction here to prevent request_cleanup()
 	 * running in threads from running into locks we currently hold.
 	 * We have to be over the limit and our filesystem has to be
 	 * responsible for more than our share of that usage.
 	 */
 	limit = (max_softdeps / 10) * 9;
 	if (dep_current[D_INODEDEP] > limit &&
 	    ump->softdep_curdeps[D_INODEDEP] > limit / stat_flush_threads)
 		return (0);
 	if (thresh)
 		thresh = jblocks->jb_min;
 	else
 		thresh = jblocks->jb_low;
 	avail = (ump->softdep_on_journal * JREC_SIZE) / DEV_BSIZE;
 	avail = jblocks->jb_free - avail;
 
 	return (avail > thresh);
 }
 
 static void
 journal_suspend(ump)
 	struct ufsmount *ump;
 {
 	struct jblocks *jblocks;
 	struct mount *mp;
 
 	mp = UFSTOVFS(ump);
 	jblocks = ump->softdep_jblocks;
 	MNT_ILOCK(mp);
 	if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
 		stat_journal_min++;
 		mp->mnt_kern_flag |= MNTK_SUSPEND;
 		mp->mnt_susp_owner = ump->softdep_flushtd;
 	}
 	jblocks->jb_suspended = 1;
 	MNT_IUNLOCK(mp);
 }
 
 static int
 journal_unsuspend(struct ufsmount *ump)
 {
 	struct jblocks *jblocks;
 	struct mount *mp;
 
 	mp = UFSTOVFS(ump);
 	jblocks = ump->softdep_jblocks;
 
 	if (jblocks != NULL && jblocks->jb_suspended &&
 	    journal_space(ump, jblocks->jb_min)) {
 		jblocks->jb_suspended = 0;
 		FREE_LOCK(ump);
 		mp->mnt_susp_owner = curthread;
 		vfs_write_resume(mp, 0);
 		ACQUIRE_LOCK(ump);
 		return (1);
 	}
 	return (0);
 }
 
 /*
  * Called before any allocation function to be certain that there is
  * sufficient space in the journal prior to creating any new records.
  * Since in the case of block allocation we may have multiple locked
  * buffers at the time of the actual allocation we can not block
  * when the journal records are created.  Doing so would create a deadlock
  * if any of these buffers needed to be flushed to reclaim space.  Instead
  * we require a sufficiently large amount of available space such that
  * each thread in the system could have passed this allocation check and
  * still have sufficient free space.  With 20% of a minimum journal size
  * of 1MB we have 6553 records available.
  */
 int
 softdep_prealloc(vp, waitok)
 	struct vnode *vp;
 	int waitok;
 {
 	struct ufsmount *ump;
 
 	KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
 	    ("softdep_prealloc called on non-softdep filesystem"));
 	/*
 	 * Nothing to do if we are not running journaled soft updates.
 	 * If we currently hold the snapshot lock, we must avoid
 	 * handling other resources that could cause deadlock.  Do not
 	 * touch quotas vnode since it is typically recursed with
 	 * other vnode locks held.
 	 */
 	if (DOINGSUJ(vp) == 0 || IS_SNAPSHOT(VTOI(vp)) ||
 	    (vp->v_vflag & VV_SYSTEM) != 0)
 		return (0);
 	ump = VFSTOUFS(vp->v_mount);
 	ACQUIRE_LOCK(ump);
 	if (journal_space(ump, 0)) {
 		FREE_LOCK(ump);
 		return (0);
 	}
 	stat_journal_low++;
 	FREE_LOCK(ump);
 	if (waitok == MNT_NOWAIT)
 		return (ENOSPC);
 	/*
 	 * Attempt to sync this vnode once to flush any journal
 	 * work attached to it.
 	 */
 	if ((curthread->td_pflags & TDP_COWINPROGRESS) == 0)
 		ffs_syncvnode(vp, waitok, 0);
 	ACQUIRE_LOCK(ump);
 	process_removes(vp);
 	process_truncates(vp);
 	if (journal_space(ump, 0) == 0) {
 		softdep_speedup(ump);
 		if (journal_space(ump, 1) == 0)
 			journal_suspend(ump);
 	}
 	FREE_LOCK(ump);
 
 	return (0);
 }
 
 /*
  * Before adjusting a link count on a vnode verify that we have sufficient
  * journal space.  If not, process operations that depend on the currently
  * locked pair of vnodes to try to flush space as the syncer, buf daemon,
  * and softdep flush threads can not acquire these locks to reclaim space.
  */
 static void
 softdep_prelink(dvp, vp)
 	struct vnode *dvp;
 	struct vnode *vp;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(dvp->v_mount);
 	LOCK_OWNED(ump);
 	/*
 	 * Nothing to do if we have sufficient journal space.
 	 * If we currently hold the snapshot lock, we must avoid
 	 * handling other resources that could cause deadlock.
 	 */
 	if (journal_space(ump, 0) || (vp && IS_SNAPSHOT(VTOI(vp))))
 		return;
 	stat_journal_low++;
 	FREE_LOCK(ump);
 	if (vp)
 		ffs_syncvnode(vp, MNT_NOWAIT, 0);
 	ffs_syncvnode(dvp, MNT_WAIT, 0);
 	ACQUIRE_LOCK(ump);
 	/* Process vp before dvp as it may create .. removes. */
 	if (vp) {
 		process_removes(vp);
 		process_truncates(vp);
 	}
 	process_removes(dvp);
 	process_truncates(dvp);
 	softdep_speedup(ump);
 	process_worklist_item(UFSTOVFS(ump), 2, LK_NOWAIT);
 	if (journal_space(ump, 0) == 0) {
 		softdep_speedup(ump);
 		if (journal_space(ump, 1) == 0)
 			journal_suspend(ump);
 	}
 }
 
 static void
 jseg_write(ump, jseg, data)
 	struct ufsmount *ump;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jsegrec *rec;
 
 	rec = (struct jsegrec *)data;
 	rec->jsr_seq = jseg->js_seq;
 	rec->jsr_oldest = jseg->js_oldseq;
 	rec->jsr_cnt = jseg->js_cnt;
 	rec->jsr_blocks = jseg->js_size / ump->um_devvp->v_bufobj.bo_bsize;
 	rec->jsr_crc = 0;
 	rec->jsr_time = ump->um_fs->fs_mtime;
 }
 
 static inline void
 inoref_write(inoref, jseg, rec)
 	struct inoref *inoref;
 	struct jseg *jseg;
 	struct jrefrec *rec;
 {
 
 	inoref->if_jsegdep->jd_seg = jseg;
 	rec->jr_ino = inoref->if_ino;
 	rec->jr_parent = inoref->if_parent;
 	rec->jr_nlink = inoref->if_nlink;
 	rec->jr_mode = inoref->if_mode;
 	rec->jr_diroff = inoref->if_diroff;
 }
 
 static void
 jaddref_write(jaddref, jseg, data)
 	struct jaddref *jaddref;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jrefrec *rec;
 
 	rec = (struct jrefrec *)data;
 	rec->jr_op = JOP_ADDREF;
 	inoref_write(&jaddref->ja_ref, jseg, rec);
 }
 
 static void
 jremref_write(jremref, jseg, data)
 	struct jremref *jremref;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jrefrec *rec;
 
 	rec = (struct jrefrec *)data;
 	rec->jr_op = JOP_REMREF;
 	inoref_write(&jremref->jr_ref, jseg, rec);
 }
 
 static void
 jmvref_write(jmvref, jseg, data)
 	struct jmvref *jmvref;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jmvrec *rec;
 
 	rec = (struct jmvrec *)data;
 	rec->jm_op = JOP_MVREF;
 	rec->jm_ino = jmvref->jm_ino;
 	rec->jm_parent = jmvref->jm_parent;
 	rec->jm_oldoff = jmvref->jm_oldoff;
 	rec->jm_newoff = jmvref->jm_newoff;
 }
 
 static void
 jnewblk_write(jnewblk, jseg, data)
 	struct jnewblk *jnewblk;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jblkrec *rec;
 
 	jnewblk->jn_jsegdep->jd_seg = jseg;
 	rec = (struct jblkrec *)data;
 	rec->jb_op = JOP_NEWBLK;
 	rec->jb_ino = jnewblk->jn_ino;
 	rec->jb_blkno = jnewblk->jn_blkno;
 	rec->jb_lbn = jnewblk->jn_lbn;
 	rec->jb_frags = jnewblk->jn_frags;
 	rec->jb_oldfrags = jnewblk->jn_oldfrags;
 }
 
 static void
 jfreeblk_write(jfreeblk, jseg, data)
 	struct jfreeblk *jfreeblk;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jblkrec *rec;
 
 	jfreeblk->jf_dep.jb_jsegdep->jd_seg = jseg;
 	rec = (struct jblkrec *)data;
 	rec->jb_op = JOP_FREEBLK;
 	rec->jb_ino = jfreeblk->jf_ino;
 	rec->jb_blkno = jfreeblk->jf_blkno;
 	rec->jb_lbn = jfreeblk->jf_lbn;
 	rec->jb_frags = jfreeblk->jf_frags;
 	rec->jb_oldfrags = 0;
 }
 
 static void
 jfreefrag_write(jfreefrag, jseg, data)
 	struct jfreefrag *jfreefrag;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jblkrec *rec;
 
 	jfreefrag->fr_jsegdep->jd_seg = jseg;
 	rec = (struct jblkrec *)data;
 	rec->jb_op = JOP_FREEBLK;
 	rec->jb_ino = jfreefrag->fr_ino;
 	rec->jb_blkno = jfreefrag->fr_blkno;
 	rec->jb_lbn = jfreefrag->fr_lbn;
 	rec->jb_frags = jfreefrag->fr_frags;
 	rec->jb_oldfrags = 0;
 }
 
 static void
 jtrunc_write(jtrunc, jseg, data)
 	struct jtrunc *jtrunc;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jtrncrec *rec;
 
 	jtrunc->jt_dep.jb_jsegdep->jd_seg = jseg;
 	rec = (struct jtrncrec *)data;
 	rec->jt_op = JOP_TRUNC;
 	rec->jt_ino = jtrunc->jt_ino;
 	rec->jt_size = jtrunc->jt_size;
 	rec->jt_extsize = jtrunc->jt_extsize;
 }
 
 static void
 jfsync_write(jfsync, jseg, data)
 	struct jfsync *jfsync;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jtrncrec *rec;
 
 	rec = (struct jtrncrec *)data;
 	rec->jt_op = JOP_SYNC;
 	rec->jt_ino = jfsync->jfs_ino;
 	rec->jt_size = jfsync->jfs_size;
 	rec->jt_extsize = jfsync->jfs_extsize;
 }
 
 static void
 softdep_flushjournal(mp)
 	struct mount *mp;
 {
 	struct jblocks *jblocks;
 	struct ufsmount *ump;
 
 	if (MOUNTEDSUJ(mp) == 0)
 		return;
 	ump = VFSTOUFS(mp);
 	jblocks = ump->softdep_jblocks;
 	ACQUIRE_LOCK(ump);
 	while (ump->softdep_on_journal) {
 		jblocks->jb_needseg = 1;
 		softdep_process_journal(mp, NULL, MNT_WAIT);
 	}
 	FREE_LOCK(ump);
 }
 
 static void softdep_synchronize_completed(struct bio *);
 static void softdep_synchronize(struct bio *, struct ufsmount *, void *);
 
 static void
 softdep_synchronize_completed(bp)
         struct bio *bp;
 {
 	struct jseg *oldest;
 	struct jseg *jseg;
 	struct ufsmount *ump;
 
 	/*
 	 * caller1 marks the last segment written before we issued the
 	 * synchronize cache.
 	 */
 	jseg = bp->bio_caller1;
 	if (jseg == NULL) {
 		g_destroy_bio(bp);
 		return;
 	}
 	ump = VFSTOUFS(jseg->js_list.wk_mp);
 	ACQUIRE_LOCK(ump);
 	oldest = NULL;
 	/*
 	 * Mark all the journal entries waiting on the synchronize cache
 	 * as completed so they may continue on.
 	 */
 	while (jseg != NULL && (jseg->js_state & COMPLETE) == 0) {
 		jseg->js_state |= COMPLETE;
 		oldest = jseg;
 		jseg = TAILQ_PREV(jseg, jseglst, js_next);
 	}
 	/*
 	 * Restart deferred journal entry processing from the oldest
 	 * completed jseg.
 	 */
 	if (oldest)
 		complete_jsegs(oldest);
 
 	FREE_LOCK(ump);
 	g_destroy_bio(bp);
 }
 
 /*
  * Send BIO_FLUSH/SYNCHRONIZE CACHE to the device to enforce write ordering
  * barriers.  The journal must be written prior to any blocks that depend
  * on it and the journal can not be released until the blocks have be
  * written.  This code handles both barriers simultaneously.
  */
 static void
 softdep_synchronize(bp, ump, caller1)
 	struct bio *bp;
 	struct ufsmount *ump;
 	void *caller1;
 {
 
 	bp->bio_cmd = BIO_FLUSH;
 	bp->bio_flags |= BIO_ORDERED;
 	bp->bio_data = NULL;
 	bp->bio_offset = ump->um_cp->provider->mediasize;
 	bp->bio_length = 0;
 	bp->bio_done = softdep_synchronize_completed;
 	bp->bio_caller1 = caller1;
 	g_io_request(bp,
 	    (struct g_consumer *)ump->um_devvp->v_bufobj.bo_private);
 }
 
 /*
  * Flush some journal records to disk.
  */
 static void
 softdep_process_journal(mp, needwk, flags)
 	struct mount *mp;
 	struct worklist *needwk;
 	int flags;
 {
 	struct jblocks *jblocks;
 	struct ufsmount *ump;
 	struct worklist *wk;
 	struct jseg *jseg;
 	struct buf *bp;
 	struct bio *bio;
 	uint8_t *data;
 	struct fs *fs;
 	int shouldflush;
 	int segwritten;
 	int jrecmin;	/* Minimum records per block. */
 	int jrecmax;	/* Maximum records per block. */
 	int size;
 	int cnt;
 	int off;
 	int devbsize;
 
 	if (MOUNTEDSUJ(mp) == 0)
 		return;
 	shouldflush = softdep_flushcache;
 	bio = NULL;
 	jseg = NULL;
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	fs = ump->um_fs;
 	jblocks = ump->softdep_jblocks;
 	devbsize = ump->um_devvp->v_bufobj.bo_bsize;
 	/*
 	 * We write anywhere between a disk block and fs block.  The upper
 	 * bound is picked to prevent buffer cache fragmentation and limit
 	 * processing time per I/O.
 	 */
 	jrecmin = (devbsize / JREC_SIZE) - 1; /* -1 for seg header */
 	jrecmax = (fs->fs_bsize / devbsize) * jrecmin;
 	segwritten = 0;
 	for (;;) {
 		cnt = ump->softdep_on_journal;
 		/*
 		 * Criteria for writing a segment:
 		 * 1) We have a full block.
 		 * 2) We're called from jwait() and haven't found the
 		 *    journal item yet.
 		 * 3) Always write if needseg is set.
 		 * 4) If we are called from process_worklist and have
 		 *    not yet written anything we write a partial block
 		 *    to enforce a 1 second maximum latency on journal
 		 *    entries.
 		 */
 		if (cnt < (jrecmax - 1) && needwk == NULL &&
 		    jblocks->jb_needseg == 0 && (segwritten || cnt == 0))
 			break;
 		cnt++;
 		/*
 		 * Verify some free journal space.  softdep_prealloc() should
 		 * guarantee that we don't run out so this is indicative of
 		 * a problem with the flow control.  Try to recover
 		 * gracefully in any event.
 		 */
 		while (jblocks->jb_free == 0) {
 			if (flags != MNT_WAIT)
 				break;
 			printf("softdep: Out of journal space!\n");
 			softdep_speedup(ump);
 			msleep(jblocks, LOCK_PTR(ump), PRIBIO, "jblocks", hz);
 		}
 		FREE_LOCK(ump);
 		jseg = malloc(sizeof(*jseg), M_JSEG, M_SOFTDEP_FLAGS);
 		workitem_alloc(&jseg->js_list, D_JSEG, mp);
 		LIST_INIT(&jseg->js_entries);
 		LIST_INIT(&jseg->js_indirs);
 		jseg->js_state = ATTACHED;
 		if (shouldflush == 0)
 			jseg->js_state |= COMPLETE;
 		else if (bio == NULL)
 			bio = g_alloc_bio();
 		jseg->js_jblocks = jblocks;
 		bp = geteblk(fs->fs_bsize, 0);
 		ACQUIRE_LOCK(ump);
 		/*
 		 * If there was a race while we were allocating the block
 		 * and jseg the entry we care about was likely written.
 		 * We bail out in both the WAIT and NOWAIT case and assume
 		 * the caller will loop if the entry it cares about is
 		 * not written.
 		 */
 		cnt = ump->softdep_on_journal;
 		if (cnt + jblocks->jb_needseg == 0 || jblocks->jb_free == 0) {
 			bp->b_flags |= B_INVAL | B_NOCACHE;
 			WORKITEM_FREE(jseg, D_JSEG);
 			FREE_LOCK(ump);
 			brelse(bp);
 			ACQUIRE_LOCK(ump);
 			break;
 		}
 		/*
 		 * Calculate the disk block size required for the available
 		 * records rounded to the min size.
 		 */
 		if (cnt == 0)
 			size = devbsize;
 		else if (cnt < jrecmax)
 			size = howmany(cnt, jrecmin) * devbsize;
 		else
 			size = fs->fs_bsize;
 		/*
 		 * Allocate a disk block for this journal data and account
 		 * for truncation of the requested size if enough contiguous
 		 * space was not available.
 		 */
 		bp->b_blkno = jblocks_alloc(jblocks, size, &size);
 		bp->b_lblkno = bp->b_blkno;
 		bp->b_offset = bp->b_blkno * DEV_BSIZE;
 		bp->b_bcount = size;
 		bp->b_flags &= ~B_INVAL;
 		bp->b_flags |= B_VALIDSUSPWRT | B_NOCOPY;
 		/*
 		 * Initialize our jseg with cnt records.  Assign the next
 		 * sequence number to it and link it in-order.
 		 */
 		cnt = MIN(cnt, (size / devbsize) * jrecmin);
 		jseg->js_buf = bp;
 		jseg->js_cnt = cnt;
 		jseg->js_refs = cnt + 1;	/* Self ref. */
 		jseg->js_size = size;
 		jseg->js_seq = jblocks->jb_nextseq++;
 		if (jblocks->jb_oldestseg == NULL)
 			jblocks->jb_oldestseg = jseg;
 		jseg->js_oldseq = jblocks->jb_oldestseg->js_seq;
 		TAILQ_INSERT_TAIL(&jblocks->jb_segs, jseg, js_next);
 		if (jblocks->jb_writeseg == NULL)
 			jblocks->jb_writeseg = jseg;
 		/*
 		 * Start filling in records from the pending list.
 		 */
 		data = bp->b_data;
 		off = 0;
 
 		/*
 		 * Always put a header on the first block.
 		 * XXX As with below, there might not be a chance to get
 		 * into the loop.  Ensure that something valid is written.
 		 */
 		jseg_write(ump, jseg, data);
 		off += JREC_SIZE;
 		data = bp->b_data + off;
 
 		/*
 		 * XXX Something is wrong here.  There's no work to do,
 		 * but we need to perform and I/O and allow it to complete
 		 * anyways.
 		 */
 		if (LIST_EMPTY(&ump->softdep_journal_pending))
 			stat_emptyjblocks++;
 
 		while ((wk = LIST_FIRST(&ump->softdep_journal_pending))
 		    != NULL) {
 			if (cnt == 0)
 				break;
 			/* Place a segment header on every device block. */
 			if ((off % devbsize) == 0) {
 				jseg_write(ump, jseg, data);
 				off += JREC_SIZE;
 				data = bp->b_data + off;
 			}
 			if (wk == needwk)
 				needwk = NULL;
 			remove_from_journal(wk);
 			wk->wk_state |= INPROGRESS;
 			WORKLIST_INSERT(&jseg->js_entries, wk);
 			switch (wk->wk_type) {
 			case D_JADDREF:
 				jaddref_write(WK_JADDREF(wk), jseg, data);
 				break;
 			case D_JREMREF:
 				jremref_write(WK_JREMREF(wk), jseg, data);
 				break;
 			case D_JMVREF:
 				jmvref_write(WK_JMVREF(wk), jseg, data);
 				break;
 			case D_JNEWBLK:
 				jnewblk_write(WK_JNEWBLK(wk), jseg, data);
 				break;
 			case D_JFREEBLK:
 				jfreeblk_write(WK_JFREEBLK(wk), jseg, data);
 				break;
 			case D_JFREEFRAG:
 				jfreefrag_write(WK_JFREEFRAG(wk), jseg, data);
 				break;
 			case D_JTRUNC:
 				jtrunc_write(WK_JTRUNC(wk), jseg, data);
 				break;
 			case D_JFSYNC:
 				jfsync_write(WK_JFSYNC(wk), jseg, data);
 				break;
 			default:
 				panic("process_journal: Unknown type %s",
 				    TYPENAME(wk->wk_type));
 				/* NOTREACHED */
 			}
 			off += JREC_SIZE;
 			data = bp->b_data + off;
 			cnt--;
 		}
 
 		/* Clear any remaining space so we don't leak kernel data */
 		if (size > off)
 			bzero(data, size - off);
 
 		/*
 		 * Write this one buffer and continue.
 		 */
 		segwritten = 1;
 		jblocks->jb_needseg = 0;
 		WORKLIST_INSERT(&bp->b_dep, &jseg->js_list);
 		FREE_LOCK(ump);
 		pbgetvp(ump->um_devvp, bp);
 		/*
 		 * We only do the blocking wait once we find the journal
 		 * entry we're looking for.
 		 */
 		if (needwk == NULL && flags == MNT_WAIT)
 			bwrite(bp);
 		else
 			bawrite(bp);
 		ACQUIRE_LOCK(ump);
 	}
 	/*
 	 * If we wrote a segment issue a synchronize cache so the journal
 	 * is reflected on disk before the data is written.  Since reclaiming
 	 * journal space also requires writing a journal record this
 	 * process also enforces a barrier before reclamation.
 	 */
 	if (segwritten && shouldflush) {
 		softdep_synchronize(bio, ump, 
 		    TAILQ_LAST(&jblocks->jb_segs, jseglst));
 	} else if (bio)
 		g_destroy_bio(bio);
 	/*
 	 * If we've suspended the filesystem because we ran out of journal
 	 * space either try to sync it here to make some progress or
 	 * unsuspend it if we already have.
 	 */
 	if (flags == 0 && jblocks->jb_suspended) {
 		if (journal_unsuspend(ump))
 			return;
 		FREE_LOCK(ump);
 		VFS_SYNC(mp, MNT_NOWAIT);
 		ffs_sbupdate(ump, MNT_WAIT, 0);
 		ACQUIRE_LOCK(ump);
 	}
 }
 
 /*
  * Complete a jseg, allowing all dependencies awaiting journal writes
  * to proceed.  Each journal dependency also attaches a jsegdep to dependent
  * structures so that the journal segment can be freed to reclaim space.
  */
 static void
 complete_jseg(jseg)
 	struct jseg *jseg;
 {
 	struct worklist *wk;
 	struct jmvref *jmvref;
 #ifdef INVARIANTS
 	int i = 0;
 #endif
 
 	while ((wk = LIST_FIRST(&jseg->js_entries)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		wk->wk_state &= ~INPROGRESS;
 		wk->wk_state |= COMPLETE;
 		KASSERT(i++ < jseg->js_cnt,
 		    ("handle_written_jseg: overflow %d >= %d",
 		    i - 1, jseg->js_cnt));
 		switch (wk->wk_type) {
 		case D_JADDREF:
 			handle_written_jaddref(WK_JADDREF(wk));
 			break;
 		case D_JREMREF:
 			handle_written_jremref(WK_JREMREF(wk));
 			break;
 		case D_JMVREF:
 			rele_jseg(jseg);	/* No jsegdep. */
 			jmvref = WK_JMVREF(wk);
 			LIST_REMOVE(jmvref, jm_deps);
 			if ((jmvref->jm_pagedep->pd_state & ONWORKLIST) == 0)
 				free_pagedep(jmvref->jm_pagedep);
 			WORKITEM_FREE(jmvref, D_JMVREF);
 			break;
 		case D_JNEWBLK:
 			handle_written_jnewblk(WK_JNEWBLK(wk));
 			break;
 		case D_JFREEBLK:
 			handle_written_jblkdep(&WK_JFREEBLK(wk)->jf_dep);
 			break;
 		case D_JTRUNC:
 			handle_written_jblkdep(&WK_JTRUNC(wk)->jt_dep);
 			break;
 		case D_JFSYNC:
 			rele_jseg(jseg);	/* No jsegdep. */
 			WORKITEM_FREE(wk, D_JFSYNC);
 			break;
 		case D_JFREEFRAG:
 			handle_written_jfreefrag(WK_JFREEFRAG(wk));
 			break;
 		default:
 			panic("handle_written_jseg: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	/* Release the self reference so the structure may be freed. */
 	rele_jseg(jseg);
 }
 
 /*
  * Determine which jsegs are ready for completion processing.  Waits for
  * synchronize cache to complete as well as forcing in-order completion
  * of journal entries.
  */
 static void
 complete_jsegs(jseg)
 	struct jseg *jseg;
 {
 	struct jblocks *jblocks;
 	struct jseg *jsegn;
 
 	jblocks = jseg->js_jblocks;
 	/*
 	 * Don't allow out of order completions.  If this isn't the first
 	 * block wait for it to write before we're done.
 	 */
 	if (jseg != jblocks->jb_writeseg)
 		return;
 	/* Iterate through available jsegs processing their entries. */
 	while (jseg && (jseg->js_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		jblocks->jb_oldestwrseq = jseg->js_oldseq;
 		jsegn = TAILQ_NEXT(jseg, js_next);
 		complete_jseg(jseg);
 		jseg = jsegn;
 	}
 	jblocks->jb_writeseg = jseg;
 	/*
 	 * Attempt to free jsegs now that oldestwrseq may have advanced. 
 	 */
 	free_jsegs(jblocks);
 }
 
 /*
  * Mark a jseg as DEPCOMPLETE and throw away the buffer.  Attempt to handle
  * the final completions.
  */
 static void
 handle_written_jseg(jseg, bp)
 	struct jseg *jseg;
 	struct buf *bp;
 {
 
 	if (jseg->js_refs == 0)
 		panic("handle_written_jseg: No self-reference on %p", jseg);
 	jseg->js_state |= DEPCOMPLETE;
 	/*
 	 * We'll never need this buffer again, set flags so it will be
 	 * discarded.
 	 */
 	bp->b_flags |= B_INVAL | B_NOCACHE;
 	pbrelvp(bp);
 	complete_jsegs(jseg);
 }
 
 static inline struct jsegdep *
 inoref_jseg(inoref)
 	struct inoref *inoref;
 {
 	struct jsegdep *jsegdep;
 
 	jsegdep = inoref->if_jsegdep;
 	inoref->if_jsegdep = NULL;
 
 	return (jsegdep);
 }
 
 /*
  * Called once a jremref has made it to stable store.  The jremref is marked
  * complete and we attempt to free it.  Any pagedeps writes sleeping waiting
  * for the jremref to complete will be awoken by free_jremref.
  */
 static void
 handle_written_jremref(jremref)
 	struct jremref *jremref;
 {
 	struct inodedep *inodedep;
 	struct jsegdep *jsegdep;
 	struct dirrem *dirrem;
 
 	/* Grab the jsegdep. */
 	jsegdep = inoref_jseg(&jremref->jr_ref);
 	/*
 	 * Remove us from the inoref list.
 	 */
 	if (inodedep_lookup(jremref->jr_list.wk_mp, jremref->jr_ref.if_ino,
 	    0, &inodedep) == 0)
 		panic("handle_written_jremref: Lost inodedep");
 	TAILQ_REMOVE(&inodedep->id_inoreflst, &jremref->jr_ref, if_deps);
 	/*
 	 * Complete the dirrem.
 	 */
 	dirrem = jremref->jr_dirrem;
 	jremref->jr_dirrem = NULL;
 	LIST_REMOVE(jremref, jr_deps);
 	jsegdep->jd_state |= jremref->jr_state & MKDIR_PARENT;
 	jwork_insert(&dirrem->dm_jwork, jsegdep);
 	if (LIST_EMPTY(&dirrem->dm_jremrefhd) &&
 	    (dirrem->dm_state & COMPLETE) != 0)
 		add_to_worklist(&dirrem->dm_list, 0);
 	free_jremref(jremref);
 }
 
 /*
  * Called once a jaddref has made it to stable store.  The dependency is
  * marked complete and any dependent structures are added to the inode
  * bufwait list to be completed as soon as it is written.  If a bitmap write
  * depends on this entry we move the inode into the inodedephd of the
  * bmsafemap dependency and attempt to remove the jaddref from the bmsafemap.
  */
 static void
 handle_written_jaddref(jaddref)
 	struct jaddref *jaddref;
 {
 	struct jsegdep *jsegdep;
 	struct inodedep *inodedep;
 	struct diradd *diradd;
 	struct mkdir *mkdir;
 
 	/* Grab the jsegdep. */
 	jsegdep = inoref_jseg(&jaddref->ja_ref);
 	mkdir = NULL;
 	diradd = NULL;
 	if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
 	    0, &inodedep) == 0)
 		panic("handle_written_jaddref: Lost inodedep.");
 	if (jaddref->ja_diradd == NULL)
 		panic("handle_written_jaddref: No dependency");
 	if (jaddref->ja_diradd->da_list.wk_type == D_DIRADD) {
 		diradd = jaddref->ja_diradd;
 		WORKLIST_INSERT(&inodedep->id_bufwait, &diradd->da_list);
 	} else if (jaddref->ja_state & MKDIR_PARENT) {
 		mkdir = jaddref->ja_mkdir;
 		WORKLIST_INSERT(&inodedep->id_bufwait, &mkdir->md_list);
 	} else if (jaddref->ja_state & MKDIR_BODY)
 		mkdir = jaddref->ja_mkdir;
 	else
 		panic("handle_written_jaddref: Unknown dependency %p",
 		    jaddref->ja_diradd);
 	jaddref->ja_diradd = NULL;	/* also clears ja_mkdir */
 	/*
 	 * Remove us from the inode list.
 	 */
 	TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref, if_deps);
 	/*
 	 * The mkdir may be waiting on the jaddref to clear before freeing.
 	 */
 	if (mkdir) {
 		KASSERT(mkdir->md_list.wk_type == D_MKDIR,
 		    ("handle_written_jaddref: Incorrect type for mkdir %s",
 		    TYPENAME(mkdir->md_list.wk_type)));
 		mkdir->md_jaddref = NULL;
 		diradd = mkdir->md_diradd;
 		mkdir->md_state |= DEPCOMPLETE;
 		complete_mkdir(mkdir);
 	}
 	jwork_insert(&diradd->da_jwork, jsegdep);
 	if (jaddref->ja_state & NEWBLOCK) {
 		inodedep->id_state |= ONDEPLIST;
 		LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_inodedephd,
 		    inodedep, id_deps);
 	}
 	free_jaddref(jaddref);
 }
 
 /*
  * Called once a jnewblk journal is written.  The allocdirect or allocindir
  * is placed in the bmsafemap to await notification of a written bitmap.  If
  * the operation was canceled we add the segdep to the appropriate
  * dependency to free the journal space once the canceling operation
  * completes.
  */
 static void
 handle_written_jnewblk(jnewblk)
 	struct jnewblk *jnewblk;
 {
 	struct bmsafemap *bmsafemap;
 	struct freefrag *freefrag;
 	struct freework *freework;
 	struct jsegdep *jsegdep;
 	struct newblk *newblk;
 
 	/* Grab the jsegdep. */
 	jsegdep = jnewblk->jn_jsegdep;
 	jnewblk->jn_jsegdep = NULL;
 	if (jnewblk->jn_dep == NULL) 
 		panic("handle_written_jnewblk: No dependency for the segdep.");
 	switch (jnewblk->jn_dep->wk_type) {
 	case D_NEWBLK:
 	case D_ALLOCDIRECT:
 	case D_ALLOCINDIR:
 		/*
 		 * Add the written block to the bmsafemap so it can
 		 * be notified when the bitmap is on disk.
 		 */
 		newblk = WK_NEWBLK(jnewblk->jn_dep);
 		newblk->nb_jnewblk = NULL;
 		if ((newblk->nb_state & GOINGAWAY) == 0) {
 			bmsafemap = newblk->nb_bmsafemap;
 			newblk->nb_state |= ONDEPLIST;
 			LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk,
 			    nb_deps);
 		}
 		jwork_insert(&newblk->nb_jwork, jsegdep);
 		break;
 	case D_FREEFRAG:
 		/*
 		 * A newblock being removed by a freefrag when replaced by
 		 * frag extension.
 		 */
 		freefrag = WK_FREEFRAG(jnewblk->jn_dep);
 		freefrag->ff_jdep = NULL;
 		jwork_insert(&freefrag->ff_jwork, jsegdep);
 		break;
 	case D_FREEWORK:
 		/*
 		 * A direct block was removed by truncate.
 		 */
 		freework = WK_FREEWORK(jnewblk->jn_dep);
 		freework->fw_jnewblk = NULL;
 		jwork_insert(&freework->fw_freeblks->fb_jwork, jsegdep);
 		break;
 	default:
 		panic("handle_written_jnewblk: Unknown type %d.",
 		    jnewblk->jn_dep->wk_type);
 	}
 	jnewblk->jn_dep = NULL;
 	free_jnewblk(jnewblk);
 }
 
 /*
  * Cancel a jfreefrag that won't be needed, probably due to colliding with
  * an in-flight allocation that has not yet been committed.  Divorce us
  * from the freefrag and mark it DEPCOMPLETE so that it may be added
  * to the worklist.
  */
 static void
 cancel_jfreefrag(jfreefrag)
 	struct jfreefrag *jfreefrag;
 {
 	struct freefrag *freefrag;
 
 	if (jfreefrag->fr_jsegdep) {
 		free_jsegdep(jfreefrag->fr_jsegdep);
 		jfreefrag->fr_jsegdep = NULL;
 	}
 	freefrag = jfreefrag->fr_freefrag;
 	jfreefrag->fr_freefrag = NULL;
 	free_jfreefrag(jfreefrag);
 	freefrag->ff_state |= DEPCOMPLETE;
 	CTR1(KTR_SUJ, "cancel_jfreefrag: blkno %jd", freefrag->ff_blkno);
 }
 
 /*
  * Free a jfreefrag when the parent freefrag is rendered obsolete.
  */
 static void
 free_jfreefrag(jfreefrag)
 	struct jfreefrag *jfreefrag;
 {
 
 	if (jfreefrag->fr_state & INPROGRESS)
 		WORKLIST_REMOVE(&jfreefrag->fr_list);
 	else if (jfreefrag->fr_state & ONWORKLIST)
 		remove_from_journal(&jfreefrag->fr_list);
 	if (jfreefrag->fr_freefrag != NULL)
 		panic("free_jfreefrag:  Still attached to a freefrag.");
 	WORKITEM_FREE(jfreefrag, D_JFREEFRAG);
 }
 
 /*
  * Called when the journal write for a jfreefrag completes.  The parent
  * freefrag is added to the worklist if this completes its dependencies.
  */
 static void
 handle_written_jfreefrag(jfreefrag)
 	struct jfreefrag *jfreefrag;
 {
 	struct jsegdep *jsegdep;
 	struct freefrag *freefrag;
 
 	/* Grab the jsegdep. */
 	jsegdep = jfreefrag->fr_jsegdep;
 	jfreefrag->fr_jsegdep = NULL;
 	freefrag = jfreefrag->fr_freefrag;
 	if (freefrag == NULL)
 		panic("handle_written_jfreefrag: No freefrag.");
 	freefrag->ff_state |= DEPCOMPLETE;
 	freefrag->ff_jdep = NULL;
 	jwork_insert(&freefrag->ff_jwork, jsegdep);
 	if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE)
 		add_to_worklist(&freefrag->ff_list, 0);
 	jfreefrag->fr_freefrag = NULL;
 	free_jfreefrag(jfreefrag);
 }
 
 /*
  * Called when the journal write for a jfreeblk completes.  The jfreeblk
  * is removed from the freeblks list of pending journal writes and the
  * jsegdep is moved to the freeblks jwork to be completed when all blocks
  * have been reclaimed.
  */
 static void
 handle_written_jblkdep(jblkdep)
 	struct jblkdep *jblkdep;
 {
 	struct freeblks *freeblks;
 	struct jsegdep *jsegdep;
 
 	/* Grab the jsegdep. */
 	jsegdep = jblkdep->jb_jsegdep;
 	jblkdep->jb_jsegdep = NULL;
 	freeblks = jblkdep->jb_freeblks;
 	LIST_REMOVE(jblkdep, jb_deps);
 	jwork_insert(&freeblks->fb_jwork, jsegdep);
 	/*
 	 * If the freeblks is all journaled, we can add it to the worklist.
 	 */
 	if (LIST_EMPTY(&freeblks->fb_jblkdephd) &&
 	    (freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE)
 		add_to_worklist(&freeblks->fb_list, WK_NODELAY);
 
 	free_jblkdep(jblkdep);
 }
 
 static struct jsegdep *
 newjsegdep(struct worklist *wk)
 {
 	struct jsegdep *jsegdep;
 
 	jsegdep = malloc(sizeof(*jsegdep), M_JSEGDEP, M_SOFTDEP_FLAGS);
 	workitem_alloc(&jsegdep->jd_list, D_JSEGDEP, wk->wk_mp);
 	jsegdep->jd_seg = NULL;
 
 	return (jsegdep);
 }
 
 static struct jmvref *
 newjmvref(dp, ino, oldoff, newoff)
 	struct inode *dp;
 	ino_t ino;
 	off_t oldoff;
 	off_t newoff;
 {
 	struct jmvref *jmvref;
 
 	jmvref = malloc(sizeof(*jmvref), M_JMVREF, M_SOFTDEP_FLAGS);
 	workitem_alloc(&jmvref->jm_list, D_JMVREF, ITOVFS(dp));
 	jmvref->jm_list.wk_state = ATTACHED | DEPCOMPLETE;
 	jmvref->jm_parent = dp->i_number;
 	jmvref->jm_ino = ino;
 	jmvref->jm_oldoff = oldoff;
 	jmvref->jm_newoff = newoff;
 
 	return (jmvref);
 }
 
 /*
  * Allocate a new jremref that tracks the removal of ip from dp with the
  * directory entry offset of diroff.  Mark the entry as ATTACHED and
  * DEPCOMPLETE as we have all the information required for the journal write
  * and the directory has already been removed from the buffer.  The caller
  * is responsible for linking the jremref into the pagedep and adding it
  * to the journal to write.  The MKDIR_PARENT flag is set if we're doing
  * a DOTDOT addition so handle_workitem_remove() can properly assign
  * the jsegdep when we're done.
  */
 static struct jremref *
 newjremref(struct dirrem *dirrem, struct inode *dp, struct inode *ip,
     off_t diroff, nlink_t nlink)
 {
 	struct jremref *jremref;
 
 	jremref = malloc(sizeof(*jremref), M_JREMREF, M_SOFTDEP_FLAGS);
 	workitem_alloc(&jremref->jr_list, D_JREMREF, ITOVFS(dp));
 	jremref->jr_state = ATTACHED;
 	newinoref(&jremref->jr_ref, ip->i_number, dp->i_number, diroff,
 	   nlink, ip->i_mode);
 	jremref->jr_dirrem = dirrem;
 
 	return (jremref);
 }
 
 static inline void
 newinoref(struct inoref *inoref, ino_t ino, ino_t parent, off_t diroff,
     nlink_t nlink, uint16_t mode)
 {
 
 	inoref->if_jsegdep = newjsegdep(&inoref->if_list);
 	inoref->if_diroff = diroff;
 	inoref->if_ino = ino;
 	inoref->if_parent = parent;
 	inoref->if_nlink = nlink;
 	inoref->if_mode = mode;
 }
 
 /*
  * Allocate a new jaddref to track the addition of ino to dp at diroff.  The
  * directory offset may not be known until later.  The caller is responsible
  * adding the entry to the journal when this information is available.  nlink
  * should be the link count prior to the addition and mode is only required
  * to have the correct FMT.
  */
 static struct jaddref *
 newjaddref(struct inode *dp, ino_t ino, off_t diroff, int16_t nlink,
     uint16_t mode)
 {
 	struct jaddref *jaddref;
 
 	jaddref = malloc(sizeof(*jaddref), M_JADDREF, M_SOFTDEP_FLAGS);
 	workitem_alloc(&jaddref->ja_list, D_JADDREF, ITOVFS(dp));
 	jaddref->ja_state = ATTACHED;
 	jaddref->ja_mkdir = NULL;
 	newinoref(&jaddref->ja_ref, ino, dp->i_number, diroff, nlink, mode);
 
 	return (jaddref);
 }
 
 /*
  * Create a new free dependency for a freework.  The caller is responsible
  * for adjusting the reference count when it has the lock held.  The freedep
  * will track an outstanding bitmap write that will ultimately clear the
  * freework to continue.
  */
 static struct freedep *
 newfreedep(struct freework *freework)
 {
 	struct freedep *freedep;
 
 	freedep = malloc(sizeof(*freedep), M_FREEDEP, M_SOFTDEP_FLAGS);
 	workitem_alloc(&freedep->fd_list, D_FREEDEP, freework->fw_list.wk_mp);
 	freedep->fd_freework = freework;
 
 	return (freedep);
 }
 
 /*
  * Free a freedep structure once the buffer it is linked to is written.  If
  * this is the last reference to the freework schedule it for completion.
  */
 static void
 free_freedep(freedep)
 	struct freedep *freedep;
 {
 	struct freework *freework;
 
 	freework = freedep->fd_freework;
 	freework->fw_freeblks->fb_cgwait--;
 	if (--freework->fw_ref == 0)
 		freework_enqueue(freework);
 	WORKITEM_FREE(freedep, D_FREEDEP);
 }
 
 /*
  * Allocate a new freework structure that may be a level in an indirect
  * when parent is not NULL or a top level block when it is.  The top level
  * freework structures are allocated without the per-filesystem lock held
  * and before the freeblks is visible outside of softdep_setup_freeblocks().
  */
 static struct freework *
 newfreework(ump, freeblks, parent, lbn, nb, frags, off, journal)
 	struct ufsmount *ump;
 	struct freeblks *freeblks;
 	struct freework *parent;
 	ufs_lbn_t lbn;
 	ufs2_daddr_t nb;
 	int frags;
 	int off;
 	int journal;
 {
 	struct freework *freework;
 
 	freework = malloc(sizeof(*freework), M_FREEWORK, M_SOFTDEP_FLAGS);
 	workitem_alloc(&freework->fw_list, D_FREEWORK, freeblks->fb_list.wk_mp);
 	freework->fw_state = ATTACHED;
 	freework->fw_jnewblk = NULL;
 	freework->fw_freeblks = freeblks;
 	freework->fw_parent = parent;
 	freework->fw_lbn = lbn;
 	freework->fw_blkno = nb;
 	freework->fw_frags = frags;
 	freework->fw_indir = NULL;
 	freework->fw_ref = (MOUNTEDSUJ(UFSTOVFS(ump)) == 0 || lbn >= -NXADDR)
 		? 0 : NINDIR(ump->um_fs) + 1;
 	freework->fw_start = freework->fw_off = off;
 	if (journal)
 		newjfreeblk(freeblks, lbn, nb, frags);
 	if (parent == NULL) {
 		ACQUIRE_LOCK(ump);
 		WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
 		freeblks->fb_ref++;
 		FREE_LOCK(ump);
 	}
 
 	return (freework);
 }
 
 /*
  * Eliminate a jfreeblk for a block that does not need journaling.
  */
 static void
 cancel_jfreeblk(freeblks, blkno)
 	struct freeblks *freeblks;
 	ufs2_daddr_t blkno;
 {
 	struct jfreeblk *jfreeblk;
 	struct jblkdep *jblkdep;
 
 	LIST_FOREACH(jblkdep, &freeblks->fb_jblkdephd, jb_deps) {
 		if (jblkdep->jb_list.wk_type != D_JFREEBLK)
 			continue;
 		jfreeblk = WK_JFREEBLK(&jblkdep->jb_list);
 		if (jfreeblk->jf_blkno == blkno)
 			break;
 	}
 	if (jblkdep == NULL)
 		return;
 	CTR1(KTR_SUJ, "cancel_jfreeblk: blkno %jd", blkno);
 	free_jsegdep(jblkdep->jb_jsegdep);
 	LIST_REMOVE(jblkdep, jb_deps);
 	WORKITEM_FREE(jfreeblk, D_JFREEBLK);
 }
 
 /*
  * Allocate a new jfreeblk to journal top level block pointer when truncating
  * a file.  The caller must add this to the worklist when the per-filesystem
  * lock is held.
  */
 static struct jfreeblk *
 newjfreeblk(freeblks, lbn, blkno, frags)
 	struct freeblks *freeblks;
 	ufs_lbn_t lbn;
 	ufs2_daddr_t blkno;
 	int frags;
 {
 	struct jfreeblk *jfreeblk;
 
 	jfreeblk = malloc(sizeof(*jfreeblk), M_JFREEBLK, M_SOFTDEP_FLAGS);
 	workitem_alloc(&jfreeblk->jf_dep.jb_list, D_JFREEBLK,
 	    freeblks->fb_list.wk_mp);
 	jfreeblk->jf_dep.jb_jsegdep = newjsegdep(&jfreeblk->jf_dep.jb_list);
 	jfreeblk->jf_dep.jb_freeblks = freeblks;
 	jfreeblk->jf_ino = freeblks->fb_inum;
 	jfreeblk->jf_lbn = lbn;
 	jfreeblk->jf_blkno = blkno;
 	jfreeblk->jf_frags = frags;
 	LIST_INSERT_HEAD(&freeblks->fb_jblkdephd, &jfreeblk->jf_dep, jb_deps);
 
 	return (jfreeblk);
 }
 
 /*
  * The journal is only prepared to handle full-size block numbers, so we
  * have to adjust the record to reflect the change to a full-size block.
  * For example, suppose we have a block made up of fragments 8-15 and
  * want to free its last two fragments. We are given a request that says:
  *     FREEBLK ino=5, blkno=14, lbn=0, frags=2, oldfrags=0
  * where frags are the number of fragments to free and oldfrags are the
  * number of fragments to keep. To block align it, we have to change it to
  * have a valid full-size blkno, so it becomes:
  *     FREEBLK ino=5, blkno=8, lbn=0, frags=2, oldfrags=6
  */
 static void
 adjust_newfreework(freeblks, frag_offset)
 	struct freeblks *freeblks;
 	int frag_offset;
 {
 	struct jfreeblk *jfreeblk;
 
 	KASSERT((LIST_FIRST(&freeblks->fb_jblkdephd) != NULL &&
 	    LIST_FIRST(&freeblks->fb_jblkdephd)->jb_list.wk_type == D_JFREEBLK),
 	    ("adjust_newfreework: Missing freeblks dependency"));
 
 	jfreeblk = WK_JFREEBLK(LIST_FIRST(&freeblks->fb_jblkdephd));
 	jfreeblk->jf_blkno -= frag_offset;
 	jfreeblk->jf_frags += frag_offset;
 }
 
 /*
  * Allocate a new jtrunc to track a partial truncation.
  */
 static struct jtrunc *
 newjtrunc(freeblks, size, extsize)
 	struct freeblks *freeblks;
 	off_t size;
 	int extsize;
 {
 	struct jtrunc *jtrunc;
 
 	jtrunc = malloc(sizeof(*jtrunc), M_JTRUNC, M_SOFTDEP_FLAGS);
 	workitem_alloc(&jtrunc->jt_dep.jb_list, D_JTRUNC,
 	    freeblks->fb_list.wk_mp);
 	jtrunc->jt_dep.jb_jsegdep = newjsegdep(&jtrunc->jt_dep.jb_list);
 	jtrunc->jt_dep.jb_freeblks = freeblks;
 	jtrunc->jt_ino = freeblks->fb_inum;
 	jtrunc->jt_size = size;
 	jtrunc->jt_extsize = extsize;
 	LIST_INSERT_HEAD(&freeblks->fb_jblkdephd, &jtrunc->jt_dep, jb_deps);
 
 	return (jtrunc);
 }
 
 /*
  * If we're canceling a new bitmap we have to search for another ref
  * to move into the bmsafemap dep.  This might be better expressed
  * with another structure.
  */
 static void
 move_newblock_dep(jaddref, inodedep)
 	struct jaddref *jaddref;
 	struct inodedep *inodedep;
 {
 	struct inoref *inoref;
 	struct jaddref *jaddrefn;
 
 	jaddrefn = NULL;
 	for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref;
 	    inoref = TAILQ_NEXT(inoref, if_deps)) {
 		if ((jaddref->ja_state & NEWBLOCK) &&
 		    inoref->if_list.wk_type == D_JADDREF) {
 			jaddrefn = (struct jaddref *)inoref;
 			break;
 		}
 	}
 	if (jaddrefn == NULL)
 		return;
 	jaddrefn->ja_state &= ~(ATTACHED | UNDONE);
 	jaddrefn->ja_state |= jaddref->ja_state &
 	    (ATTACHED | UNDONE | NEWBLOCK);
 	jaddref->ja_state &= ~(ATTACHED | UNDONE | NEWBLOCK);
 	jaddref->ja_state |= ATTACHED;
 	LIST_REMOVE(jaddref, ja_bmdeps);
 	LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_jaddrefhd, jaddrefn,
 	    ja_bmdeps);
 }
 
 /*
  * Cancel a jaddref either before it has been written or while it is being
  * written.  This happens when a link is removed before the add reaches
  * the disk.  The jaddref dependency is kept linked into the bmsafemap
  * and inode to prevent the link count or bitmap from reaching the disk
  * until handle_workitem_remove() re-adjusts the counts and bitmaps as
  * required.
  *
  * Returns 1 if the canceled addref requires journaling of the remove and
  * 0 otherwise.
  */
 static int
 cancel_jaddref(jaddref, inodedep, wkhd)
 	struct jaddref *jaddref;
 	struct inodedep *inodedep;
 	struct workhead *wkhd;
 {
 	struct inoref *inoref;
 	struct jsegdep *jsegdep;
 	int needsj;
 
 	KASSERT((jaddref->ja_state & COMPLETE) == 0,
 	    ("cancel_jaddref: Canceling complete jaddref"));
 	if (jaddref->ja_state & (INPROGRESS | COMPLETE))
 		needsj = 1;
 	else
 		needsj = 0;
 	if (inodedep == NULL)
 		if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
 		    0, &inodedep) == 0)
 			panic("cancel_jaddref: Lost inodedep");
 	/*
 	 * We must adjust the nlink of any reference operation that follows
 	 * us so that it is consistent with the in-memory reference.  This
 	 * ensures that inode nlink rollbacks always have the correct link.
 	 */
 	if (needsj == 0) {
 		for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref;
 		    inoref = TAILQ_NEXT(inoref, if_deps)) {
 			if (inoref->if_state & GOINGAWAY)
 				break;
 			inoref->if_nlink--;
 		}
 	}
 	jsegdep = inoref_jseg(&jaddref->ja_ref);
 	if (jaddref->ja_state & NEWBLOCK)
 		move_newblock_dep(jaddref, inodedep);
 	wake_worklist(&jaddref->ja_list);
 	jaddref->ja_mkdir = NULL;
 	if (jaddref->ja_state & INPROGRESS) {
 		jaddref->ja_state &= ~INPROGRESS;
 		WORKLIST_REMOVE(&jaddref->ja_list);
 		jwork_insert(wkhd, jsegdep);
 	} else {
 		free_jsegdep(jsegdep);
 		if (jaddref->ja_state & DEPCOMPLETE)
 			remove_from_journal(&jaddref->ja_list);
 	}
 	jaddref->ja_state |= (GOINGAWAY | DEPCOMPLETE);
 	/*
 	 * Leave NEWBLOCK jaddrefs on the inodedep so handle_workitem_remove
 	 * can arrange for them to be freed with the bitmap.  Otherwise we
 	 * no longer need this addref attached to the inoreflst and it
 	 * will incorrectly adjust nlink if we leave it.
 	 */
 	if ((jaddref->ja_state & NEWBLOCK) == 0) {
 		TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref,
 		    if_deps);
 		jaddref->ja_state |= COMPLETE;
 		free_jaddref(jaddref);
 		return (needsj);
 	}
 	/*
 	 * Leave the head of the list for jsegdeps for fast merging.
 	 */
 	if (LIST_FIRST(wkhd) != NULL) {
 		jaddref->ja_state |= ONWORKLIST;
 		LIST_INSERT_AFTER(LIST_FIRST(wkhd), &jaddref->ja_list, wk_list);
 	} else
 		WORKLIST_INSERT(wkhd, &jaddref->ja_list);
 
 	return (needsj);
 }
 
 /* 
  * Attempt to free a jaddref structure when some work completes.  This
  * should only succeed once the entry is written and all dependencies have
  * been notified.
  */
 static void
 free_jaddref(jaddref)
 	struct jaddref *jaddref;
 {
 
 	if ((jaddref->ja_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	if (jaddref->ja_ref.if_jsegdep)
 		panic("free_jaddref: segdep attached to jaddref %p(0x%X)\n",
 		    jaddref, jaddref->ja_state);
 	if (jaddref->ja_state & NEWBLOCK)
 		LIST_REMOVE(jaddref, ja_bmdeps);
 	if (jaddref->ja_state & (INPROGRESS | ONWORKLIST))
 		panic("free_jaddref: Bad state %p(0x%X)",
 		    jaddref, jaddref->ja_state);
 	if (jaddref->ja_mkdir != NULL)
 		panic("free_jaddref: Work pending, 0x%X\n", jaddref->ja_state);
 	WORKITEM_FREE(jaddref, D_JADDREF);
 }
 
 /*
  * Free a jremref structure once it has been written or discarded.
  */
 static void
 free_jremref(jremref)
 	struct jremref *jremref;
 {
 
 	if (jremref->jr_ref.if_jsegdep)
 		free_jsegdep(jremref->jr_ref.if_jsegdep);
 	if (jremref->jr_state & INPROGRESS)
 		panic("free_jremref: IO still pending");
 	WORKITEM_FREE(jremref, D_JREMREF);
 }
 
 /*
  * Free a jnewblk structure.
  */
 static void
 free_jnewblk(jnewblk)
 	struct jnewblk *jnewblk;
 {
 
 	if ((jnewblk->jn_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	LIST_REMOVE(jnewblk, jn_deps);
 	if (jnewblk->jn_dep != NULL)
 		panic("free_jnewblk: Dependency still attached.");
 	WORKITEM_FREE(jnewblk, D_JNEWBLK);
 }
 
 /*
  * Cancel a jnewblk which has been been made redundant by frag extension.
  */
 static void
 cancel_jnewblk(jnewblk, wkhd)
 	struct jnewblk *jnewblk;
 	struct workhead *wkhd;
 {
 	struct jsegdep *jsegdep;
 
 	CTR1(KTR_SUJ, "cancel_jnewblk: blkno %jd", jnewblk->jn_blkno);
 	jsegdep = jnewblk->jn_jsegdep;
 	if (jnewblk->jn_jsegdep == NULL || jnewblk->jn_dep == NULL)
 		panic("cancel_jnewblk: Invalid state");
 	jnewblk->jn_jsegdep  = NULL;
 	jnewblk->jn_dep = NULL;
 	jnewblk->jn_state |= GOINGAWAY;
 	if (jnewblk->jn_state & INPROGRESS) {
 		jnewblk->jn_state &= ~INPROGRESS;
 		WORKLIST_REMOVE(&jnewblk->jn_list);
 		jwork_insert(wkhd, jsegdep);
 	} else {
 		free_jsegdep(jsegdep);
 		remove_from_journal(&jnewblk->jn_list);
 	}
 	wake_worklist(&jnewblk->jn_list);
 	WORKLIST_INSERT(wkhd, &jnewblk->jn_list);
 }
 
 static void
 free_jblkdep(jblkdep)
 	struct jblkdep *jblkdep;
 {
 
 	if (jblkdep->jb_list.wk_type == D_JFREEBLK)
 		WORKITEM_FREE(jblkdep, D_JFREEBLK);
 	else if (jblkdep->jb_list.wk_type == D_JTRUNC)
 		WORKITEM_FREE(jblkdep, D_JTRUNC);
 	else
 		panic("free_jblkdep: Unexpected type %s",
 		    TYPENAME(jblkdep->jb_list.wk_type));
 }
 
 /*
  * Free a single jseg once it is no longer referenced in memory or on
  * disk.  Reclaim journal blocks and dependencies waiting for the segment
  * to disappear.
  */
 static void
 free_jseg(jseg, jblocks)
 	struct jseg *jseg;
 	struct jblocks *jblocks;
 {
 	struct freework *freework;
 
 	/*
 	 * Free freework structures that were lingering to indicate freed
 	 * indirect blocks that forced journal write ordering on reallocate.
 	 */
 	while ((freework = LIST_FIRST(&jseg->js_indirs)) != NULL)
 		indirblk_remove(freework);
 	if (jblocks->jb_oldestseg == jseg)
 		jblocks->jb_oldestseg = TAILQ_NEXT(jseg, js_next);
 	TAILQ_REMOVE(&jblocks->jb_segs, jseg, js_next);
 	jblocks_free(jblocks, jseg->js_list.wk_mp, jseg->js_size);
 	KASSERT(LIST_EMPTY(&jseg->js_entries),
 	    ("free_jseg: Freed jseg has valid entries."));
 	WORKITEM_FREE(jseg, D_JSEG);
 }
 
 /*
  * Free all jsegs that meet the criteria for being reclaimed and update
  * oldestseg.
  */
 static void
 free_jsegs(jblocks)
 	struct jblocks *jblocks;
 {
 	struct jseg *jseg;
 
 	/*
 	 * Free only those jsegs which have none allocated before them to
 	 * preserve the journal space ordering.
 	 */
 	while ((jseg = TAILQ_FIRST(&jblocks->jb_segs)) != NULL) {
 		/*
 		 * Only reclaim space when nothing depends on this journal
 		 * set and another set has written that it is no longer
 		 * valid.
 		 */
 		if (jseg->js_refs != 0) {
 			jblocks->jb_oldestseg = jseg;
 			return;
 		}
 		if ((jseg->js_state & ALLCOMPLETE) != ALLCOMPLETE)
 			break;
 		if (jseg->js_seq > jblocks->jb_oldestwrseq)
 			break;
 		/*
 		 * We can free jsegs that didn't write entries when
 		 * oldestwrseq == js_seq.
 		 */
 		if (jseg->js_seq == jblocks->jb_oldestwrseq &&
 		    jseg->js_cnt != 0)
 			break;
 		free_jseg(jseg, jblocks);
 	}
 	/*
 	 * If we exited the loop above we still must discover the
 	 * oldest valid segment.
 	 */
 	if (jseg)
 		for (jseg = jblocks->jb_oldestseg; jseg != NULL;
 		     jseg = TAILQ_NEXT(jseg, js_next))
 			if (jseg->js_refs != 0)
 				break;
 	jblocks->jb_oldestseg = jseg;
 	/*
 	 * The journal has no valid records but some jsegs may still be
 	 * waiting on oldestwrseq to advance.  We force a small record
 	 * out to permit these lingering records to be reclaimed.
 	 */
 	if (jblocks->jb_oldestseg == NULL && !TAILQ_EMPTY(&jblocks->jb_segs))
 		jblocks->jb_needseg = 1;
 }
 
 /*
  * Release one reference to a jseg and free it if the count reaches 0.  This
  * should eventually reclaim journal space as well.
  */
 static void
 rele_jseg(jseg)
 	struct jseg *jseg;
 {
 
 	KASSERT(jseg->js_refs > 0,
 	    ("free_jseg: Invalid refcnt %d", jseg->js_refs));
 	if (--jseg->js_refs != 0)
 		return;
 	free_jsegs(jseg->js_jblocks);
 }
 
 /*
  * Release a jsegdep and decrement the jseg count.
  */
 static void
 free_jsegdep(jsegdep)
 	struct jsegdep *jsegdep;
 {
 
 	if (jsegdep->jd_seg)
 		rele_jseg(jsegdep->jd_seg);
 	WORKITEM_FREE(jsegdep, D_JSEGDEP);
 }
 
 /*
  * Wait for a journal item to make it to disk.  Initiate journal processing
  * if required.
  */
 static int
 jwait(wk, waitfor)
 	struct worklist *wk;
 	int waitfor;
 {
 
 	LOCK_OWNED(VFSTOUFS(wk->wk_mp));
 	/*
 	 * Blocking journal waits cause slow synchronous behavior.  Record
 	 * stats on the frequency of these blocking operations.
 	 */
 	if (waitfor == MNT_WAIT) {
 		stat_journal_wait++;
 		switch (wk->wk_type) {
 		case D_JREMREF:
 		case D_JMVREF:
 			stat_jwait_filepage++;
 			break;
 		case D_JTRUNC:
 		case D_JFREEBLK:
 			stat_jwait_freeblks++;
 			break;
 		case D_JNEWBLK:
 			stat_jwait_newblk++;
 			break;
 		case D_JADDREF:
 			stat_jwait_inode++;
 			break;
 		default:
 			break;
 		}
 	}
 	/*
 	 * If IO has not started we process the journal.  We can't mark the
 	 * worklist item as IOWAITING because we drop the lock while
 	 * processing the journal and the worklist entry may be freed after
 	 * this point.  The caller may call back in and re-issue the request.
 	 */
 	if ((wk->wk_state & INPROGRESS) == 0) {
 		softdep_process_journal(wk->wk_mp, wk, waitfor);
 		if (waitfor != MNT_WAIT)
 			return (EBUSY);
 		return (0);
 	}
 	if (waitfor != MNT_WAIT)
 		return (EBUSY);
 	wait_worklist(wk, "jwait");
 	return (0);
 }
 
 /*
  * Lookup an inodedep based on an inode pointer and set the nlinkdelta as
  * appropriate.  This is a convenience function to reduce duplicate code
  * for the setup and revert functions below.
  */
 static struct inodedep *
 inodedep_lookup_ip(ip)
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 
 	KASSERT(ip->i_nlink >= ip->i_effnlink,
 	    ("inodedep_lookup_ip: bad delta"));
 	(void) inodedep_lookup(ITOVFS(ip), ip->i_number, DEPALLOC,
 	    &inodedep);
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 	KASSERT((inodedep->id_state & UNLINKED) == 0, ("inode unlinked"));
 
 	return (inodedep);
 }
 
 /*
  * Called prior to creating a new inode and linking it to a directory.  The
  * jaddref structure must already be allocated by softdep_setup_inomapdep
  * and it is discovered here so we can initialize the mode and update
  * nlinkdelta.
  */
 void
 softdep_setup_create(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_setup_create called on non-softdep filesystem"));
 	KASSERT(ip->i_nlink == 1,
 	    ("softdep_setup_create: Invalid link count."));
 	dvp = ITOV(dp);
 	ACQUIRE_LOCK(ITOUMP(dp));
 	inodedep = inodedep_lookup_ip(ip);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
 		    ("softdep_setup_create: No addref structure present."));
 	}
 	softdep_prelink(dvp, NULL);
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Create a jaddref structure to track the addition of a DOTDOT link when
  * we are reparenting an inode as part of a rename.  This jaddref will be
  * found by softdep_setup_directory_change.  Adjusts nlinkdelta for
  * non-journaling softdep.
  */
 void
 softdep_setup_dotdot_link(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_setup_dotdot_link called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	jaddref = NULL;
 	/*
 	 * We don't set MKDIR_PARENT as this is not tied to a mkdir and
 	 * is used as a normal link would be.
 	 */
 	if (DOINGSUJ(dvp))
 		jaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET,
 		    dp->i_effnlink - 1, dp->i_mode);
 	ACQUIRE_LOCK(ITOUMP(dp));
 	inodedep = inodedep_lookup_ip(dp);
 	if (jaddref)
 		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
 		    if_deps);
 	softdep_prelink(dvp, ITOV(ip));
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Create a jaddref structure to track a new link to an inode.  The directory
  * offset is not known until softdep_setup_directory_add or
  * softdep_setup_directory_change.  Adjusts nlinkdelta for non-journaling
  * softdep.
  */
 void
 softdep_setup_link(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_setup_link called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	jaddref = NULL;
 	if (DOINGSUJ(dvp))
 		jaddref = newjaddref(dp, ip->i_number, 0, ip->i_effnlink - 1,
 		    ip->i_mode);
 	ACQUIRE_LOCK(ITOUMP(dp));
 	inodedep = inodedep_lookup_ip(ip);
 	if (jaddref)
 		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
 		    if_deps);
 	softdep_prelink(dvp, ITOV(ip));
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Called to create the jaddref structures to track . and .. references as
  * well as lookup and further initialize the incomplete jaddref created
  * by softdep_setup_inomapdep when the inode was allocated.  Adjusts
  * nlinkdelta for non-journaling softdep.
  */
 void
 softdep_setup_mkdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 	struct jaddref *dotdotaddref;
 	struct jaddref *dotaddref;
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_setup_mkdir called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	dotaddref = dotdotaddref = NULL;
 	if (DOINGSUJ(dvp)) {
 		dotaddref = newjaddref(ip, ip->i_number, DOT_OFFSET, 1,
 		    ip->i_mode);
 		dotaddref->ja_state |= MKDIR_BODY;
 		dotdotaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET,
 		    dp->i_effnlink - 1, dp->i_mode);
 		dotdotaddref->ja_state |= MKDIR_PARENT;
 	}
 	ACQUIRE_LOCK(ITOUMP(dp));
 	inodedep = inodedep_lookup_ip(ip);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref != NULL,
 		    ("softdep_setup_mkdir: No addref structure present."));
 		KASSERT(jaddref->ja_parent == dp->i_number, 
 		    ("softdep_setup_mkdir: bad parent %ju",
 		    (uintmax_t)jaddref->ja_parent));
 		TAILQ_INSERT_BEFORE(&jaddref->ja_ref, &dotaddref->ja_ref,
 		    if_deps);
 	}
 	inodedep = inodedep_lookup_ip(dp);
 	if (DOINGSUJ(dvp))
 		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst,
 		    &dotdotaddref->ja_ref, if_deps);
 	softdep_prelink(ITOV(dp), NULL);
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Called to track nlinkdelta of the inode and parent directories prior to
  * unlinking a directory.
  */
 void
 softdep_setup_rmdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_setup_rmdir called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	ACQUIRE_LOCK(ITOUMP(dp));
 	(void) inodedep_lookup_ip(ip);
 	(void) inodedep_lookup_ip(dp);
 	softdep_prelink(dvp, ITOV(ip));
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Called to track nlinkdelta of the inode and parent directories prior to
  * unlink.
  */
 void
 softdep_setup_unlink(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_setup_unlink called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	ACQUIRE_LOCK(ITOUMP(dp));
 	(void) inodedep_lookup_ip(ip);
 	(void) inodedep_lookup_ip(dp);
 	softdep_prelink(dvp, ITOV(ip));
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Called to release the journal structures created by a failed non-directory
  * creation.  Adjusts nlinkdelta for non-journaling softdep.
  */
 void
 softdep_revert_create(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS((dp))) != 0,
 	    ("softdep_revert_create called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	ACQUIRE_LOCK(ITOUMP(dp));
 	inodedep = inodedep_lookup_ip(ip);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref->ja_parent == dp->i_number,
 		    ("softdep_revert_create: addref parent mismatch"));
 		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
 	}
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Called to release the journal structures created by a failed link
  * addition.  Adjusts nlinkdelta for non-journaling softdep.
  */
 void
 softdep_revert_link(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_revert_link called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	ACQUIRE_LOCK(ITOUMP(dp));
 	inodedep = inodedep_lookup_ip(ip);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref->ja_parent == dp->i_number,
 		    ("softdep_revert_link: addref parent mismatch"));
 		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
 	}
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Called to release the journal structures created by a failed mkdir
  * attempt.  Adjusts nlinkdelta for non-journaling softdep.
  */
 void
 softdep_revert_mkdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct jaddref *dotaddref;
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_revert_mkdir called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 
 	ACQUIRE_LOCK(ITOUMP(dp));
 	inodedep = inodedep_lookup_ip(dp);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref->ja_parent == ip->i_number,
 		    ("softdep_revert_mkdir: dotdot addref parent mismatch"));
 		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
 	}
 	inodedep = inodedep_lookup_ip(ip);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref->ja_parent == dp->i_number,
 		    ("softdep_revert_mkdir: addref parent mismatch"));
 		dotaddref = (struct jaddref *)TAILQ_PREV(&jaddref->ja_ref,
 		    inoreflst, if_deps);
 		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
 		KASSERT(dotaddref->ja_parent == ip->i_number,
 		    ("softdep_revert_mkdir: dot addref parent mismatch"));
 		cancel_jaddref(dotaddref, inodedep, &inodedep->id_inowait);
 	}
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /* 
  * Called to correct nlinkdelta after a failed rmdir.
  */
 void
 softdep_revert_rmdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_revert_rmdir called on non-softdep filesystem"));
 	ACQUIRE_LOCK(ITOUMP(dp));
 	(void) inodedep_lookup_ip(ip);
 	(void) inodedep_lookup_ip(dp);
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Protecting the freemaps (or bitmaps).
  * 
  * To eliminate the need to execute fsck before mounting a filesystem
  * after a power failure, one must (conservatively) guarantee that the
  * on-disk copy of the bitmaps never indicate that a live inode or block is
  * free.  So, when a block or inode is allocated, the bitmap should be
  * updated (on disk) before any new pointers.  When a block or inode is
  * freed, the bitmap should not be updated until all pointers have been
  * reset.  The latter dependency is handled by the delayed de-allocation
  * approach described below for block and inode de-allocation.  The former
  * dependency is handled by calling the following procedure when a block or
  * inode is allocated. When an inode is allocated an "inodedep" is created
  * with its DEPCOMPLETE flag cleared until its bitmap is written to disk.
  * Each "inodedep" is also inserted into the hash indexing structure so
  * that any additional link additions can be made dependent on the inode
  * allocation.
  * 
  * The ufs filesystem maintains a number of free block counts (e.g., per
  * cylinder group, per cylinder and per <cylinder, rotational position> pair)
  * in addition to the bitmaps.  These counts are used to improve efficiency
  * during allocation and therefore must be consistent with the bitmaps.
  * There is no convenient way to guarantee post-crash consistency of these
  * counts with simple update ordering, for two main reasons: (1) The counts
  * and bitmaps for a single cylinder group block are not in the same disk
  * sector.  If a disk write is interrupted (e.g., by power failure), one may
  * be written and the other not.  (2) Some of the counts are located in the
  * superblock rather than the cylinder group block. So, we focus our soft
  * updates implementation on protecting the bitmaps. When mounting a
  * filesystem, we recompute the auxiliary counts from the bitmaps.
  */
 
 /*
  * Called just after updating the cylinder group block to allocate an inode.
  */
 void
 softdep_setup_inomapdep(bp, ip, newinum, mode)
 	struct buf *bp;		/* buffer for cylgroup block with inode map */
 	struct inode *ip;	/* inode related to allocation */
 	ino_t newinum;		/* new inode number being allocated */
 	int mode;
 {
 	struct inodedep *inodedep;
 	struct bmsafemap *bmsafemap;
 	struct jaddref *jaddref;
 	struct mount *mp;
 	struct fs *fs;
 
 	mp = ITOVFS(ip);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_inomapdep called on non-softdep filesystem"));
 	fs = VFSTOUFS(mp)->um_fs;
 	jaddref = NULL;
 
 	/*
 	 * Allocate the journal reference add structure so that the bitmap
 	 * can be dependent on it.
 	 */
 	if (MOUNTEDSUJ(mp)) {
 		jaddref = newjaddref(ip, newinum, 0, 0, mode);
 		jaddref->ja_state |= NEWBLOCK;
 	}
 
 	/*
 	 * Create a dependency for the newly allocated inode.
 	 * Panic if it already exists as something is seriously wrong.
 	 * Otherwise add it to the dependency list for the buffer holding
 	 * the cylinder group map from which it was allocated.
 	 *
 	 * We have to preallocate a bmsafemap entry in case it is needed
 	 * in bmsafemap_lookup since once we allocate the inodedep, we
 	 * have to finish initializing it before we can FREE_LOCK().
 	 * By preallocating, we avoid FREE_LOCK() while doing a malloc
 	 * in bmsafemap_lookup. We cannot call bmsafemap_lookup before
 	 * creating the inodedep as it can be freed during the time
 	 * that we FREE_LOCK() while allocating the inodedep. We must
 	 * call workitem_alloc() before entering the locked section as
 	 * it also acquires the lock and we must avoid trying doing so
 	 * recursively.
 	 */
 	bmsafemap = malloc(sizeof(struct bmsafemap),
 	    M_BMSAFEMAP, M_SOFTDEP_FLAGS);
 	workitem_alloc(&bmsafemap->sm_list, D_BMSAFEMAP, mp);
 	ACQUIRE_LOCK(ITOUMP(ip));
 	if ((inodedep_lookup(mp, newinum, DEPALLOC, &inodedep)))
 		panic("softdep_setup_inomapdep: dependency %p for new"
 		    "inode already exists", inodedep);
 	bmsafemap = bmsafemap_lookup(mp, bp, ino_to_cg(fs, newinum), bmsafemap);
 	if (jaddref) {
 		LIST_INSERT_HEAD(&bmsafemap->sm_jaddrefhd, jaddref, ja_bmdeps);
 		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
 		    if_deps);
 	} else {
 		inodedep->id_state |= ONDEPLIST;
 		LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps);
 	}
 	inodedep->id_bmsafemap = bmsafemap;
 	inodedep->id_state &= ~DEPCOMPLETE;
 	FREE_LOCK(ITOUMP(ip));
 }
 
 /*
  * Called just after updating the cylinder group block to
  * allocate block or fragment.
  */
 void
 softdep_setup_blkmapdep(bp, mp, newblkno, frags, oldfrags)
 	struct buf *bp;		/* buffer for cylgroup block with block map */
 	struct mount *mp;	/* filesystem doing allocation */
 	ufs2_daddr_t newblkno;	/* number of newly allocated block */
 	int frags;		/* Number of fragments. */
 	int oldfrags;		/* Previous number of fragments for extend. */
 {
 	struct newblk *newblk;
 	struct bmsafemap *bmsafemap;
 	struct jnewblk *jnewblk;
 	struct ufsmount *ump;
 	struct fs *fs;
 
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_blkmapdep called on non-softdep filesystem"));
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	jnewblk = NULL;
 	/*
 	 * Create a dependency for the newly allocated block.
 	 * Add it to the dependency list for the buffer holding
 	 * the cylinder group map from which it was allocated.
 	 */
 	if (MOUNTEDSUJ(mp)) {
 		jnewblk = malloc(sizeof(*jnewblk), M_JNEWBLK, M_SOFTDEP_FLAGS);
 		workitem_alloc(&jnewblk->jn_list, D_JNEWBLK, mp);
 		jnewblk->jn_jsegdep = newjsegdep(&jnewblk->jn_list);
 		jnewblk->jn_state = ATTACHED;
 		jnewblk->jn_blkno = newblkno;
 		jnewblk->jn_frags = frags;
 		jnewblk->jn_oldfrags = oldfrags;
 #ifdef SUJ_DEBUG
 		{
 			struct cg *cgp;
 			uint8_t *blksfree;
 			long bno;
 			int i;
 	
 			cgp = (struct cg *)bp->b_data;
 			blksfree = cg_blksfree(cgp);
 			bno = dtogd(fs, jnewblk->jn_blkno);
 			for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags;
 			    i++) {
 				if (isset(blksfree, bno + i))
 					panic("softdep_setup_blkmapdep: "
 					    "free fragment %d from %d-%d "
 					    "state 0x%X dep %p", i,
 					    jnewblk->jn_oldfrags,
 					    jnewblk->jn_frags,
 					    jnewblk->jn_state,
 					    jnewblk->jn_dep);
 			}
 		}
 #endif
 	}
 
 	CTR3(KTR_SUJ,
 	    "softdep_setup_blkmapdep: blkno %jd frags %d oldfrags %d",
 	    newblkno, frags, oldfrags);
 	ACQUIRE_LOCK(ump);
 	if (newblk_lookup(mp, newblkno, DEPALLOC, &newblk) != 0)
 		panic("softdep_setup_blkmapdep: found block");
 	newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(mp, bp,
 	    dtog(fs, newblkno), NULL);
 	if (jnewblk) {
 		jnewblk->jn_dep = (struct worklist *)newblk;
 		LIST_INSERT_HEAD(&bmsafemap->sm_jnewblkhd, jnewblk, jn_deps);
 	} else {
 		newblk->nb_state |= ONDEPLIST;
 		LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps);
 	}
 	newblk->nb_bmsafemap = bmsafemap;
 	newblk->nb_jnewblk = jnewblk;
 	FREE_LOCK(ump);
 }
 
 #define	BMSAFEMAP_HASH(ump, cg) \
       (&(ump)->bmsafemap_hashtbl[(cg) & (ump)->bmsafemap_hash_size])
 
 static int
 bmsafemap_find(bmsafemaphd, cg, bmsafemapp)
 	struct bmsafemap_hashhead *bmsafemaphd;
 	int cg;
 	struct bmsafemap **bmsafemapp;
 {
 	struct bmsafemap *bmsafemap;
 
 	LIST_FOREACH(bmsafemap, bmsafemaphd, sm_hash)
 		if (bmsafemap->sm_cg == cg)
 			break;
 	if (bmsafemap) {
 		*bmsafemapp = bmsafemap;
 		return (1);
 	}
 	*bmsafemapp = NULL;
 
 	return (0);
 }
 
 /*
  * Find the bmsafemap associated with a cylinder group buffer.
  * If none exists, create one. The buffer must be locked when
  * this routine is called and this routine must be called with
  * the softdep lock held. To avoid giving up the lock while
  * allocating a new bmsafemap, a preallocated bmsafemap may be
  * provided. If it is provided but not needed, it is freed.
  */
 static struct bmsafemap *
 bmsafemap_lookup(mp, bp, cg, newbmsafemap)
 	struct mount *mp;
 	struct buf *bp;
 	int cg;
 	struct bmsafemap *newbmsafemap;
 {
 	struct bmsafemap_hashhead *bmsafemaphd;
 	struct bmsafemap *bmsafemap, *collision;
 	struct worklist *wk;
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	KASSERT(bp != NULL, ("bmsafemap_lookup: missing buffer"));
 	LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 		if (wk->wk_type == D_BMSAFEMAP) {
 			if (newbmsafemap)
 				WORKITEM_FREE(newbmsafemap, D_BMSAFEMAP);
 			return (WK_BMSAFEMAP(wk));
 		}
 	}
 	bmsafemaphd = BMSAFEMAP_HASH(ump, cg);
 	if (bmsafemap_find(bmsafemaphd, cg, &bmsafemap) == 1) {
 		if (newbmsafemap)
 			WORKITEM_FREE(newbmsafemap, D_BMSAFEMAP);
 		return (bmsafemap);
 	}
 	if (newbmsafemap) {
 		bmsafemap = newbmsafemap;
 	} else {
 		FREE_LOCK(ump);
 		bmsafemap = malloc(sizeof(struct bmsafemap),
 			M_BMSAFEMAP, M_SOFTDEP_FLAGS);
 		workitem_alloc(&bmsafemap->sm_list, D_BMSAFEMAP, mp);
 		ACQUIRE_LOCK(ump);
 	}
 	bmsafemap->sm_buf = bp;
 	LIST_INIT(&bmsafemap->sm_inodedephd);
 	LIST_INIT(&bmsafemap->sm_inodedepwr);
 	LIST_INIT(&bmsafemap->sm_newblkhd);
 	LIST_INIT(&bmsafemap->sm_newblkwr);
 	LIST_INIT(&bmsafemap->sm_jaddrefhd);
 	LIST_INIT(&bmsafemap->sm_jnewblkhd);
 	LIST_INIT(&bmsafemap->sm_freehd);
 	LIST_INIT(&bmsafemap->sm_freewr);
 	if (bmsafemap_find(bmsafemaphd, cg, &collision) == 1) {
 		WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
 		return (collision);
 	}
 	bmsafemap->sm_cg = cg;
 	LIST_INSERT_HEAD(bmsafemaphd, bmsafemap, sm_hash);
 	LIST_INSERT_HEAD(&ump->softdep_dirtycg, bmsafemap, sm_next);
 	WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list);
 	return (bmsafemap);
 }
 
 /*
  * Direct block allocation dependencies.
  * 
  * When a new block is allocated, the corresponding disk locations must be
  * initialized (with zeros or new data) before the on-disk inode points to
  * them.  Also, the freemap from which the block was allocated must be
  * updated (on disk) before the inode's pointer. These two dependencies are
  * independent of each other and are needed for all file blocks and indirect
  * blocks that are pointed to directly by the inode.  Just before the
  * "in-core" version of the inode is updated with a newly allocated block
  * number, a procedure (below) is called to setup allocation dependency
  * structures.  These structures are removed when the corresponding
  * dependencies are satisfied or when the block allocation becomes obsolete
  * (i.e., the file is deleted, the block is de-allocated, or the block is a
  * fragment that gets upgraded).  All of these cases are handled in
  * procedures described later.
  * 
  * When a file extension causes a fragment to be upgraded, either to a larger
  * fragment or to a full block, the on-disk location may change (if the
  * previous fragment could not simply be extended). In this case, the old
  * fragment must be de-allocated, but not until after the inode's pointer has
  * been updated. In most cases, this is handled by later procedures, which
  * will construct a "freefrag" structure to be added to the workitem queue
  * when the inode update is complete (or obsolete).  The main exception to
  * this is when an allocation occurs while a pending allocation dependency
  * (for the same block pointer) remains.  This case is handled in the main
  * allocation dependency setup procedure by immediately freeing the
  * unreferenced fragments.
  */ 
 void 
 softdep_setup_allocdirect(ip, off, newblkno, oldblkno, newsize, oldsize, bp)
 	struct inode *ip;	/* inode to which block is being added */
 	ufs_lbn_t off;		/* block pointer within inode */
 	ufs2_daddr_t newblkno;	/* disk block number being added */
 	ufs2_daddr_t oldblkno;	/* previous block number, 0 unless frag */
 	long newsize;		/* size of new block */
 	long oldsize;		/* size of new block */
 	struct buf *bp;		/* bp for allocated block */
 {
 	struct allocdirect *adp, *oldadp;
 	struct allocdirectlst *adphead;
 	struct freefrag *freefrag;
 	struct inodedep *inodedep;
 	struct pagedep *pagedep;
 	struct jnewblk *jnewblk;
 	struct newblk *newblk;
 	struct mount *mp;
 	ufs_lbn_t lbn;
 
 	lbn = bp->b_lblkno;
 	mp = ITOVFS(ip);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_allocdirect called on non-softdep filesystem"));
 	if (oldblkno && oldblkno != newblkno)
 		freefrag = newfreefrag(ip, oldblkno, oldsize, lbn);
 	else
 		freefrag = NULL;
 
 	CTR6(KTR_SUJ,
 	    "softdep_setup_allocdirect: ino %d blkno %jd oldblkno %jd "
 	    "off %jd newsize %ld oldsize %d",
 	    ip->i_number, newblkno, oldblkno, off, newsize, oldsize);
 	ACQUIRE_LOCK(ITOUMP(ip));
 	if (off >= NDADDR) {
 		if (lbn > 0)
 			panic("softdep_setup_allocdirect: bad lbn %jd, off %jd",
 			    lbn, off);
 		/* allocating an indirect block */
 		if (oldblkno != 0)
 			panic("softdep_setup_allocdirect: non-zero indir");
 	} else {
 		if (off != lbn)
 			panic("softdep_setup_allocdirect: lbn %jd != off %jd",
 			    lbn, off);
 		/*
 		 * Allocating a direct block.
 		 *
 		 * If we are allocating a directory block, then we must
 		 * allocate an associated pagedep to track additions and
 		 * deletions.
 		 */
 		if ((ip->i_mode & IFMT) == IFDIR)
 			pagedep_lookup(mp, bp, ip->i_number, off, DEPALLOC,
 			    &pagedep);
 	}
 	if (newblk_lookup(mp, newblkno, 0, &newblk) == 0)
 		panic("softdep_setup_allocdirect: lost block");
 	KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
 	    ("softdep_setup_allocdirect: newblk already initialized"));
 	/*
 	 * Convert the newblk to an allocdirect.
 	 */
 	WORKITEM_REASSIGN(newblk, D_ALLOCDIRECT);
 	adp = (struct allocdirect *)newblk;
 	newblk->nb_freefrag = freefrag;
 	adp->ad_offset = off;
 	adp->ad_oldblkno = oldblkno;
 	adp->ad_newsize = newsize;
 	adp->ad_oldsize = oldsize;
 
 	/*
 	 * Finish initializing the journal.
 	 */
 	if ((jnewblk = newblk->nb_jnewblk) != NULL) {
 		jnewblk->jn_ino = ip->i_number;
 		jnewblk->jn_lbn = lbn;
 		add_to_journal(&jnewblk->jn_list);
 	}
 	if (freefrag && freefrag->ff_jdep != NULL &&
 	    freefrag->ff_jdep->wk_type == D_JFREEFRAG)
 		add_to_journal(freefrag->ff_jdep);
 	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	adp->ad_inodedep = inodedep;
 
 	WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list);
 	/*
 	 * The list of allocdirects must be kept in sorted and ascending
 	 * order so that the rollback routines can quickly determine the
 	 * first uncommitted block (the size of the file stored on disk
 	 * ends at the end of the lowest committed fragment, or if there
 	 * are no fragments, at the end of the highest committed block).
 	 * Since files generally grow, the typical case is that the new
 	 * block is to be added at the end of the list. We speed this
 	 * special case by checking against the last allocdirect in the
 	 * list before laboriously traversing the list looking for the
 	 * insertion point.
 	 */
 	adphead = &inodedep->id_newinoupdt;
 	oldadp = TAILQ_LAST(adphead, allocdirectlst);
 	if (oldadp == NULL || oldadp->ad_offset <= off) {
 		/* insert at end of list */
 		TAILQ_INSERT_TAIL(adphead, adp, ad_next);
 		if (oldadp != NULL && oldadp->ad_offset == off)
 			allocdirect_merge(adphead, adp, oldadp);
 		FREE_LOCK(ITOUMP(ip));
 		return;
 	}
 	TAILQ_FOREACH(oldadp, adphead, ad_next) {
 		if (oldadp->ad_offset >= off)
 			break;
 	}
 	if (oldadp == NULL)
 		panic("softdep_setup_allocdirect: lost entry");
 	/* insert in middle of list */
 	TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
 	if (oldadp->ad_offset == off)
 		allocdirect_merge(adphead, adp, oldadp);
 
 	FREE_LOCK(ITOUMP(ip));
 }
 
 /*
  * Merge a newer and older journal record to be stored either in a
  * newblock or freefrag.  This handles aggregating journal records for
  * fragment allocation into a second record as well as replacing a
  * journal free with an aborted journal allocation.  A segment for the
  * oldest record will be placed on wkhd if it has been written.  If not
  * the segment for the newer record will suffice.
  */
 static struct worklist *
 jnewblk_merge(new, old, wkhd)
 	struct worklist *new;
 	struct worklist *old;
 	struct workhead *wkhd;
 {
 	struct jnewblk *njnewblk;
 	struct jnewblk *jnewblk;
 
 	/* Handle NULLs to simplify callers. */
 	if (new == NULL)
 		return (old);
 	if (old == NULL)
 		return (new);
 	/* Replace a jfreefrag with a jnewblk. */
 	if (new->wk_type == D_JFREEFRAG) {
 		if (WK_JNEWBLK(old)->jn_blkno != WK_JFREEFRAG(new)->fr_blkno)
 			panic("jnewblk_merge: blkno mismatch: %p, %p",
 			    old, new);
 		cancel_jfreefrag(WK_JFREEFRAG(new));
 		return (old);
 	}
 	if (old->wk_type != D_JNEWBLK || new->wk_type != D_JNEWBLK)
 		panic("jnewblk_merge: Bad type: old %d new %d\n",
 		    old->wk_type, new->wk_type);
 	/*
 	 * Handle merging of two jnewblk records that describe
 	 * different sets of fragments in the same block.
 	 */
 	jnewblk = WK_JNEWBLK(old);
 	njnewblk = WK_JNEWBLK(new);
 	if (jnewblk->jn_blkno != njnewblk->jn_blkno)
 		panic("jnewblk_merge: Merging disparate blocks.");
 	/*
 	 * The record may be rolled back in the cg.
 	 */
 	if (jnewblk->jn_state & UNDONE) {
 		jnewblk->jn_state &= ~UNDONE;
 		njnewblk->jn_state |= UNDONE;
 		njnewblk->jn_state &= ~ATTACHED;
 	}
 	/*
 	 * We modify the newer addref and free the older so that if neither
 	 * has been written the most up-to-date copy will be on disk.  If
 	 * both have been written but rolled back we only temporarily need
 	 * one of them to fix the bits when the cg write completes.
 	 */
 	jnewblk->jn_state |= ATTACHED | COMPLETE;
 	njnewblk->jn_oldfrags = jnewblk->jn_oldfrags;
 	cancel_jnewblk(jnewblk, wkhd);
 	WORKLIST_REMOVE(&jnewblk->jn_list);
 	free_jnewblk(jnewblk);
 	return (new);
 }
 
 /*
  * Replace an old allocdirect dependency with a newer one.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 allocdirect_merge(adphead, newadp, oldadp)
 	struct allocdirectlst *adphead;	/* head of list holding allocdirects */
 	struct allocdirect *newadp;	/* allocdirect being added */
 	struct allocdirect *oldadp;	/* existing allocdirect being checked */
 {
 	struct worklist *wk;
 	struct freefrag *freefrag;
 
 	freefrag = NULL;
 	LOCK_OWNED(VFSTOUFS(newadp->ad_list.wk_mp));
 	if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
 	    newadp->ad_oldsize != oldadp->ad_newsize ||
 	    newadp->ad_offset >= NDADDR)
 		panic("%s %jd != new %jd || old size %ld != new %ld",
 		    "allocdirect_merge: old blkno",
 		    (intmax_t)newadp->ad_oldblkno,
 		    (intmax_t)oldadp->ad_newblkno,
 		    newadp->ad_oldsize, oldadp->ad_newsize);
 	newadp->ad_oldblkno = oldadp->ad_oldblkno;
 	newadp->ad_oldsize = oldadp->ad_oldsize;
 	/*
 	 * If the old dependency had a fragment to free or had never
 	 * previously had a block allocated, then the new dependency
 	 * can immediately post its freefrag and adopt the old freefrag.
 	 * This action is done by swapping the freefrag dependencies.
 	 * The new dependency gains the old one's freefrag, and the
 	 * old one gets the new one and then immediately puts it on
 	 * the worklist when it is freed by free_newblk. It is
 	 * not possible to do this swap when the old dependency had a
 	 * non-zero size but no previous fragment to free. This condition
 	 * arises when the new block is an extension of the old block.
 	 * Here, the first part of the fragment allocated to the new
 	 * dependency is part of the block currently claimed on disk by
 	 * the old dependency, so cannot legitimately be freed until the
 	 * conditions for the new dependency are fulfilled.
 	 */
 	freefrag = newadp->ad_freefrag;
 	if (oldadp->ad_freefrag != NULL || oldadp->ad_oldblkno == 0) {
 		newadp->ad_freefrag = oldadp->ad_freefrag;
 		oldadp->ad_freefrag = freefrag;
 	}
 	/*
 	 * If we are tracking a new directory-block allocation,
 	 * move it from the old allocdirect to the new allocdirect.
 	 */
 	if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		if (!LIST_EMPTY(&oldadp->ad_newdirblk))
 			panic("allocdirect_merge: extra newdirblk");
 		WORKLIST_INSERT(&newadp->ad_newdirblk, wk);
 	}
 	TAILQ_REMOVE(adphead, oldadp, ad_next);
 	/*
 	 * We need to move any journal dependencies over to the freefrag
 	 * that releases this block if it exists.  Otherwise we are
 	 * extending an existing block and we'll wait until that is
 	 * complete to release the journal space and extend the
 	 * new journal to cover this old space as well.
 	 */
 	if (freefrag == NULL) {
 		if (oldadp->ad_newblkno != newadp->ad_newblkno)
 			panic("allocdirect_merge: %jd != %jd",
 			    oldadp->ad_newblkno, newadp->ad_newblkno);
 		newadp->ad_block.nb_jnewblk = (struct jnewblk *)
 		    jnewblk_merge(&newadp->ad_block.nb_jnewblk->jn_list, 
 		    &oldadp->ad_block.nb_jnewblk->jn_list,
 		    &newadp->ad_block.nb_jwork);
 		oldadp->ad_block.nb_jnewblk = NULL;
 		cancel_newblk(&oldadp->ad_block, NULL,
 		    &newadp->ad_block.nb_jwork);
 	} else {
 		wk = (struct worklist *) cancel_newblk(&oldadp->ad_block,
 		    &freefrag->ff_list, &freefrag->ff_jwork);
 		freefrag->ff_jdep = jnewblk_merge(freefrag->ff_jdep, wk,
 		    &freefrag->ff_jwork);
 	}
 	free_newblk(&oldadp->ad_block);
 }
 
 /*
  * Allocate a jfreefrag structure to journal a single block free.
  */
 static struct jfreefrag *
 newjfreefrag(freefrag, ip, blkno, size, lbn)
 	struct freefrag *freefrag;
 	struct inode *ip;
 	ufs2_daddr_t blkno;
 	long size;
 	ufs_lbn_t lbn;
 {
 	struct jfreefrag *jfreefrag;
 	struct fs *fs;
 
 	fs = ITOFS(ip);
 	jfreefrag = malloc(sizeof(struct jfreefrag), M_JFREEFRAG,
 	    M_SOFTDEP_FLAGS);
 	workitem_alloc(&jfreefrag->fr_list, D_JFREEFRAG, ITOVFS(ip));
 	jfreefrag->fr_jsegdep = newjsegdep(&jfreefrag->fr_list);
 	jfreefrag->fr_state = ATTACHED | DEPCOMPLETE;
 	jfreefrag->fr_ino = ip->i_number;
 	jfreefrag->fr_lbn = lbn;
 	jfreefrag->fr_blkno = blkno;
 	jfreefrag->fr_frags = numfrags(fs, size);
 	jfreefrag->fr_freefrag = freefrag;
 
 	return (jfreefrag);
 }
 
 /*
  * Allocate a new freefrag structure.
  */
 static struct freefrag *
 newfreefrag(ip, blkno, size, lbn)
 	struct inode *ip;
 	ufs2_daddr_t blkno;
 	long size;
 	ufs_lbn_t lbn;
 {
 	struct freefrag *freefrag;
 	struct ufsmount *ump;
 	struct fs *fs;
 
 	CTR4(KTR_SUJ, "newfreefrag: ino %d blkno %jd size %ld lbn %jd",
 	    ip->i_number, blkno, size, lbn);
 	ump = ITOUMP(ip);
 	fs = ump->um_fs;
 	if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag)
 		panic("newfreefrag: frag size");
 	freefrag = malloc(sizeof(struct freefrag),
 	    M_FREEFRAG, M_SOFTDEP_FLAGS);
 	workitem_alloc(&freefrag->ff_list, D_FREEFRAG, UFSTOVFS(ump));
 	freefrag->ff_state = ATTACHED;
 	LIST_INIT(&freefrag->ff_jwork);
 	freefrag->ff_inum = ip->i_number;
 	freefrag->ff_vtype = ITOV(ip)->v_type;
 	freefrag->ff_blkno = blkno;
 	freefrag->ff_fragsize = size;
 
 	if (MOUNTEDSUJ(UFSTOVFS(ump))) {
 		freefrag->ff_jdep = (struct worklist *)
 		    newjfreefrag(freefrag, ip, blkno, size, lbn);
 	} else {
 		freefrag->ff_state |= DEPCOMPLETE;
 		freefrag->ff_jdep = NULL;
 	}
 
 	return (freefrag);
 }
 
 /*
  * This workitem de-allocates fragments that were replaced during
  * file block allocation.
  */
 static void 
 handle_workitem_freefrag(freefrag)
 	struct freefrag *freefrag;
 {
 	struct ufsmount *ump = VFSTOUFS(freefrag->ff_list.wk_mp);
 	struct workhead wkhd;
 
 	CTR3(KTR_SUJ,
 	    "handle_workitem_freefrag: ino %d blkno %jd size %ld",
 	    freefrag->ff_inum, freefrag->ff_blkno, freefrag->ff_fragsize);
 	/*
 	 * It would be illegal to add new completion items to the
 	 * freefrag after it was schedule to be done so it must be
 	 * safe to modify the list head here.
 	 */
 	LIST_INIT(&wkhd);
 	ACQUIRE_LOCK(ump);
 	LIST_SWAP(&freefrag->ff_jwork, &wkhd, worklist, wk_list);
 	/*
 	 * If the journal has not been written we must cancel it here.
 	 */
 	if (freefrag->ff_jdep) {
 		if (freefrag->ff_jdep->wk_type != D_JNEWBLK)
 			panic("handle_workitem_freefrag: Unexpected type %d\n",
 			    freefrag->ff_jdep->wk_type);
 		cancel_jnewblk(WK_JNEWBLK(freefrag->ff_jdep), &wkhd);
 	}
 	FREE_LOCK(ump);
 	ffs_blkfree(ump, ump->um_fs, ump->um_devvp, freefrag->ff_blkno,
 	   freefrag->ff_fragsize, freefrag->ff_inum, freefrag->ff_vtype, &wkhd);
 	ACQUIRE_LOCK(ump);
 	WORKITEM_FREE(freefrag, D_FREEFRAG);
 	FREE_LOCK(ump);
 }
 
 /*
  * Set up a dependency structure for an external attributes data block.
  * This routine follows much of the structure of softdep_setup_allocdirect.
  * See the description of softdep_setup_allocdirect above for details.
  */
 void 
 softdep_setup_allocext(ip, off, newblkno, oldblkno, newsize, oldsize, bp)
 	struct inode *ip;
 	ufs_lbn_t off;
 	ufs2_daddr_t newblkno;
 	ufs2_daddr_t oldblkno;
 	long newsize;
 	long oldsize;
 	struct buf *bp;
 {
 	struct allocdirect *adp, *oldadp;
 	struct allocdirectlst *adphead;
 	struct freefrag *freefrag;
 	struct inodedep *inodedep;
 	struct jnewblk *jnewblk;
 	struct newblk *newblk;
 	struct mount *mp;
 	struct ufsmount *ump;
 	ufs_lbn_t lbn;
 
 	mp = ITOVFS(ip);
 	ump = VFSTOUFS(mp);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_allocext called on non-softdep filesystem"));
 	KASSERT(off < NXADDR, ("softdep_setup_allocext: lbn %lld > NXADDR",
 		    (long long)off));
 
 	lbn = bp->b_lblkno;
 	if (oldblkno && oldblkno != newblkno)
 		freefrag = newfreefrag(ip, oldblkno, oldsize, lbn);
 	else
 		freefrag = NULL;
 
 	ACQUIRE_LOCK(ump);
 	if (newblk_lookup(mp, newblkno, 0, &newblk) == 0)
 		panic("softdep_setup_allocext: lost block");
 	KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
 	    ("softdep_setup_allocext: newblk already initialized"));
 	/*
 	 * Convert the newblk to an allocdirect.
 	 */
 	WORKITEM_REASSIGN(newblk, D_ALLOCDIRECT);
 	adp = (struct allocdirect *)newblk;
 	newblk->nb_freefrag = freefrag;
 	adp->ad_offset = off;
 	adp->ad_oldblkno = oldblkno;
 	adp->ad_newsize = newsize;
 	adp->ad_oldsize = oldsize;
 	adp->ad_state |=  EXTDATA;
 
 	/*
 	 * Finish initializing the journal.
 	 */
 	if ((jnewblk = newblk->nb_jnewblk) != NULL) {
 		jnewblk->jn_ino = ip->i_number;
 		jnewblk->jn_lbn = lbn;
 		add_to_journal(&jnewblk->jn_list);
 	}
 	if (freefrag && freefrag->ff_jdep != NULL &&
 	    freefrag->ff_jdep->wk_type == D_JFREEFRAG)
 		add_to_journal(freefrag->ff_jdep);
 	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	adp->ad_inodedep = inodedep;
 
 	WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list);
 	/*
 	 * The list of allocdirects must be kept in sorted and ascending
 	 * order so that the rollback routines can quickly determine the
 	 * first uncommitted block (the size of the file stored on disk
 	 * ends at the end of the lowest committed fragment, or if there
 	 * are no fragments, at the end of the highest committed block).
 	 * Since files generally grow, the typical case is that the new
 	 * block is to be added at the end of the list. We speed this
 	 * special case by checking against the last allocdirect in the
 	 * list before laboriously traversing the list looking for the
 	 * insertion point.
 	 */
 	adphead = &inodedep->id_newextupdt;
 	oldadp = TAILQ_LAST(adphead, allocdirectlst);
 	if (oldadp == NULL || oldadp->ad_offset <= off) {
 		/* insert at end of list */
 		TAILQ_INSERT_TAIL(adphead, adp, ad_next);
 		if (oldadp != NULL && oldadp->ad_offset == off)
 			allocdirect_merge(adphead, adp, oldadp);
 		FREE_LOCK(ump);
 		return;
 	}
 	TAILQ_FOREACH(oldadp, adphead, ad_next) {
 		if (oldadp->ad_offset >= off)
 			break;
 	}
 	if (oldadp == NULL)
 		panic("softdep_setup_allocext: lost entry");
 	/* insert in middle of list */
 	TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
 	if (oldadp->ad_offset == off)
 		allocdirect_merge(adphead, adp, oldadp);
 	FREE_LOCK(ump);
 }
 
 /*
  * Indirect block allocation dependencies.
  * 
  * The same dependencies that exist for a direct block also exist when
  * a new block is allocated and pointed to by an entry in a block of
  * indirect pointers. The undo/redo states described above are also
  * used here. Because an indirect block contains many pointers that
  * may have dependencies, a second copy of the entire in-memory indirect
  * block is kept. The buffer cache copy is always completely up-to-date.
  * The second copy, which is used only as a source for disk writes,
  * contains only the safe pointers (i.e., those that have no remaining
  * update dependencies). The second copy is freed when all pointers
  * are safe. The cache is not allowed to replace indirect blocks with
  * pending update dependencies. If a buffer containing an indirect
  * block with dependencies is written, these routines will mark it
  * dirty again. It can only be successfully written once all the
  * dependencies are removed. The ffs_fsync routine in conjunction with
  * softdep_sync_metadata work together to get all the dependencies
  * removed so that a file can be successfully written to disk. Three
  * procedures are used when setting up indirect block pointer
  * dependencies. The division is necessary because of the organization
  * of the "balloc" routine and because of the distinction between file
  * pages and file metadata blocks.
  */
 
 /*
  * Allocate a new allocindir structure.
  */
 static struct allocindir *
 newallocindir(ip, ptrno, newblkno, oldblkno, lbn)
 	struct inode *ip;	/* inode for file being extended */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs2_daddr_t newblkno;	/* disk block number being added */
 	ufs2_daddr_t oldblkno;	/* previous block number, 0 if none */
 	ufs_lbn_t lbn;
 {
 	struct newblk *newblk;
 	struct allocindir *aip;
 	struct freefrag *freefrag;
 	struct jnewblk *jnewblk;
 
 	if (oldblkno)
 		freefrag = newfreefrag(ip, oldblkno, ITOFS(ip)->fs_bsize, lbn);
 	else
 		freefrag = NULL;
 	ACQUIRE_LOCK(ITOUMP(ip));
 	if (newblk_lookup(ITOVFS(ip), newblkno, 0, &newblk) == 0)
 		panic("new_allocindir: lost block");
 	KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
 	    ("newallocindir: newblk already initialized"));
 	WORKITEM_REASSIGN(newblk, D_ALLOCINDIR);
 	newblk->nb_freefrag = freefrag;
 	aip = (struct allocindir *)newblk;
 	aip->ai_offset = ptrno;
 	aip->ai_oldblkno = oldblkno;
 	aip->ai_lbn = lbn;
 	if ((jnewblk = newblk->nb_jnewblk) != NULL) {
 		jnewblk->jn_ino = ip->i_number;
 		jnewblk->jn_lbn = lbn;
 		add_to_journal(&jnewblk->jn_list);
 	}
 	if (freefrag && freefrag->ff_jdep != NULL &&
 	    freefrag->ff_jdep->wk_type == D_JFREEFRAG)
 		add_to_journal(freefrag->ff_jdep);
 	return (aip);
 }
 
 /*
  * Called just before setting an indirect block pointer
  * to a newly allocated file page.
  */
 void
 softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
 	struct inode *ip;	/* inode for file being extended */
 	ufs_lbn_t lbn;		/* allocated block number within file */
 	struct buf *bp;		/* buffer with indirect blk referencing page */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs2_daddr_t newblkno;	/* disk block number being added */
 	ufs2_daddr_t oldblkno;	/* previous block number, 0 if none */
 	struct buf *nbp;	/* buffer holding allocated page */
 {
 	struct inodedep *inodedep;
 	struct freefrag *freefrag;
 	struct allocindir *aip;
 	struct pagedep *pagedep;
 	struct mount *mp;
 	struct ufsmount *ump;
 
 	mp = ITOVFS(ip);
 	ump = VFSTOUFS(mp);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_allocindir_page called on non-softdep filesystem"));
 	KASSERT(lbn == nbp->b_lblkno,
 	    ("softdep_setup_allocindir_page: lbn %jd != lblkno %jd",
 	    lbn, bp->b_lblkno));
 	CTR4(KTR_SUJ,
 	    "softdep_setup_allocindir_page: ino %d blkno %jd oldblkno %jd "
 	    "lbn %jd", ip->i_number, newblkno, oldblkno, lbn);
 	ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_page");
 	aip = newallocindir(ip, ptrno, newblkno, oldblkno, lbn);
 	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	/*
 	 * If we are allocating a directory page, then we must
 	 * allocate an associated pagedep to track additions and
 	 * deletions.
 	 */
 	if ((ip->i_mode & IFMT) == IFDIR)
 		pagedep_lookup(mp, nbp, ip->i_number, lbn, DEPALLOC, &pagedep);
 	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list);
 	freefrag = setup_allocindir_phase2(bp, ip, inodedep, aip, lbn);
 	FREE_LOCK(ump);
 	if (freefrag)
 		handle_workitem_freefrag(freefrag);
 }
 
 /*
  * Called just before setting an indirect block pointer to a
  * newly allocated indirect block.
  */
 void
 softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno)
 	struct buf *nbp;	/* newly allocated indirect block */
 	struct inode *ip;	/* inode for file being extended */
 	struct buf *bp;		/* indirect block referencing allocated block */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs2_daddr_t newblkno;	/* disk block number being added */
 {
 	struct inodedep *inodedep;
 	struct allocindir *aip;
 	struct ufsmount *ump;
 	ufs_lbn_t lbn;
 
 	ump = ITOUMP(ip);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_setup_allocindir_meta called on non-softdep filesystem"));
 	CTR3(KTR_SUJ,
 	    "softdep_setup_allocindir_meta: ino %d blkno %jd ptrno %d",
 	    ip->i_number, newblkno, ptrno);
 	lbn = nbp->b_lblkno;
 	ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_meta");
 	aip = newallocindir(ip, ptrno, newblkno, 0, lbn);
 	inodedep_lookup(UFSTOVFS(ump), ip->i_number, DEPALLOC, &inodedep);
 	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list);
 	if (setup_allocindir_phase2(bp, ip, inodedep, aip, lbn))
 		panic("softdep_setup_allocindir_meta: Block already existed");
 	FREE_LOCK(ump);
 }
 
 static void
 indirdep_complete(indirdep)
 	struct indirdep *indirdep;
 {
 	struct allocindir *aip;
 
 	LIST_REMOVE(indirdep, ir_next);
 	indirdep->ir_state |= DEPCOMPLETE;
 
 	while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL) {
 		LIST_REMOVE(aip, ai_next);
 		free_newblk(&aip->ai_block);
 	}
 	/*
 	 * If this indirdep is not attached to a buf it was simply waiting
 	 * on completion to clear completehd.  free_indirdep() asserts
 	 * that nothing is dangling.
 	 */
 	if ((indirdep->ir_state & ONWORKLIST) == 0)
 		free_indirdep(indirdep);
 }
 
 static struct indirdep *
 indirdep_lookup(mp, ip, bp)
 	struct mount *mp;
 	struct inode *ip;
 	struct buf *bp;
 {
 	struct indirdep *indirdep, *newindirdep;
 	struct newblk *newblk;
 	struct ufsmount *ump;
 	struct worklist *wk;
 	struct fs *fs;
 	ufs2_daddr_t blkno;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	indirdep = NULL;
 	newindirdep = NULL;
 	fs = ump->um_fs;
 	for (;;) {
 		LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 			if (wk->wk_type != D_INDIRDEP)
 				continue;
 			indirdep = WK_INDIRDEP(wk);
 			break;
 		}
 		/* Found on the buffer worklist, no new structure to free. */
 		if (indirdep != NULL && newindirdep == NULL)
 			return (indirdep);
 		if (indirdep != NULL && newindirdep != NULL)
 			panic("indirdep_lookup: simultaneous create");
 		/* None found on the buffer and a new structure is ready. */
 		if (indirdep == NULL && newindirdep != NULL)
 			break;
 		/* None found and no new structure available. */
 		FREE_LOCK(ump);
 		newindirdep = malloc(sizeof(struct indirdep),
 		    M_INDIRDEP, M_SOFTDEP_FLAGS);
 		workitem_alloc(&newindirdep->ir_list, D_INDIRDEP, mp);
 		newindirdep->ir_state = ATTACHED;
 		if (I_IS_UFS1(ip))
 			newindirdep->ir_state |= UFS1FMT;
 		TAILQ_INIT(&newindirdep->ir_trunc);
 		newindirdep->ir_saveddata = NULL;
 		LIST_INIT(&newindirdep->ir_deplisthd);
 		LIST_INIT(&newindirdep->ir_donehd);
 		LIST_INIT(&newindirdep->ir_writehd);
 		LIST_INIT(&newindirdep->ir_completehd);
 		if (bp->b_blkno == bp->b_lblkno) {
 			ufs_bmaparray(bp->b_vp, bp->b_lblkno, &blkno, bp,
 			    NULL, NULL);
 			bp->b_blkno = blkno;
 		}
 		newindirdep->ir_freeblks = NULL;
 		newindirdep->ir_savebp =
 		    getblk(ump->um_devvp, bp->b_blkno, bp->b_bcount, 0, 0, 0);
 		newindirdep->ir_bp = bp;
 		BUF_KERNPROC(newindirdep->ir_savebp);
 		bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount);
 		ACQUIRE_LOCK(ump);
 	}
 	indirdep = newindirdep;
 	WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);
 	/*
 	 * If the block is not yet allocated we don't set DEPCOMPLETE so
 	 * that we don't free dependencies until the pointers are valid.
 	 * This could search b_dep for D_ALLOCDIRECT/D_ALLOCINDIR rather
 	 * than using the hash.
 	 */
 	if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, &newblk))
 		LIST_INSERT_HEAD(&newblk->nb_indirdeps, indirdep, ir_next);
 	else
 		indirdep->ir_state |= DEPCOMPLETE;
 	return (indirdep);
 }
 
 /*
  * Called to finish the allocation of the "aip" allocated
  * by one of the two routines above.
  */
 static struct freefrag *
 setup_allocindir_phase2(bp, ip, inodedep, aip, lbn)
 	struct buf *bp;		/* in-memory copy of the indirect block */
 	struct inode *ip;	/* inode for file being extended */
 	struct inodedep *inodedep; /* Inodedep for ip */
 	struct allocindir *aip;	/* allocindir allocated by the above routines */
 	ufs_lbn_t lbn;		/* Logical block number for this block. */
 {
 	struct fs *fs;
 	struct indirdep *indirdep;
 	struct allocindir *oldaip;
 	struct freefrag *freefrag;
 	struct mount *mp;
 	struct ufsmount *ump;
 
 	mp = ITOVFS(ip);
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	fs = ump->um_fs;
 	if (bp->b_lblkno >= 0)
 		panic("setup_allocindir_phase2: not indir blk");
 	KASSERT(aip->ai_offset >= 0 && aip->ai_offset < NINDIR(fs),
 	    ("setup_allocindir_phase2: Bad offset %d", aip->ai_offset));
 	indirdep = indirdep_lookup(mp, ip, bp);
 	KASSERT(indirdep->ir_savebp != NULL,
 	    ("setup_allocindir_phase2 NULL ir_savebp"));
 	aip->ai_indirdep = indirdep;
 	/*
 	 * Check for an unwritten dependency for this indirect offset.  If
 	 * there is, merge the old dependency into the new one.  This happens
 	 * as a result of reallocblk only.
 	 */
 	freefrag = NULL;
 	if (aip->ai_oldblkno != 0) {
 		LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next) {
 			if (oldaip->ai_offset == aip->ai_offset) {
 				freefrag = allocindir_merge(aip, oldaip);
 				goto done;
 			}
 		}
 		LIST_FOREACH(oldaip, &indirdep->ir_donehd, ai_next) {
 			if (oldaip->ai_offset == aip->ai_offset) {
 				freefrag = allocindir_merge(aip, oldaip);
 				goto done;
 			}
 		}
 	}
 done:
 	LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next);
 	return (freefrag);
 }
 
 /*
  * Merge two allocindirs which refer to the same block.  Move newblock
  * dependencies and setup the freefrags appropriately.
  */
 static struct freefrag *
 allocindir_merge(aip, oldaip)
 	struct allocindir *aip;
 	struct allocindir *oldaip;
 {
 	struct freefrag *freefrag;
 	struct worklist *wk;
 
 	if (oldaip->ai_newblkno != aip->ai_oldblkno)
 		panic("allocindir_merge: blkno");
 	aip->ai_oldblkno = oldaip->ai_oldblkno;
 	freefrag = aip->ai_freefrag;
 	aip->ai_freefrag = oldaip->ai_freefrag;
 	oldaip->ai_freefrag = NULL;
 	KASSERT(freefrag != NULL, ("setup_allocindir_phase2: No freefrag"));
 	/*
 	 * If we are tracking a new directory-block allocation,
 	 * move it from the old allocindir to the new allocindir.
 	 */
 	if ((wk = LIST_FIRST(&oldaip->ai_newdirblk)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		if (!LIST_EMPTY(&oldaip->ai_newdirblk))
 			panic("allocindir_merge: extra newdirblk");
 		WORKLIST_INSERT(&aip->ai_newdirblk, wk);
 	}
 	/*
 	 * We can skip journaling for this freefrag and just complete
 	 * any pending journal work for the allocindir that is being
 	 * removed after the freefrag completes.
 	 */
 	if (freefrag->ff_jdep)
 		cancel_jfreefrag(WK_JFREEFRAG(freefrag->ff_jdep));
 	LIST_REMOVE(oldaip, ai_next);
 	freefrag->ff_jdep = (struct worklist *)cancel_newblk(&oldaip->ai_block,
 	    &freefrag->ff_list, &freefrag->ff_jwork);
 	free_newblk(&oldaip->ai_block);
 
 	return (freefrag);
 }
 
 static inline void
 setup_freedirect(freeblks, ip, i, needj)
 	struct freeblks *freeblks;
 	struct inode *ip;
 	int i;
 	int needj;
 {
 	struct ufsmount *ump;
 	ufs2_daddr_t blkno;
 	int frags;
 
 	blkno = DIP(ip, i_db[i]);
 	if (blkno == 0)
 		return;
 	DIP_SET(ip, i_db[i], 0);
 	ump = ITOUMP(ip);
 	frags = sblksize(ump->um_fs, ip->i_size, i);
 	frags = numfrags(ump->um_fs, frags);
 	newfreework(ump, freeblks, NULL, i, blkno, frags, 0, needj);
 }
 
 static inline void
 setup_freeext(freeblks, ip, i, needj)
 	struct freeblks *freeblks;
 	struct inode *ip;
 	int i;
 	int needj;
 {
 	struct ufsmount *ump;
 	ufs2_daddr_t blkno;
 	int frags;
 
 	blkno = ip->i_din2->di_extb[i];
 	if (blkno == 0)
 		return;
 	ip->i_din2->di_extb[i] = 0;
 	ump = ITOUMP(ip);
 	frags = sblksize(ump->um_fs, ip->i_din2->di_extsize, i);
 	frags = numfrags(ump->um_fs, frags);
 	newfreework(ump, freeblks, NULL, -1 - i, blkno, frags, 0, needj);
 }
 
 static inline void
 setup_freeindir(freeblks, ip, i, lbn, needj)
 	struct freeblks *freeblks;
 	struct inode *ip;
 	int i;
 	ufs_lbn_t lbn;
 	int needj;
 {
 	struct ufsmount *ump;
 	ufs2_daddr_t blkno;
 
 	blkno = DIP(ip, i_ib[i]);
 	if (blkno == 0)
 		return;
 	DIP_SET(ip, i_ib[i], 0);
 	ump = ITOUMP(ip);
 	newfreework(ump, freeblks, NULL, lbn, blkno, ump->um_fs->fs_frag,
 	    0, needj);
 }
 
 static inline struct freeblks *
 newfreeblks(mp, ip)
 	struct mount *mp;
 	struct inode *ip;
 {
 	struct freeblks *freeblks;
 
 	freeblks = malloc(sizeof(struct freeblks),
 		M_FREEBLKS, M_SOFTDEP_FLAGS|M_ZERO);
 	workitem_alloc(&freeblks->fb_list, D_FREEBLKS, mp);
 	LIST_INIT(&freeblks->fb_jblkdephd);
 	LIST_INIT(&freeblks->fb_jwork);
 	freeblks->fb_ref = 0;
 	freeblks->fb_cgwait = 0;
 	freeblks->fb_state = ATTACHED;
 	freeblks->fb_uid = ip->i_uid;
 	freeblks->fb_inum = ip->i_number;
 	freeblks->fb_vtype = ITOV(ip)->v_type;
 	freeblks->fb_modrev = DIP(ip, i_modrev);
 	freeblks->fb_devvp = ITODEVVP(ip);
 	freeblks->fb_chkcnt = 0;
 	freeblks->fb_len = 0;
 
 	return (freeblks);
 }
 
 static void
 trunc_indirdep(indirdep, freeblks, bp, off)
 	struct indirdep *indirdep;
 	struct freeblks *freeblks;
 	struct buf *bp;
 	int off;
 {
 	struct allocindir *aip, *aipn;
 
 	/*
 	 * The first set of allocindirs won't be in savedbp.
 	 */
 	LIST_FOREACH_SAFE(aip, &indirdep->ir_deplisthd, ai_next, aipn)
 		if (aip->ai_offset > off)
 			cancel_allocindir(aip, bp, freeblks, 1);
 	LIST_FOREACH_SAFE(aip, &indirdep->ir_donehd, ai_next, aipn)
 		if (aip->ai_offset > off)
 			cancel_allocindir(aip, bp, freeblks, 1);
 	/*
 	 * These will exist in savedbp.
 	 */
 	LIST_FOREACH_SAFE(aip, &indirdep->ir_writehd, ai_next, aipn)
 		if (aip->ai_offset > off)
 			cancel_allocindir(aip, NULL, freeblks, 0);
 	LIST_FOREACH_SAFE(aip, &indirdep->ir_completehd, ai_next, aipn)
 		if (aip->ai_offset > off)
 			cancel_allocindir(aip, NULL, freeblks, 0);
 }
 
 /*
  * Follow the chain of indirects down to lastlbn creating a freework
  * structure for each.  This will be used to start indir_trunc() at
  * the right offset and create the journal records for the parrtial
  * truncation.  A second step will handle the truncated dependencies.
  */
 static int
 setup_trunc_indir(freeblks, ip, lbn, lastlbn, blkno)
 	struct freeblks *freeblks;
 	struct inode *ip;
 	ufs_lbn_t lbn;
 	ufs_lbn_t lastlbn;
 	ufs2_daddr_t blkno;
 {
 	struct indirdep *indirdep;
 	struct indirdep *indirn;
 	struct freework *freework;
 	struct newblk *newblk;
 	struct mount *mp;
 	struct ufsmount *ump;
 	struct buf *bp;
 	uint8_t *start;
 	uint8_t *end;
 	ufs_lbn_t lbnadd;
 	int level;
 	int error;
 	int off;
 
 
 	freework = NULL;
 	if (blkno == 0)
 		return (0);
 	mp = freeblks->fb_list.wk_mp;
 	ump = VFSTOUFS(mp);
 	bp = getblk(ITOV(ip), lbn, mp->mnt_stat.f_iosize, 0, 0, 0);
 	if ((bp->b_flags & B_CACHE) == 0) {
 		bp->b_blkno = blkptrtodb(VFSTOUFS(mp), blkno);
 		bp->b_iocmd = BIO_READ;
 		bp->b_flags &= ~B_INVAL;
 		bp->b_ioflags &= ~BIO_ERROR;
 		vfs_busy_pages(bp, 0);
 		bp->b_iooffset = dbtob(bp->b_blkno);
 		bstrategy(bp);
 #ifdef RACCT
 		if (racct_enable) {
 			PROC_LOCK(curproc);
 			racct_add_buf(curproc, bp, 0);
 			PROC_UNLOCK(curproc);
 		}
 #endif /* RACCT */
 		curthread->td_ru.ru_inblock++;
 		error = bufwait(bp);
 		if (error) {
 			brelse(bp);
 			return (error);
 		}
 	}
 	level = lbn_level(lbn);
 	lbnadd = lbn_offset(ump->um_fs, level);
 	/*
 	 * Compute the offset of the last block we want to keep.  Store
 	 * in the freework the first block we want to completely free.
 	 */
 	off = (lastlbn - -(lbn + level)) / lbnadd;
 	if (off + 1 == NINDIR(ump->um_fs))
 		goto nowork;
 	freework = newfreework(ump, freeblks, NULL, lbn, blkno, 0, off + 1, 0);
 	/*
 	 * Link the freework into the indirdep.  This will prevent any new
 	 * allocations from proceeding until we are finished with the
 	 * truncate and the block is written.
 	 */
 	ACQUIRE_LOCK(ump);
 	indirdep = indirdep_lookup(mp, ip, bp);
 	if (indirdep->ir_freeblks)
 		panic("setup_trunc_indir: indirdep already truncated.");
 	TAILQ_INSERT_TAIL(&indirdep->ir_trunc, freework, fw_next);
 	freework->fw_indir = indirdep;
 	/*
 	 * Cancel any allocindirs that will not make it to disk.
 	 * We have to do this for all copies of the indirdep that
 	 * live on this newblk.
 	 */
 	if ((indirdep->ir_state & DEPCOMPLETE) == 0) {
 		newblk_lookup(mp, dbtofsb(ump->um_fs, bp->b_blkno), 0, &newblk);
 		LIST_FOREACH(indirn, &newblk->nb_indirdeps, ir_next)
 			trunc_indirdep(indirn, freeblks, bp, off);
 	} else
 		trunc_indirdep(indirdep, freeblks, bp, off);
 	FREE_LOCK(ump);
 	/*
 	 * Creation is protected by the buf lock. The saveddata is only
 	 * needed if a full truncation follows a partial truncation but it
 	 * is difficult to allocate in that case so we fetch it anyway.
 	 */
 	if (indirdep->ir_saveddata == NULL)
 		indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP,
 		    M_SOFTDEP_FLAGS);
 nowork:
 	/* Fetch the blkno of the child and the zero start offset. */
 	if (I_IS_UFS1(ip)) {
 		blkno = ((ufs1_daddr_t *)bp->b_data)[off];
 		start = (uint8_t *)&((ufs1_daddr_t *)bp->b_data)[off+1];
 	} else {
 		blkno = ((ufs2_daddr_t *)bp->b_data)[off];
 		start = (uint8_t *)&((ufs2_daddr_t *)bp->b_data)[off+1];
 	}
 	if (freework) {
 		/* Zero the truncated pointers. */
 		end = bp->b_data + bp->b_bcount;
 		bzero(start, end - start);
 		bdwrite(bp);
 	} else
 		bqrelse(bp);
 	if (level == 0)
 		return (0);
 	lbn++; /* adjust level */
 	lbn -= (off * lbnadd);
 	return setup_trunc_indir(freeblks, ip, lbn, lastlbn, blkno);
 }
 
 /*
  * Complete the partial truncation of an indirect block setup by
  * setup_trunc_indir().  This zeros the truncated pointers in the saved
  * copy and writes them to disk before the freeblks is allowed to complete.
  */
 static void
 complete_trunc_indir(freework)
 	struct freework *freework;
 {
 	struct freework *fwn;
 	struct indirdep *indirdep;
 	struct ufsmount *ump;
 	struct buf *bp;
 	uintptr_t start;
 	int count;
 
 	ump = VFSTOUFS(freework->fw_list.wk_mp);
 	LOCK_OWNED(ump);
 	indirdep = freework->fw_indir;
 	for (;;) {
 		bp = indirdep->ir_bp;
 		/* See if the block was discarded. */
 		if (bp == NULL)
 			break;
 		/* Inline part of getdirtybuf().  We dont want bremfree. */
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0)
 			break;
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 		    LOCK_PTR(ump)) == 0)
 			BUF_UNLOCK(bp);
 		ACQUIRE_LOCK(ump);
 	}
 	freework->fw_state |= DEPCOMPLETE;
 	TAILQ_REMOVE(&indirdep->ir_trunc, freework, fw_next);
 	/*
 	 * Zero the pointers in the saved copy.
 	 */
 	if (indirdep->ir_state & UFS1FMT)
 		start = sizeof(ufs1_daddr_t);
 	else
 		start = sizeof(ufs2_daddr_t);
 	start *= freework->fw_start;
 	count = indirdep->ir_savebp->b_bcount - start;
 	start += (uintptr_t)indirdep->ir_savebp->b_data;
 	bzero((char *)start, count);
 	/*
 	 * We need to start the next truncation in the list if it has not
 	 * been started yet.
 	 */
 	fwn = TAILQ_FIRST(&indirdep->ir_trunc);
 	if (fwn != NULL) {
 		if (fwn->fw_freeblks == indirdep->ir_freeblks)
 			TAILQ_REMOVE(&indirdep->ir_trunc, fwn, fw_next);
 		if ((fwn->fw_state & ONWORKLIST) == 0)
 			freework_enqueue(fwn);
 	}
 	/*
 	 * If bp is NULL the block was fully truncated, restore
 	 * the saved block list otherwise free it if it is no
 	 * longer needed.
 	 */
 	if (TAILQ_EMPTY(&indirdep->ir_trunc)) {
 		if (bp == NULL)
 			bcopy(indirdep->ir_saveddata,
 			    indirdep->ir_savebp->b_data,
 			    indirdep->ir_savebp->b_bcount);
 		free(indirdep->ir_saveddata, M_INDIRDEP);
 		indirdep->ir_saveddata = NULL;
 	}
 	/*
 	 * When bp is NULL there is a full truncation pending.  We
 	 * must wait for this full truncation to be journaled before
 	 * we can release this freework because the disk pointers will
 	 * never be written as zero.
 	 */
 	if (bp == NULL)  {
 		if (LIST_EMPTY(&indirdep->ir_freeblks->fb_jblkdephd))
 			handle_written_freework(freework);
 		else
 			WORKLIST_INSERT(&indirdep->ir_freeblks->fb_freeworkhd,
 			   &freework->fw_list);
 	} else {
 		/* Complete when the real copy is written. */
 		WORKLIST_INSERT(&bp->b_dep, &freework->fw_list);
 		BUF_UNLOCK(bp);
 	}
 }
 
 /*
  * Calculate the number of blocks we are going to release where datablocks
  * is the current total and length is the new file size.
  */
 static ufs2_daddr_t
 blkcount(fs, datablocks, length)
 	struct fs *fs;
 	ufs2_daddr_t datablocks;
 	off_t length;
 {
 	off_t totblks, numblks;
 
 	totblks = 0;
 	numblks = howmany(length, fs->fs_bsize);
 	if (numblks <= NDADDR) {
 		totblks = howmany(length, fs->fs_fsize);
 		goto out;
 	}
         totblks = blkstofrags(fs, numblks);
 	numblks -= NDADDR;
 	/*
 	 * Count all single, then double, then triple indirects required.
 	 * Subtracting one indirects worth of blocks for each pass
 	 * acknowledges one of each pointed to by the inode.
 	 */
 	for (;;) {
 		totblks += blkstofrags(fs, howmany(numblks, NINDIR(fs)));
 		numblks -= NINDIR(fs);
 		if (numblks <= 0)
 			break;
 		numblks = howmany(numblks, NINDIR(fs));
 	}
 out:
 	totblks = fsbtodb(fs, totblks);
 	/*
 	 * Handle sparse files.  We can't reclaim more blocks than the inode
 	 * references.  We will correct it later in handle_complete_freeblks()
 	 * when we know the real count.
 	 */
 	if (totblks > datablocks)
 		return (0);
 	return (datablocks - totblks);
 }
 
 /*
  * Handle freeblocks for journaled softupdate filesystems.
  *
  * Contrary to normal softupdates, we must preserve the block pointers in
  * indirects until their subordinates are free.  This is to avoid journaling
  * every block that is freed which may consume more space than the journal
  * itself.  The recovery program will see the free block journals at the
  * base of the truncated area and traverse them to reclaim space.  The
  * pointers in the inode may be cleared immediately after the journal
  * records are written because each direct and indirect pointer in the
  * inode is recorded in a journal.  This permits full truncation to proceed
  * asynchronously.  The write order is journal -> inode -> cgs -> indirects.
  *
  * The algorithm is as follows:
  * 1) Traverse the in-memory state and create journal entries to release
  *    the relevant blocks and full indirect trees.
  * 2) Traverse the indirect block chain adding partial truncation freework
  *    records to indirects in the path to lastlbn.  The freework will
  *    prevent new allocation dependencies from being satisfied in this
  *    indirect until the truncation completes.
  * 3) Read and lock the inode block, performing an update with the new size
  *    and pointers.  This prevents truncated data from becoming valid on
  *    disk through step 4.
  * 4) Reap unsatisfied dependencies that are beyond the truncated area,
  *    eliminate journal work for those records that do not require it.
  * 5) Schedule the journal records to be written followed by the inode block.
  * 6) Allocate any necessary frags for the end of file.
  * 7) Zero any partially truncated blocks.
  *
  * From this truncation proceeds asynchronously using the freework and
  * indir_trunc machinery.  The file will not be extended again into a
  * partially truncated indirect block until all work is completed but
  * the normal dependency mechanism ensures that it is rolled back/forward
  * as appropriate.  Further truncation may occur without delay and is
  * serialized in indir_trunc().
  */
 void
 softdep_journal_freeblocks(ip, cred, length, flags)
 	struct inode *ip;	/* The inode whose length is to be reduced */
 	struct ucred *cred;
 	off_t length;		/* The new length for the file */
 	int flags;		/* IO_EXT and/or IO_NORMAL */
 {
 	struct freeblks *freeblks, *fbn;
 	struct worklist *wk, *wkn;
 	struct inodedep *inodedep;
 	struct jblkdep *jblkdep;
 	struct allocdirect *adp, *adpn;
 	struct ufsmount *ump;
 	struct fs *fs;
 	struct buf *bp;
 	struct vnode *vp;
 	struct mount *mp;
 	ufs2_daddr_t extblocks, datablocks;
 	ufs_lbn_t tmpval, lbn, lastlbn;
 	int frags, lastoff, iboff, allocblock, needj, error, i;
 
 	ump = ITOUMP(ip);
 	mp = UFSTOVFS(ump);
 	fs = ump->um_fs;
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_journal_freeblocks called on non-softdep filesystem"));
 	vp = ITOV(ip);
 	needj = 1;
 	iboff = -1;
 	allocblock = 0;
 	extblocks = 0;
 	datablocks = 0;
 	frags = 0;
 	freeblks = newfreeblks(mp, ip);
 	ACQUIRE_LOCK(ump);
 	/*
 	 * If we're truncating a removed file that will never be written
 	 * we don't need to journal the block frees.  The canceled journals
 	 * for the allocations will suffice.
 	 */
 	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED &&
 	    length == 0)
 		needj = 0;
 	CTR3(KTR_SUJ, "softdep_journal_freeblks: ip %d length %ld needj %d",
 	    ip->i_number, length, needj);
 	FREE_LOCK(ump);
 	/*
 	 * Calculate the lbn that we are truncating to.  This results in -1
 	 * if we're truncating the 0 bytes.  So it is the last lbn we want
 	 * to keep, not the first lbn we want to truncate.
 	 */
 	lastlbn = lblkno(fs, length + fs->fs_bsize - 1) - 1;
 	lastoff = blkoff(fs, length);
 	/*
 	 * Compute frags we are keeping in lastlbn.  0 means all.
 	 */
 	if (lastlbn >= 0 && lastlbn < NDADDR) {
 		frags = fragroundup(fs, lastoff);
 		/* adp offset of last valid allocdirect. */
 		iboff = lastlbn;
 	} else if (lastlbn > 0)
 		iboff = NDADDR;
 	if (fs->fs_magic == FS_UFS2_MAGIC)
 		extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
 	/*
 	 * Handle normal data blocks and indirects.  This section saves
 	 * values used after the inode update to complete frag and indirect
 	 * truncation.
 	 */
 	if ((flags & IO_NORMAL) != 0) {
 		/*
 		 * Handle truncation of whole direct and indirect blocks.
 		 */
 		for (i = iboff + 1; i < NDADDR; i++)
 			setup_freedirect(freeblks, ip, i, needj);
 		for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR;
 		    i++, lbn += tmpval, tmpval *= NINDIR(fs)) {
 			/* Release a whole indirect tree. */
 			if (lbn > lastlbn) {
 				setup_freeindir(freeblks, ip, i, -lbn -i,
 				    needj);
 				continue;
 			}
 			iboff = i + NDADDR;
 			/*
 			 * Traverse partially truncated indirect tree.
 			 */
 			if (lbn <= lastlbn && lbn + tmpval - 1 > lastlbn)
 				setup_trunc_indir(freeblks, ip, -lbn - i,
 				    lastlbn, DIP(ip, i_ib[i]));
 		}
 		/*
 		 * Handle partial truncation to a frag boundary.
 		 */
 		if (frags) {
 			ufs2_daddr_t blkno;
 			long oldfrags;
 
 			oldfrags = blksize(fs, ip, lastlbn);
 			blkno = DIP(ip, i_db[lastlbn]);
 			if (blkno && oldfrags != frags) {
 				oldfrags -= frags;
 				oldfrags = numfrags(fs, oldfrags);
 				blkno += numfrags(fs, frags);
 				newfreework(ump, freeblks, NULL, lastlbn,
 				    blkno, oldfrags, 0, needj);
 				if (needj)
 					adjust_newfreework(freeblks,
 					    numfrags(fs, frags));
 			} else if (blkno == 0)
 				allocblock = 1;
 		}
 		/*
 		 * Add a journal record for partial truncate if we are
 		 * handling indirect blocks.  Non-indirects need no extra
 		 * journaling.
 		 */
 		if (length != 0 && lastlbn >= NDADDR) {
 			ip->i_flag |= IN_TRUNCATED;
 			newjtrunc(freeblks, length, 0);
 		}
 		ip->i_size = length;
 		DIP_SET(ip, i_size, ip->i_size);
 		datablocks = DIP(ip, i_blocks) - extblocks;
 		if (length != 0)
 			datablocks = blkcount(fs, datablocks, length);
 		freeblks->fb_len = length;
 	}
 	if ((flags & IO_EXT) != 0) {
 		for (i = 0; i < NXADDR; i++)
 			setup_freeext(freeblks, ip, i, needj);
 		ip->i_din2->di_extsize = 0;
 		datablocks += extblocks;
 	}
 #ifdef QUOTA
 	/* Reference the quotas in case the block count is wrong in the end. */
 	quotaref(vp, freeblks->fb_quota);
 	(void) chkdq(ip, -datablocks, NOCRED, 0);
 #endif
 	freeblks->fb_chkcnt = -datablocks;
 	UFS_LOCK(ump);
 	fs->fs_pendingblocks += datablocks;
 	UFS_UNLOCK(ump);
 	DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - datablocks);
 	/*
 	 * Handle truncation of incomplete alloc direct dependencies.  We
 	 * hold the inode block locked to prevent incomplete dependencies
 	 * from reaching the disk while we are eliminating those that
 	 * have been truncated.  This is a partially inlined ffs_update().
 	 */
 	ufs_itimes(vp);
 	ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED);
 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 	    (int)fs->fs_bsize, cred, &bp);
 	if (error) {
 		brelse(bp);
 		softdep_error("softdep_journal_freeblocks", error);
 		return;
 	}
 	if (bp->b_bufsize == fs->fs_bsize)
 		bp->b_flags |= B_CLUSTEROK;
 	softdep_update_inodeblock(ip, bp, 0);
 	if (ump->um_fstype == UFS1)
 		*((struct ufs1_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
 	else
 		*((struct ufs2_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
 	ACQUIRE_LOCK(ump);
 	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & IOSTARTED) != 0)
 		panic("softdep_setup_freeblocks: inode busy");
 	/*
 	 * Add the freeblks structure to the list of operations that
 	 * must await the zero'ed inode being written to disk. If we
 	 * still have a bitmap dependency (needj), then the inode
 	 * has never been written to disk, so we can process the
 	 * freeblks below once we have deleted the dependencies.
 	 */
 	if (needj)
 		WORKLIST_INSERT(&bp->b_dep, &freeblks->fb_list);
 	else
 		freeblks->fb_state |= COMPLETE;
 	if ((flags & IO_NORMAL) != 0) {
 		TAILQ_FOREACH_SAFE(adp, &inodedep->id_inoupdt, ad_next, adpn) {
 			if (adp->ad_offset > iboff)
 				cancel_allocdirect(&inodedep->id_inoupdt, adp,
 				    freeblks);
 			/*
 			 * Truncate the allocdirect.  We could eliminate
 			 * or modify journal records as well.
 			 */
 			else if (adp->ad_offset == iboff && frags)
 				adp->ad_newsize = frags;
 		}
 	}
 	if ((flags & IO_EXT) != 0)
 		while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
 			cancel_allocdirect(&inodedep->id_extupdt, adp,
 			    freeblks);
 	/*
 	 * Scan the bufwait list for newblock dependencies that will never
 	 * make it to disk.
 	 */
 	LIST_FOREACH_SAFE(wk, &inodedep->id_bufwait, wk_list, wkn) {
 		if (wk->wk_type != D_ALLOCDIRECT)
 			continue;
 		adp = WK_ALLOCDIRECT(wk);
 		if (((flags & IO_NORMAL) != 0 && (adp->ad_offset > iboff)) ||
 		    ((flags & IO_EXT) != 0 && (adp->ad_state & EXTDATA))) {
 			cancel_jfreeblk(freeblks, adp->ad_newblkno);
 			cancel_newblk(WK_NEWBLK(wk), NULL, &freeblks->fb_jwork);
 			WORKLIST_INSERT(&freeblks->fb_freeworkhd, wk);
 		}
 	}
 	/*
 	 * Add journal work.
 	 */
 	LIST_FOREACH(jblkdep, &freeblks->fb_jblkdephd, jb_deps)
 		add_to_journal(&jblkdep->jb_list);
 	FREE_LOCK(ump);
 	bdwrite(bp);
 	/*
 	 * Truncate dependency structures beyond length.
 	 */
 	trunc_dependencies(ip, freeblks, lastlbn, frags, flags);
 	/*
 	 * This is only set when we need to allocate a fragment because
 	 * none existed at the end of a frag-sized file.  It handles only
 	 * allocating a new, zero filled block.
 	 */
 	if (allocblock) {
 		ip->i_size = length - lastoff;
 		DIP_SET(ip, i_size, ip->i_size);
 		error = UFS_BALLOC(vp, length - 1, 1, cred, BA_CLRBUF, &bp);
 		if (error != 0) {
 			softdep_error("softdep_journal_freeblks", error);
 			return;
 		}
 		ip->i_size = length;
 		DIP_SET(ip, i_size, length);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		allocbuf(bp, frags);
 		ffs_update(vp, 0);
 		bawrite(bp);
 	} else if (lastoff != 0 && vp->v_type != VDIR) {
 		int size;
 
 		/*
 		 * Zero the end of a truncated frag or block.
 		 */
 		size = sblksize(fs, length, lastlbn);
 		error = bread(vp, lastlbn, size, cred, &bp);
 		if (error) {
 			softdep_error("softdep_journal_freeblks", error);
 			return;
 		}
 		bzero((char *)bp->b_data + lastoff, size - lastoff);
 		bawrite(bp);
 
 	}
 	ACQUIRE_LOCK(ump);
 	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	TAILQ_INSERT_TAIL(&inodedep->id_freeblklst, freeblks, fb_next);
 	freeblks->fb_state |= DEPCOMPLETE | ONDEPLIST;
 	/*
 	 * We zero earlier truncations so they don't erroneously
 	 * update i_blocks.
 	 */
 	if (freeblks->fb_len == 0 && (flags & IO_NORMAL) != 0)
 		TAILQ_FOREACH(fbn, &inodedep->id_freeblklst, fb_next)
 			fbn->fb_len = 0;
 	if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE &&
 	    LIST_EMPTY(&freeblks->fb_jblkdephd))
 		freeblks->fb_state |= INPROGRESS;
 	else
 		freeblks = NULL;
 	FREE_LOCK(ump);
 	if (freeblks)
 		handle_workitem_freeblocks(freeblks, 0);
 	trunc_pages(ip, length, extblocks, flags);
 
 }
 
 /*
  * Flush a JOP_SYNC to the journal.
  */
 void
 softdep_journal_fsync(ip)
 	struct inode *ip;
 {
 	struct jfsync *jfsync;
 	struct ufsmount *ump;
 
 	ump = ITOUMP(ip);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_journal_fsync called on non-softdep filesystem"));
 	if ((ip->i_flag & IN_TRUNCATED) == 0)
 		return;
 	ip->i_flag &= ~IN_TRUNCATED;
 	jfsync = malloc(sizeof(*jfsync), M_JFSYNC, M_SOFTDEP_FLAGS | M_ZERO);
 	workitem_alloc(&jfsync->jfs_list, D_JFSYNC, UFSTOVFS(ump));
 	jfsync->jfs_size = ip->i_size;
 	jfsync->jfs_ino = ip->i_number;
 	ACQUIRE_LOCK(ump);
 	add_to_journal(&jfsync->jfs_list);
 	jwait(&jfsync->jfs_list, MNT_WAIT);
 	FREE_LOCK(ump);
 }
 
 /*
  * Block de-allocation dependencies.
  * 
  * When blocks are de-allocated, the on-disk pointers must be nullified before
  * the blocks are made available for use by other files.  (The true
  * requirement is that old pointers must be nullified before new on-disk
  * pointers are set.  We chose this slightly more stringent requirement to
  * reduce complexity.) Our implementation handles this dependency by updating
  * the inode (or indirect block) appropriately but delaying the actual block
  * de-allocation (i.e., freemap and free space count manipulation) until
  * after the updated versions reach stable storage.  After the disk is
  * updated, the blocks can be safely de-allocated whenever it is convenient.
  * This implementation handles only the common case of reducing a file's
  * length to zero. Other cases are handled by the conventional synchronous
  * write approach.
  *
  * The ffs implementation with which we worked double-checks
  * the state of the block pointers and file size as it reduces
  * a file's length.  Some of this code is replicated here in our
  * soft updates implementation.  The freeblks->fb_chkcnt field is
  * used to transfer a part of this information to the procedure
  * that eventually de-allocates the blocks.
  *
  * This routine should be called from the routine that shortens
  * a file's length, before the inode's size or block pointers
  * are modified. It will save the block pointer information for
  * later release and zero the inode so that the calling routine
  * can release it.
  */
 void
 softdep_setup_freeblocks(ip, length, flags)
 	struct inode *ip;	/* The inode whose length is to be reduced */
 	off_t length;		/* The new length for the file */
 	int flags;		/* IO_EXT and/or IO_NORMAL */
 {
 	struct ufs1_dinode *dp1;
 	struct ufs2_dinode *dp2;
 	struct freeblks *freeblks;
 	struct inodedep *inodedep;
 	struct allocdirect *adp;
 	struct ufsmount *ump;
 	struct buf *bp;
 	struct fs *fs;
 	ufs2_daddr_t extblocks, datablocks;
 	struct mount *mp;
 	int i, delay, error;
 	ufs_lbn_t tmpval;
 	ufs_lbn_t lbn;
 
 	ump = ITOUMP(ip);
 	mp = UFSTOVFS(ump);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_freeblocks called on non-softdep filesystem"));
 	CTR2(KTR_SUJ, "softdep_setup_freeblks: ip %d length %ld",
 	    ip->i_number, length);
 	KASSERT(length == 0, ("softdep_setup_freeblocks: non-zero length"));
 	fs = ump->um_fs;
 	if ((error = bread(ump->um_devvp,
 	    fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 	    (int)fs->fs_bsize, NOCRED, &bp)) != 0) {
 		brelse(bp);
 		softdep_error("softdep_setup_freeblocks", error);
 		return;
 	}
 	freeblks = newfreeblks(mp, ip);
 	extblocks = 0;
 	datablocks = 0;
 	if (fs->fs_magic == FS_UFS2_MAGIC)
 		extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
 	if ((flags & IO_NORMAL) != 0) {
 		for (i = 0; i < NDADDR; i++)
 			setup_freedirect(freeblks, ip, i, 0);
 		for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR;
 		    i++, lbn += tmpval, tmpval *= NINDIR(fs))
 			setup_freeindir(freeblks, ip, i, -lbn -i, 0);
 		ip->i_size = 0;
 		DIP_SET(ip, i_size, 0);
 		datablocks = DIP(ip, i_blocks) - extblocks;
 	}
 	if ((flags & IO_EXT) != 0) {
 		for (i = 0; i < NXADDR; i++)
 			setup_freeext(freeblks, ip, i, 0);
 		ip->i_din2->di_extsize = 0;
 		datablocks += extblocks;
 	}
 #ifdef QUOTA
 	/* Reference the quotas in case the block count is wrong in the end. */
 	quotaref(ITOV(ip), freeblks->fb_quota);
 	(void) chkdq(ip, -datablocks, NOCRED, 0);
 #endif
 	freeblks->fb_chkcnt = -datablocks;
 	UFS_LOCK(ump);
 	fs->fs_pendingblocks += datablocks;
 	UFS_UNLOCK(ump);
 	DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - datablocks);
 	/*
-	 * Push the zero'ed inode to to its disk buffer so that we are free
+	 * Push the zero'ed inode to its disk buffer so that we are free
 	 * to delete its dependencies below. Once the dependencies are gone
 	 * the buffer can be safely released.
 	 */
 	if (ump->um_fstype == UFS1) {
 		dp1 = ((struct ufs1_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number));
 		ip->i_din1->di_freelink = dp1->di_freelink;
 		*dp1 = *ip->i_din1;
 	} else {
 		dp2 = ((struct ufs2_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number));
 		ip->i_din2->di_freelink = dp2->di_freelink;
 		*dp2 = *ip->i_din2;
 	}
 	/*
 	 * Find and eliminate any inode dependencies.
 	 */
 	ACQUIRE_LOCK(ump);
 	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & IOSTARTED) != 0)
 		panic("softdep_setup_freeblocks: inode busy");
 	/*
 	 * Add the freeblks structure to the list of operations that
 	 * must await the zero'ed inode being written to disk. If we
 	 * still have a bitmap dependency (delay == 0), then the inode
 	 * has never been written to disk, so we can process the
 	 * freeblks below once we have deleted the dependencies.
 	 */
 	delay = (inodedep->id_state & DEPCOMPLETE);
 	if (delay)
 		WORKLIST_INSERT(&bp->b_dep, &freeblks->fb_list);
 	else
 		freeblks->fb_state |= COMPLETE;
 	/*
 	 * Because the file length has been truncated to zero, any
 	 * pending block allocation dependency structures associated
 	 * with this inode are obsolete and can simply be de-allocated.
 	 * We must first merge the two dependency lists to get rid of
 	 * any duplicate freefrag structures, then purge the merged list.
 	 * If we still have a bitmap dependency, then the inode has never
 	 * been written to disk, so we can free any fragments without delay.
 	 */
 	if (flags & IO_NORMAL) {
 		merge_inode_lists(&inodedep->id_newinoupdt,
 		    &inodedep->id_inoupdt);
 		while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
 			cancel_allocdirect(&inodedep->id_inoupdt, adp,
 			    freeblks);
 	}
 	if (flags & IO_EXT) {
 		merge_inode_lists(&inodedep->id_newextupdt,
 		    &inodedep->id_extupdt);
 		while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
 			cancel_allocdirect(&inodedep->id_extupdt, adp,
 			    freeblks);
 	}
 	FREE_LOCK(ump);
 	bdwrite(bp);
 	trunc_dependencies(ip, freeblks, -1, 0, flags);
 	ACQUIRE_LOCK(ump);
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
 	freeblks->fb_state |= DEPCOMPLETE;
 	/*
 	 * If the inode with zeroed block pointers is now on disk
 	 * we can start freeing blocks.
 	 */  
 	if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE)
 		freeblks->fb_state |= INPROGRESS;
 	else
 		freeblks = NULL;
 	FREE_LOCK(ump);
 	if (freeblks)
 		handle_workitem_freeblocks(freeblks, 0);
 	trunc_pages(ip, length, extblocks, flags);
 }
 
 /*
  * Eliminate pages from the page cache that back parts of this inode and
  * adjust the vnode pager's idea of our size.  This prevents stale data
  * from hanging around in the page cache.
  */
 static void
 trunc_pages(ip, length, extblocks, flags)
 	struct inode *ip;
 	off_t length;
 	ufs2_daddr_t extblocks;
 	int flags;
 {
 	struct vnode *vp;
 	struct fs *fs;
 	ufs_lbn_t lbn;
 	off_t end, extend;
 
 	vp = ITOV(ip);
 	fs = ITOFS(ip);
 	extend = OFF_TO_IDX(lblktosize(fs, -extblocks));
 	if ((flags & IO_EXT) != 0)
 		vn_pages_remove(vp, extend, 0);
 	if ((flags & IO_NORMAL) == 0)
 		return;
 	BO_LOCK(&vp->v_bufobj);
 	drain_output(vp);
 	BO_UNLOCK(&vp->v_bufobj);
 	/*
 	 * The vnode pager eliminates file pages we eliminate indirects
 	 * below.
 	 */
 	vnode_pager_setsize(vp, length);
 	/*
 	 * Calculate the end based on the last indirect we want to keep.  If
 	 * the block extends into indirects we can just use the negative of
 	 * its lbn.  Doubles and triples exist at lower numbers so we must
 	 * be careful not to remove those, if they exist.  double and triple
 	 * indirect lbns do not overlap with others so it is not important
 	 * to verify how many levels are required.
 	 */
 	lbn = lblkno(fs, length);
 	if (lbn >= NDADDR) {
 		/* Calculate the virtual lbn of the triple indirect. */
 		lbn = -lbn - (NIADDR - 1);
 		end = OFF_TO_IDX(lblktosize(fs, lbn));
 	} else
 		end = extend;
 	vn_pages_remove(vp, OFF_TO_IDX(OFF_MAX), end);
 }
 
 /*
  * See if the buf bp is in the range eliminated by truncation.
  */
 static int
 trunc_check_buf(bp, blkoffp, lastlbn, lastoff, flags)
 	struct buf *bp;
 	int *blkoffp;
 	ufs_lbn_t lastlbn;
 	int lastoff;
 	int flags;
 {
 	ufs_lbn_t lbn;
 
 	*blkoffp = 0;
 	/* Only match ext/normal blocks as appropriate. */
 	if (((flags & IO_EXT) == 0 && (bp->b_xflags & BX_ALTDATA)) ||
 	    ((flags & IO_NORMAL) == 0 && (bp->b_xflags & BX_ALTDATA) == 0))
 		return (0);
 	/* ALTDATA is always a full truncation. */
 	if ((bp->b_xflags & BX_ALTDATA) != 0)
 		return (1);
 	/* -1 is full truncation. */
 	if (lastlbn == -1)
 		return (1);
 	/*
 	 * If this is a partial truncate we only want those
 	 * blocks and indirect blocks that cover the range
 	 * we're after.
 	 */
 	lbn = bp->b_lblkno;
 	if (lbn < 0)
 		lbn = -(lbn + lbn_level(lbn));
 	if (lbn < lastlbn)
 		return (0);
 	/* Here we only truncate lblkno if it's partial. */
 	if (lbn == lastlbn) {
 		if (lastoff == 0)
 			return (0);
 		*blkoffp = lastoff;
 	}
 	return (1);
 }
 
 /*
  * Eliminate any dependencies that exist in memory beyond lblkno:off
  */
 static void
 trunc_dependencies(ip, freeblks, lastlbn, lastoff, flags)
 	struct inode *ip;
 	struct freeblks *freeblks;
 	ufs_lbn_t lastlbn;
 	int lastoff;
 	int flags;
 {
 	struct bufobj *bo;
 	struct vnode *vp;
 	struct buf *bp;
 	int blkoff;
 
 	/*
 	 * We must wait for any I/O in progress to finish so that
 	 * all potential buffers on the dirty list will be visible.
 	 * Once they are all there, walk the list and get rid of
 	 * any dependencies.
 	 */
 	vp = ITOV(ip);
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
 	drain_output(vp);
 	TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
 		bp->b_vflags &= ~BV_SCANNED;
 restart:
 	TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
 		if (bp->b_vflags & BV_SCANNED)
 			continue;
 		if (!trunc_check_buf(bp, &blkoff, lastlbn, lastoff, flags)) {
 			bp->b_vflags |= BV_SCANNED;
 			continue;
 		}
 		KASSERT(bp->b_bufobj == bo, ("Wrong object in buffer"));
 		if ((bp = getdirtybuf(bp, BO_LOCKPTR(bo), MNT_WAIT)) == NULL)
 			goto restart;
 		BO_UNLOCK(bo);
 		if (deallocate_dependencies(bp, freeblks, blkoff))
 			bqrelse(bp);
 		else
 			brelse(bp);
 		BO_LOCK(bo);
 		goto restart;
 	}
 	/*
 	 * Now do the work of vtruncbuf while also matching indirect blocks.
 	 */
 	TAILQ_FOREACH(bp, &bo->bo_clean.bv_hd, b_bobufs)
 		bp->b_vflags &= ~BV_SCANNED;
 cleanrestart:
 	TAILQ_FOREACH(bp, &bo->bo_clean.bv_hd, b_bobufs) {
 		if (bp->b_vflags & BV_SCANNED)
 			continue;
 		if (!trunc_check_buf(bp, &blkoff, lastlbn, lastoff, flags)) {
 			bp->b_vflags |= BV_SCANNED;
 			continue;
 		}
 		if (BUF_LOCK(bp,
 		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 		    BO_LOCKPTR(bo)) == ENOLCK) {
 			BO_LOCK(bo);
 			goto cleanrestart;
 		}
 		bp->b_vflags |= BV_SCANNED;
 		bremfree(bp);
 		if (blkoff != 0) {
 			allocbuf(bp, blkoff);
 			bqrelse(bp);
 		} else {
 			bp->b_flags |= B_INVAL | B_NOCACHE | B_RELBUF;
 			brelse(bp);
 		}
 		BO_LOCK(bo);
 		goto cleanrestart;
 	}
 	drain_output(vp);
 	BO_UNLOCK(bo);
 }
 
 static int
 cancel_pagedep(pagedep, freeblks, blkoff)
 	struct pagedep *pagedep;
 	struct freeblks *freeblks;
 	int blkoff;
 {
 	struct jremref *jremref;
 	struct jmvref *jmvref;
 	struct dirrem *dirrem, *tmp;
 	int i;
 
 	/*
 	 * Copy any directory remove dependencies to the list
 	 * to be processed after the freeblks proceeds.  If
 	 * directory entry never made it to disk they
 	 * can be dumped directly onto the work list.
 	 */
 	LIST_FOREACH_SAFE(dirrem, &pagedep->pd_dirremhd, dm_next, tmp) {
 		/* Skip this directory removal if it is intended to remain. */
 		if (dirrem->dm_offset < blkoff)
 			continue;
 		/*
 		 * If there are any dirrems we wait for the journal write
 		 * to complete and then restart the buf scan as the lock
 		 * has been dropped.
 		 */
 		while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL) {
 			jwait(&jremref->jr_list, MNT_WAIT);
 			return (ERESTART);
 		}
 		LIST_REMOVE(dirrem, dm_next);
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		WORKLIST_INSERT(&freeblks->fb_freeworkhd, &dirrem->dm_list);
 	}
 	while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL) {
 		jwait(&jmvref->jm_list, MNT_WAIT);
 		return (ERESTART);
 	}
 	/*
 	 * When we're partially truncating a pagedep we just want to flush
 	 * journal entries and return.  There can not be any adds in the
 	 * truncated portion of the directory and newblk must remain if
 	 * part of the block remains.
 	 */
 	if (blkoff != 0) {
 		struct diradd *dap;
 
 		LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)
 			if (dap->da_offset > blkoff)
 				panic("cancel_pagedep: diradd %p off %d > %d",
 				    dap, dap->da_offset, blkoff);
 		for (i = 0; i < DAHASHSZ; i++)
 			LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist)
 				if (dap->da_offset > blkoff)
 					panic("cancel_pagedep: diradd %p off %d > %d",
 					    dap, dap->da_offset, blkoff);
 		return (0);
 	}
 	/*
 	 * There should be no directory add dependencies present
 	 * as the directory could not be truncated until all
 	 * children were removed.
 	 */
 	KASSERT(LIST_FIRST(&pagedep->pd_pendinghd) == NULL,
 	    ("deallocate_dependencies: pendinghd != NULL"));
 	for (i = 0; i < DAHASHSZ; i++)
 		KASSERT(LIST_FIRST(&pagedep->pd_diraddhd[i]) == NULL,
 		    ("deallocate_dependencies: diraddhd != NULL"));
 	if ((pagedep->pd_state & NEWBLOCK) != 0)
 		free_newdirblk(pagedep->pd_newdirblk);
 	if (free_pagedep(pagedep) == 0)
 		panic("Failed to free pagedep %p", pagedep);
 	return (0);
 }
 
 /*
  * Reclaim any dependency structures from a buffer that is about to
  * be reallocated to a new vnode. The buffer must be locked, thus,
  * no I/O completion operations can occur while we are manipulating
  * its associated dependencies. The mutex is held so that other I/O's
  * associated with related dependencies do not occur.
  */
 static int
 deallocate_dependencies(bp, freeblks, off)
 	struct buf *bp;
 	struct freeblks *freeblks;
 	int off;
 {
 	struct indirdep *indirdep;
 	struct pagedep *pagedep;
 	struct worklist *wk, *wkn;
 	struct ufsmount *ump;
 
 	ump = softdep_bp_to_mp(bp);
 	if (ump == NULL)
 		goto done;
 	ACQUIRE_LOCK(ump);
 	LIST_FOREACH_SAFE(wk, &bp->b_dep, wk_list, wkn) {
 		switch (wk->wk_type) {
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 			if (bp->b_lblkno >= 0 ||
 			    bp->b_blkno != indirdep->ir_savebp->b_lblkno)
 				panic("deallocate_dependencies: not indir");
 			cancel_indirdep(indirdep, bp, freeblks);
 			continue;
 
 		case D_PAGEDEP:
 			pagedep = WK_PAGEDEP(wk);
 			if (cancel_pagedep(pagedep, freeblks, off)) {
 				FREE_LOCK(ump);
 				return (ERESTART);
 			}
 			continue;
 
 		case D_ALLOCINDIR:
 			/*
 			 * Simply remove the allocindir, we'll find it via
 			 * the indirdep where we can clear pointers if
 			 * needed.
 			 */
 			WORKLIST_REMOVE(wk);
 			continue;
 
 		case D_FREEWORK:
 			/*
 			 * A truncation is waiting for the zero'd pointers
 			 * to be written.  It can be freed when the freeblks
 			 * is journaled.
 			 */
 			WORKLIST_REMOVE(wk);
 			wk->wk_state |= ONDEPLIST;
 			WORKLIST_INSERT(&freeblks->fb_freeworkhd, wk);
 			break;
 
 		case D_ALLOCDIRECT:
 			if (off != 0)
 				continue;
 			/* FALLTHROUGH */
 		default:
 			panic("deallocate_dependencies: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	FREE_LOCK(ump);
 done:
 	/*
 	 * Don't throw away this buf, we were partially truncating and
 	 * some deps may always remain.
 	 */
 	if (off) {
 		allocbuf(bp, off);
 		bp->b_vflags |= BV_SCANNED;
 		return (EBUSY);
 	}
 	bp->b_flags |= B_INVAL | B_NOCACHE;
 
 	return (0);
 }
 
 /*
  * An allocdirect is being canceled due to a truncate.  We must make sure
  * the journal entry is released in concert with the blkfree that releases
  * the storage.  Completed journal entries must not be released until the
  * space is no longer pointed to by the inode or in the bitmap.
  */
 static void
 cancel_allocdirect(adphead, adp, freeblks)
 	struct allocdirectlst *adphead;
 	struct allocdirect *adp;
 	struct freeblks *freeblks;
 {
 	struct freework *freework;
 	struct newblk *newblk;
 	struct worklist *wk;
 
 	TAILQ_REMOVE(adphead, adp, ad_next);
 	newblk = (struct newblk *)adp;
 	freework = NULL;
 	/*
 	 * Find the correct freework structure.
 	 */
 	LIST_FOREACH(wk, &freeblks->fb_freeworkhd, wk_list) {
 		if (wk->wk_type != D_FREEWORK)
 			continue;
 		freework = WK_FREEWORK(wk);
 		if (freework->fw_blkno == newblk->nb_newblkno)
 			break;
 	}
 	if (freework == NULL)
 		panic("cancel_allocdirect: Freework not found");
 	/*
 	 * If a newblk exists at all we still have the journal entry that
 	 * initiated the allocation so we do not need to journal the free.
 	 */
 	cancel_jfreeblk(freeblks, freework->fw_blkno);
 	/*
 	 * If the journal hasn't been written the jnewblk must be passed
 	 * to the call to ffs_blkfree that reclaims the space.  We accomplish
 	 * this by linking the journal dependency into the freework to be
 	 * freed when freework_freeblock() is called.  If the journal has
 	 * been written we can simply reclaim the journal space when the
 	 * freeblks work is complete.
 	 */
 	freework->fw_jnewblk = cancel_newblk(newblk, &freework->fw_list,
 	    &freeblks->fb_jwork);
 	WORKLIST_INSERT(&freeblks->fb_freeworkhd, &newblk->nb_list);
 }
 
 
 /*
  * Cancel a new block allocation.  May be an indirect or direct block.  We
  * remove it from various lists and return any journal record that needs to
  * be resolved by the caller.
  *
  * A special consideration is made for indirects which were never pointed
  * at on disk and will never be found once this block is released.
  */
 static struct jnewblk *
 cancel_newblk(newblk, wk, wkhd)
 	struct newblk *newblk;
 	struct worklist *wk;
 	struct workhead *wkhd;
 {
 	struct jnewblk *jnewblk;
 
 	CTR1(KTR_SUJ, "cancel_newblk: blkno %jd", newblk->nb_newblkno);
 	    
 	newblk->nb_state |= GOINGAWAY;
 	/*
 	 * Previously we traversed the completedhd on each indirdep
 	 * attached to this newblk to cancel them and gather journal
 	 * work.  Since we need only the oldest journal segment and
 	 * the lowest point on the tree will always have the oldest
 	 * journal segment we are free to release the segments
 	 * of any subordinates and may leave the indirdep list to
 	 * indirdep_complete() when this newblk is freed.
 	 */
 	if (newblk->nb_state & ONDEPLIST) {
 		newblk->nb_state &= ~ONDEPLIST;
 		LIST_REMOVE(newblk, nb_deps);
 	}
 	if (newblk->nb_state & ONWORKLIST)
 		WORKLIST_REMOVE(&newblk->nb_list);
 	/*
 	 * If the journal entry hasn't been written we save a pointer to
 	 * the dependency that frees it until it is written or the
 	 * superseding operation completes.
 	 */
 	jnewblk = newblk->nb_jnewblk;
 	if (jnewblk != NULL && wk != NULL) {
 		newblk->nb_jnewblk = NULL;
 		jnewblk->jn_dep = wk;
 	}
 	if (!LIST_EMPTY(&newblk->nb_jwork))
 		jwork_move(wkhd, &newblk->nb_jwork);
 	/*
 	 * When truncating we must free the newdirblk early to remove
 	 * the pagedep from the hash before returning.
 	 */
 	if ((wk = LIST_FIRST(&newblk->nb_newdirblk)) != NULL)
 		free_newdirblk(WK_NEWDIRBLK(wk));
 	if (!LIST_EMPTY(&newblk->nb_newdirblk))
 		panic("cancel_newblk: extra newdirblk");
 
 	return (jnewblk);
 }
 
 /*
  * Schedule the freefrag associated with a newblk to be released once
  * the pointers are written and the previous block is no longer needed.
  */
 static void
 newblk_freefrag(newblk)
 	struct newblk *newblk;
 {
 	struct freefrag *freefrag;
 
 	if (newblk->nb_freefrag == NULL)
 		return;
 	freefrag = newblk->nb_freefrag;
 	newblk->nb_freefrag = NULL;
 	freefrag->ff_state |= COMPLETE;
 	if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE)
 		add_to_worklist(&freefrag->ff_list, 0);
 }
 
 /*
  * Free a newblk. Generate a new freefrag work request if appropriate.
  * This must be called after the inode pointer and any direct block pointers
  * are valid or fully removed via truncate or frag extension.
  */
 static void
 free_newblk(newblk)
 	struct newblk *newblk;
 {
 	struct indirdep *indirdep;
 	struct worklist *wk;
 
 	KASSERT(newblk->nb_jnewblk == NULL,
 	    ("free_newblk: jnewblk %p still attached", newblk->nb_jnewblk));
 	KASSERT(newblk->nb_list.wk_type != D_NEWBLK,
 	    ("free_newblk: unclaimed newblk"));
 	LOCK_OWNED(VFSTOUFS(newblk->nb_list.wk_mp));
 	newblk_freefrag(newblk);
 	if (newblk->nb_state & ONDEPLIST)
 		LIST_REMOVE(newblk, nb_deps);
 	if (newblk->nb_state & ONWORKLIST)
 		WORKLIST_REMOVE(&newblk->nb_list);
 	LIST_REMOVE(newblk, nb_hash);
 	if ((wk = LIST_FIRST(&newblk->nb_newdirblk)) != NULL)
 		free_newdirblk(WK_NEWDIRBLK(wk));
 	if (!LIST_EMPTY(&newblk->nb_newdirblk))
 		panic("free_newblk: extra newdirblk");
 	while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL)
 		indirdep_complete(indirdep);
 	handle_jwork(&newblk->nb_jwork);
 	WORKITEM_FREE(newblk, D_NEWBLK);
 }
 
 /*
  * Free a newdirblk. Clear the NEWBLOCK flag on its associated pagedep.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 free_newdirblk(newdirblk)
 	struct newdirblk *newdirblk;
 {
 	struct pagedep *pagedep;
 	struct diradd *dap;
 	struct worklist *wk;
 
 	LOCK_OWNED(VFSTOUFS(newdirblk->db_list.wk_mp));
 	WORKLIST_REMOVE(&newdirblk->db_list);
 	/*
 	 * If the pagedep is still linked onto the directory buffer
 	 * dependency chain, then some of the entries on the
 	 * pd_pendinghd list may not be committed to disk yet. In
 	 * this case, we will simply clear the NEWBLOCK flag and
 	 * let the pd_pendinghd list be processed when the pagedep
 	 * is next written. If the pagedep is no longer on the buffer
 	 * dependency chain, then all the entries on the pd_pending
 	 * list are committed to disk and we can free them here.
 	 */
 	pagedep = newdirblk->db_pagedep;
 	pagedep->pd_state &= ~NEWBLOCK;
 	if ((pagedep->pd_state & ONWORKLIST) == 0) {
 		while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
 			free_diradd(dap, NULL);
 		/*
 		 * If no dependencies remain, the pagedep will be freed.
 		 */
 		free_pagedep(pagedep);
 	}
 	/* Should only ever be one item in the list. */
 	while ((wk = LIST_FIRST(&newdirblk->db_mkdir)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY);
 	}
 	WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
 }
 
 /*
  * Prepare an inode to be freed. The actual free operation is not
  * done until the zero'ed inode has been written to disk.
  */
 void
 softdep_freefile(pvp, ino, mode)
 	struct vnode *pvp;
 	ino_t ino;
 	int mode;
 {
 	struct inode *ip = VTOI(pvp);
 	struct inodedep *inodedep;
 	struct freefile *freefile;
 	struct freeblks *freeblks;
 	struct ufsmount *ump;
 
 	ump = ITOUMP(ip);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_freefile called on non-softdep filesystem"));
 	/*
 	 * This sets up the inode de-allocation dependency.
 	 */
 	freefile = malloc(sizeof(struct freefile),
 		M_FREEFILE, M_SOFTDEP_FLAGS);
 	workitem_alloc(&freefile->fx_list, D_FREEFILE, pvp->v_mount);
 	freefile->fx_mode = mode;
 	freefile->fx_oldinum = ino;
 	freefile->fx_devvp = ump->um_devvp;
 	LIST_INIT(&freefile->fx_jwork);
 	UFS_LOCK(ump);
 	ump->um_fs->fs_pendinginodes += 1;
 	UFS_UNLOCK(ump);
 
 	/*
 	 * If the inodedep does not exist, then the zero'ed inode has
 	 * been written to disk. If the allocated inode has never been
 	 * written to disk, then the on-disk inode is zero'ed. In either
 	 * case we can free the file immediately.  If the journal was
 	 * canceled before being written the inode will never make it to
 	 * disk and we must send the canceled journal entrys to
 	 * ffs_freefile() to be cleared in conjunction with the bitmap.
 	 * Any blocks waiting on the inode to write can be safely freed
 	 * here as it will never been written.
 	 */
 	ACQUIRE_LOCK(ump);
 	inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
 	if (inodedep) {
 		/*
 		 * Clear out freeblks that no longer need to reference
 		 * this inode.
 		 */
 		while ((freeblks =
 		    TAILQ_FIRST(&inodedep->id_freeblklst)) != NULL) {
 			TAILQ_REMOVE(&inodedep->id_freeblklst, freeblks,
 			    fb_next);
 			freeblks->fb_state &= ~ONDEPLIST;
 		}
 		/*
 		 * Remove this inode from the unlinked list.
 		 */
 		if (inodedep->id_state & UNLINKED) {
 			/*
 			 * Save the journal work to be freed with the bitmap
 			 * before we clear UNLINKED.  Otherwise it can be lost
 			 * if the inode block is written.
 			 */
 			handle_bufwait(inodedep, &freefile->fx_jwork);
 			clear_unlinked_inodedep(inodedep);
 			/*
 			 * Re-acquire inodedep as we've dropped the
 			 * per-filesystem lock in clear_unlinked_inodedep().
 			 */
 			inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
 		}
 	}
 	if (inodedep == NULL || check_inode_unwritten(inodedep)) {
 		FREE_LOCK(ump);
 		handle_workitem_freefile(freefile);
 		return;
 	}
 	if ((inodedep->id_state & DEPCOMPLETE) == 0)
 		inodedep->id_state |= GOINGAWAY;
 	WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
 	FREE_LOCK(ump);
 	if (ip->i_number == ino)
 		ip->i_flag |= IN_MODIFIED;
 }
 
 /*
  * Check to see if an inode has never been written to disk. If
  * so free the inodedep and return success, otherwise return failure.
  * This routine must be called with splbio interrupts blocked.
  *
  * If we still have a bitmap dependency, then the inode has never
  * been written to disk. Drop the dependency as it is no longer
  * necessary since the inode is being deallocated. We set the
  * ALLCOMPLETE flags since the bitmap now properly shows that the
  * inode is not allocated. Even if the inode is actively being
  * written, it has been rolled back to its zero'ed state, so we
  * are ensured that a zero inode is what is on the disk. For short
  * lived files, this change will usually result in removing all the
  * dependencies from the inode so that it can be freed immediately.
  */
 static int
 check_inode_unwritten(inodedep)
 	struct inodedep *inodedep;
 {
 
 	LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
 
 	if ((inodedep->id_state & (DEPCOMPLETE | UNLINKED)) != 0 ||
 	    !LIST_EMPTY(&inodedep->id_dirremhd) ||
 	    !LIST_EMPTY(&inodedep->id_pendinghd) ||
 	    !LIST_EMPTY(&inodedep->id_bufwait) ||
 	    !LIST_EMPTY(&inodedep->id_inowait) ||
 	    !TAILQ_EMPTY(&inodedep->id_inoreflst) ||
 	    !TAILQ_EMPTY(&inodedep->id_inoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newinoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_extupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newextupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_freeblklst) ||
 	    inodedep->id_mkdiradd != NULL || 
 	    inodedep->id_nlinkdelta != 0)
 		return (0);
 	/*
 	 * Another process might be in initiate_write_inodeblock_ufs[12]
 	 * trying to allocate memory without holding "Softdep Lock".
 	 */
 	if ((inodedep->id_state & IOSTARTED) != 0 &&
 	    inodedep->id_savedino1 == NULL)
 		return (0);
 
 	if (inodedep->id_state & ONDEPLIST)
 		LIST_REMOVE(inodedep, id_deps);
 	inodedep->id_state &= ~ONDEPLIST;
 	inodedep->id_state |= ALLCOMPLETE;
 	inodedep->id_bmsafemap = NULL;
 	if (inodedep->id_state & ONWORKLIST)
 		WORKLIST_REMOVE(&inodedep->id_list);
 	if (inodedep->id_savedino1 != NULL) {
 		free(inodedep->id_savedino1, M_SAVEDINO);
 		inodedep->id_savedino1 = NULL;
 	}
 	if (free_inodedep(inodedep) == 0)
 		panic("check_inode_unwritten: busy inode");
 	return (1);
 }
 
 static int
 check_inodedep_free(inodedep)
 	struct inodedep *inodedep;
 {
 
 	LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
 	if ((inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
 	    !LIST_EMPTY(&inodedep->id_dirremhd) ||
 	    !LIST_EMPTY(&inodedep->id_pendinghd) ||
 	    !LIST_EMPTY(&inodedep->id_bufwait) ||
 	    !LIST_EMPTY(&inodedep->id_inowait) ||
 	    !TAILQ_EMPTY(&inodedep->id_inoreflst) ||
 	    !TAILQ_EMPTY(&inodedep->id_inoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newinoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_extupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newextupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_freeblklst) ||
 	    inodedep->id_mkdiradd != NULL ||
 	    inodedep->id_nlinkdelta != 0 ||
 	    inodedep->id_savedino1 != NULL)
 		return (0);
 	return (1);
 }
 
 /*
  * Try to free an inodedep structure. Return 1 if it could be freed.
  */
 static int
 free_inodedep(inodedep)
 	struct inodedep *inodedep;
 {
 
 	LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
 	if ((inodedep->id_state & (ONWORKLIST | UNLINKED)) != 0 ||
 	    !check_inodedep_free(inodedep))
 		return (0);
 	if (inodedep->id_state & ONDEPLIST)
 		LIST_REMOVE(inodedep, id_deps);
 	LIST_REMOVE(inodedep, id_hash);
 	WORKITEM_FREE(inodedep, D_INODEDEP);
 	return (1);
 }
 
 /*
  * Free the block referenced by a freework structure.  The parent freeblks
  * structure is released and completed when the final cg bitmap reaches
  * the disk.  This routine may be freeing a jnewblk which never made it to
  * disk in which case we do not have to wait as the operation is undone
  * in memory immediately.
  */
 static void
 freework_freeblock(freework)
 	struct freework *freework;
 {
 	struct freeblks *freeblks;
 	struct jnewblk *jnewblk;
 	struct ufsmount *ump;
 	struct workhead wkhd;
 	struct fs *fs;
 	int bsize;
 	int needj;
 
 	ump = VFSTOUFS(freework->fw_list.wk_mp);
 	LOCK_OWNED(ump);
 	/*
 	 * Handle partial truncate separately.
 	 */
 	if (freework->fw_indir) {
 		complete_trunc_indir(freework);
 		return;
 	}
 	freeblks = freework->fw_freeblks;
 	fs = ump->um_fs;
 	needj = MOUNTEDSUJ(freeblks->fb_list.wk_mp) != 0;
 	bsize = lfragtosize(fs, freework->fw_frags);
 	LIST_INIT(&wkhd);
 	/*
 	 * DEPCOMPLETE is cleared in indirblk_insert() if the block lives
 	 * on the indirblk hashtable and prevents premature freeing.
 	 */
 	freework->fw_state |= DEPCOMPLETE;
 	/*
 	 * SUJ needs to wait for the segment referencing freed indirect
 	 * blocks to expire so that we know the checker will not confuse
 	 * a re-allocated indirect block with its old contents.
 	 */
 	if (needj && freework->fw_lbn <= -NDADDR)
 		indirblk_insert(freework);
 	/*
 	 * If we are canceling an existing jnewblk pass it to the free
 	 * routine, otherwise pass the freeblk which will ultimately
 	 * release the freeblks.  If we're not journaling, we can just
 	 * free the freeblks immediately.
 	 */
 	jnewblk = freework->fw_jnewblk;
 	if (jnewblk != NULL) {
 		cancel_jnewblk(jnewblk, &wkhd);
 		needj = 0;
 	} else if (needj) {
 		freework->fw_state |= DELAYEDFREE;
 		freeblks->fb_cgwait++;
 		WORKLIST_INSERT(&wkhd, &freework->fw_list);
 	}
 	FREE_LOCK(ump);
 	freeblks_free(ump, freeblks, btodb(bsize));
 	CTR4(KTR_SUJ,
 	    "freework_freeblock: ino %d blkno %jd lbn %jd size %ld",
 	    freeblks->fb_inum, freework->fw_blkno, freework->fw_lbn, bsize);
 	ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize,
 	    freeblks->fb_inum, freeblks->fb_vtype, &wkhd);
 	ACQUIRE_LOCK(ump);
 	/*
 	 * The jnewblk will be discarded and the bits in the map never
 	 * made it to disk.  We can immediately free the freeblk.
 	 */
 	if (needj == 0)
 		handle_written_freework(freework);
 }
 
 /*
  * We enqueue freework items that need processing back on the freeblks and
  * add the freeblks to the worklist.  This makes it easier to find all work
  * required to flush a truncation in process_truncates().
  */
 static void
 freework_enqueue(freework)
 	struct freework *freework;
 {
 	struct freeblks *freeblks;
 
 	freeblks = freework->fw_freeblks;
 	if ((freework->fw_state & INPROGRESS) == 0)
 		WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
 	if ((freeblks->fb_state &
 	    (ONWORKLIST | INPROGRESS | ALLCOMPLETE)) == ALLCOMPLETE &&
 	    LIST_EMPTY(&freeblks->fb_jblkdephd))
 		add_to_worklist(&freeblks->fb_list, WK_NODELAY);
 }
 
 /*
  * Start, continue, or finish the process of freeing an indirect block tree.
  * The free operation may be paused at any point with fw_off containing the
  * offset to restart from.  This enables us to implement some flow control
  * for large truncates which may fan out and generate a huge number of
  * dependencies.
  */
 static void
 handle_workitem_indirblk(freework)
 	struct freework *freework;
 {
 	struct freeblks *freeblks;
 	struct ufsmount *ump;
 	struct fs *fs;
 
 	freeblks = freework->fw_freeblks;
 	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
 	fs = ump->um_fs;
 	if (freework->fw_state & DEPCOMPLETE) {
 		handle_written_freework(freework);
 		return;
 	}
 	if (freework->fw_off == NINDIR(fs)) {
 		freework_freeblock(freework);
 		return;
 	}
 	freework->fw_state |= INPROGRESS;
 	FREE_LOCK(ump);
 	indir_trunc(freework, fsbtodb(fs, freework->fw_blkno),
 	    freework->fw_lbn);
 	ACQUIRE_LOCK(ump);
 }
 
 /*
  * Called when a freework structure attached to a cg buf is written.  The
  * ref on either the parent or the freeblks structure is released and
  * the freeblks is added back to the worklist if there is more work to do.
  */
 static void
 handle_written_freework(freework)
 	struct freework *freework;
 {
 	struct freeblks *freeblks;
 	struct freework *parent;
 
 	freeblks = freework->fw_freeblks;
 	parent = freework->fw_parent;
 	if (freework->fw_state & DELAYEDFREE)
 		freeblks->fb_cgwait--;
 	freework->fw_state |= COMPLETE;
 	if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE)
 		WORKITEM_FREE(freework, D_FREEWORK);
 	if (parent) {
 		if (--parent->fw_ref == 0)
 			freework_enqueue(parent);
 		return;
 	}
 	if (--freeblks->fb_ref != 0)
 		return;
 	if ((freeblks->fb_state & (ALLCOMPLETE | ONWORKLIST | INPROGRESS)) ==
 	    ALLCOMPLETE && LIST_EMPTY(&freeblks->fb_jblkdephd)) 
 		add_to_worklist(&freeblks->fb_list, WK_NODELAY);
 }
 
 /*
  * This workitem routine performs the block de-allocation.
  * The workitem is added to the pending list after the updated
  * inode block has been written to disk.  As mentioned above,
  * checks regarding the number of blocks de-allocated (compared
  * to the number of blocks allocated for the file) are also
  * performed in this function.
  */
 static int
 handle_workitem_freeblocks(freeblks, flags)
 	struct freeblks *freeblks;
 	int flags;
 {
 	struct freework *freework;
 	struct newblk *newblk;
 	struct allocindir *aip;
 	struct ufsmount *ump;
 	struct worklist *wk;
 
 	KASSERT(LIST_EMPTY(&freeblks->fb_jblkdephd),
 	    ("handle_workitem_freeblocks: Journal entries not written."));
 	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
 	ACQUIRE_LOCK(ump);
 	while ((wk = LIST_FIRST(&freeblks->fb_freeworkhd)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		switch (wk->wk_type) {
 		case D_DIRREM:
 			wk->wk_state |= COMPLETE;
 			add_to_worklist(wk, 0);
 			continue;
 
 		case D_ALLOCDIRECT:
 			free_newblk(WK_NEWBLK(wk));
 			continue;
 
 		case D_ALLOCINDIR:
 			aip = WK_ALLOCINDIR(wk);
 			freework = NULL;
 			if (aip->ai_state & DELAYEDFREE) {
 				FREE_LOCK(ump);
 				freework = newfreework(ump, freeblks, NULL,
 				    aip->ai_lbn, aip->ai_newblkno,
 				    ump->um_fs->fs_frag, 0, 0);
 				ACQUIRE_LOCK(ump);
 			}
 			newblk = WK_NEWBLK(wk);
 			if (newblk->nb_jnewblk) {
 				freework->fw_jnewblk = newblk->nb_jnewblk;
 				newblk->nb_jnewblk->jn_dep = &freework->fw_list;
 				newblk->nb_jnewblk = NULL;
 			}
 			free_newblk(newblk);
 			continue;
 
 		case D_FREEWORK:
 			freework = WK_FREEWORK(wk);
 			if (freework->fw_lbn <= -NDADDR)
 				handle_workitem_indirblk(freework);
 			else
 				freework_freeblock(freework);
 			continue;
 		default:
 			panic("handle_workitem_freeblocks: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 		}
 	}
 	if (freeblks->fb_ref != 0) {
 		freeblks->fb_state &= ~INPROGRESS;
 		wake_worklist(&freeblks->fb_list);
 		freeblks = NULL;
 	}
 	FREE_LOCK(ump);
 	if (freeblks)
 		return handle_complete_freeblocks(freeblks, flags);
 	return (0);
 }
 
 /*
  * Handle completion of block free via truncate.  This allows fs_pending
  * to track the actual free block count more closely than if we only updated
  * it at the end.  We must be careful to handle cases where the block count
  * on free was incorrect.
  */
 static void
 freeblks_free(ump, freeblks, blocks)
 	struct ufsmount *ump;
 	struct freeblks *freeblks;
 	int blocks;
 {
 	struct fs *fs;
 	ufs2_daddr_t remain;
 
 	UFS_LOCK(ump);
 	remain = -freeblks->fb_chkcnt;
 	freeblks->fb_chkcnt += blocks;
 	if (remain > 0) {
 		if (remain < blocks)
 			blocks = remain;
 		fs = ump->um_fs;
 		fs->fs_pendingblocks -= blocks;
 	}
 	UFS_UNLOCK(ump);
 }
 
 /*
  * Once all of the freework workitems are complete we can retire the
  * freeblocks dependency and any journal work awaiting completion.  This
  * can not be called until all other dependencies are stable on disk.
  */
 static int
 handle_complete_freeblocks(freeblks, flags)
 	struct freeblks *freeblks;
 	int flags;
 {
 	struct inodedep *inodedep;
 	struct inode *ip;
 	struct vnode *vp;
 	struct fs *fs;
 	struct ufsmount *ump;
 	ufs2_daddr_t spare;
 
 	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
 	fs = ump->um_fs;
 	flags = LK_EXCLUSIVE | flags;
 	spare = freeblks->fb_chkcnt;
 
 	/*
 	 * If we did not release the expected number of blocks we may have
 	 * to adjust the inode block count here.  Only do so if it wasn't
 	 * a truncation to zero and the modrev still matches.
 	 */
 	if (spare && freeblks->fb_len != 0) {
 		if (ffs_vgetf(freeblks->fb_list.wk_mp, freeblks->fb_inum,
 		    flags, &vp, FFSV_FORCEINSMQ) != 0)
 			return (EBUSY);
 		ip = VTOI(vp);
 		if (DIP(ip, i_modrev) == freeblks->fb_modrev) {
 			DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - spare);
 			ip->i_flag |= IN_CHANGE;
 			/*
 			 * We must wait so this happens before the
 			 * journal is reclaimed.
 			 */
 			ffs_update(vp, 1);
 		}
 		vput(vp);
 	}
 	if (spare < 0) {
 		UFS_LOCK(ump);
 		fs->fs_pendingblocks += spare;
 		UFS_UNLOCK(ump);
 	}
 #ifdef QUOTA
 	/* Handle spare. */
 	if (spare)
 		quotaadj(freeblks->fb_quota, ump, -spare);
 	quotarele(freeblks->fb_quota);
 #endif
 	ACQUIRE_LOCK(ump);
 	if (freeblks->fb_state & ONDEPLIST) {
 		inodedep_lookup(freeblks->fb_list.wk_mp, freeblks->fb_inum,
 		    0, &inodedep);
 		TAILQ_REMOVE(&inodedep->id_freeblklst, freeblks, fb_next);
 		freeblks->fb_state &= ~ONDEPLIST;
 		if (TAILQ_EMPTY(&inodedep->id_freeblklst))
 			free_inodedep(inodedep);
 	}
 	/*
 	 * All of the freeblock deps must be complete prior to this call
 	 * so it's now safe to complete earlier outstanding journal entries.
 	 */
 	handle_jwork(&freeblks->fb_jwork);
 	WORKITEM_FREE(freeblks, D_FREEBLKS);
 	FREE_LOCK(ump);
 	return (0);
 }
 
 /*
  * Release blocks associated with the freeblks and stored in the indirect
  * block dbn. If level is greater than SINGLE, the block is an indirect block
  * and recursive calls to indirtrunc must be used to cleanse other indirect
  * blocks.
  *
  * This handles partial and complete truncation of blocks.  Partial is noted
  * with goingaway == 0.  In this case the freework is completed after the
  * zero'd indirects are written to disk.  For full truncation the freework
  * is completed after the block is freed.
  */
 static void
 indir_trunc(freework, dbn, lbn)
 	struct freework *freework;
 	ufs2_daddr_t dbn;
 	ufs_lbn_t lbn;
 {
 	struct freework *nfreework;
 	struct workhead wkhd;
 	struct freeblks *freeblks;
 	struct buf *bp;
 	struct fs *fs;
 	struct indirdep *indirdep;
 	struct ufsmount *ump;
 	ufs1_daddr_t *bap1;
 	ufs2_daddr_t nb, nnb, *bap2;
 	ufs_lbn_t lbnadd, nlbn;
 	int i, nblocks, ufs1fmt;
 	int freedblocks;
 	int goingaway;
 	int freedeps;
 	int needj;
 	int level;
 	int cnt;
 
 	freeblks = freework->fw_freeblks;
 	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
 	fs = ump->um_fs;
 	/*
 	 * Get buffer of block pointers to be freed.  There are three cases:
 	 * 
 	 * 1) Partial truncate caches the indirdep pointer in the freework
 	 *    which provides us a back copy to the save bp which holds the
 	 *    pointers we want to clear.  When this completes the zero
 	 *    pointers are written to the real copy.
 	 * 2) The indirect is being completely truncated, cancel_indirdep()
 	 *    eliminated the real copy and placed the indirdep on the saved
 	 *    copy.  The indirdep and buf are discarded when this completes.
 	 * 3) The indirect was not in memory, we read a copy off of the disk
 	 *    using the devvp and drop and invalidate the buffer when we're
 	 *    done.
 	 */
 	goingaway = 1;
 	indirdep = NULL;
 	if (freework->fw_indir != NULL) {
 		goingaway = 0;
 		indirdep = freework->fw_indir;
 		bp = indirdep->ir_savebp;
 		if (bp == NULL || bp->b_blkno != dbn)
 			panic("indir_trunc: Bad saved buf %p blkno %jd",
 			    bp, (intmax_t)dbn);
 	} else if ((bp = incore(&freeblks->fb_devvp->v_bufobj, dbn)) != NULL) {
 		/*
 		 * The lock prevents the buf dep list from changing and
 	 	 * indirects on devvp should only ever have one dependency.
 		 */
 		indirdep = WK_INDIRDEP(LIST_FIRST(&bp->b_dep));
 		if (indirdep == NULL || (indirdep->ir_state & GOINGAWAY) == 0)
 			panic("indir_trunc: Bad indirdep %p from buf %p",
 			    indirdep, bp);
 	} else if (bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize,
 	    NOCRED, &bp) != 0) {
 		brelse(bp);
 		return;
 	}
 	ACQUIRE_LOCK(ump);
 	/* Protects against a race with complete_trunc_indir(). */
 	freework->fw_state &= ~INPROGRESS;
 	/*
 	 * If we have an indirdep we need to enforce the truncation order
 	 * and discard it when it is complete.
 	 */
 	if (indirdep) {
 		if (freework != TAILQ_FIRST(&indirdep->ir_trunc) &&
 		    !TAILQ_EMPTY(&indirdep->ir_trunc)) {
 			/*
 			 * Add the complete truncate to the list on the
 			 * indirdep to enforce in-order processing.
 			 */
 			if (freework->fw_indir == NULL)
 				TAILQ_INSERT_TAIL(&indirdep->ir_trunc,
 				    freework, fw_next);
 			FREE_LOCK(ump);
 			return;
 		}
 		/*
 		 * If we're goingaway, free the indirdep.  Otherwise it will
 		 * linger until the write completes.
 		 */
 		if (goingaway)
 			free_indirdep(indirdep);
 	}
 	FREE_LOCK(ump);
 	/* Initialize pointers depending on block size. */
 	if (ump->um_fstype == UFS1) {
 		bap1 = (ufs1_daddr_t *)bp->b_data;
 		nb = bap1[freework->fw_off];
 		ufs1fmt = 1;
 		bap2 = NULL;
 	} else {
 		bap2 = (ufs2_daddr_t *)bp->b_data;
 		nb = bap2[freework->fw_off];
 		ufs1fmt = 0;
 		bap1 = NULL;
 	}
 	level = lbn_level(lbn);
 	needj = MOUNTEDSUJ(UFSTOVFS(ump)) != 0;
 	lbnadd = lbn_offset(fs, level);
 	nblocks = btodb(fs->fs_bsize);
 	nfreework = freework;
 	freedeps = 0;
 	cnt = 0;
 	/*
 	 * Reclaim blocks.  Traverses into nested indirect levels and
 	 * arranges for the current level to be freed when subordinates
 	 * are free when journaling.
 	 */
 	for (i = freework->fw_off; i < NINDIR(fs); i++, nb = nnb) {
 		if (i != NINDIR(fs) - 1) {
 			if (ufs1fmt)
 				nnb = bap1[i+1];
 			else
 				nnb = bap2[i+1];
 		} else
 			nnb = 0;
 		if (nb == 0)
 			continue;
 		cnt++;
 		if (level != 0) {
 			nlbn = (lbn + 1) - (i * lbnadd);
 			if (needj != 0) {
 				nfreework = newfreework(ump, freeblks, freework,
 				    nlbn, nb, fs->fs_frag, 0, 0);
 				freedeps++;
 			}
 			indir_trunc(nfreework, fsbtodb(fs, nb), nlbn);
 		} else {
 			struct freedep *freedep;
 
 			/*
 			 * Attempt to aggregate freedep dependencies for
 			 * all blocks being released to the same CG.
 			 */
 			LIST_INIT(&wkhd);
 			if (needj != 0 &&
 			    (nnb == 0 || (dtog(fs, nb) != dtog(fs, nnb)))) {
 				freedep = newfreedep(freework);
 				WORKLIST_INSERT_UNLOCKED(&wkhd,
 				    &freedep->fd_list);
 				freedeps++;
 			}
 			CTR3(KTR_SUJ,
 			    "indir_trunc: ino %d blkno %jd size %ld",
 			    freeblks->fb_inum, nb, fs->fs_bsize);
 			ffs_blkfree(ump, fs, freeblks->fb_devvp, nb,
 			    fs->fs_bsize, freeblks->fb_inum,
 			    freeblks->fb_vtype, &wkhd);
 		}
 	}
 	if (goingaway) {
 		bp->b_flags |= B_INVAL | B_NOCACHE;
 		brelse(bp);
 	}
 	freedblocks = 0;
 	if (level == 0)
 		freedblocks = (nblocks * cnt);
 	if (needj == 0)
 		freedblocks += nblocks;
 	freeblks_free(ump, freeblks, freedblocks);
 	/*
 	 * If we are journaling set up the ref counts and offset so this
 	 * indirect can be completed when its children are free.
 	 */
 	if (needj) {
 		ACQUIRE_LOCK(ump);
 		freework->fw_off = i;
 		freework->fw_ref += freedeps;
 		freework->fw_ref -= NINDIR(fs) + 1;
 		if (level == 0)
 			freeblks->fb_cgwait += freedeps;
 		if (freework->fw_ref == 0)
 			freework_freeblock(freework);
 		FREE_LOCK(ump);
 		return;
 	}
 	/*
 	 * If we're not journaling we can free the indirect now.
 	 */
 	dbn = dbtofsb(fs, dbn);
 	CTR3(KTR_SUJ,
 	    "indir_trunc 2: ino %d blkno %jd size %ld",
 	    freeblks->fb_inum, dbn, fs->fs_bsize);
 	ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize,
 	    freeblks->fb_inum, freeblks->fb_vtype, NULL);
 	/* Non SUJ softdep does single-threaded truncations. */
 	if (freework->fw_blkno == dbn) {
 		freework->fw_state |= ALLCOMPLETE;
 		ACQUIRE_LOCK(ump);
 		handle_written_freework(freework);
 		FREE_LOCK(ump);
 	}
 	return;
 }
 
 /*
  * Cancel an allocindir when it is removed via truncation.  When bp is not
  * NULL the indirect never appeared on disk and is scheduled to be freed
  * independently of the indir so we can more easily track journal work.
  */
 static void
 cancel_allocindir(aip, bp, freeblks, trunc)
 	struct allocindir *aip;
 	struct buf *bp;
 	struct freeblks *freeblks;
 	int trunc;
 {
 	struct indirdep *indirdep;
 	struct freefrag *freefrag;
 	struct newblk *newblk;
 
 	newblk = (struct newblk *)aip;
 	LIST_REMOVE(aip, ai_next);
 	/*
 	 * We must eliminate the pointer in bp if it must be freed on its
 	 * own due to partial truncate or pending journal work.
 	 */
 	if (bp && (trunc || newblk->nb_jnewblk)) {
 		/*
 		 * Clear the pointer and mark the aip to be freed
 		 * directly if it never existed on disk.
 		 */
 		aip->ai_state |= DELAYEDFREE;
 		indirdep = aip->ai_indirdep;
 		if (indirdep->ir_state & UFS1FMT)
 			((ufs1_daddr_t *)bp->b_data)[aip->ai_offset] = 0;
 		else
 			((ufs2_daddr_t *)bp->b_data)[aip->ai_offset] = 0;
 	}
 	/*
 	 * When truncating the previous pointer will be freed via
 	 * savedbp.  Eliminate the freefrag which would dup free.
 	 */
 	if (trunc && (freefrag = newblk->nb_freefrag) != NULL) {
 		newblk->nb_freefrag = NULL;
 		if (freefrag->ff_jdep)
 			cancel_jfreefrag(
 			    WK_JFREEFRAG(freefrag->ff_jdep));
 		jwork_move(&freeblks->fb_jwork, &freefrag->ff_jwork);
 		WORKITEM_FREE(freefrag, D_FREEFRAG);
 	}
 	/*
 	 * If the journal hasn't been written the jnewblk must be passed
 	 * to the call to ffs_blkfree that reclaims the space.  We accomplish
 	 * this by leaving the journal dependency on the newblk to be freed
 	 * when a freework is created in handle_workitem_freeblocks().
 	 */
 	cancel_newblk(newblk, NULL, &freeblks->fb_jwork);
 	WORKLIST_INSERT(&freeblks->fb_freeworkhd, &newblk->nb_list);
 }
 
 /*
  * Create the mkdir dependencies for . and .. in a new directory.  Link them
  * in to a newdirblk so any subsequent additions are tracked properly.  The
  * caller is responsible for adding the mkdir1 dependency to the journal
  * and updating id_mkdiradd.  This function returns with the per-filesystem
  * lock held.
  */
 static struct mkdir *
 setup_newdir(dap, newinum, dinum, newdirbp, mkdirp)
 	struct diradd *dap;
 	ino_t newinum;
 	ino_t dinum;
 	struct buf *newdirbp;
 	struct mkdir **mkdirp;
 {
 	struct newblk *newblk;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct newdirblk *newdirblk;
 	struct mkdir *mkdir1, *mkdir2;
 	struct worklist *wk;
 	struct jaddref *jaddref;
 	struct ufsmount *ump;
 	struct mount *mp;
 
 	mp = dap->da_list.wk_mp;
 	ump = VFSTOUFS(mp);
 	newdirblk = malloc(sizeof(struct newdirblk), M_NEWDIRBLK,
 	    M_SOFTDEP_FLAGS);
 	workitem_alloc(&newdirblk->db_list, D_NEWDIRBLK, mp);
 	LIST_INIT(&newdirblk->db_mkdir);
 	mkdir1 = malloc(sizeof(struct mkdir), M_MKDIR, M_SOFTDEP_FLAGS);
 	workitem_alloc(&mkdir1->md_list, D_MKDIR, mp);
 	mkdir1->md_state = ATTACHED | MKDIR_BODY;
 	mkdir1->md_diradd = dap;
 	mkdir1->md_jaddref = NULL;
 	mkdir2 = malloc(sizeof(struct mkdir), M_MKDIR, M_SOFTDEP_FLAGS);
 	workitem_alloc(&mkdir2->md_list, D_MKDIR, mp);
 	mkdir2->md_state = ATTACHED | MKDIR_PARENT;
 	mkdir2->md_diradd = dap;
 	mkdir2->md_jaddref = NULL;
 	if (MOUNTEDSUJ(mp) == 0) {
 		mkdir1->md_state |= DEPCOMPLETE;
 		mkdir2->md_state |= DEPCOMPLETE;
 	}
 	/*
 	 * Dependency on "." and ".." being written to disk.
 	 */
 	mkdir1->md_buf = newdirbp;
 	ACQUIRE_LOCK(VFSTOUFS(mp));
 	LIST_INSERT_HEAD(&ump->softdep_mkdirlisthd, mkdir1, md_mkdirs);
 	/*
 	 * We must link the pagedep, allocdirect, and newdirblk for
 	 * the initial file page so the pointer to the new directory
 	 * is not written until the directory contents are live and
 	 * any subsequent additions are not marked live until the
 	 * block is reachable via the inode.
 	 */
 	if (pagedep_lookup(mp, newdirbp, newinum, 0, 0, &pagedep) == 0)
 		panic("setup_newdir: lost pagedep");
 	LIST_FOREACH(wk, &newdirbp->b_dep, wk_list)
 		if (wk->wk_type == D_ALLOCDIRECT)
 			break;
 	if (wk == NULL)
 		panic("setup_newdir: lost allocdirect");
 	if (pagedep->pd_state & NEWBLOCK)
 		panic("setup_newdir: NEWBLOCK already set");
 	newblk = WK_NEWBLK(wk);
 	pagedep->pd_state |= NEWBLOCK;
 	pagedep->pd_newdirblk = newdirblk;
 	newdirblk->db_pagedep = pagedep;
 	WORKLIST_INSERT(&newblk->nb_newdirblk, &newdirblk->db_list);
 	WORKLIST_INSERT(&newdirblk->db_mkdir, &mkdir1->md_list);
 	/*
 	 * Look up the inodedep for the parent directory so that we
 	 * can link mkdir2 into the pending dotdot jaddref or
 	 * the inode write if there is none.  If the inode is
 	 * ALLCOMPLETE and no jaddref is present all dependencies have
 	 * been satisfied and mkdir2 can be freed.
 	 */
 	inodedep_lookup(mp, dinum, 0, &inodedep);
 	if (MOUNTEDSUJ(mp)) {
 		if (inodedep == NULL)
 			panic("setup_newdir: Lost parent.");
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref != NULL && jaddref->ja_parent == newinum &&
 		    (jaddref->ja_state & MKDIR_PARENT),
 		    ("setup_newdir: bad dotdot jaddref %p", jaddref));
 		LIST_INSERT_HEAD(&ump->softdep_mkdirlisthd, mkdir2, md_mkdirs);
 		mkdir2->md_jaddref = jaddref;
 		jaddref->ja_mkdir = mkdir2;
 	} else if (inodedep == NULL ||
 	    (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		dap->da_state &= ~MKDIR_PARENT;
 		WORKITEM_FREE(mkdir2, D_MKDIR);
 		mkdir2 = NULL;
 	} else {
 		LIST_INSERT_HEAD(&ump->softdep_mkdirlisthd, mkdir2, md_mkdirs);
 		WORKLIST_INSERT(&inodedep->id_bufwait, &mkdir2->md_list);
 	}
 	*mkdirp = mkdir2;
 
 	return (mkdir1);
 }
 
 /*
  * Directory entry addition dependencies.
  * 
  * When adding a new directory entry, the inode (with its incremented link
  * count) must be written to disk before the directory entry's pointer to it.
  * Also, if the inode is newly allocated, the corresponding freemap must be
  * updated (on disk) before the directory entry's pointer. These requirements
  * are met via undo/redo on the directory entry's pointer, which consists
  * simply of the inode number.
  * 
  * As directory entries are added and deleted, the free space within a
  * directory block can become fragmented.  The ufs filesystem will compact
  * a fragmented directory block to make space for a new entry. When this
  * occurs, the offsets of previously added entries change. Any "diradd"
  * dependency structures corresponding to these entries must be updated with
  * the new offsets.
  */
 
 /*
  * This routine is called after the in-memory inode's link
  * count has been incremented, but before the directory entry's
  * pointer to the inode has been set.
  */
 int
 softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for directory */
 	off_t diroffset;	/* offset of new entry in directory */
 	ino_t newinum;		/* inode referenced by new directory entry */
 	struct buf *newdirbp;	/* non-NULL => contents of new mkdir */
 	int isnewblk;		/* entry is in a newly allocated block */
 {
 	int offset;		/* offset of new entry within directory block */
 	ufs_lbn_t lbn;		/* block in directory containing new entry */
 	struct fs *fs;
 	struct diradd *dap;
 	struct newblk *newblk;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct newdirblk *newdirblk;
 	struct mkdir *mkdir1, *mkdir2;
 	struct jaddref *jaddref;
 	struct ufsmount *ump;
 	struct mount *mp;
 	int isindir;
 
 	mp = ITOVFS(dp);
 	ump = VFSTOUFS(mp);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_directory_add called on non-softdep filesystem"));
 	/*
 	 * Whiteouts have no dependencies.
 	 */
 	if (newinum == WINO) {
 		if (newdirbp != NULL)
 			bdwrite(newdirbp);
 		return (0);
 	}
 	jaddref = NULL;
 	mkdir1 = mkdir2 = NULL;
 	fs = ump->um_fs;
 	lbn = lblkno(fs, diroffset);
 	offset = blkoff(fs, diroffset);
 	dap = malloc(sizeof(struct diradd), M_DIRADD,
 		M_SOFTDEP_FLAGS|M_ZERO);
 	workitem_alloc(&dap->da_list, D_DIRADD, mp);
 	dap->da_offset = offset;
 	dap->da_newinum = newinum;
 	dap->da_state = ATTACHED;
 	LIST_INIT(&dap->da_jwork);
 	isindir = bp->b_lblkno >= NDADDR;
 	newdirblk = NULL;
 	if (isnewblk &&
 	    (isindir ? blkoff(fs, diroffset) : fragoff(fs, diroffset)) == 0) {
 		newdirblk = malloc(sizeof(struct newdirblk),
 		    M_NEWDIRBLK, M_SOFTDEP_FLAGS);
 		workitem_alloc(&newdirblk->db_list, D_NEWDIRBLK, mp);
 		LIST_INIT(&newdirblk->db_mkdir);
 	}
 	/*
 	 * If we're creating a new directory setup the dependencies and set
 	 * the dap state to wait for them.  Otherwise it's COMPLETE and
 	 * we can move on.
 	 */
 	if (newdirbp == NULL) {
 		dap->da_state |= DEPCOMPLETE;
 		ACQUIRE_LOCK(ump);
 	} else {
 		dap->da_state |= MKDIR_BODY | MKDIR_PARENT;
 		mkdir1 = setup_newdir(dap, newinum, dp->i_number, newdirbp,
 		    &mkdir2);
 	}
 	/*
 	 * Link into parent directory pagedep to await its being written.
 	 */
 	pagedep_lookup(mp, bp, dp->i_number, lbn, DEPALLOC, &pagedep);
 #ifdef DEBUG
 	if (diradd_lookup(pagedep, offset) != NULL)
 		panic("softdep_setup_directory_add: %p already at off %d\n",
 		    diradd_lookup(pagedep, offset), offset);
 #endif
 	dap->da_pagedep = pagedep;
 	LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,
 	    da_pdlist);
 	inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
 	/*
 	 * If we're journaling, link the diradd into the jaddref so it
 	 * may be completed after the journal entry is written.  Otherwise,
 	 * link the diradd into its inodedep.  If the inode is not yet
 	 * written place it on the bufwait list, otherwise do the post-inode
 	 * write processing to put it on the id_pendinghd list.
 	 */
 	if (MOUNTEDSUJ(mp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
 		    ("softdep_setup_directory_add: bad jaddref %p", jaddref));
 		jaddref->ja_diroff = diroffset;
 		jaddref->ja_diradd = dap;
 		add_to_journal(&jaddref->ja_list);
 	} else if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE)
 		diradd_inode_written(dap, inodedep);
 	else
 		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
 	/*
 	 * Add the journal entries for . and .. links now that the primary
 	 * link is written.
 	 */
 	if (mkdir1 != NULL && MOUNTEDSUJ(mp)) {
 		jaddref = (struct jaddref *)TAILQ_PREV(&jaddref->ja_ref,
 		    inoreflst, if_deps);
 		KASSERT(jaddref != NULL &&
 		    jaddref->ja_ino == jaddref->ja_parent &&
 		    (jaddref->ja_state & MKDIR_BODY),
 		    ("softdep_setup_directory_add: bad dot jaddref %p",
 		    jaddref));
 		mkdir1->md_jaddref = jaddref;
 		jaddref->ja_mkdir = mkdir1;
 		/*
 		 * It is important that the dotdot journal entry
 		 * is added prior to the dot entry since dot writes
 		 * both the dot and dotdot links.  These both must
 		 * be added after the primary link for the journal
 		 * to remain consistent.
 		 */
 		add_to_journal(&mkdir2->md_jaddref->ja_list);
 		add_to_journal(&jaddref->ja_list);
 	}
 	/*
 	 * If we are adding a new directory remember this diradd so that if
 	 * we rename it we can keep the dot and dotdot dependencies.  If
 	 * we are adding a new name for an inode that has a mkdiradd we
 	 * must be in rename and we have to move the dot and dotdot
 	 * dependencies to this new name.  The old name is being orphaned
 	 * soon.
 	 */
 	if (mkdir1 != NULL) {
 		if (inodedep->id_mkdiradd != NULL)
 			panic("softdep_setup_directory_add: Existing mkdir");
 		inodedep->id_mkdiradd = dap;
 	} else if (inodedep->id_mkdiradd)
 		merge_diradd(inodedep, dap);
 	if (newdirblk != NULL) {
 		/*
 		 * There is nothing to do if we are already tracking
 		 * this block.
 		 */
 		if ((pagedep->pd_state & NEWBLOCK) != 0) {
 			WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
 			FREE_LOCK(ump);
 			return (0);
 		}
 		if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, &newblk)
 		    == 0)
 			panic("softdep_setup_directory_add: lost entry");
 		WORKLIST_INSERT(&newblk->nb_newdirblk, &newdirblk->db_list);
 		pagedep->pd_state |= NEWBLOCK;
 		pagedep->pd_newdirblk = newdirblk;
 		newdirblk->db_pagedep = pagedep;
 		FREE_LOCK(ump);
 		/*
 		 * If we extended into an indirect signal direnter to sync.
 		 */
 		if (isindir)
 			return (1);
 		return (0);
 	}
 	FREE_LOCK(ump);
 	return (0);
 }
 
 /*
  * This procedure is called to change the offset of a directory
  * entry when compacting a directory block which must be owned
  * exclusively by the caller. Note that the actual entry movement
  * must be done in this procedure to ensure that no I/O completions
  * occur while the move is in progress.
  */
 void 
 softdep_change_directoryentry_offset(bp, dp, base, oldloc, newloc, entrysize)
 	struct buf *bp;		/* Buffer holding directory block. */
 	struct inode *dp;	/* inode for directory */
 	caddr_t base;		/* address of dp->i_offset */
 	caddr_t oldloc;		/* address of old directory location */
 	caddr_t newloc;		/* address of new directory location */
 	int entrysize;		/* size of directory entry */
 {
 	int offset, oldoffset, newoffset;
 	struct pagedep *pagedep;
 	struct jmvref *jmvref;
 	struct diradd *dap;
 	struct direct *de;
 	struct mount *mp;
 	struct ufsmount *ump;
 	ufs_lbn_t lbn;
 	int flags;
 
 	mp = ITOVFS(dp);
 	ump = VFSTOUFS(mp);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_change_directoryentry_offset called on "
 	     "non-softdep filesystem"));
 	de = (struct direct *)oldloc;
 	jmvref = NULL;
 	flags = 0;
 	/*
 	 * Moves are always journaled as it would be too complex to
 	 * determine if any affected adds or removes are present in the
 	 * journal.
 	 */
 	if (MOUNTEDSUJ(mp)) {
 		flags = DEPALLOC;
 		jmvref = newjmvref(dp, de->d_ino,
 		    dp->i_offset + (oldloc - base),
 		    dp->i_offset + (newloc - base));
 	}
 	lbn = lblkno(ump->um_fs, dp->i_offset);
 	offset = blkoff(ump->um_fs, dp->i_offset);
 	oldoffset = offset + (oldloc - base);
 	newoffset = offset + (newloc - base);
 	ACQUIRE_LOCK(ump);
 	if (pagedep_lookup(mp, bp, dp->i_number, lbn, flags, &pagedep) == 0)
 		goto done;
 	dap = diradd_lookup(pagedep, oldoffset);
 	if (dap) {
 		dap->da_offset = newoffset;
 		newoffset = DIRADDHASH(newoffset);
 		oldoffset = DIRADDHASH(oldoffset);
 		if ((dap->da_state & ALLCOMPLETE) != ALLCOMPLETE &&
 		    newoffset != oldoffset) {
 			LIST_REMOVE(dap, da_pdlist);
 			LIST_INSERT_HEAD(&pagedep->pd_diraddhd[newoffset],
 			    dap, da_pdlist);
 		}
 	}
 done:
 	if (jmvref) {
 		jmvref->jm_pagedep = pagedep;
 		LIST_INSERT_HEAD(&pagedep->pd_jmvrefhd, jmvref, jm_deps);
 		add_to_journal(&jmvref->jm_list);
 	}
 	bcopy(oldloc, newloc, entrysize);
 	FREE_LOCK(ump);
 }
 
 /*
  * Move the mkdir dependencies and journal work from one diradd to another
  * when renaming a directory.  The new name must depend on the mkdir deps
  * completing as the old name did.  Directories can only have one valid link
  * at a time so one must be canonical.
  */
 static void
 merge_diradd(inodedep, newdap)
 	struct inodedep *inodedep;
 	struct diradd *newdap;
 {
 	struct diradd *olddap;
 	struct mkdir *mkdir, *nextmd;
 	struct ufsmount *ump;
 	short state;
 
 	olddap = inodedep->id_mkdiradd;
 	inodedep->id_mkdiradd = newdap;
 	if ((olddap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
 		newdap->da_state &= ~DEPCOMPLETE;
 		ump = VFSTOUFS(inodedep->id_list.wk_mp);
 		for (mkdir = LIST_FIRST(&ump->softdep_mkdirlisthd); mkdir;
 		     mkdir = nextmd) {
 			nextmd = LIST_NEXT(mkdir, md_mkdirs);
 			if (mkdir->md_diradd != olddap)
 				continue;
 			mkdir->md_diradd = newdap;
 			state = mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY);
 			newdap->da_state |= state;
 			olddap->da_state &= ~state;
 			if ((olddap->da_state &
 			    (MKDIR_PARENT | MKDIR_BODY)) == 0)
 				break;
 		}
 		if ((olddap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0)
 			panic("merge_diradd: unfound ref");
 	}
 	/*
 	 * Any mkdir related journal items are not safe to be freed until
 	 * the new name is stable.
 	 */
 	jwork_move(&newdap->da_jwork, &olddap->da_jwork);
 	olddap->da_state |= DEPCOMPLETE;
 	complete_diradd(olddap);
 }
 
 /*
  * Move the diradd to the pending list when all diradd dependencies are
  * complete.
  */
 static void
 complete_diradd(dap)
 	struct diradd *dap;
 {
 	struct pagedep *pagedep;
 
 	if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		if (dap->da_state & DIRCHG)
 			pagedep = dap->da_previous->dm_pagedep;
 		else
 			pagedep = dap->da_pagedep;
 		LIST_REMOVE(dap, da_pdlist);
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 	}
 }
 
 /*
  * Cancel a diradd when a dirrem overlaps with it.  We must cancel the journal
  * add entries and conditonally journal the remove.
  */
 static void
 cancel_diradd(dap, dirrem, jremref, dotremref, dotdotremref)
 	struct diradd *dap;
 	struct dirrem *dirrem;
 	struct jremref *jremref;
 	struct jremref *dotremref;
 	struct jremref *dotdotremref;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct inoref *inoref;
 	struct ufsmount *ump;
 	struct mkdir *mkdir;
 
 	/*
 	 * If no remove references were allocated we're on a non-journaled
 	 * filesystem and can skip the cancel step.
 	 */
 	if (jremref == NULL) {
 		free_diradd(dap, NULL);
 		return;
 	}
 	/*
 	 * Cancel the primary name an free it if it does not require
 	 * journaling.
 	 */
 	if (inodedep_lookup(dap->da_list.wk_mp, dap->da_newinum,
 	    0, &inodedep) != 0) {
 		/* Abort the addref that reference this diradd.  */
 		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
 			if (inoref->if_list.wk_type != D_JADDREF)
 				continue;
 			jaddref = (struct jaddref *)inoref;
 			if (jaddref->ja_diradd != dap)
 				continue;
 			if (cancel_jaddref(jaddref, inodedep,
 			    &dirrem->dm_jwork) == 0) {
 				free_jremref(jremref);
 				jremref = NULL;
 			}
 			break;
 		}
 	}
 	/*
 	 * Cancel subordinate names and free them if they do not require
 	 * journaling.
 	 */
 	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
 		ump = VFSTOUFS(dap->da_list.wk_mp);
 		LIST_FOREACH(mkdir, &ump->softdep_mkdirlisthd, md_mkdirs) {
 			if (mkdir->md_diradd != dap)
 				continue;
 			if ((jaddref = mkdir->md_jaddref) == NULL)
 				continue;
 			mkdir->md_jaddref = NULL;
 			if (mkdir->md_state & MKDIR_PARENT) {
 				if (cancel_jaddref(jaddref, NULL,
 				    &dirrem->dm_jwork) == 0) {
 					free_jremref(dotdotremref);
 					dotdotremref = NULL;
 				}
 			} else {
 				if (cancel_jaddref(jaddref, inodedep,
 				    &dirrem->dm_jwork) == 0) {
 					free_jremref(dotremref);
 					dotremref = NULL;
 				}
 			}
 		}
 	}
 
 	if (jremref)
 		journal_jremref(dirrem, jremref, inodedep);
 	if (dotremref)
 		journal_jremref(dirrem, dotremref, inodedep);
 	if (dotdotremref)
 		journal_jremref(dirrem, dotdotremref, NULL);
 	jwork_move(&dirrem->dm_jwork, &dap->da_jwork);
 	free_diradd(dap, &dirrem->dm_jwork);
 }
 
 /*
  * Free a diradd dependency structure. This routine must be called
  * with splbio interrupts blocked.
  */
 static void
 free_diradd(dap, wkhd)
 	struct diradd *dap;
 	struct workhead *wkhd;
 {
 	struct dirrem *dirrem;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct mkdir *mkdir, *nextmd;
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(dap->da_list.wk_mp);
 	LOCK_OWNED(ump);
 	LIST_REMOVE(dap, da_pdlist);
 	if (dap->da_state & ONWORKLIST)
 		WORKLIST_REMOVE(&dap->da_list);
 	if ((dap->da_state & DIRCHG) == 0) {
 		pagedep = dap->da_pagedep;
 	} else {
 		dirrem = dap->da_previous;
 		pagedep = dirrem->dm_pagedep;
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		dirrem->dm_state |= COMPLETE;
 		if (LIST_EMPTY(&dirrem->dm_jremrefhd))
 			add_to_worklist(&dirrem->dm_list, 0);
 	}
 	if (inodedep_lookup(pagedep->pd_list.wk_mp, dap->da_newinum,
 	    0, &inodedep) != 0)
 		if (inodedep->id_mkdiradd == dap)
 			inodedep->id_mkdiradd = NULL;
 	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
 		for (mkdir = LIST_FIRST(&ump->softdep_mkdirlisthd); mkdir;
 		     mkdir = nextmd) {
 			nextmd = LIST_NEXT(mkdir, md_mkdirs);
 			if (mkdir->md_diradd != dap)
 				continue;
 			dap->da_state &=
 			    ~(mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY));
 			LIST_REMOVE(mkdir, md_mkdirs);
 			if (mkdir->md_state & ONWORKLIST)
 				WORKLIST_REMOVE(&mkdir->md_list);
 			if (mkdir->md_jaddref != NULL)
 				panic("free_diradd: Unexpected jaddref");
 			WORKITEM_FREE(mkdir, D_MKDIR);
 			if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0)
 				break;
 		}
 		if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0)
 			panic("free_diradd: unfound ref");
 	}
 	if (inodedep)
 		free_inodedep(inodedep);
 	/*
 	 * Free any journal segments waiting for the directory write.
 	 */
 	handle_jwork(&dap->da_jwork);
 	WORKITEM_FREE(dap, D_DIRADD);
 }
 
 /*
  * Directory entry removal dependencies.
  * 
  * When removing a directory entry, the entry's inode pointer must be
  * zero'ed on disk before the corresponding inode's link count is decremented
  * (possibly freeing the inode for re-use). This dependency is handled by
  * updating the directory entry but delaying the inode count reduction until
  * after the directory block has been written to disk. After this point, the
  * inode count can be decremented whenever it is convenient.
  */
 
 /*
  * This routine should be called immediately after removing
  * a directory entry.  The inode's link count should not be
  * decremented by the calling procedure -- the soft updates
  * code will do this task when it is safe.
  */
 void 
 softdep_setup_remove(bp, dp, ip, isrmdir)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for the directory being modified */
 	struct inode *ip;	/* inode for directory entry being removed */
 	int isrmdir;		/* indicates if doing RMDIR */
 {
 	struct dirrem *dirrem, *prevdirrem;
 	struct inodedep *inodedep;
 	struct ufsmount *ump;
 	int direct;
 
 	ump = ITOUMP(ip);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_setup_remove called on non-softdep filesystem"));
 	/*
 	 * Allocate a new dirrem if appropriate and ACQUIRE_LOCK.  We want
 	 * newdirrem() to setup the full directory remove which requires
 	 * isrmdir > 1.
 	 */
 	dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
 	/*
 	 * Add the dirrem to the inodedep's pending remove list for quick
 	 * discovery later.
 	 */
 	if (inodedep_lookup(UFSTOVFS(ump), ip->i_number, 0, &inodedep) == 0)
 		panic("softdep_setup_remove: Lost inodedep.");
 	KASSERT((inodedep->id_state & UNLINKED) == 0, ("inode unlinked"));
 	dirrem->dm_state |= ONDEPLIST;
 	LIST_INSERT_HEAD(&inodedep->id_dirremhd, dirrem, dm_inonext);
 
 	/*
 	 * If the COMPLETE flag is clear, then there were no active
 	 * entries and we want to roll back to a zeroed entry until
 	 * the new inode is committed to disk. If the COMPLETE flag is
 	 * set then we have deleted an entry that never made it to
 	 * disk. If the entry we deleted resulted from a name change,
 	 * then the old name still resides on disk. We cannot delete
 	 * its inode (returned to us in prevdirrem) until the zeroed
 	 * directory entry gets to disk. The new inode has never been
 	 * referenced on the disk, so can be deleted immediately.
 	 */
 	if ((dirrem->dm_state & COMPLETE) == 0) {
 		LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
 		    dm_next);
 		FREE_LOCK(ump);
 	} else {
 		if (prevdirrem != NULL)
 			LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd,
 			    prevdirrem, dm_next);
 		dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
 		direct = LIST_EMPTY(&dirrem->dm_jremrefhd);
 		FREE_LOCK(ump);
 		if (direct)
 			handle_workitem_remove(dirrem, 0);
 	}
 }
 
 /*
  * Check for an entry matching 'offset' on both the pd_dirraddhd list and the
  * pd_pendinghd list of a pagedep.
  */
 static struct diradd *
 diradd_lookup(pagedep, offset)
 	struct pagedep *pagedep;
 	int offset;
 {
 	struct diradd *dap;
 
 	LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist)
 		if (dap->da_offset == offset)
 			return (dap);
 	LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)
 		if (dap->da_offset == offset)
 			return (dap);
 	return (NULL);
 }
 
 /*
  * Search for a .. diradd dependency in a directory that is being removed.
  * If the directory was renamed to a new parent we have a diradd rather
  * than a mkdir for the .. entry.  We need to cancel it now before
  * it is found in truncate().
  */
 static struct jremref *
 cancel_diradd_dotdot(ip, dirrem, jremref)
 	struct inode *ip;
 	struct dirrem *dirrem;
 	struct jremref *jremref;
 {
 	struct pagedep *pagedep;
 	struct diradd *dap;
 	struct worklist *wk;
 
 	if (pagedep_lookup(ITOVFS(ip), NULL, ip->i_number, 0, 0, &pagedep) == 0)
 		return (jremref);
 	dap = diradd_lookup(pagedep, DOTDOT_OFFSET);
 	if (dap == NULL)
 		return (jremref);
 	cancel_diradd(dap, dirrem, jremref, NULL, NULL);
 	/*
 	 * Mark any journal work as belonging to the parent so it is freed
 	 * with the .. reference.
 	 */
 	LIST_FOREACH(wk, &dirrem->dm_jwork, wk_list)
 		wk->wk_state |= MKDIR_PARENT;
 	return (NULL);
 }
 
 /*
  * Cancel the MKDIR_PARENT mkdir component of a diradd when we're going to
  * replace it with a dirrem/diradd pair as a result of re-parenting a
  * directory.  This ensures that we don't simultaneously have a mkdir and
  * a diradd for the same .. entry.
  */
 static struct jremref *
 cancel_mkdir_dotdot(ip, dirrem, jremref)
 	struct inode *ip;
 	struct dirrem *dirrem;
 	struct jremref *jremref;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct ufsmount *ump;
 	struct mkdir *mkdir;
 	struct diradd *dap;
 	struct mount *mp;
 
 	mp = ITOVFS(ip);
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0)
 		return (jremref);
 	dap = inodedep->id_mkdiradd;
 	if (dap == NULL || (dap->da_state & MKDIR_PARENT) == 0)
 		return (jremref);
 	ump = VFSTOUFS(inodedep->id_list.wk_mp);
 	for (mkdir = LIST_FIRST(&ump->softdep_mkdirlisthd); mkdir;
 	    mkdir = LIST_NEXT(mkdir, md_mkdirs))
 		if (mkdir->md_diradd == dap && mkdir->md_state & MKDIR_PARENT)
 			break;
 	if (mkdir == NULL)
 		panic("cancel_mkdir_dotdot: Unable to find mkdir\n");
 	if ((jaddref = mkdir->md_jaddref) != NULL) {
 		mkdir->md_jaddref = NULL;
 		jaddref->ja_state &= ~MKDIR_PARENT;
 		if (inodedep_lookup(mp, jaddref->ja_ino, 0, &inodedep) == 0)
 			panic("cancel_mkdir_dotdot: Lost parent inodedep");
 		if (cancel_jaddref(jaddref, inodedep, &dirrem->dm_jwork)) {
 			journal_jremref(dirrem, jremref, inodedep);
 			jremref = NULL;
 		}
 	}
 	if (mkdir->md_state & ONWORKLIST)
 		WORKLIST_REMOVE(&mkdir->md_list);
 	mkdir->md_state |= ALLCOMPLETE;
 	complete_mkdir(mkdir);
 	return (jremref);
 }
 
 static void
 journal_jremref(dirrem, jremref, inodedep)
 	struct dirrem *dirrem;
 	struct jremref *jremref;
 	struct inodedep *inodedep;
 {
 
 	if (inodedep == NULL)
 		if (inodedep_lookup(jremref->jr_list.wk_mp,
 		    jremref->jr_ref.if_ino, 0, &inodedep) == 0)
 			panic("journal_jremref: Lost inodedep");
 	LIST_INSERT_HEAD(&dirrem->dm_jremrefhd, jremref, jr_deps);
 	TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jremref->jr_ref, if_deps);
 	add_to_journal(&jremref->jr_list);
 }
 
 static void
 dirrem_journal(dirrem, jremref, dotremref, dotdotremref)
 	struct dirrem *dirrem;
 	struct jremref *jremref;
 	struct jremref *dotremref;
 	struct jremref *dotdotremref;
 {
 	struct inodedep *inodedep;
 
 
 	if (inodedep_lookup(jremref->jr_list.wk_mp, jremref->jr_ref.if_ino, 0,
 	    &inodedep) == 0)
 		panic("dirrem_journal: Lost inodedep");
 	journal_jremref(dirrem, jremref, inodedep);
 	if (dotremref)
 		journal_jremref(dirrem, dotremref, inodedep);
 	if (dotdotremref)
 		journal_jremref(dirrem, dotdotremref, NULL);
 }
 
 /*
  * Allocate a new dirrem if appropriate and return it along with
  * its associated pagedep. Called without a lock, returns with lock.
  */
 static struct dirrem *
 newdirrem(bp, dp, ip, isrmdir, prevdirremp)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for the directory being modified */
 	struct inode *ip;	/* inode for directory entry being removed */
 	int isrmdir;		/* indicates if doing RMDIR */
 	struct dirrem **prevdirremp; /* previously referenced inode, if any */
 {
 	int offset;
 	ufs_lbn_t lbn;
 	struct diradd *dap;
 	struct dirrem *dirrem;
 	struct pagedep *pagedep;
 	struct jremref *jremref;
 	struct jremref *dotremref;
 	struct jremref *dotdotremref;
 	struct vnode *dvp;
 	struct ufsmount *ump;
 
 	/*
 	 * Whiteouts have no deletion dependencies.
 	 */
 	if (ip == NULL)
 		panic("newdirrem: whiteout");
 	dvp = ITOV(dp);
 	ump = ITOUMP(dp);
 
 	/*
 	 * If the system is over its limit and our filesystem is
 	 * responsible for more than our share of that usage and
 	 * we are not a snapshot, request some inodedep cleanup.
 	 * Limiting the number of dirrem structures will also limit
 	 * the number of freefile and freeblks structures.
 	 */
 	ACQUIRE_LOCK(ump);
 	if (!IS_SNAPSHOT(ip) && softdep_excess_items(ump, D_DIRREM))
 		schedule_cleanup(UFSTOVFS(ump));
 	else
 		FREE_LOCK(ump);
 	dirrem = malloc(sizeof(struct dirrem), M_DIRREM, M_SOFTDEP_FLAGS |
 	    M_ZERO);
 	workitem_alloc(&dirrem->dm_list, D_DIRREM, dvp->v_mount);
 	LIST_INIT(&dirrem->dm_jremrefhd);
 	LIST_INIT(&dirrem->dm_jwork);
 	dirrem->dm_state = isrmdir ? RMDIR : 0;
 	dirrem->dm_oldinum = ip->i_number;
 	*prevdirremp = NULL;
 	/*
 	 * Allocate remove reference structures to track journal write
 	 * dependencies.  We will always have one for the link and
 	 * when doing directories we will always have one more for dot.
 	 * When renaming a directory we skip the dotdot link change so
 	 * this is not needed.
 	 */
 	jremref = dotremref = dotdotremref = NULL;
 	if (DOINGSUJ(dvp)) {
 		if (isrmdir) {
 			jremref = newjremref(dirrem, dp, ip, dp->i_offset,
 			    ip->i_effnlink + 2);
 			dotremref = newjremref(dirrem, ip, ip, DOT_OFFSET,
 			    ip->i_effnlink + 1);
 			dotdotremref = newjremref(dirrem, ip, dp, DOTDOT_OFFSET,
 			    dp->i_effnlink + 1);
 			dotdotremref->jr_state |= MKDIR_PARENT;
 		} else
 			jremref = newjremref(dirrem, dp, ip, dp->i_offset,
 			    ip->i_effnlink + 1);
 	}
 	ACQUIRE_LOCK(ump);
 	lbn = lblkno(ump->um_fs, dp->i_offset);
 	offset = blkoff(ump->um_fs, dp->i_offset);
 	pagedep_lookup(UFSTOVFS(ump), bp, dp->i_number, lbn, DEPALLOC,
 	    &pagedep);
 	dirrem->dm_pagedep = pagedep;
 	dirrem->dm_offset = offset;
 	/*
 	 * If we're renaming a .. link to a new directory, cancel any
 	 * existing MKDIR_PARENT mkdir.  If it has already been canceled
 	 * the jremref is preserved for any potential diradd in this
 	 * location.  This can not coincide with a rmdir.
 	 */
 	if (dp->i_offset == DOTDOT_OFFSET) {
 		if (isrmdir)
 			panic("newdirrem: .. directory change during remove?");
 		jremref = cancel_mkdir_dotdot(dp, dirrem, jremref);
 	}
 	/*
 	 * If we're removing a directory search for the .. dependency now and
 	 * cancel it.  Any pending journal work will be added to the dirrem
 	 * to be completed when the workitem remove completes.
 	 */
 	if (isrmdir)
 		dotdotremref = cancel_diradd_dotdot(ip, dirrem, dotdotremref);
 	/*
 	 * Check for a diradd dependency for the same directory entry.
 	 * If present, then both dependencies become obsolete and can
 	 * be de-allocated.
 	 */
 	dap = diradd_lookup(pagedep, offset);
 	if (dap == NULL) {
 		/*
 		 * Link the jremref structures into the dirrem so they are
 		 * written prior to the pagedep.
 		 */
 		if (jremref)
 			dirrem_journal(dirrem, jremref, dotremref,
 			    dotdotremref);
 		return (dirrem);
 	}
 	/*
 	 * Must be ATTACHED at this point.
 	 */
 	if ((dap->da_state & ATTACHED) == 0)
 		panic("newdirrem: not ATTACHED");
 	if (dap->da_newinum != ip->i_number)
 		panic("newdirrem: inum %ju should be %ju",
 		    (uintmax_t)ip->i_number, (uintmax_t)dap->da_newinum);
 	/*
 	 * If we are deleting a changed name that never made it to disk,
 	 * then return the dirrem describing the previous inode (which
 	 * represents the inode currently referenced from this entry on disk).
 	 */
 	if ((dap->da_state & DIRCHG) != 0) {
 		*prevdirremp = dap->da_previous;
 		dap->da_state &= ~DIRCHG;
 		dap->da_pagedep = pagedep;
 	}
 	/*
 	 * We are deleting an entry that never made it to disk.
 	 * Mark it COMPLETE so we can delete its inode immediately.
 	 */
 	dirrem->dm_state |= COMPLETE;
 	cancel_diradd(dap, dirrem, jremref, dotremref, dotdotremref);
 #ifdef SUJ_DEBUG
 	if (isrmdir == 0) {
 		struct worklist *wk;
 
 		LIST_FOREACH(wk, &dirrem->dm_jwork, wk_list)
 			if (wk->wk_state & (MKDIR_BODY | MKDIR_PARENT))
 				panic("bad wk %p (0x%X)\n", wk, wk->wk_state);
 	}
 #endif
 
 	return (dirrem);
 }
 
 /*
  * Directory entry change dependencies.
  * 
  * Changing an existing directory entry requires that an add operation
  * be completed first followed by a deletion. The semantics for the addition
  * are identical to the description of adding a new entry above except
  * that the rollback is to the old inode number rather than zero. Once
  * the addition dependency is completed, the removal is done as described
  * in the removal routine above.
  */
 
 /*
  * This routine should be called immediately after changing
  * a directory entry.  The inode's link count should not be
  * decremented by the calling procedure -- the soft updates
  * code will perform this task when it is safe.
  */
 void 
 softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for the directory being modified */
 	struct inode *ip;	/* inode for directory entry being removed */
 	ino_t newinum;		/* new inode number for changed entry */
 	int isrmdir;		/* indicates if doing RMDIR */
 {
 	int offset;
 	struct diradd *dap = NULL;
 	struct dirrem *dirrem, *prevdirrem;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct mount *mp;
 	struct ufsmount *ump;
 
 	mp = ITOVFS(dp);
 	ump = VFSTOUFS(mp);
 	offset = blkoff(ump->um_fs, dp->i_offset);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	   ("softdep_setup_directory_change called on non-softdep filesystem"));
 
 	/*
 	 * Whiteouts do not need diradd dependencies.
 	 */
 	if (newinum != WINO) {
 		dap = malloc(sizeof(struct diradd),
 		    M_DIRADD, M_SOFTDEP_FLAGS|M_ZERO);
 		workitem_alloc(&dap->da_list, D_DIRADD, mp);
 		dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE;
 		dap->da_offset = offset;
 		dap->da_newinum = newinum;
 		LIST_INIT(&dap->da_jwork);
 	}
 
 	/*
 	 * Allocate a new dirrem and ACQUIRE_LOCK.
 	 */
 	dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
 	pagedep = dirrem->dm_pagedep;
 	/*
 	 * The possible values for isrmdir:
 	 *	0 - non-directory file rename
 	 *	1 - directory rename within same directory
 	 *   inum - directory rename to new directory of given inode number
 	 * When renaming to a new directory, we are both deleting and
 	 * creating a new directory entry, so the link count on the new
 	 * directory should not change. Thus we do not need the followup
 	 * dirrem which is usually done in handle_workitem_remove. We set
 	 * the DIRCHG flag to tell handle_workitem_remove to skip the 
 	 * followup dirrem.
 	 */
 	if (isrmdir > 1)
 		dirrem->dm_state |= DIRCHG;
 
 	/*
 	 * Whiteouts have no additional dependencies,
 	 * so just put the dirrem on the correct list.
 	 */
 	if (newinum == WINO) {
 		if ((dirrem->dm_state & COMPLETE) == 0) {
 			LIST_INSERT_HEAD(&pagedep->pd_dirremhd, dirrem,
 			    dm_next);
 		} else {
 			dirrem->dm_dirinum = pagedep->pd_ino;
 			if (LIST_EMPTY(&dirrem->dm_jremrefhd))
 				add_to_worklist(&dirrem->dm_list, 0);
 		}
 		FREE_LOCK(ump);
 		return;
 	}
 	/*
 	 * Add the dirrem to the inodedep's pending remove list for quick
 	 * discovery later.  A valid nlinkdelta ensures that this lookup
 	 * will not fail.
 	 */
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0)
 		panic("softdep_setup_directory_change: Lost inodedep.");
 	dirrem->dm_state |= ONDEPLIST;
 	LIST_INSERT_HEAD(&inodedep->id_dirremhd, dirrem, dm_inonext);
 
 	/*
 	 * If the COMPLETE flag is clear, then there were no active
 	 * entries and we want to roll back to the previous inode until
 	 * the new inode is committed to disk. If the COMPLETE flag is
 	 * set, then we have deleted an entry that never made it to disk.
 	 * If the entry we deleted resulted from a name change, then the old
 	 * inode reference still resides on disk. Any rollback that we do
 	 * needs to be to that old inode (returned to us in prevdirrem). If
 	 * the entry we deleted resulted from a create, then there is
 	 * no entry on the disk, so we want to roll back to zero rather
 	 * than the uncommitted inode. In either of the COMPLETE cases we
 	 * want to immediately free the unwritten and unreferenced inode.
 	 */
 	if ((dirrem->dm_state & COMPLETE) == 0) {
 		dap->da_previous = dirrem;
 	} else {
 		if (prevdirrem != NULL) {
 			dap->da_previous = prevdirrem;
 		} else {
 			dap->da_state &= ~DIRCHG;
 			dap->da_pagedep = pagedep;
 		}
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		if (LIST_EMPTY(&dirrem->dm_jremrefhd))
 			add_to_worklist(&dirrem->dm_list, 0);
 	}
 	/*
 	 * Lookup the jaddref for this journal entry.  We must finish
 	 * initializing it and make the diradd write dependent on it.
 	 * If we're not journaling, put it on the id_bufwait list if the
 	 * inode is not yet written. If it is written, do the post-inode
 	 * write processing to put it on the id_pendinghd list.
 	 */
 	inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
 	if (MOUNTEDSUJ(mp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
 		    ("softdep_setup_directory_change: bad jaddref %p",
 		    jaddref));
 		jaddref->ja_diroff = dp->i_offset;
 		jaddref->ja_diradd = dap;
 		LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],
 		    dap, da_pdlist);
 		add_to_journal(&jaddref->ja_list);
 	} else if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		dap->da_state |= COMPLETE;
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 		WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
 	} else {
 		LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],
 		    dap, da_pdlist);
 		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
 	}
 	/*
 	 * If we're making a new name for a directory that has not been
 	 * committed when need to move the dot and dotdot references to
 	 * this new name.
 	 */
 	if (inodedep->id_mkdiradd && dp->i_offset != DOTDOT_OFFSET)
 		merge_diradd(inodedep, dap);
 	FREE_LOCK(ump);
 }
 
 /*
  * Called whenever the link count on an inode is changed.
  * It creates an inode dependency so that the new reference(s)
  * to the inode cannot be committed to disk until the updated
  * inode has been written.
  */
 void
 softdep_change_linkcnt(ip)
 	struct inode *ip;	/* the inode with the increased link count */
 {
 	struct inodedep *inodedep;
 	struct ufsmount *ump;
 
 	ump = ITOUMP(ip);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_change_linkcnt called on non-softdep filesystem"));
 	ACQUIRE_LOCK(ump);
 	inodedep_lookup(UFSTOVFS(ump), ip->i_number, DEPALLOC, &inodedep);
 	if (ip->i_nlink < ip->i_effnlink)
 		panic("softdep_change_linkcnt: bad delta");
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 	FREE_LOCK(ump);
 }
 
 /*
  * Attach a sbdep dependency to the superblock buf so that we can keep
  * track of the head of the linked list of referenced but unlinked inodes.
  */
 void
 softdep_setup_sbupdate(ump, fs, bp)
 	struct ufsmount *ump;
 	struct fs *fs;
 	struct buf *bp;
 {
 	struct sbdep *sbdep;
 	struct worklist *wk;
 
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_setup_sbupdate called on non-softdep filesystem"));
 	LIST_FOREACH(wk, &bp->b_dep, wk_list)
 		if (wk->wk_type == D_SBDEP)
 			break;
 	if (wk != NULL)
 		return;
 	sbdep = malloc(sizeof(struct sbdep), M_SBDEP, M_SOFTDEP_FLAGS);
 	workitem_alloc(&sbdep->sb_list, D_SBDEP, UFSTOVFS(ump));
 	sbdep->sb_fs = fs;
 	sbdep->sb_ump = ump;
 	ACQUIRE_LOCK(ump);
 	WORKLIST_INSERT(&bp->b_dep, &sbdep->sb_list);
 	FREE_LOCK(ump);
 }
 
 /*
  * Return the first unlinked inodedep which is ready to be the head of the
  * list.  The inodedep and all those after it must have valid next pointers.
  */
 static struct inodedep *
 first_unlinked_inodedep(ump)
 	struct ufsmount *ump;
 {
 	struct inodedep *inodedep;
 	struct inodedep *idp;
 
 	LOCK_OWNED(ump);
 	for (inodedep = TAILQ_LAST(&ump->softdep_unlinked, inodedeplst);
 	    inodedep; inodedep = idp) {
 		if ((inodedep->id_state & UNLINKNEXT) == 0)
 			return (NULL);
 		idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked);
 		if (idp == NULL || (idp->id_state & UNLINKNEXT) == 0)
 			break;
 		if ((inodedep->id_state & UNLINKPREV) == 0)
 			break;
 	}
 	return (inodedep);
 }
 
 /*
  * Set the sujfree unlinked head pointer prior to writing a superblock.
  */
 static void
 initiate_write_sbdep(sbdep)
 	struct sbdep *sbdep;
 {
 	struct inodedep *inodedep;
 	struct fs *bpfs;
 	struct fs *fs;
 
 	bpfs = sbdep->sb_fs;
 	fs = sbdep->sb_ump->um_fs;
 	inodedep = first_unlinked_inodedep(sbdep->sb_ump);
 	if (inodedep) {
 		fs->fs_sujfree = inodedep->id_ino;
 		inodedep->id_state |= UNLINKPREV;
 	} else
 		fs->fs_sujfree = 0;
 	bpfs->fs_sujfree = fs->fs_sujfree;
 }
 
 /*
  * After a superblock is written determine whether it must be written again
  * due to a changing unlinked list head.
  */
 static int
 handle_written_sbdep(sbdep, bp)
 	struct sbdep *sbdep;
 	struct buf *bp;
 {
 	struct inodedep *inodedep;
 	struct fs *fs;
 
 	LOCK_OWNED(sbdep->sb_ump);
 	fs = sbdep->sb_fs;
 	/*
 	 * If the superblock doesn't match the in-memory list start over.
 	 */
 	inodedep = first_unlinked_inodedep(sbdep->sb_ump);
 	if ((inodedep && fs->fs_sujfree != inodedep->id_ino) ||
 	    (inodedep == NULL && fs->fs_sujfree != 0)) {
 		bdirty(bp);
 		return (1);
 	}
 	WORKITEM_FREE(sbdep, D_SBDEP);
 	if (fs->fs_sujfree == 0)
 		return (0);
 	/*
 	 * Now that we have a record of this inode in stable store allow it
 	 * to be written to free up pending work.  Inodes may see a lot of
 	 * write activity after they are unlinked which we must not hold up.
 	 */
 	for (; inodedep != NULL; inodedep = TAILQ_NEXT(inodedep, id_unlinked)) {
 		if ((inodedep->id_state & UNLINKLINKS) != UNLINKLINKS)
 			panic("handle_written_sbdep: Bad inodedep %p (0x%X)",
 			    inodedep, inodedep->id_state);
 		if (inodedep->id_state & UNLINKONLIST)
 			break;
 		inodedep->id_state |= DEPCOMPLETE | UNLINKONLIST;
 	}
 
 	return (0);
 }
 
 /*
  * Mark an inodedep as unlinked and insert it into the in-memory unlinked list.
  */
 static void
 unlinked_inodedep(mp, inodedep)
 	struct mount *mp;
 	struct inodedep *inodedep;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	if (MOUNTEDSUJ(mp) == 0)
 		return;
 	ump->um_fs->fs_fmod = 1;
 	if (inodedep->id_state & UNLINKED)
 		panic("unlinked_inodedep: %p already unlinked\n", inodedep);
 	inodedep->id_state |= UNLINKED;
 	TAILQ_INSERT_HEAD(&ump->softdep_unlinked, inodedep, id_unlinked);
 }
 
 /*
  * Remove an inodedep from the unlinked inodedep list.  This may require
  * disk writes if the inode has made it that far.
  */
 static void
 clear_unlinked_inodedep(inodedep)
 	struct inodedep *inodedep;
 {
 	struct ufsmount *ump;
 	struct inodedep *idp;
 	struct inodedep *idn;
 	struct fs *fs;
 	struct buf *bp;
 	ino_t ino;
 	ino_t nino;
 	ino_t pino;
 	int error;
 
 	ump = VFSTOUFS(inodedep->id_list.wk_mp);
 	fs = ump->um_fs;
 	ino = inodedep->id_ino;
 	error = 0;
 	for (;;) {
 		LOCK_OWNED(ump);
 		KASSERT((inodedep->id_state & UNLINKED) != 0,
 		    ("clear_unlinked_inodedep: inodedep %p not unlinked",
 		    inodedep));
 		/*
 		 * If nothing has yet been written simply remove us from
 		 * the in memory list and return.  This is the most common
 		 * case where handle_workitem_remove() loses the final
 		 * reference.
 		 */
 		if ((inodedep->id_state & UNLINKLINKS) == 0)
 			break;
 		/*
 		 * If we have a NEXT pointer and no PREV pointer we can simply
 		 * clear NEXT's PREV and remove ourselves from the list.  Be
 		 * careful not to clear PREV if the superblock points at
 		 * next as well.
 		 */
 		idn = TAILQ_NEXT(inodedep, id_unlinked);
 		if ((inodedep->id_state & UNLINKLINKS) == UNLINKNEXT) {
 			if (idn && fs->fs_sujfree != idn->id_ino)
 				idn->id_state &= ~UNLINKPREV;
 			break;
 		}
 		/*
 		 * Here we have an inodedep which is actually linked into
 		 * the list.  We must remove it by forcing a write to the
 		 * link before us, whether it be the superblock or an inode.
 		 * Unfortunately the list may change while we're waiting
 		 * on the buf lock for either resource so we must loop until
 		 * we lock the right one.  If both the superblock and an
 		 * inode point to this inode we must clear the inode first
 		 * followed by the superblock.
 		 */
 		idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked);
 		pino = 0;
 		if (idp && (idp->id_state & UNLINKNEXT))
 			pino = idp->id_ino;
 		FREE_LOCK(ump);
 		if (pino == 0) {
 			bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
 			    (int)fs->fs_sbsize, 0, 0, 0);
 		} else {
 			error = bread(ump->um_devvp,
 			    fsbtodb(fs, ino_to_fsba(fs, pino)),
 			    (int)fs->fs_bsize, NOCRED, &bp);
 			if (error)
 				brelse(bp);
 		}
 		ACQUIRE_LOCK(ump);
 		if (error)
 			break;
 		/* If the list has changed restart the loop. */
 		idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked);
 		nino = 0;
 		if (idp && (idp->id_state & UNLINKNEXT))
 			nino = idp->id_ino;
 		if (nino != pino ||
 		    (inodedep->id_state & UNLINKPREV) != UNLINKPREV) {
 			FREE_LOCK(ump);
 			brelse(bp);
 			ACQUIRE_LOCK(ump);
 			continue;
 		}
 		nino = 0;
 		idn = TAILQ_NEXT(inodedep, id_unlinked);
 		if (idn)
 			nino = idn->id_ino;
 		/*
 		 * Remove us from the in memory list.  After this we cannot
 		 * access the inodedep.
 		 */
 		KASSERT((inodedep->id_state & UNLINKED) != 0,
 		    ("clear_unlinked_inodedep: inodedep %p not unlinked",
 		    inodedep));
 		inodedep->id_state &= ~(UNLINKED | UNLINKLINKS | UNLINKONLIST);
 		TAILQ_REMOVE(&ump->softdep_unlinked, inodedep, id_unlinked);
 		FREE_LOCK(ump);
 		/*
 		 * The predecessor's next pointer is manually updated here
 		 * so that the NEXT flag is never cleared for an element
 		 * that is in the list.
 		 */
 		if (pino == 0) {
 			bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
 			ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
 			softdep_setup_sbupdate(ump, (struct fs *)bp->b_data,
 			    bp);
 		} else if (fs->fs_magic == FS_UFS1_MAGIC)
 			((struct ufs1_dinode *)bp->b_data +
 			    ino_to_fsbo(fs, pino))->di_freelink = nino;
 		else
 			((struct ufs2_dinode *)bp->b_data +
 			    ino_to_fsbo(fs, pino))->di_freelink = nino;
 		/*
 		 * If the bwrite fails we have no recourse to recover.  The
 		 * filesystem is corrupted already.
 		 */
 		bwrite(bp);
 		ACQUIRE_LOCK(ump);
 		/*
 		 * If the superblock pointer still needs to be cleared force
 		 * a write here.
 		 */
 		if (fs->fs_sujfree == ino) {
 			FREE_LOCK(ump);
 			bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
 			    (int)fs->fs_sbsize, 0, 0, 0);
 			bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
 			ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
 			softdep_setup_sbupdate(ump, (struct fs *)bp->b_data,
 			    bp);
 			bwrite(bp);
 			ACQUIRE_LOCK(ump);
 		}
 
 		if (fs->fs_sujfree != ino)
 			return;
 		panic("clear_unlinked_inodedep: Failed to clear free head");
 	}
 	if (inodedep->id_ino == fs->fs_sujfree)
 		panic("clear_unlinked_inodedep: Freeing head of free list");
 	inodedep->id_state &= ~(UNLINKED | UNLINKLINKS | UNLINKONLIST);
 	TAILQ_REMOVE(&ump->softdep_unlinked, inodedep, id_unlinked);
 	return;
 }
 
 /*
  * This workitem decrements the inode's link count.
  * If the link count reaches zero, the file is removed.
  */
 static int
 handle_workitem_remove(dirrem, flags)
 	struct dirrem *dirrem;
 	int flags;
 {
 	struct inodedep *inodedep;
 	struct workhead dotdotwk;
 	struct worklist *wk;
 	struct ufsmount *ump;
 	struct mount *mp;
 	struct vnode *vp;
 	struct inode *ip;
 	ino_t oldinum;
 
 	if (dirrem->dm_state & ONWORKLIST)
 		panic("handle_workitem_remove: dirrem %p still on worklist",
 		    dirrem);
 	oldinum = dirrem->dm_oldinum;
 	mp = dirrem->dm_list.wk_mp;
 	ump = VFSTOUFS(mp);
 	flags |= LK_EXCLUSIVE;
 	if (ffs_vgetf(mp, oldinum, flags, &vp, FFSV_FORCEINSMQ) != 0)
 		return (EBUSY);
 	ip = VTOI(vp);
 	ACQUIRE_LOCK(ump);
 	if ((inodedep_lookup(mp, oldinum, 0, &inodedep)) == 0)
 		panic("handle_workitem_remove: lost inodedep");
 	if (dirrem->dm_state & ONDEPLIST)
 		LIST_REMOVE(dirrem, dm_inonext);
 	KASSERT(LIST_EMPTY(&dirrem->dm_jremrefhd),
 	    ("handle_workitem_remove:  Journal entries not written."));
 
 	/*
 	 * Move all dependencies waiting on the remove to complete
 	 * from the dirrem to the inode inowait list to be completed
 	 * after the inode has been updated and written to disk.  Any
 	 * marked MKDIR_PARENT are saved to be completed when the .. ref
 	 * is removed.
 	 */
 	LIST_INIT(&dotdotwk);
 	while ((wk = LIST_FIRST(&dirrem->dm_jwork)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		if (wk->wk_state & MKDIR_PARENT) {
 			wk->wk_state &= ~MKDIR_PARENT;
 			WORKLIST_INSERT(&dotdotwk, wk);
 			continue;
 		}
 		WORKLIST_INSERT(&inodedep->id_inowait, wk);
 	}
 	LIST_SWAP(&dirrem->dm_jwork, &dotdotwk, worklist, wk_list);
 	/*
 	 * Normal file deletion.
 	 */
 	if ((dirrem->dm_state & RMDIR) == 0) {
 		ip->i_nlink--;
 		DIP_SET(ip, i_nlink, ip->i_nlink);
 		ip->i_flag |= IN_CHANGE;
 		if (ip->i_nlink < ip->i_effnlink)
 			panic("handle_workitem_remove: bad file delta");
 		if (ip->i_nlink == 0) 
 			unlinked_inodedep(mp, inodedep);
 		inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 		KASSERT(LIST_EMPTY(&dirrem->dm_jwork),
 		    ("handle_workitem_remove: worklist not empty. %s",
 		    TYPENAME(LIST_FIRST(&dirrem->dm_jwork)->wk_type)));
 		WORKITEM_FREE(dirrem, D_DIRREM);
 		FREE_LOCK(ump);
 		goto out;
 	}
 	/*
 	 * Directory deletion. Decrement reference count for both the
 	 * just deleted parent directory entry and the reference for ".".
 	 * Arrange to have the reference count on the parent decremented
 	 * to account for the loss of "..".
 	 */
 	ip->i_nlink -= 2;
 	DIP_SET(ip, i_nlink, ip->i_nlink);
 	ip->i_flag |= IN_CHANGE;
 	if (ip->i_nlink < ip->i_effnlink)
 		panic("handle_workitem_remove: bad dir delta");
 	if (ip->i_nlink == 0)
 		unlinked_inodedep(mp, inodedep);
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 	/*
 	 * Rename a directory to a new parent. Since, we are both deleting
 	 * and creating a new directory entry, the link count on the new
 	 * directory should not change. Thus we skip the followup dirrem.
 	 */
 	if (dirrem->dm_state & DIRCHG) {
 		KASSERT(LIST_EMPTY(&dirrem->dm_jwork),
 		    ("handle_workitem_remove: DIRCHG and worklist not empty."));
 		WORKITEM_FREE(dirrem, D_DIRREM);
 		FREE_LOCK(ump);
 		goto out;
 	}
 	dirrem->dm_state = ONDEPLIST;
 	dirrem->dm_oldinum = dirrem->dm_dirinum;
 	/*
 	 * Place the dirrem on the parent's diremhd list.
 	 */
 	if (inodedep_lookup(mp, dirrem->dm_oldinum, 0, &inodedep) == 0)
 		panic("handle_workitem_remove: lost dir inodedep");
 	LIST_INSERT_HEAD(&inodedep->id_dirremhd, dirrem, dm_inonext);
 	/*
 	 * If the allocated inode has never been written to disk, then
 	 * the on-disk inode is zero'ed and we can remove the file
 	 * immediately.  When journaling if the inode has been marked
 	 * unlinked and not DEPCOMPLETE we know it can never be written.
 	 */
 	inodedep_lookup(mp, oldinum, 0, &inodedep);
 	if (inodedep == NULL ||
 	    (inodedep->id_state & (DEPCOMPLETE | UNLINKED)) == UNLINKED ||
 	    check_inode_unwritten(inodedep)) {
 		FREE_LOCK(ump);
 		vput(vp);
 		return handle_workitem_remove(dirrem, flags);
 	}
 	WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
 	FREE_LOCK(ump);
 	ip->i_flag |= IN_CHANGE;
 out:
 	ffs_update(vp, 0);
 	vput(vp);
 	return (0);
 }
 
 /*
  * Inode de-allocation dependencies.
  * 
  * When an inode's link count is reduced to zero, it can be de-allocated. We
  * found it convenient to postpone de-allocation until after the inode is
  * written to disk with its new link count (zero).  At this point, all of the
  * on-disk inode's block pointers are nullified and, with careful dependency
  * list ordering, all dependencies related to the inode will be satisfied and
  * the corresponding dependency structures de-allocated.  So, if/when the
  * inode is reused, there will be no mixing of old dependencies with new
  * ones.  This artificial dependency is set up by the block de-allocation
  * procedure above (softdep_setup_freeblocks) and completed by the
  * following procedure.
  */
 static void 
 handle_workitem_freefile(freefile)
 	struct freefile *freefile;
 {
 	struct workhead wkhd;
 	struct fs *fs;
 	struct inodedep *idp;
 	struct ufsmount *ump;
 	int error;
 
 	ump = VFSTOUFS(freefile->fx_list.wk_mp);
 	fs = ump->um_fs;
 #ifdef DEBUG
 	ACQUIRE_LOCK(ump);
 	error = inodedep_lookup(UFSTOVFS(ump), freefile->fx_oldinum, 0, &idp);
 	FREE_LOCK(ump);
 	if (error)
 		panic("handle_workitem_freefile: inodedep %p survived", idp);
 #endif
 	UFS_LOCK(ump);
 	fs->fs_pendinginodes -= 1;
 	UFS_UNLOCK(ump);
 	LIST_INIT(&wkhd);
 	LIST_SWAP(&freefile->fx_jwork, &wkhd, worklist, wk_list);
 	if ((error = ffs_freefile(ump, fs, freefile->fx_devvp,
 	    freefile->fx_oldinum, freefile->fx_mode, &wkhd)) != 0)
 		softdep_error("handle_workitem_freefile", error);
 	ACQUIRE_LOCK(ump);
 	WORKITEM_FREE(freefile, D_FREEFILE);
 	FREE_LOCK(ump);
 }
 
 
 /*
  * Helper function which unlinks marker element from work list and returns
  * the next element on the list.
  */
 static __inline struct worklist *
 markernext(struct worklist *marker)
 {
 	struct worklist *next;
 	
 	next = LIST_NEXT(marker, wk_list);
 	LIST_REMOVE(marker, wk_list);
 	return next;
 }
 
 /*
  * Disk writes.
  * 
  * The dependency structures constructed above are most actively used when file
  * system blocks are written to disk.  No constraints are placed on when a
  * block can be written, but unsatisfied update dependencies are made safe by
  * modifying (or replacing) the source memory for the duration of the disk
  * write.  When the disk write completes, the memory block is again brought
  * up-to-date.
  *
  * In-core inode structure reclamation.
  * 
  * Because there are a finite number of "in-core" inode structures, they are
  * reused regularly.  By transferring all inode-related dependencies to the
  * in-memory inode block and indexing them separately (via "inodedep"s), we
  * can allow "in-core" inode structures to be reused at any time and avoid
  * any increase in contention.
  *
  * Called just before entering the device driver to initiate a new disk I/O.
  * The buffer must be locked, thus, no I/O completion operations can occur
  * while we are manipulating its associated dependencies.
  */
 static void 
 softdep_disk_io_initiation(bp)
 	struct buf *bp;		/* structure describing disk write to occur */
 {
 	struct worklist *wk;
 	struct worklist marker;
 	struct inodedep *inodedep;
 	struct freeblks *freeblks;
 	struct jblkdep *jblkdep;
 	struct newblk *newblk;
 	struct ufsmount *ump;
 
 	/*
 	 * We only care about write operations. There should never
 	 * be dependencies for reads.
 	 */
 	if (bp->b_iocmd != BIO_WRITE)
 		panic("softdep_disk_io_initiation: not write");
 
 	if (bp->b_vflags & BV_BKGRDINPROG)
 		panic("softdep_disk_io_initiation: Writing buffer with "
 		    "background write in progress: %p", bp);
 
 	ump = softdep_bp_to_mp(bp);
 	if (ump == NULL)
 		return;
 
 	marker.wk_type = D_LAST + 1;	/* Not a normal workitem */
 	PHOLD(curproc);			/* Don't swap out kernel stack */
 	ACQUIRE_LOCK(ump);
 	/*
 	 * Do any necessary pre-I/O processing.
 	 */
 	for (wk = LIST_FIRST(&bp->b_dep); wk != NULL;
 	     wk = markernext(&marker)) {
 		LIST_INSERT_AFTER(wk, &marker, wk_list);
 		switch (wk->wk_type) {
 
 		case D_PAGEDEP:
 			initiate_write_filepage(WK_PAGEDEP(wk), bp);
 			continue;
 
 		case D_INODEDEP:
 			inodedep = WK_INODEDEP(wk);
 			if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC)
 				initiate_write_inodeblock_ufs1(inodedep, bp);
 			else
 				initiate_write_inodeblock_ufs2(inodedep, bp);
 			continue;
 
 		case D_INDIRDEP:
 			initiate_write_indirdep(WK_INDIRDEP(wk), bp);
 			continue;
 
 		case D_BMSAFEMAP:
 			initiate_write_bmsafemap(WK_BMSAFEMAP(wk), bp);
 			continue;
 
 		case D_JSEG:
 			WK_JSEG(wk)->js_buf = NULL;
 			continue;
 
 		case D_FREEBLKS:
 			freeblks = WK_FREEBLKS(wk);
 			jblkdep = LIST_FIRST(&freeblks->fb_jblkdephd);
 			/*
 			 * We have to wait for the freeblks to be journaled
 			 * before we can write an inodeblock with updated
 			 * pointers.  Be careful to arrange the marker so
 			 * we revisit the freeblks if it's not removed by
 			 * the first jwait().
 			 */
 			if (jblkdep != NULL) {
 				LIST_REMOVE(&marker, wk_list);
 				LIST_INSERT_BEFORE(wk, &marker, wk_list);
 				jwait(&jblkdep->jb_list, MNT_WAIT);
 			}
 			continue;
 		case D_ALLOCDIRECT:
 		case D_ALLOCINDIR:
 			/*
 			 * We have to wait for the jnewblk to be journaled
 			 * before we can write to a block if the contents
 			 * may be confused with an earlier file's indirect
 			 * at recovery time.  Handle the marker as described
 			 * above.
 			 */
 			newblk = WK_NEWBLK(wk);
 			if (newblk->nb_jnewblk != NULL &&
 			    indirblk_lookup(newblk->nb_list.wk_mp,
 			    newblk->nb_newblkno)) {
 				LIST_REMOVE(&marker, wk_list);
 				LIST_INSERT_BEFORE(wk, &marker, wk_list);
 				jwait(&newblk->nb_jnewblk->jn_list, MNT_WAIT);
 			}
 			continue;
 
 		case D_SBDEP:
 			initiate_write_sbdep(WK_SBDEP(wk));
 			continue;
 
 		case D_MKDIR:
 		case D_FREEWORK:
 		case D_FREEDEP:
 		case D_JSEGDEP:
 			continue;
 
 		default:
 			panic("handle_disk_io_initiation: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	FREE_LOCK(ump);
 	PRELE(curproc);			/* Allow swapout of kernel stack */
 }
 
 /*
  * Called from within the procedure above to deal with unsatisfied
  * allocation dependencies in a directory. The buffer must be locked,
  * thus, no I/O completion operations can occur while we are
  * manipulating its associated dependencies.
  */
 static void
 initiate_write_filepage(pagedep, bp)
 	struct pagedep *pagedep;
 	struct buf *bp;
 {
 	struct jremref *jremref;
 	struct jmvref *jmvref;
 	struct dirrem *dirrem;
 	struct diradd *dap;
 	struct direct *ep;
 	int i;
 
 	if (pagedep->pd_state & IOSTARTED) {
 		/*
 		 * This can only happen if there is a driver that does not
 		 * understand chaining. Here biodone will reissue the call
 		 * to strategy for the incomplete buffers.
 		 */
 		printf("initiate_write_filepage: already started\n");
 		return;
 	}
 	pagedep->pd_state |= IOSTARTED;
 	/*
 	 * Wait for all journal remove dependencies to hit the disk.
 	 * We can not allow any potentially conflicting directory adds
 	 * to be visible before removes and rollback is too difficult.
 	 * The per-filesystem lock may be dropped and re-acquired, however 
 	 * we hold the buf locked so the dependency can not go away.
 	 */
 	LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next)
 		while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL)
 			jwait(&jremref->jr_list, MNT_WAIT);
 	while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL)
 		jwait(&jmvref->jm_list, MNT_WAIT);
 	for (i = 0; i < DAHASHSZ; i++) {
 		LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
 			ep = (struct direct *)
 			    ((char *)bp->b_data + dap->da_offset);
 			if (ep->d_ino != dap->da_newinum)
 				panic("%s: dir inum %ju != new %ju",
 				    "initiate_write_filepage",
 				    (uintmax_t)ep->d_ino,
 				    (uintmax_t)dap->da_newinum);
 			if (dap->da_state & DIRCHG)
 				ep->d_ino = dap->da_previous->dm_oldinum;
 			else
 				ep->d_ino = 0;
 			dap->da_state &= ~ATTACHED;
 			dap->da_state |= UNDONE;
 		}
 	}
 }
 
 /*
  * Version of initiate_write_inodeblock that handles UFS1 dinodes.
  * Note that any bug fixes made to this routine must be done in the
  * version found below.
  *
  * Called from within the procedure above to deal with unsatisfied
  * allocation dependencies in an inodeblock. The buffer must be
  * locked, thus, no I/O completion operations can occur while we
  * are manipulating its associated dependencies.
  */
 static void 
 initiate_write_inodeblock_ufs1(inodedep, bp)
 	struct inodedep *inodedep;
 	struct buf *bp;			/* The inode block */
 {
 	struct allocdirect *adp, *lastadp;
 	struct ufs1_dinode *dp;
 	struct ufs1_dinode *sip;
 	struct inoref *inoref;
 	struct ufsmount *ump;
 	struct fs *fs;
 	ufs_lbn_t i;
 #ifdef INVARIANTS
 	ufs_lbn_t prevlbn = 0;
 #endif
 	int deplist;
 
 	if (inodedep->id_state & IOSTARTED)
 		panic("initiate_write_inodeblock_ufs1: already started");
 	inodedep->id_state |= IOSTARTED;
 	fs = inodedep->id_fs;
 	ump = VFSTOUFS(inodedep->id_list.wk_mp);
 	LOCK_OWNED(ump);
 	dp = (struct ufs1_dinode *)bp->b_data +
 	    ino_to_fsbo(fs, inodedep->id_ino);
 
 	/*
 	 * If we're on the unlinked list but have not yet written our
 	 * next pointer initialize it here.
 	 */
 	if ((inodedep->id_state & (UNLINKED | UNLINKNEXT)) == UNLINKED) {
 		struct inodedep *inon;
 
 		inon = TAILQ_NEXT(inodedep, id_unlinked);
 		dp->di_freelink = inon ? inon->id_ino : 0;
 	}
 	/*
 	 * If the bitmap is not yet written, then the allocated
 	 * inode cannot be written to disk.
 	 */
 	if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 		if (inodedep->id_savedino1 != NULL)
 			panic("initiate_write_inodeblock_ufs1: I/O underway");
 		FREE_LOCK(ump);
 		sip = malloc(sizeof(struct ufs1_dinode),
 		    M_SAVEDINO, M_SOFTDEP_FLAGS);
 		ACQUIRE_LOCK(ump);
 		inodedep->id_savedino1 = sip;
 		*inodedep->id_savedino1 = *dp;
 		bzero((caddr_t)dp, sizeof(struct ufs1_dinode));
 		dp->di_gen = inodedep->id_savedino1->di_gen;
 		dp->di_freelink = inodedep->id_savedino1->di_freelink;
 		return;
 	}
 	/*
 	 * If no dependencies, then there is nothing to roll back.
 	 */
 	inodedep->id_savedsize = dp->di_size;
 	inodedep->id_savedextsize = 0;
 	inodedep->id_savednlink = dp->di_nlink;
 	if (TAILQ_EMPTY(&inodedep->id_inoupdt) &&
 	    TAILQ_EMPTY(&inodedep->id_inoreflst))
 		return;
 	/*
 	 * Revert the link count to that of the first unwritten journal entry.
 	 */
 	inoref = TAILQ_FIRST(&inodedep->id_inoreflst);
 	if (inoref)
 		dp->di_nlink = inoref->if_nlink;
 	/*
 	 * Set the dependencies to busy.
 	 */
 	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     adp = TAILQ_NEXT(adp, ad_next)) {
 #ifdef INVARIANTS
 		if (deplist != 0 && prevlbn >= adp->ad_offset)
 			panic("softdep_write_inodeblock: lbn order");
 		prevlbn = adp->ad_offset;
 		if (adp->ad_offset < NDADDR &&
 		    dp->di_db[adp->ad_offset] != adp->ad_newblkno)
 			panic("%s: direct pointer #%jd mismatch %d != %jd",
 			    "softdep_write_inodeblock",
 			    (intmax_t)adp->ad_offset,
 			    dp->di_db[adp->ad_offset],
 			    (intmax_t)adp->ad_newblkno);
 		if (adp->ad_offset >= NDADDR &&
 		    dp->di_ib[adp->ad_offset - NDADDR] != adp->ad_newblkno)
 			panic("%s: indirect pointer #%jd mismatch %d != %jd",
 			    "softdep_write_inodeblock",
 			    (intmax_t)adp->ad_offset - NDADDR,
 			    dp->di_ib[adp->ad_offset - NDADDR],
 			    (intmax_t)adp->ad_newblkno);
 		deplist |= 1 << adp->ad_offset;
 		if ((adp->ad_state & ATTACHED) == 0)
 			panic("softdep_write_inodeblock: Unknown state 0x%x",
 			    adp->ad_state);
 #endif /* INVARIANTS */
 		adp->ad_state &= ~ATTACHED;
 		adp->ad_state |= UNDONE;
 	}
 	/*
 	 * The on-disk inode cannot claim to be any larger than the last
 	 * fragment that has been written. Otherwise, the on-disk inode
 	 * might have fragments that were not the last block in the file
 	 * which would corrupt the filesystem.
 	 */
 	for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
 		if (adp->ad_offset >= NDADDR)
 			break;
 		dp->di_db[adp->ad_offset] = adp->ad_oldblkno;
 		/* keep going until hitting a rollback to a frag */
 		if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
 			continue;
 		dp->di_size = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize;
 		for (i = adp->ad_offset + 1; i < NDADDR; i++) {
 #ifdef INVARIANTS
 			if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep1");
 #endif /* INVARIANTS */
 			dp->di_db[i] = 0;
 		}
 		for (i = 0; i < NIADDR; i++) {
 #ifdef INVARIANTS
 			if (dp->di_ib[i] != 0 &&
 			    (deplist & ((1 << NDADDR) << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep2");
 #endif /* INVARIANTS */
 			dp->di_ib[i] = 0;
 		}
 		return;
 	}
 	/*
 	 * If we have zero'ed out the last allocated block of the file,
 	 * roll back the size to the last currently allocated block.
 	 * We know that this last allocated block is a full-sized as
 	 * we already checked for fragments in the loop above.
 	 */
 	if (lastadp != NULL &&
 	    dp->di_size <= (lastadp->ad_offset + 1) * fs->fs_bsize) {
 		for (i = lastadp->ad_offset; i >= 0; i--)
 			if (dp->di_db[i] != 0)
 				break;
 		dp->di_size = (i + 1) * fs->fs_bsize;
 	}
 	/*
 	 * The only dependencies are for indirect blocks.
 	 *
 	 * The file size for indirect block additions is not guaranteed.
 	 * Such a guarantee would be non-trivial to achieve. The conventional
 	 * synchronous write implementation also does not make this guarantee.
 	 * Fsck should catch and fix discrepancies. Arguably, the file size
 	 * can be over-estimated without destroying integrity when the file
 	 * moves into the indirect blocks (i.e., is large). If we want to
 	 * postpone fsck, we are stuck with this argument.
 	 */
 	for (; adp; adp = TAILQ_NEXT(adp, ad_next))
 		dp->di_ib[adp->ad_offset - NDADDR] = 0;
 }
 		
 /*
  * Version of initiate_write_inodeblock that handles UFS2 dinodes.
  * Note that any bug fixes made to this routine must be done in the
  * version found above.
  *
  * Called from within the procedure above to deal with unsatisfied
  * allocation dependencies in an inodeblock. The buffer must be
  * locked, thus, no I/O completion operations can occur while we
  * are manipulating its associated dependencies.
  */
 static void 
 initiate_write_inodeblock_ufs2(inodedep, bp)
 	struct inodedep *inodedep;
 	struct buf *bp;			/* The inode block */
 {
 	struct allocdirect *adp, *lastadp;
 	struct ufs2_dinode *dp;
 	struct ufs2_dinode *sip;
 	struct inoref *inoref;
 	struct ufsmount *ump;
 	struct fs *fs;
 	ufs_lbn_t i;
 #ifdef INVARIANTS
 	ufs_lbn_t prevlbn = 0;
 #endif
 	int deplist;
 
 	if (inodedep->id_state & IOSTARTED)
 		panic("initiate_write_inodeblock_ufs2: already started");
 	inodedep->id_state |= IOSTARTED;
 	fs = inodedep->id_fs;
 	ump = VFSTOUFS(inodedep->id_list.wk_mp);
 	LOCK_OWNED(ump);
 	dp = (struct ufs2_dinode *)bp->b_data +
 	    ino_to_fsbo(fs, inodedep->id_ino);
 
 	/*
 	 * If we're on the unlinked list but have not yet written our
 	 * next pointer initialize it here.
 	 */
 	if ((inodedep->id_state & (UNLINKED | UNLINKNEXT)) == UNLINKED) {
 		struct inodedep *inon;
 
 		inon = TAILQ_NEXT(inodedep, id_unlinked);
 		dp->di_freelink = inon ? inon->id_ino : 0;
 	}
 	/*
 	 * If the bitmap is not yet written, then the allocated
 	 * inode cannot be written to disk.
 	 */
 	if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 		if (inodedep->id_savedino2 != NULL)
 			panic("initiate_write_inodeblock_ufs2: I/O underway");
 		FREE_LOCK(ump);
 		sip = malloc(sizeof(struct ufs2_dinode),
 		    M_SAVEDINO, M_SOFTDEP_FLAGS);
 		ACQUIRE_LOCK(ump);
 		inodedep->id_savedino2 = sip;
 		*inodedep->id_savedino2 = *dp;
 		bzero((caddr_t)dp, sizeof(struct ufs2_dinode));
 		dp->di_gen = inodedep->id_savedino2->di_gen;
 		dp->di_freelink = inodedep->id_savedino2->di_freelink;
 		return;
 	}
 	/*
 	 * If no dependencies, then there is nothing to roll back.
 	 */
 	inodedep->id_savedsize = dp->di_size;
 	inodedep->id_savedextsize = dp->di_extsize;
 	inodedep->id_savednlink = dp->di_nlink;
 	if (TAILQ_EMPTY(&inodedep->id_inoupdt) &&
 	    TAILQ_EMPTY(&inodedep->id_extupdt) &&
 	    TAILQ_EMPTY(&inodedep->id_inoreflst))
 		return;
 	/*
 	 * Revert the link count to that of the first unwritten journal entry.
 	 */
 	inoref = TAILQ_FIRST(&inodedep->id_inoreflst);
 	if (inoref)
 		dp->di_nlink = inoref->if_nlink;
 
 	/*
 	 * Set the ext data dependencies to busy.
 	 */
 	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
 	     adp = TAILQ_NEXT(adp, ad_next)) {
 #ifdef INVARIANTS
 		if (deplist != 0 && prevlbn >= adp->ad_offset)
 			panic("softdep_write_inodeblock: lbn order");
 		prevlbn = adp->ad_offset;
 		if (dp->di_extb[adp->ad_offset] != adp->ad_newblkno)
 			panic("%s: direct pointer #%jd mismatch %jd != %jd",
 			    "softdep_write_inodeblock",
 			    (intmax_t)adp->ad_offset,
 			    (intmax_t)dp->di_extb[adp->ad_offset],
 			    (intmax_t)adp->ad_newblkno);
 		deplist |= 1 << adp->ad_offset;
 		if ((adp->ad_state & ATTACHED) == 0)
 			panic("softdep_write_inodeblock: Unknown state 0x%x",
 			    adp->ad_state);
 #endif /* INVARIANTS */
 		adp->ad_state &= ~ATTACHED;
 		adp->ad_state |= UNDONE;
 	}
 	/*
 	 * The on-disk inode cannot claim to be any larger than the last
 	 * fragment that has been written. Otherwise, the on-disk inode
 	 * might have fragments that were not the last block in the ext
 	 * data which would corrupt the filesystem.
 	 */
 	for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
 	     lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
 		dp->di_extb[adp->ad_offset] = adp->ad_oldblkno;
 		/* keep going until hitting a rollback to a frag */
 		if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
 			continue;
 		dp->di_extsize = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize;
 		for (i = adp->ad_offset + 1; i < NXADDR; i++) {
 #ifdef INVARIANTS
 			if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep1");
 #endif /* INVARIANTS */
 			dp->di_extb[i] = 0;
 		}
 		lastadp = NULL;
 		break;
 	}
 	/*
 	 * If we have zero'ed out the last allocated block of the ext
 	 * data, roll back the size to the last currently allocated block.
 	 * We know that this last allocated block is a full-sized as
 	 * we already checked for fragments in the loop above.
 	 */
 	if (lastadp != NULL &&
 	    dp->di_extsize <= (lastadp->ad_offset + 1) * fs->fs_bsize) {
 		for (i = lastadp->ad_offset; i >= 0; i--)
 			if (dp->di_extb[i] != 0)
 				break;
 		dp->di_extsize = (i + 1) * fs->fs_bsize;
 	}
 	/*
 	 * Set the file data dependencies to busy.
 	 */
 	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     adp = TAILQ_NEXT(adp, ad_next)) {
 #ifdef INVARIANTS
 		if (deplist != 0 && prevlbn >= adp->ad_offset)
 			panic("softdep_write_inodeblock: lbn order");
 		if ((adp->ad_state & ATTACHED) == 0)
 			panic("inodedep %p and adp %p not attached", inodedep, adp);
 		prevlbn = adp->ad_offset;
 		if (adp->ad_offset < NDADDR &&
 		    dp->di_db[adp->ad_offset] != adp->ad_newblkno)
 			panic("%s: direct pointer #%jd mismatch %jd != %jd",
 			    "softdep_write_inodeblock",
 			    (intmax_t)adp->ad_offset,
 			    (intmax_t)dp->di_db[adp->ad_offset],
 			    (intmax_t)adp->ad_newblkno);
 		if (adp->ad_offset >= NDADDR &&
 		    dp->di_ib[adp->ad_offset - NDADDR] != adp->ad_newblkno)
 			panic("%s indirect pointer #%jd mismatch %jd != %jd",
 			    "softdep_write_inodeblock:",
 			    (intmax_t)adp->ad_offset - NDADDR,
 			    (intmax_t)dp->di_ib[adp->ad_offset - NDADDR],
 			    (intmax_t)adp->ad_newblkno);
 		deplist |= 1 << adp->ad_offset;
 		if ((adp->ad_state & ATTACHED) == 0)
 			panic("softdep_write_inodeblock: Unknown state 0x%x",
 			    adp->ad_state);
 #endif /* INVARIANTS */
 		adp->ad_state &= ~ATTACHED;
 		adp->ad_state |= UNDONE;
 	}
 	/*
 	 * The on-disk inode cannot claim to be any larger than the last
 	 * fragment that has been written. Otherwise, the on-disk inode
 	 * might have fragments that were not the last block in the file
 	 * which would corrupt the filesystem.
 	 */
 	for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
 		if (adp->ad_offset >= NDADDR)
 			break;
 		dp->di_db[adp->ad_offset] = adp->ad_oldblkno;
 		/* keep going until hitting a rollback to a frag */
 		if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
 			continue;
 		dp->di_size = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize;
 		for (i = adp->ad_offset + 1; i < NDADDR; i++) {
 #ifdef INVARIANTS
 			if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep2");
 #endif /* INVARIANTS */
 			dp->di_db[i] = 0;
 		}
 		for (i = 0; i < NIADDR; i++) {
 #ifdef INVARIANTS
 			if (dp->di_ib[i] != 0 &&
 			    (deplist & ((1 << NDADDR) << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep3");
 #endif /* INVARIANTS */
 			dp->di_ib[i] = 0;
 		}
 		return;
 	}
 	/*
 	 * If we have zero'ed out the last allocated block of the file,
 	 * roll back the size to the last currently allocated block.
 	 * We know that this last allocated block is a full-sized as
 	 * we already checked for fragments in the loop above.
 	 */
 	if (lastadp != NULL &&
 	    dp->di_size <= (lastadp->ad_offset + 1) * fs->fs_bsize) {
 		for (i = lastadp->ad_offset; i >= 0; i--)
 			if (dp->di_db[i] != 0)
 				break;
 		dp->di_size = (i + 1) * fs->fs_bsize;
 	}
 	/*
 	 * The only dependencies are for indirect blocks.
 	 *
 	 * The file size for indirect block additions is not guaranteed.
 	 * Such a guarantee would be non-trivial to achieve. The conventional
 	 * synchronous write implementation also does not make this guarantee.
 	 * Fsck should catch and fix discrepancies. Arguably, the file size
 	 * can be over-estimated without destroying integrity when the file
 	 * moves into the indirect blocks (i.e., is large). If we want to
 	 * postpone fsck, we are stuck with this argument.
 	 */
 	for (; adp; adp = TAILQ_NEXT(adp, ad_next))
 		dp->di_ib[adp->ad_offset - NDADDR] = 0;
 }
 
 /*
  * Cancel an indirdep as a result of truncation.  Release all of the
  * children allocindirs and place their journal work on the appropriate
  * list.
  */
 static void
 cancel_indirdep(indirdep, bp, freeblks)
 	struct indirdep *indirdep;
 	struct buf *bp;
 	struct freeblks *freeblks;
 {
 	struct allocindir *aip;
 
 	/*
 	 * None of the indirect pointers will ever be visible,
 	 * so they can simply be tossed. GOINGAWAY ensures
 	 * that allocated pointers will be saved in the buffer
 	 * cache until they are freed. Note that they will
 	 * only be able to be found by their physical address
 	 * since the inode mapping the logical address will
 	 * be gone. The save buffer used for the safe copy
 	 * was allocated in setup_allocindir_phase2 using
 	 * the physical address so it could be used for this
 	 * purpose. Hence we swap the safe copy with the real
 	 * copy, allowing the safe copy to be freed and holding
 	 * on to the real copy for later use in indir_trunc.
 	 */
 	if (indirdep->ir_state & GOINGAWAY)
 		panic("cancel_indirdep: already gone");
 	if ((indirdep->ir_state & DEPCOMPLETE) == 0) {
 		indirdep->ir_state |= DEPCOMPLETE;
 		LIST_REMOVE(indirdep, ir_next);
 	}
 	indirdep->ir_state |= GOINGAWAY;
 	/*
 	 * Pass in bp for blocks still have journal writes
 	 * pending so we can cancel them on their own.
 	 */
 	while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != NULL)
 		cancel_allocindir(aip, bp, freeblks, 0);
 	while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != NULL)
 		cancel_allocindir(aip, NULL, freeblks, 0);
 	while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != NULL)
 		cancel_allocindir(aip, NULL, freeblks, 0);
 	while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL)
 		cancel_allocindir(aip, NULL, freeblks, 0);
 	/*
 	 * If there are pending partial truncations we need to keep the
 	 * old block copy around until they complete.  This is because
 	 * the current b_data is not a perfect superset of the available
 	 * blocks.
 	 */
 	if (TAILQ_EMPTY(&indirdep->ir_trunc))
 		bcopy(bp->b_data, indirdep->ir_savebp->b_data, bp->b_bcount);
 	else
 		bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
 	WORKLIST_REMOVE(&indirdep->ir_list);
 	WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, &indirdep->ir_list);
 	indirdep->ir_bp = NULL;
 	indirdep->ir_freeblks = freeblks;
 }
 
 /*
  * Free an indirdep once it no longer has new pointers to track.
  */
 static void
 free_indirdep(indirdep)
 	struct indirdep *indirdep;
 {
 
 	KASSERT(TAILQ_EMPTY(&indirdep->ir_trunc),
 	    ("free_indirdep: Indir trunc list not empty."));
 	KASSERT(LIST_EMPTY(&indirdep->ir_completehd),
 	    ("free_indirdep: Complete head not empty."));
 	KASSERT(LIST_EMPTY(&indirdep->ir_writehd),
 	    ("free_indirdep: write head not empty."));
 	KASSERT(LIST_EMPTY(&indirdep->ir_donehd),
 	    ("free_indirdep: done head not empty."));
 	KASSERT(LIST_EMPTY(&indirdep->ir_deplisthd),
 	    ("free_indirdep: deplist head not empty."));
 	KASSERT((indirdep->ir_state & DEPCOMPLETE),
 	    ("free_indirdep: %p still on newblk list.", indirdep));
 	KASSERT(indirdep->ir_saveddata == NULL,
 	    ("free_indirdep: %p still has saved data.", indirdep));
 	if (indirdep->ir_state & ONWORKLIST)
 		WORKLIST_REMOVE(&indirdep->ir_list);
 	WORKITEM_FREE(indirdep, D_INDIRDEP);
 }
 
 /*
  * Called before a write to an indirdep.  This routine is responsible for
  * rolling back pointers to a safe state which includes only those
  * allocindirs which have been completed.
  */
 static void
 initiate_write_indirdep(indirdep, bp)
 	struct indirdep *indirdep;
 	struct buf *bp;
 {
 	struct ufsmount *ump;
 
 	indirdep->ir_state |= IOSTARTED;
 	if (indirdep->ir_state & GOINGAWAY)
 		panic("disk_io_initiation: indirdep gone");
 	/*
 	 * If there are no remaining dependencies, this will be writing
 	 * the real pointers.
 	 */
 	if (LIST_EMPTY(&indirdep->ir_deplisthd) &&
 	    TAILQ_EMPTY(&indirdep->ir_trunc))
 		return;
 	/*
 	 * Replace up-to-date version with safe version.
 	 */
 	if (indirdep->ir_saveddata == NULL) {
 		ump = VFSTOUFS(indirdep->ir_list.wk_mp);
 		LOCK_OWNED(ump);
 		FREE_LOCK(ump);
 		indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP,
 		    M_SOFTDEP_FLAGS);
 		ACQUIRE_LOCK(ump);
 	}
 	indirdep->ir_state &= ~ATTACHED;
 	indirdep->ir_state |= UNDONE;
 	bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
 	bcopy(indirdep->ir_savebp->b_data, bp->b_data,
 	    bp->b_bcount);
 }
 
 /*
  * Called when an inode has been cleared in a cg bitmap.  This finally
  * eliminates any canceled jaddrefs
  */
 void
 softdep_setup_inofree(mp, bp, ino, wkhd)
 	struct mount *mp;
 	struct buf *bp;
 	ino_t ino;
 	struct workhead *wkhd;
 {
 	struct worklist *wk, *wkn;
 	struct inodedep *inodedep;
 	struct ufsmount *ump;
 	uint8_t *inosused;
 	struct cg *cgp;
 	struct fs *fs;
 
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_inofree called on non-softdep filesystem"));
 	ump = VFSTOUFS(mp);
 	ACQUIRE_LOCK(ump);
 	fs = ump->um_fs;
 	cgp = (struct cg *)bp->b_data;
 	inosused = cg_inosused(cgp);
 	if (isset(inosused, ino % fs->fs_ipg))
 		panic("softdep_setup_inofree: inode %ju not freed.",
 		    (uintmax_t)ino);
 	if (inodedep_lookup(mp, ino, 0, &inodedep))
 		panic("softdep_setup_inofree: ino %ju has existing inodedep %p",
 		    (uintmax_t)ino, inodedep);
 	if (wkhd) {
 		LIST_FOREACH_SAFE(wk, wkhd, wk_list, wkn) {
 			if (wk->wk_type != D_JADDREF)
 				continue;
 			WORKLIST_REMOVE(wk);
 			/*
 			 * We can free immediately even if the jaddref
 			 * isn't attached in a background write as now
 			 * the bitmaps are reconciled.
 			 */
 			wk->wk_state |= COMPLETE | ATTACHED;
 			free_jaddref(WK_JADDREF(wk));
 		}
 		jwork_move(&bp->b_dep, wkhd);
 	}
 	FREE_LOCK(ump);
 }
 
 
 /*
  * Called via ffs_blkfree() after a set of frags has been cleared from a cg
  * map.  Any dependencies waiting for the write to clear are added to the
  * buf's list and any jnewblks that are being canceled are discarded
  * immediately.
  */
 void
 softdep_setup_blkfree(mp, bp, blkno, frags, wkhd)
 	struct mount *mp;
 	struct buf *bp;
 	ufs2_daddr_t blkno;
 	int frags;
 	struct workhead *wkhd;
 {
 	struct bmsafemap *bmsafemap;
 	struct jnewblk *jnewblk;
 	struct ufsmount *ump;
 	struct worklist *wk;
 	struct fs *fs;
 #ifdef SUJ_DEBUG
 	uint8_t *blksfree;
 	struct cg *cgp;
 	ufs2_daddr_t jstart;
 	ufs2_daddr_t jend;
 	ufs2_daddr_t end;
 	long bno;
 	int i;
 #endif
 
 	CTR3(KTR_SUJ,
 	    "softdep_setup_blkfree: blkno %jd frags %d wk head %p",
 	    blkno, frags, wkhd);
 
 	ump = VFSTOUFS(mp);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_setup_blkfree called on non-softdep filesystem"));
 	ACQUIRE_LOCK(ump);
 	/* Lookup the bmsafemap so we track when it is dirty. */
 	fs = ump->um_fs;
 	bmsafemap = bmsafemap_lookup(mp, bp, dtog(fs, blkno), NULL);
 	/*
 	 * Detach any jnewblks which have been canceled.  They must linger
 	 * until the bitmap is cleared again by ffs_blkfree() to prevent
 	 * an unjournaled allocation from hitting the disk.
 	 */
 	if (wkhd) {
 		while ((wk = LIST_FIRST(wkhd)) != NULL) {
 			CTR2(KTR_SUJ,
 			    "softdep_setup_blkfree: blkno %jd wk type %d",
 			    blkno, wk->wk_type);
 			WORKLIST_REMOVE(wk);
 			if (wk->wk_type != D_JNEWBLK) {
 				WORKLIST_INSERT(&bmsafemap->sm_freehd, wk);
 				continue;
 			}
 			jnewblk = WK_JNEWBLK(wk);
 			KASSERT(jnewblk->jn_state & GOINGAWAY,
 			    ("softdep_setup_blkfree: jnewblk not canceled."));
 #ifdef SUJ_DEBUG
 			/*
 			 * Assert that this block is free in the bitmap
 			 * before we discard the jnewblk.
 			 */
 			cgp = (struct cg *)bp->b_data;
 			blksfree = cg_blksfree(cgp);
 			bno = dtogd(fs, jnewblk->jn_blkno);
 			for (i = jnewblk->jn_oldfrags;
 			    i < jnewblk->jn_frags; i++) {
 				if (isset(blksfree, bno + i))
 					continue;
 				panic("softdep_setup_blkfree: not free");
 			}
 #endif
 			/*
 			 * Even if it's not attached we can free immediately
 			 * as the new bitmap is correct.
 			 */
 			wk->wk_state |= COMPLETE | ATTACHED;
 			free_jnewblk(jnewblk);
 		}
 	}
 
 #ifdef SUJ_DEBUG
 	/*
 	 * Assert that we are not freeing a block which has an outstanding
 	 * allocation dependency.
 	 */
 	fs = VFSTOUFS(mp)->um_fs;
 	bmsafemap = bmsafemap_lookup(mp, bp, dtog(fs, blkno), NULL);
 	end = blkno + frags;
 	LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) {
 		/*
 		 * Don't match against blocks that will be freed when the
 		 * background write is done.
 		 */
 		if ((jnewblk->jn_state & (ATTACHED | COMPLETE | DEPCOMPLETE)) ==
 		    (COMPLETE | DEPCOMPLETE))
 			continue;
 		jstart = jnewblk->jn_blkno + jnewblk->jn_oldfrags;
 		jend = jnewblk->jn_blkno + jnewblk->jn_frags;
 		if ((blkno >= jstart && blkno < jend) ||
 		    (end > jstart && end <= jend)) {
 			printf("state 0x%X %jd - %d %d dep %p\n",
 			    jnewblk->jn_state, jnewblk->jn_blkno,
 			    jnewblk->jn_oldfrags, jnewblk->jn_frags,
 			    jnewblk->jn_dep);
 			panic("softdep_setup_blkfree: "
 			    "%jd-%jd(%d) overlaps with %jd-%jd",
 			    blkno, end, frags, jstart, jend);
 		}
 	}
 #endif
 	FREE_LOCK(ump);
 }
 
 /*
  * Revert a block allocation when the journal record that describes it
  * is not yet written.
  */
 static int
 jnewblk_rollback(jnewblk, fs, cgp, blksfree)
 	struct jnewblk *jnewblk;
 	struct fs *fs;
 	struct cg *cgp;
 	uint8_t *blksfree;
 {
 	ufs1_daddr_t fragno;
 	long cgbno, bbase;
 	int frags, blk;
 	int i;
 
 	frags = 0;
 	cgbno = dtogd(fs, jnewblk->jn_blkno);
 	/*
 	 * We have to test which frags need to be rolled back.  We may
 	 * be operating on a stale copy when doing background writes.
 	 */
 	for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; i++)
 		if (isclr(blksfree, cgbno + i))
 			frags++;
 	if (frags == 0)
 		return (0);
 	/*
 	 * This is mostly ffs_blkfree() sans some validation and
 	 * superblock updates.
 	 */
 	if (frags == fs->fs_frag) {
 		fragno = fragstoblks(fs, cgbno);
 		ffs_setblock(fs, blksfree, fragno);
 		ffs_clusteracct(fs, cgp, fragno, 1);
 		cgp->cg_cs.cs_nbfree++;
 	} else {
 		cgbno += jnewblk->jn_oldfrags;
 		bbase = cgbno - fragnum(fs, cgbno);
 		/* Decrement the old frags.  */
 		blk = blkmap(fs, blksfree, bbase);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
 		/* Deallocate the fragment */
 		for (i = 0; i < frags; i++)
 			setbit(blksfree, cgbno + i);
 		cgp->cg_cs.cs_nffree += frags;
 		/* Add back in counts associated with the new frags */
 		blk = blkmap(fs, blksfree, bbase);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
 		/* If a complete block has been reassembled, account for it. */
 		fragno = fragstoblks(fs, bbase);
 		if (ffs_isblock(fs, blksfree, fragno)) {
 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
 			ffs_clusteracct(fs, cgp, fragno, 1);
 			cgp->cg_cs.cs_nbfree++;
 		}
 	}
 	stat_jnewblk++;
 	jnewblk->jn_state &= ~ATTACHED;
 	jnewblk->jn_state |= UNDONE;
 
 	return (frags);
 }
 
 static void
 initiate_write_bmsafemap(bmsafemap, bp)
 	struct bmsafemap *bmsafemap;
 	struct buf *bp;			/* The cg block. */
 {
 	struct jaddref *jaddref;
 	struct jnewblk *jnewblk;
 	uint8_t *inosused;
 	uint8_t *blksfree;
 	struct cg *cgp;
 	struct fs *fs;
 	ino_t ino;
 
 	/*
 	 * If this is a background write, we did this at the time that
 	 * the copy was made, so do not need to do it again.
 	 */
 	if (bmsafemap->sm_state & IOSTARTED)
 		return;
 	bmsafemap->sm_state |= IOSTARTED;
 	/*
 	 * Clear any inode allocations which are pending journal writes.
 	 */
 	if (LIST_FIRST(&bmsafemap->sm_jaddrefhd) != NULL) {
 		cgp = (struct cg *)bp->b_data;
 		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
 		inosused = cg_inosused(cgp);
 		LIST_FOREACH(jaddref, &bmsafemap->sm_jaddrefhd, ja_bmdeps) {
 			ino = jaddref->ja_ino % fs->fs_ipg;
 			if (isset(inosused, ino)) {
 				if ((jaddref->ja_mode & IFMT) == IFDIR)
 					cgp->cg_cs.cs_ndir--;
 				cgp->cg_cs.cs_nifree++;
 				clrbit(inosused, ino);
 				jaddref->ja_state &= ~ATTACHED;
 				jaddref->ja_state |= UNDONE;
 				stat_jaddref++;
 			} else
 				panic("initiate_write_bmsafemap: inode %ju "
 				    "marked free", (uintmax_t)jaddref->ja_ino);
 		}
 	}
 	/*
 	 * Clear any block allocations which are pending journal writes.
 	 */
 	if (LIST_FIRST(&bmsafemap->sm_jnewblkhd) != NULL) {
 		cgp = (struct cg *)bp->b_data;
 		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
 		blksfree = cg_blksfree(cgp);
 		LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) {
 			if (jnewblk_rollback(jnewblk, fs, cgp, blksfree))
 				continue;
 			panic("initiate_write_bmsafemap: block %jd "
 			    "marked free", jnewblk->jn_blkno);
 		}
 	}
 	/*
 	 * Move allocation lists to the written lists so they can be
 	 * cleared once the block write is complete.
 	 */
 	LIST_SWAP(&bmsafemap->sm_inodedephd, &bmsafemap->sm_inodedepwr,
 	    inodedep, id_deps);
 	LIST_SWAP(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr,
 	    newblk, nb_deps);
 	LIST_SWAP(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr, worklist,
 	    wk_list);
 }
 
 /*
  * This routine is called during the completion interrupt
  * service routine for a disk write (from the procedure called
  * by the device driver to inform the filesystem caches of
  * a request completion).  It should be called early in this
  * procedure, before the block is made available to other
  * processes or other routines are called.
  *
  */
 static void 
 softdep_disk_write_complete(bp)
 	struct buf *bp;		/* describes the completed disk write */
 {
 	struct worklist *wk;
 	struct worklist *owk;
 	struct ufsmount *ump;
 	struct workhead reattach;
 	struct freeblks *freeblks;
 	struct buf *sbp;
 
 	ump = softdep_bp_to_mp(bp);
 	if (ump == NULL)
 		return;
 
 	/*
 	 * If an error occurred while doing the write, then the data
 	 * has not hit the disk and the dependencies cannot be processed.
 	 * But we do have to go through and roll forward any dependencies
 	 * that were rolled back before the disk write.
 	 */
 	ACQUIRE_LOCK(ump);
 	if ((bp->b_ioflags & BIO_ERROR) != 0 && (bp->b_flags & B_INVAL) == 0) {
 		LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 			switch (wk->wk_type) {
 
 			case D_PAGEDEP:
 				handle_written_filepage(WK_PAGEDEP(wk), bp, 0);
 				continue;
 
 			case D_INODEDEP:
 				handle_written_inodeblock(WK_INODEDEP(wk),
 				    bp, 0);
 				continue;
 
 			case D_BMSAFEMAP:
 				handle_written_bmsafemap(WK_BMSAFEMAP(wk),
 				    bp, 0);
 				continue;
 
 			case D_INDIRDEP:
 				handle_written_indirdep(WK_INDIRDEP(wk),
 				    bp, &sbp, 0);
 				continue;
 			default:
 				/* nothing to roll forward */
 				continue;
 			}
 		}
 		FREE_LOCK(ump);
 		return;
 	}
 	LIST_INIT(&reattach);
 
 	/*
 	 * Ump SU lock must not be released anywhere in this code segment.
 	 */
 	sbp = NULL;
 	owk = NULL;
 	while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		atomic_add_long(&dep_write[wk->wk_type], 1);
 		if (wk == owk)
 			panic("duplicate worklist: %p\n", wk);
 		owk = wk;
 		switch (wk->wk_type) {
 
 		case D_PAGEDEP:
 			if (handle_written_filepage(WK_PAGEDEP(wk), bp,
 			    WRITESUCCEEDED))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_INODEDEP:
 			if (handle_written_inodeblock(WK_INODEDEP(wk), bp,
 			    WRITESUCCEEDED))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_BMSAFEMAP:
 			if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp,
 			    WRITESUCCEEDED))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_MKDIR:
 			handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY);
 			continue;
 
 		case D_ALLOCDIRECT:
 			wk->wk_state |= COMPLETE;
 			handle_allocdirect_partdone(WK_ALLOCDIRECT(wk), NULL);
 			continue;
 
 		case D_ALLOCINDIR:
 			wk->wk_state |= COMPLETE;
 			handle_allocindir_partdone(WK_ALLOCINDIR(wk));
 			continue;
 
 		case D_INDIRDEP:
 			if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp,
 			    WRITESUCCEEDED))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_FREEBLKS:
 			wk->wk_state |= COMPLETE;
 			freeblks = WK_FREEBLKS(wk);
 			if ((wk->wk_state & ALLCOMPLETE) == ALLCOMPLETE &&
 			    LIST_EMPTY(&freeblks->fb_jblkdephd))
 				add_to_worklist(wk, WK_NODELAY);
 			continue;
 
 		case D_FREEWORK:
 			handle_written_freework(WK_FREEWORK(wk));
 			break;
 
 		case D_JSEGDEP:
 			free_jsegdep(WK_JSEGDEP(wk));
 			continue;
 
 		case D_JSEG:
 			handle_written_jseg(WK_JSEG(wk), bp);
 			continue;
 
 		case D_SBDEP:
 			if (handle_written_sbdep(WK_SBDEP(wk), bp))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_FREEDEP:
 			free_freedep(WK_FREEDEP(wk));
 			continue;
 
 		default:
 			panic("handle_disk_write_complete: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	/*
 	 * Reattach any requests that must be redone.
 	 */
 	while ((wk = LIST_FIRST(&reattach)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(&bp->b_dep, wk);
 	}
 	FREE_LOCK(ump);
 	if (sbp)
 		brelse(sbp);
 }
 
 /*
  * Called from within softdep_disk_write_complete above. Note that
  * this routine is always called from interrupt level with further
  * splbio interrupts blocked.
  */
 static void 
 handle_allocdirect_partdone(adp, wkhd)
 	struct allocdirect *adp;	/* the completed allocdirect */
 	struct workhead *wkhd;		/* Work to do when inode is writtne. */
 {
 	struct allocdirectlst *listhead;
 	struct allocdirect *listadp;
 	struct inodedep *inodedep;
 	long bsize;
 
 	if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	/*
 	 * The on-disk inode cannot claim to be any larger than the last
 	 * fragment that has been written. Otherwise, the on-disk inode
 	 * might have fragments that were not the last block in the file
 	 * which would corrupt the filesystem. Thus, we cannot free any
 	 * allocdirects after one whose ad_oldblkno claims a fragment as
 	 * these blocks must be rolled back to zero before writing the inode.
 	 * We check the currently active set of allocdirects in id_inoupdt
 	 * or id_extupdt as appropriate.
 	 */
 	inodedep = adp->ad_inodedep;
 	bsize = inodedep->id_fs->fs_bsize;
 	if (adp->ad_state & EXTDATA)
 		listhead = &inodedep->id_extupdt;
 	else
 		listhead = &inodedep->id_inoupdt;
 	TAILQ_FOREACH(listadp, listhead, ad_next) {
 		/* found our block */
 		if (listadp == adp)
 			break;
 		/* continue if ad_oldlbn is not a fragment */
 		if (listadp->ad_oldsize == 0 ||
 		    listadp->ad_oldsize == bsize)
 			continue;
 		/* hit a fragment */
 		return;
 	}
 	/*
 	 * If we have reached the end of the current list without
 	 * finding the just finished dependency, then it must be
 	 * on the future dependency list. Future dependencies cannot
 	 * be freed until they are moved to the current list.
 	 */
 	if (listadp == NULL) {
 #ifdef DEBUG
 		if (adp->ad_state & EXTDATA)
 			listhead = &inodedep->id_newextupdt;
 		else
 			listhead = &inodedep->id_newinoupdt;
 		TAILQ_FOREACH(listadp, listhead, ad_next)
 			/* found our block */
 			if (listadp == adp)
 				break;
 		if (listadp == NULL)
 			panic("handle_allocdirect_partdone: lost dep");
 #endif /* DEBUG */
 		return;
 	}
 	/*
 	 * If we have found the just finished dependency, then queue
 	 * it along with anything that follows it that is complete.
 	 * Since the pointer has not yet been written in the inode
 	 * as the dependency prevents it, place the allocdirect on the
 	 * bufwait list where it will be freed once the pointer is
 	 * valid.
 	 */
 	if (wkhd == NULL)
 		wkhd = &inodedep->id_bufwait;
 	for (; adp; adp = listadp) {
 		listadp = TAILQ_NEXT(adp, ad_next);
 		if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
 			return;
 		TAILQ_REMOVE(listhead, adp, ad_next);
 		WORKLIST_INSERT(wkhd, &adp->ad_block.nb_list);
 	}
 }
 
 /*
  * Called from within softdep_disk_write_complete above.  This routine
  * completes successfully written allocindirs.
  */
 static void
 handle_allocindir_partdone(aip)
 	struct allocindir *aip;		/* the completed allocindir */
 {
 	struct indirdep *indirdep;
 
 	if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	indirdep = aip->ai_indirdep;
 	LIST_REMOVE(aip, ai_next);
 	/*
 	 * Don't set a pointer while the buffer is undergoing IO or while
 	 * we have active truncations.
 	 */
 	if (indirdep->ir_state & UNDONE || !TAILQ_EMPTY(&indirdep->ir_trunc)) {
 		LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next);
 		return;
 	}
 	if (indirdep->ir_state & UFS1FMT)
 		((ufs1_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
 		    aip->ai_newblkno;
 	else
 		((ufs2_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
 		    aip->ai_newblkno;
 	/*
 	 * Await the pointer write before freeing the allocindir.
 	 */
 	LIST_INSERT_HEAD(&indirdep->ir_writehd, aip, ai_next);
 }
 
 /*
  * Release segments held on a jwork list.
  */
 static void
 handle_jwork(wkhd)
 	struct workhead *wkhd;
 {
 	struct worklist *wk;
 
 	while ((wk = LIST_FIRST(wkhd)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		switch (wk->wk_type) {
 		case D_JSEGDEP:
 			free_jsegdep(WK_JSEGDEP(wk));
 			continue;
 		case D_FREEDEP:
 			free_freedep(WK_FREEDEP(wk));
 			continue;
 		case D_FREEFRAG:
 			rele_jseg(WK_JSEG(WK_FREEFRAG(wk)->ff_jdep));
 			WORKITEM_FREE(wk, D_FREEFRAG);
 			continue;
 		case D_FREEWORK:
 			handle_written_freework(WK_FREEWORK(wk));
 			continue;
 		default:
 			panic("handle_jwork: Unknown type %s\n",
 			    TYPENAME(wk->wk_type));
 		}
 	}
 }
 
 /*
  * Handle the bufwait list on an inode when it is safe to release items
  * held there.  This normally happens after an inode block is written but
  * may be delayed and handled later if there are pending journal items that
  * are not yet safe to be released.
  */
 static struct freefile *
 handle_bufwait(inodedep, refhd)
 	struct inodedep *inodedep;
 	struct workhead *refhd;
 {
 	struct jaddref *jaddref;
 	struct freefile *freefile;
 	struct worklist *wk;
 
 	freefile = NULL;
 	while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		switch (wk->wk_type) {
 		case D_FREEFILE:
 			/*
 			 * We defer adding freefile to the worklist
 			 * until all other additions have been made to
 			 * ensure that it will be done after all the
 			 * old blocks have been freed.
 			 */
 			if (freefile != NULL)
 				panic("handle_bufwait: freefile");
 			freefile = WK_FREEFILE(wk);
 			continue;
 
 		case D_MKDIR:
 			handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT);
 			continue;
 
 		case D_DIRADD:
 			diradd_inode_written(WK_DIRADD(wk), inodedep);
 			continue;
 
 		case D_FREEFRAG:
 			wk->wk_state |= COMPLETE;
 			if ((wk->wk_state & ALLCOMPLETE) == ALLCOMPLETE)
 				add_to_worklist(wk, 0);
 			continue;
 
 		case D_DIRREM:
 			wk->wk_state |= COMPLETE;
 			add_to_worklist(wk, 0);
 			continue;
 
 		case D_ALLOCDIRECT:
 		case D_ALLOCINDIR:
 			free_newblk(WK_NEWBLK(wk));
 			continue;
 
 		case D_JNEWBLK:
 			wk->wk_state |= COMPLETE;
 			free_jnewblk(WK_JNEWBLK(wk));
 			continue;
 
 		/*
 		 * Save freed journal segments and add references on
 		 * the supplied list which will delay their release
 		 * until the cg bitmap is cleared on disk.
 		 */
 		case D_JSEGDEP:
 			if (refhd == NULL)
 				free_jsegdep(WK_JSEGDEP(wk));
 			else
 				WORKLIST_INSERT(refhd, wk);
 			continue;
 
 		case D_JADDREF:
 			jaddref = WK_JADDREF(wk);
 			TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref,
 			    if_deps);
 			/*
 			 * Transfer any jaddrefs to the list to be freed with
 			 * the bitmap if we're handling a removed file.
 			 */
 			if (refhd == NULL) {
 				wk->wk_state |= COMPLETE;
 				free_jaddref(jaddref);
 			} else
 				WORKLIST_INSERT(refhd, wk);
 			continue;
 
 		default:
 			panic("handle_bufwait: Unknown type %p(%s)",
 			    wk, TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	return (freefile);
 }
 /*
  * Called from within softdep_disk_write_complete above to restore
  * in-memory inode block contents to their most up-to-date state. Note
  * that this routine is always called from interrupt level with further
  * interrupts from this device blocked.
  *
  * If the write did not succeed, we will do all the roll-forward
  * operations, but we will not take the actions that will allow its
  * dependencies to be processed.
  */
 static int 
 handle_written_inodeblock(inodedep, bp, flags)
 	struct inodedep *inodedep;
 	struct buf *bp;		/* buffer containing the inode block */
 	int flags;
 {
 	struct freefile *freefile;
 	struct allocdirect *adp, *nextadp;
 	struct ufs1_dinode *dp1 = NULL;
 	struct ufs2_dinode *dp2 = NULL;
 	struct workhead wkhd;
 	int hadchanges, fstype;
 	ino_t freelink;
 
 	LIST_INIT(&wkhd);
 	hadchanges = 0;
 	freefile = NULL;
 	if ((inodedep->id_state & IOSTARTED) == 0)
 		panic("handle_written_inodeblock: not started");
 	inodedep->id_state &= ~IOSTARTED;
 	if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC) {
 		fstype = UFS1;
 		dp1 = (struct ufs1_dinode *)bp->b_data +
 		    ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
 		freelink = dp1->di_freelink;
 	} else {
 		fstype = UFS2;
 		dp2 = (struct ufs2_dinode *)bp->b_data +
 		    ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
 		freelink = dp2->di_freelink;
 	}
 	/*
 	 * Leave this inodeblock dirty until it's in the list.
 	 */
 	if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) == UNLINKED &&
 	    (flags & WRITESUCCEEDED)) {
 		struct inodedep *inon;
 
 		inon = TAILQ_NEXT(inodedep, id_unlinked);
 		if ((inon == NULL && freelink == 0) ||
 		    (inon && inon->id_ino == freelink)) {
 			if (inon)
 				inon->id_state |= UNLINKPREV;
 			inodedep->id_state |= UNLINKNEXT;
 		}
 		hadchanges = 1;
 	}
 	/*
 	 * If we had to rollback the inode allocation because of
 	 * bitmaps being incomplete, then simply restore it.
 	 * Keep the block dirty so that it will not be reclaimed until
 	 * all associated dependencies have been cleared and the
 	 * corresponding updates written to disk.
 	 */
 	if (inodedep->id_savedino1 != NULL) {
 		hadchanges = 1;
 		if (fstype == UFS1)
 			*dp1 = *inodedep->id_savedino1;
 		else
 			*dp2 = *inodedep->id_savedino2;
 		free(inodedep->id_savedino1, M_SAVEDINO);
 		inodedep->id_savedino1 = NULL;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			stat_inode_bitmap++;
 		bdirty(bp);
 		/*
 		 * If the inode is clear here and GOINGAWAY it will never
 		 * be written.  Process the bufwait and clear any pending
 		 * work which may include the freefile.
 		 */
 		if (inodedep->id_state & GOINGAWAY)
 			goto bufwait;
 		return (1);
 	}
 	if (flags & WRITESUCCEEDED)
 		inodedep->id_state |= COMPLETE;
 	/*
 	 * Roll forward anything that had to be rolled back before 
 	 * the inode could be updated.
 	 */
 	for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = nextadp) {
 		nextadp = TAILQ_NEXT(adp, ad_next);
 		if (adp->ad_state & ATTACHED)
 			panic("handle_written_inodeblock: new entry");
 		if (fstype == UFS1) {
 			if (adp->ad_offset < NDADDR) {
 				if (dp1->di_db[adp->ad_offset]!=adp->ad_oldblkno)
 					panic("%s %s #%jd mismatch %d != %jd",
 					    "handle_written_inodeblock:",
 					    "direct pointer",
 					    (intmax_t)adp->ad_offset,
 					    dp1->di_db[adp->ad_offset],
 					    (intmax_t)adp->ad_oldblkno);
 				dp1->di_db[adp->ad_offset] = adp->ad_newblkno;
 			} else {
 				if (dp1->di_ib[adp->ad_offset - NDADDR] != 0)
 					panic("%s: %s #%jd allocated as %d",
 					    "handle_written_inodeblock",
 					    "indirect pointer",
 					    (intmax_t)adp->ad_offset - NDADDR,
 					    dp1->di_ib[adp->ad_offset - NDADDR]);
 				dp1->di_ib[adp->ad_offset - NDADDR] =
 				    adp->ad_newblkno;
 			}
 		} else {
 			if (adp->ad_offset < NDADDR) {
 				if (dp2->di_db[adp->ad_offset]!=adp->ad_oldblkno)
 					panic("%s: %s #%jd %s %jd != %jd",
 					    "handle_written_inodeblock",
 					    "direct pointer",
 					    (intmax_t)adp->ad_offset, "mismatch",
 					    (intmax_t)dp2->di_db[adp->ad_offset],
 					    (intmax_t)adp->ad_oldblkno);
 				dp2->di_db[adp->ad_offset] = adp->ad_newblkno;
 			} else {
 				if (dp2->di_ib[adp->ad_offset - NDADDR] != 0)
 					panic("%s: %s #%jd allocated as %jd",
 					    "handle_written_inodeblock",
 					    "indirect pointer",
 					    (intmax_t)adp->ad_offset - NDADDR,
 					    (intmax_t)
 					    dp2->di_ib[adp->ad_offset - NDADDR]);
 				dp2->di_ib[adp->ad_offset - NDADDR] =
 				    adp->ad_newblkno;
 			}
 		}
 		adp->ad_state &= ~UNDONE;
 		adp->ad_state |= ATTACHED;
 		hadchanges = 1;
 	}
 	for (adp = TAILQ_FIRST(&inodedep->id_extupdt); adp; adp = nextadp) {
 		nextadp = TAILQ_NEXT(adp, ad_next);
 		if (adp->ad_state & ATTACHED)
 			panic("handle_written_inodeblock: new entry");
 		if (dp2->di_extb[adp->ad_offset] != adp->ad_oldblkno)
 			panic("%s: direct pointers #%jd %s %jd != %jd",
 			    "handle_written_inodeblock",
 			    (intmax_t)adp->ad_offset, "mismatch",
 			    (intmax_t)dp2->di_extb[adp->ad_offset],
 			    (intmax_t)adp->ad_oldblkno);
 		dp2->di_extb[adp->ad_offset] = adp->ad_newblkno;
 		adp->ad_state &= ~UNDONE;
 		adp->ad_state |= ATTACHED;
 		hadchanges = 1;
 	}
 	if (hadchanges && (bp->b_flags & B_DELWRI) == 0)
 		stat_direct_blk_ptrs++;
 	/*
 	 * Reset the file size to its most up-to-date value.
 	 */
 	if (inodedep->id_savedsize == -1 || inodedep->id_savedextsize == -1)
 		panic("handle_written_inodeblock: bad size");
 	if (inodedep->id_savednlink > LINK_MAX)
 		panic("handle_written_inodeblock: Invalid link count "
 		    "%jd for inodedep %p", (uintmax_t)inodedep->id_savednlink,
 		    inodedep);
 	if (fstype == UFS1) {
 		if (dp1->di_nlink != inodedep->id_savednlink) { 
 			dp1->di_nlink = inodedep->id_savednlink;
 			hadchanges = 1;
 		}
 		if (dp1->di_size != inodedep->id_savedsize) {
 			dp1->di_size = inodedep->id_savedsize;
 			hadchanges = 1;
 		}
 	} else {
 		if (dp2->di_nlink != inodedep->id_savednlink) { 
 			dp2->di_nlink = inodedep->id_savednlink;
 			hadchanges = 1;
 		}
 		if (dp2->di_size != inodedep->id_savedsize) {
 			dp2->di_size = inodedep->id_savedsize;
 			hadchanges = 1;
 		}
 		if (dp2->di_extsize != inodedep->id_savedextsize) {
 			dp2->di_extsize = inodedep->id_savedextsize;
 			hadchanges = 1;
 		}
 	}
 	inodedep->id_savedsize = -1;
 	inodedep->id_savedextsize = -1;
 	inodedep->id_savednlink = -1;
 	/*
 	 * If there were any rollbacks in the inode block, then it must be
 	 * marked dirty so that its will eventually get written back in
 	 * its correct form.
 	 */
 	if (hadchanges)
 		bdirty(bp);
 bufwait:
 	/*
 	 * If the write did not succeed, we have done all the roll-forward
 	 * operations, but we cannot take the actions that will allow its
 	 * dependencies to be processed.
 	 */
 	if ((flags & WRITESUCCEEDED) == 0)
 		return (hadchanges);
 	/*
 	 * Process any allocdirects that completed during the update.
 	 */
 	if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
 		handle_allocdirect_partdone(adp, &wkhd);
 	if ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
 		handle_allocdirect_partdone(adp, &wkhd);
 	/*
 	 * Process deallocations that were held pending until the
 	 * inode had been written to disk. Freeing of the inode
 	 * is delayed until after all blocks have been freed to
 	 * avoid creation of new <vfsid, inum, lbn> triples
 	 * before the old ones have been deleted.  Completely
 	 * unlinked inodes are not processed until the unlinked
 	 * inode list is written or the last reference is removed.
 	 */
 	if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) != UNLINKED) {
 		freefile = handle_bufwait(inodedep, NULL);
 		if (freefile && !LIST_EMPTY(&wkhd)) {
 			WORKLIST_INSERT(&wkhd, &freefile->fx_list);
 			freefile = NULL;
 		}
 	}
 	/*
 	 * Move rolled forward dependency completions to the bufwait list
 	 * now that those that were already written have been processed.
 	 */
 	if (!LIST_EMPTY(&wkhd) && hadchanges == 0)
 		panic("handle_written_inodeblock: bufwait but no changes");
 	jwork_move(&inodedep->id_bufwait, &wkhd);
 
 	if (freefile != NULL) {
 		/*
 		 * If the inode is goingaway it was never written.  Fake up
 		 * the state here so free_inodedep() can succeed.
 		 */
 		if (inodedep->id_state & GOINGAWAY)
 			inodedep->id_state |= COMPLETE | DEPCOMPLETE;
 		if (free_inodedep(inodedep) == 0)
 			panic("handle_written_inodeblock: live inodedep %p",
 			    inodedep);
 		add_to_worklist(&freefile->fx_list, 0);
 		return (0);
 	}
 
 	/*
 	 * If no outstanding dependencies, free it.
 	 */
 	if (free_inodedep(inodedep) ||
 	    (TAILQ_FIRST(&inodedep->id_inoreflst) == 0 &&
 	     TAILQ_FIRST(&inodedep->id_inoupdt) == 0 &&
 	     TAILQ_FIRST(&inodedep->id_extupdt) == 0 &&
 	     LIST_FIRST(&inodedep->id_bufwait) == 0))
 		return (0);
 	return (hadchanges);
 }
 
 /*
  * Perform needed roll-forwards and kick off any dependencies that
  * can now be processed.
  *
  * If the write did not succeed, we will do all the roll-forward
  * operations, but we will not take the actions that will allow its
  * dependencies to be processed.
  */
 static int
 handle_written_indirdep(indirdep, bp, bpp, flags)
 	struct indirdep *indirdep;
 	struct buf *bp;
 	struct buf **bpp;
 	int flags;
 {
 	struct allocindir *aip;
 	struct buf *sbp;
 	int chgs;
 
 	if (indirdep->ir_state & GOINGAWAY)
 		panic("handle_written_indirdep: indirdep gone");
 	if ((indirdep->ir_state & IOSTARTED) == 0)
 		panic("handle_written_indirdep: IO not started");
 	chgs = 0;
 	/*
 	 * If there were rollbacks revert them here.
 	 */
 	if (indirdep->ir_saveddata) {
 		bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount);
 		if (TAILQ_EMPTY(&indirdep->ir_trunc)) {
 			free(indirdep->ir_saveddata, M_INDIRDEP);
 			indirdep->ir_saveddata = NULL;
 		}
 		chgs = 1;
 	}
 	indirdep->ir_state &= ~(UNDONE | IOSTARTED);
 	indirdep->ir_state |= ATTACHED;
 	/*
 	 * If the write did not succeed, we have done all the roll-forward
 	 * operations, but we cannot take the actions that will allow its
 	 * dependencies to be processed.
 	 */
 	if ((flags & WRITESUCCEEDED) == 0) {
 		stat_indir_blk_ptrs++;
 		bdirty(bp);
 		return (1);
 	}
 	/*
 	 * Move allocindirs with written pointers to the completehd if
 	 * the indirdep's pointer is not yet written.  Otherwise
 	 * free them here.
 	 */
 	while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != NULL) {
 		LIST_REMOVE(aip, ai_next);
 		if ((indirdep->ir_state & DEPCOMPLETE) == 0) {
 			LIST_INSERT_HEAD(&indirdep->ir_completehd, aip,
 			    ai_next);
 			newblk_freefrag(&aip->ai_block);
 			continue;
 		}
 		free_newblk(&aip->ai_block);
 	}
 	/*
 	 * Move allocindirs that have finished dependency processing from
 	 * the done list to the write list after updating the pointers.
 	 */
 	if (TAILQ_EMPTY(&indirdep->ir_trunc)) {
 		while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != NULL) {
 			handle_allocindir_partdone(aip);
 			if (aip == LIST_FIRST(&indirdep->ir_donehd))
 				panic("disk_write_complete: not gone");
 			chgs = 1;
 		}
 	}
 	/*
 	 * Preserve the indirdep if there were any changes or if it is not
 	 * yet valid on disk.
 	 */
 	if (chgs) {
 		stat_indir_blk_ptrs++;
 		bdirty(bp);
 		return (1);
 	}
 	/*
 	 * If there were no changes we can discard the savedbp and detach
 	 * ourselves from the buf.  We are only carrying completed pointers
 	 * in this case.
 	 */
 	sbp = indirdep->ir_savebp;
 	sbp->b_flags |= B_INVAL | B_NOCACHE;
 	indirdep->ir_savebp = NULL;
 	indirdep->ir_bp = NULL;
 	if (*bpp != NULL)
 		panic("handle_written_indirdep: bp already exists.");
 	*bpp = sbp;
 	/*
 	 * The indirdep may not be freed until its parent points at it.
 	 */
 	if (indirdep->ir_state & DEPCOMPLETE)
 		free_indirdep(indirdep);
 
 	return (0);
 }
 
 /*
  * Process a diradd entry after its dependent inode has been written.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 diradd_inode_written(dap, inodedep)
 	struct diradd *dap;
 	struct inodedep *inodedep;
 {
 
 	dap->da_state |= COMPLETE;
 	complete_diradd(dap);
 	WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
 }
 
 /*
  * Returns true if the bmsafemap will have rollbacks when written.  Must only
  * be called with the per-filesystem lock and the buf lock on the cg held.
  */
 static int
 bmsafemap_backgroundwrite(bmsafemap, bp)
 	struct bmsafemap *bmsafemap;
 	struct buf *bp;
 {
 	int dirty;
 
 	LOCK_OWNED(VFSTOUFS(bmsafemap->sm_list.wk_mp));
 	dirty = !LIST_EMPTY(&bmsafemap->sm_jaddrefhd) | 
 	    !LIST_EMPTY(&bmsafemap->sm_jnewblkhd);
 	/*
 	 * If we're initiating a background write we need to process the
 	 * rollbacks as they exist now, not as they exist when IO starts.
 	 * No other consumers will look at the contents of the shadowed
 	 * buf so this is safe to do here.
 	 */
 	if (bp->b_xflags & BX_BKGRDMARKER)
 		initiate_write_bmsafemap(bmsafemap, bp);
 
 	return (dirty);
 }
 
 /*
  * Re-apply an allocation when a cg write is complete.
  */
 static int
 jnewblk_rollforward(jnewblk, fs, cgp, blksfree)
 	struct jnewblk *jnewblk;
 	struct fs *fs;
 	struct cg *cgp;
 	uint8_t *blksfree;
 {
 	ufs1_daddr_t fragno;
 	ufs2_daddr_t blkno;
 	long cgbno, bbase;
 	int frags, blk;
 	int i;
 
 	frags = 0;
 	cgbno = dtogd(fs, jnewblk->jn_blkno);
 	for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; i++) {
 		if (isclr(blksfree, cgbno + i))
 			panic("jnewblk_rollforward: re-allocated fragment");
 		frags++;
 	}
 	if (frags == fs->fs_frag) {
 		blkno = fragstoblks(fs, cgbno);
 		ffs_clrblock(fs, blksfree, (long)blkno);
 		ffs_clusteracct(fs, cgp, blkno, -1);
 		cgp->cg_cs.cs_nbfree--;
 	} else {
 		bbase = cgbno - fragnum(fs, cgbno);
 		cgbno += jnewblk->jn_oldfrags;
                 /* If a complete block had been reassembled, account for it. */
 		fragno = fragstoblks(fs, bbase);
 		if (ffs_isblock(fs, blksfree, fragno)) {
 			cgp->cg_cs.cs_nffree += fs->fs_frag;
 			ffs_clusteracct(fs, cgp, fragno, -1);
 			cgp->cg_cs.cs_nbfree--;
 		}
 		/* Decrement the old frags.  */
 		blk = blkmap(fs, blksfree, bbase);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
 		/* Allocate the fragment */
 		for (i = 0; i < frags; i++)
 			clrbit(blksfree, cgbno + i);
 		cgp->cg_cs.cs_nffree -= frags;
 		/* Add back in counts associated with the new frags */
 		blk = blkmap(fs, blksfree, bbase);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
 	}
 	return (frags);
 }
 
 /*
  * Complete a write to a bmsafemap structure.  Roll forward any bitmap
  * changes if it's not a background write.  Set all written dependencies 
  * to DEPCOMPLETE and free the structure if possible.
  *
  * If the write did not succeed, we will do all the roll-forward
  * operations, but we will not take the actions that will allow its
  * dependencies to be processed.
  */
 static int
 handle_written_bmsafemap(bmsafemap, bp, flags)
 	struct bmsafemap *bmsafemap;
 	struct buf *bp;
 	int flags;
 {
 	struct newblk *newblk;
 	struct inodedep *inodedep;
 	struct jaddref *jaddref, *jatmp;
 	struct jnewblk *jnewblk, *jntmp;
 	struct ufsmount *ump;
 	uint8_t *inosused;
 	uint8_t *blksfree;
 	struct cg *cgp;
 	struct fs *fs;
 	ino_t ino;
 	int foreground;
 	int chgs;
 
 	if ((bmsafemap->sm_state & IOSTARTED) == 0)
 		panic("handle_written_bmsafemap: Not started\n");
 	ump = VFSTOUFS(bmsafemap->sm_list.wk_mp);
 	chgs = 0;
 	bmsafemap->sm_state &= ~IOSTARTED;
 	foreground = (bp->b_xflags & BX_BKGRDMARKER) == 0;
 	/*
 	 * If write was successful, release journal work that was waiting
 	 * on the write. Otherwise move the work back.
 	 */
 	if (flags & WRITESUCCEEDED)
 		handle_jwork(&bmsafemap->sm_freewr);
 	else
 		LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr,
 		    worklist, wk_list);
 
 	/*
 	 * Restore unwritten inode allocation pending jaddref writes.
 	 */
 	if (!LIST_EMPTY(&bmsafemap->sm_jaddrefhd)) {
 		cgp = (struct cg *)bp->b_data;
 		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
 		inosused = cg_inosused(cgp);
 		LIST_FOREACH_SAFE(jaddref, &bmsafemap->sm_jaddrefhd,
 		    ja_bmdeps, jatmp) {
 			if ((jaddref->ja_state & UNDONE) == 0)
 				continue;
 			ino = jaddref->ja_ino % fs->fs_ipg;
 			if (isset(inosused, ino))
 				panic("handle_written_bmsafemap: "
 				    "re-allocated inode");
 			/* Do the roll-forward only if it's a real copy. */
 			if (foreground) {
 				if ((jaddref->ja_mode & IFMT) == IFDIR)
 					cgp->cg_cs.cs_ndir++;
 				cgp->cg_cs.cs_nifree--;
 				setbit(inosused, ino);
 				chgs = 1;
 			}
 			jaddref->ja_state &= ~UNDONE;
 			jaddref->ja_state |= ATTACHED;
 			free_jaddref(jaddref);
 		}
 	}
 	/*
 	 * Restore any block allocations which are pending journal writes.
 	 */
 	if (LIST_FIRST(&bmsafemap->sm_jnewblkhd) != NULL) {
 		cgp = (struct cg *)bp->b_data;
 		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
 		blksfree = cg_blksfree(cgp);
 		LIST_FOREACH_SAFE(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps,
 		    jntmp) {
 			if ((jnewblk->jn_state & UNDONE) == 0)
 				continue;
 			/* Do the roll-forward only if it's a real copy. */
 			if (foreground &&
 			    jnewblk_rollforward(jnewblk, fs, cgp, blksfree))
 				chgs = 1;
 			jnewblk->jn_state &= ~(UNDONE | NEWBLOCK);
 			jnewblk->jn_state |= ATTACHED;
 			free_jnewblk(jnewblk);
 		}
 	}
 	/*
 	 * If the write did not succeed, we have done all the roll-forward
 	 * operations, but we cannot take the actions that will allow its
 	 * dependencies to be processed.
 	 */
 	if ((flags & WRITESUCCEEDED) == 0) {
 		LIST_CONCAT(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr,
 		    newblk, nb_deps);
 		LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr,
 		    worklist, wk_list);
 		if (foreground)
 			bdirty(bp);
 		return (1);
 	}
 	while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkwr))) {
 		newblk->nb_state |= DEPCOMPLETE;
 		newblk->nb_state &= ~ONDEPLIST;
 		newblk->nb_bmsafemap = NULL;
 		LIST_REMOVE(newblk, nb_deps);
 		if (newblk->nb_list.wk_type == D_ALLOCDIRECT)
 			handle_allocdirect_partdone(
 			    WK_ALLOCDIRECT(&newblk->nb_list), NULL);
 		else if (newblk->nb_list.wk_type == D_ALLOCINDIR)
 			handle_allocindir_partdone(
 			    WK_ALLOCINDIR(&newblk->nb_list));
 		else if (newblk->nb_list.wk_type != D_NEWBLK)
 			panic("handle_written_bmsafemap: Unexpected type: %s",
 			    TYPENAME(newblk->nb_list.wk_type));
 	}
 	while ((inodedep = LIST_FIRST(&bmsafemap->sm_inodedepwr)) != NULL) {
 		inodedep->id_state |= DEPCOMPLETE;
 		inodedep->id_state &= ~ONDEPLIST;
 		LIST_REMOVE(inodedep, id_deps);
 		inodedep->id_bmsafemap = NULL;
 	}
 	LIST_REMOVE(bmsafemap, sm_next);
 	if (chgs == 0 && LIST_EMPTY(&bmsafemap->sm_jaddrefhd) &&
 	    LIST_EMPTY(&bmsafemap->sm_jnewblkhd) &&
 	    LIST_EMPTY(&bmsafemap->sm_newblkhd) &&
 	    LIST_EMPTY(&bmsafemap->sm_inodedephd) &&
 	    LIST_EMPTY(&bmsafemap->sm_freehd)) {
 		LIST_REMOVE(bmsafemap, sm_hash);
 		WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
 		return (0);
 	}
 	LIST_INSERT_HEAD(&ump->softdep_dirtycg, bmsafemap, sm_next);
 	if (foreground)
 		bdirty(bp);
 	return (1);
 }
 
 /*
  * Try to free a mkdir dependency.
  */
 static void
 complete_mkdir(mkdir)
 	struct mkdir *mkdir;
 {
 	struct diradd *dap;
 
 	if ((mkdir->md_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	LIST_REMOVE(mkdir, md_mkdirs);
 	dap = mkdir->md_diradd;
 	dap->da_state &= ~(mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY));
 	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0) {
 		dap->da_state |= DEPCOMPLETE;
 		complete_diradd(dap);
 	}
 	WORKITEM_FREE(mkdir, D_MKDIR);
 }
 
 /*
  * Handle the completion of a mkdir dependency.
  */
 static void
 handle_written_mkdir(mkdir, type)
 	struct mkdir *mkdir;
 	int type;
 {
 
 	if ((mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY)) != type)
 		panic("handle_written_mkdir: bad type");
 	mkdir->md_state |= COMPLETE;
 	complete_mkdir(mkdir);
 }
 
 static int
 free_pagedep(pagedep)
 	struct pagedep *pagedep;
 {
 	int i;
 
 	if (pagedep->pd_state & NEWBLOCK)
 		return (0);
 	if (!LIST_EMPTY(&pagedep->pd_dirremhd))
 		return (0);
 	for (i = 0; i < DAHASHSZ; i++)
 		if (!LIST_EMPTY(&pagedep->pd_diraddhd[i]))
 			return (0);
 	if (!LIST_EMPTY(&pagedep->pd_pendinghd))
 		return (0);
 	if (!LIST_EMPTY(&pagedep->pd_jmvrefhd))
 		return (0);
 	if (pagedep->pd_state & ONWORKLIST)
 		WORKLIST_REMOVE(&pagedep->pd_list);
 	LIST_REMOVE(pagedep, pd_hash);
 	WORKITEM_FREE(pagedep, D_PAGEDEP);
 
 	return (1);
 }
 
 /*
  * Called from within softdep_disk_write_complete above.
  * A write operation was just completed. Removed inodes can
  * now be freed and associated block pointers may be committed.
  * Note that this routine is always called from interrupt level
  * with further interrupts from this device blocked.
  *
  * If the write did not succeed, we will do all the roll-forward
  * operations, but we will not take the actions that will allow its
  * dependencies to be processed.
  */
 static int 
 handle_written_filepage(pagedep, bp, flags)
 	struct pagedep *pagedep;
 	struct buf *bp;		/* buffer containing the written page */
 	int flags;
 {
 	struct dirrem *dirrem;
 	struct diradd *dap, *nextdap;
 	struct direct *ep;
 	int i, chgs;
 
 	if ((pagedep->pd_state & IOSTARTED) == 0)
 		panic("handle_written_filepage: not started");
 	pagedep->pd_state &= ~IOSTARTED;
 	if ((flags & WRITESUCCEEDED) == 0)
 		goto rollforward;
 	/*
 	 * Process any directory removals that have been committed.
 	 */
 	while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)) != NULL) {
 		LIST_REMOVE(dirrem, dm_next);
 		dirrem->dm_state |= COMPLETE;
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		KASSERT(LIST_EMPTY(&dirrem->dm_jremrefhd),
 		    ("handle_written_filepage: Journal entries not written."));
 		add_to_worklist(&dirrem->dm_list, 0);
 	}
 	/*
 	 * Free any directory additions that have been committed.
 	 * If it is a newly allocated block, we have to wait until
 	 * the on-disk directory inode claims the new block.
 	 */
 	if ((pagedep->pd_state & NEWBLOCK) == 0)
 		while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
 			free_diradd(dap, NULL);
 rollforward:
 	/*
 	 * Uncommitted directory entries must be restored.
 	 */
 	for (chgs = 0, i = 0; i < DAHASHSZ; i++) {
 		for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); dap;
 		     dap = nextdap) {
 			nextdap = LIST_NEXT(dap, da_pdlist);
 			if (dap->da_state & ATTACHED)
 				panic("handle_written_filepage: attached");
 			ep = (struct direct *)
 			    ((char *)bp->b_data + dap->da_offset);
 			ep->d_ino = dap->da_newinum;
 			dap->da_state &= ~UNDONE;
 			dap->da_state |= ATTACHED;
 			chgs = 1;
 			/*
 			 * If the inode referenced by the directory has
 			 * been written out, then the dependency can be
 			 * moved to the pending list.
 			 */
 			if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
 				LIST_REMOVE(dap, da_pdlist);
 				LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap,
 				    da_pdlist);
 			}
 		}
 	}
 	/*
 	 * If there were any rollbacks in the directory, then it must be
 	 * marked dirty so that its will eventually get written back in
 	 * its correct form.
 	 */
 	if (chgs || (flags & WRITESUCCEEDED) == 0) {
 		if ((bp->b_flags & B_DELWRI) == 0)
 			stat_dir_entry++;
 		bdirty(bp);
 		return (1);
 	}
 	/*
 	 * If we are not waiting for a new directory block to be
 	 * claimed by its inode, then the pagedep will be freed.
 	 * Otherwise it will remain to track any new entries on
 	 * the page in case they are fsync'ed.
 	 */
 	free_pagedep(pagedep);
 	return (0);
 }
 
 /*
  * Writing back in-core inode structures.
  * 
  * The filesystem only accesses an inode's contents when it occupies an
  * "in-core" inode structure.  These "in-core" structures are separate from
  * the page frames used to cache inode blocks.  Only the latter are
  * transferred to/from the disk.  So, when the updated contents of the
  * "in-core" inode structure are copied to the corresponding in-memory inode
  * block, the dependencies are also transferred.  The following procedure is
  * called when copying a dirty "in-core" inode to a cached inode block.
  */
 
 /*
  * Called when an inode is loaded from disk. If the effective link count
  * differed from the actual link count when it was last flushed, then we
  * need to ensure that the correct effective link count is put back.
  */
 void 
 softdep_load_inodeblock(ip)
 	struct inode *ip;	/* the "in_core" copy of the inode */
 {
 	struct inodedep *inodedep;
 	struct ufsmount *ump;
 
 	ump = ITOUMP(ip);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_load_inodeblock called on non-softdep filesystem"));
 	/*
 	 * Check for alternate nlink count.
 	 */
 	ip->i_effnlink = ip->i_nlink;
 	ACQUIRE_LOCK(ump);
 	if (inodedep_lookup(UFSTOVFS(ump), ip->i_number, 0, &inodedep) == 0) {
 		FREE_LOCK(ump);
 		return;
 	}
 	ip->i_effnlink -= inodedep->id_nlinkdelta;
 	FREE_LOCK(ump);
 }
 
 /*
  * This routine is called just before the "in-core" inode
  * information is to be copied to the in-memory inode block.
  * Recall that an inode block contains several inodes. If
  * the force flag is set, then the dependencies will be
  * cleared so that the update can always be made. Note that
  * the buffer is locked when this routine is called, so we
  * will never be in the middle of writing the inode block 
  * to disk.
  */
 void 
 softdep_update_inodeblock(ip, bp, waitfor)
 	struct inode *ip;	/* the "in_core" copy of the inode */
 	struct buf *bp;		/* the buffer containing the inode block */
 	int waitfor;		/* nonzero => update must be allowed */
 {
 	struct inodedep *inodedep;
 	struct inoref *inoref;
 	struct ufsmount *ump;
 	struct worklist *wk;
 	struct mount *mp;
 	struct buf *ibp;
 	struct fs *fs;
 	int error;
 
 	ump = ITOUMP(ip);
 	mp = UFSTOVFS(ump);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_update_inodeblock called on non-softdep filesystem"));
 	fs = ump->um_fs;
 	/*
 	 * Preserve the freelink that is on disk.  clear_unlinked_inodedep()
 	 * does not have access to the in-core ip so must write directly into
 	 * the inode block buffer when setting freelink.
 	 */
 	if (fs->fs_magic == FS_UFS1_MAGIC)
 		DIP_SET(ip, i_freelink, ((struct ufs1_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number))->di_freelink);
 	else
 		DIP_SET(ip, i_freelink, ((struct ufs2_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number))->di_freelink);
 	/*
 	 * If the effective link count is not equal to the actual link
 	 * count, then we must track the difference in an inodedep while
 	 * the inode is (potentially) tossed out of the cache. Otherwise,
 	 * if there is no existing inodedep, then there are no dependencies
 	 * to track.
 	 */
 	ACQUIRE_LOCK(ump);
 again:
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) {
 		FREE_LOCK(ump);
 		if (ip->i_effnlink != ip->i_nlink)
 			panic("softdep_update_inodeblock: bad link count");
 		return;
 	}
 	if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink)
 		panic("softdep_update_inodeblock: bad delta");
 	/*
 	 * If we're flushing all dependencies we must also move any waiting
 	 * for journal writes onto the bufwait list prior to I/O.
 	 */
 	if (waitfor) {
 		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
 			if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
 			    == DEPCOMPLETE) {
 				jwait(&inoref->if_list, MNT_WAIT);
 				goto again;
 			}
 		}
 	}
 	/*
 	 * Changes have been initiated. Anything depending on these
 	 * changes cannot occur until this inode has been written.
 	 */
 	inodedep->id_state &= ~COMPLETE;
 	if ((inodedep->id_state & ONWORKLIST) == 0)
 		WORKLIST_INSERT(&bp->b_dep, &inodedep->id_list);
 	/*
 	 * Any new dependencies associated with the incore inode must 
 	 * now be moved to the list associated with the buffer holding
 	 * the in-memory copy of the inode. Once merged process any
 	 * allocdirects that are completed by the merger.
 	 */
 	merge_inode_lists(&inodedep->id_newinoupdt, &inodedep->id_inoupdt);
 	if (!TAILQ_EMPTY(&inodedep->id_inoupdt))
 		handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt),
 		    NULL);
 	merge_inode_lists(&inodedep->id_newextupdt, &inodedep->id_extupdt);
 	if (!TAILQ_EMPTY(&inodedep->id_extupdt))
 		handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_extupdt),
 		    NULL);
 	/*
 	 * Now that the inode has been pushed into the buffer, the
 	 * operations dependent on the inode being written to disk
 	 * can be moved to the id_bufwait so that they will be
 	 * processed when the buffer I/O completes.
 	 */
 	while ((wk = LIST_FIRST(&inodedep->id_inowait)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(&inodedep->id_bufwait, wk);
 	}
 	/*
 	 * Newly allocated inodes cannot be written until the bitmap
 	 * that allocates them have been written (indicated by
 	 * DEPCOMPLETE being set in id_state). If we are doing a
 	 * forced sync (e.g., an fsync on a file), we force the bitmap
 	 * to be written so that the update can be done.
 	 */
 	if (waitfor == 0) {
 		FREE_LOCK(ump);
 		return;
 	}
 retry:
 	if ((inodedep->id_state & (DEPCOMPLETE | GOINGAWAY)) != 0) {
 		FREE_LOCK(ump);
 		return;
 	}
 	ibp = inodedep->id_bmsafemap->sm_buf;
 	ibp = getdirtybuf(ibp, LOCK_PTR(ump), MNT_WAIT);
 	if (ibp == NULL) {
 		/*
 		 * If ibp came back as NULL, the dependency could have been
 		 * freed while we slept.  Look it up again, and check to see
 		 * that it has completed.
 		 */
 		if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0)
 			goto retry;
 		FREE_LOCK(ump);
 		return;
 	}
 	FREE_LOCK(ump);
 	if ((error = bwrite(ibp)) != 0)
 		softdep_error("softdep_update_inodeblock: bwrite", error);
 }
 
 /*
  * Merge the a new inode dependency list (such as id_newinoupdt) into an
  * old inode dependency list (such as id_inoupdt). This routine must be
  * called with splbio interrupts blocked.
  */
 static void
 merge_inode_lists(newlisthead, oldlisthead)
 	struct allocdirectlst *newlisthead;
 	struct allocdirectlst *oldlisthead;
 {
 	struct allocdirect *listadp, *newadp;
 
 	newadp = TAILQ_FIRST(newlisthead);
 	for (listadp = TAILQ_FIRST(oldlisthead); listadp && newadp;) {
 		if (listadp->ad_offset < newadp->ad_offset) {
 			listadp = TAILQ_NEXT(listadp, ad_next);
 			continue;
 		}
 		TAILQ_REMOVE(newlisthead, newadp, ad_next);
 		TAILQ_INSERT_BEFORE(listadp, newadp, ad_next);
 		if (listadp->ad_offset == newadp->ad_offset) {
 			allocdirect_merge(oldlisthead, newadp,
 			    listadp);
 			listadp = newadp;
 		}
 		newadp = TAILQ_FIRST(newlisthead);
 	}
 	while ((newadp = TAILQ_FIRST(newlisthead)) != NULL) {
 		TAILQ_REMOVE(newlisthead, newadp, ad_next);
 		TAILQ_INSERT_TAIL(oldlisthead, newadp, ad_next);
 	}
 }
 
 /*
  * If we are doing an fsync, then we must ensure that any directory
  * entries for the inode have been written after the inode gets to disk.
  */
 int
 softdep_fsync(vp)
 	struct vnode *vp;	/* the "in_core" copy of the inode */
 {
 	struct inodedep *inodedep;
 	struct pagedep *pagedep;
 	struct inoref *inoref;
 	struct ufsmount *ump;
 	struct worklist *wk;
 	struct diradd *dap;
 	struct mount *mp;
 	struct vnode *pvp;
 	struct inode *ip;
 	struct buf *bp;
 	struct fs *fs;
 	struct thread *td = curthread;
 	int error, flushparent, pagedep_new_block;
 	ino_t parentino;
 	ufs_lbn_t lbn;
 
 	ip = VTOI(vp);
 	mp = vp->v_mount;
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	if (MOUNTEDSOFTDEP(mp) == 0)
 		return (0);
 	ACQUIRE_LOCK(ump);
 restart:
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) {
 		FREE_LOCK(ump);
 		return (0);
 	}
 	TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
 		if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
 		    == DEPCOMPLETE) {
 			jwait(&inoref->if_list, MNT_WAIT);
 			goto restart;
 		}
 	}
 	if (!LIST_EMPTY(&inodedep->id_inowait) ||
 	    !TAILQ_EMPTY(&inodedep->id_extupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newextupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_inoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newinoupdt))
 		panic("softdep_fsync: pending ops %p", inodedep);
 	for (error = 0, flushparent = 0; ; ) {
 		if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) == NULL)
 			break;
 		if (wk->wk_type != D_DIRADD)
 			panic("softdep_fsync: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 		dap = WK_DIRADD(wk);
 		/*
 		 * Flush our parent if this directory entry has a MKDIR_PARENT
 		 * dependency or is contained in a newly allocated block.
 		 */
 		if (dap->da_state & DIRCHG)
 			pagedep = dap->da_previous->dm_pagedep;
 		else
 			pagedep = dap->da_pagedep;
 		parentino = pagedep->pd_ino;
 		lbn = pagedep->pd_lbn;
 		if ((dap->da_state & (MKDIR_BODY | COMPLETE)) != COMPLETE)
 			panic("softdep_fsync: dirty");
 		if ((dap->da_state & MKDIR_PARENT) ||
 		    (pagedep->pd_state & NEWBLOCK))
 			flushparent = 1;
 		else
 			flushparent = 0;
 		/*
 		 * If we are being fsync'ed as part of vgone'ing this vnode,
 		 * then we will not be able to release and recover the
 		 * vnode below, so we just have to give up on writing its
 		 * directory entry out. It will eventually be written, just
 		 * not now, but then the user was not asking to have it
 		 * written, so we are not breaking any promises.
 		 */
 		if (vp->v_iflag & VI_DOOMED)
 			break;
 		/*
 		 * We prevent deadlock by always fetching inodes from the
 		 * root, moving down the directory tree. Thus, when fetching
 		 * our parent directory, we first try to get the lock. If
 		 * that fails, we must unlock ourselves before requesting
 		 * the lock on our parent. See the comment in ufs_lookup
 		 * for details on possible races.
 		 */
 		FREE_LOCK(ump);
 		if (ffs_vgetf(mp, parentino, LK_NOWAIT | LK_EXCLUSIVE, &pvp,
 		    FFSV_FORCEINSMQ)) {
 			error = vfs_busy(mp, MBF_NOWAIT);
 			if (error != 0) {
 				vfs_ref(mp);
 				VOP_UNLOCK(vp, 0);
 				error = vfs_busy(mp, 0);
 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 				vfs_rel(mp);
 				if (error != 0)
 					return (ENOENT);
 				if (vp->v_iflag & VI_DOOMED) {
 					vfs_unbusy(mp);
 					return (ENOENT);
 				}
 			}
 			VOP_UNLOCK(vp, 0);
 			error = ffs_vgetf(mp, parentino, LK_EXCLUSIVE,
 			    &pvp, FFSV_FORCEINSMQ);
 			vfs_unbusy(mp);
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 			if (vp->v_iflag & VI_DOOMED) {
 				if (error == 0)
 					vput(pvp);
 				error = ENOENT;
 			}
 			if (error != 0)
 				return (error);
 		}
 		/*
 		 * All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps
 		 * that are contained in direct blocks will be resolved by 
 		 * doing a ffs_update. Pagedeps contained in indirect blocks
 		 * may require a complete sync'ing of the directory. So, we
 		 * try the cheap and fast ffs_update first, and if that fails,
 		 * then we do the slower ffs_syncvnode of the directory.
 		 */
 		if (flushparent) {
 			int locked;
 
 			if ((error = ffs_update(pvp, 1)) != 0) {
 				vput(pvp);
 				return (error);
 			}
 			ACQUIRE_LOCK(ump);
 			locked = 1;
 			if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0) {
 				if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) != NULL) {
 					if (wk->wk_type != D_DIRADD)
 						panic("softdep_fsync: Unexpected type %s",
 						      TYPENAME(wk->wk_type));
 					dap = WK_DIRADD(wk);
 					if (dap->da_state & DIRCHG)
 						pagedep = dap->da_previous->dm_pagedep;
 					else
 						pagedep = dap->da_pagedep;
 					pagedep_new_block = pagedep->pd_state & NEWBLOCK;
 					FREE_LOCK(ump);
 					locked = 0;
 					if (pagedep_new_block && (error =
 					    ffs_syncvnode(pvp, MNT_WAIT, 0))) {
 						vput(pvp);
 						return (error);
 					}
 				}
 			}
 			if (locked)
 				FREE_LOCK(ump);
 		}
 		/*
 		 * Flush directory page containing the inode's name.
 		 */
 		error = bread(pvp, lbn, blksize(fs, VTOI(pvp), lbn), td->td_ucred,
 		    &bp);
 		if (error == 0)
 			error = bwrite(bp);
 		else
 			brelse(bp);
 		vput(pvp);
 		if (error != 0)
 			return (error);
 		ACQUIRE_LOCK(ump);
 		if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0)
 			break;
 	}
 	FREE_LOCK(ump);
 	return (0);
 }
 
 /*
  * Flush all the dirty bitmaps associated with the block device
  * before flushing the rest of the dirty blocks so as to reduce
  * the number of dependencies that will have to be rolled back.
  *
  * XXX Unused?
  */
 void
 softdep_fsync_mountdev(vp)
 	struct vnode *vp;
 {
 	struct buf *bp, *nbp;
 	struct worklist *wk;
 	struct bufobj *bo;
 
 	if (!vn_isdisk(vp, NULL))
 		panic("softdep_fsync_mountdev: vnode not a disk");
 	bo = &vp->v_bufobj;
 restart:
 	BO_LOCK(bo);
 	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 		/* 
 		 * If it is already scheduled, skip to the next buffer.
 		 */
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
 			continue;
 
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("softdep_fsync_mountdev: not dirty");
 		/*
 		 * We are only interested in bitmaps with outstanding
 		 * dependencies.
 		 */
 		if ((wk = LIST_FIRST(&bp->b_dep)) == NULL ||
 		    wk->wk_type != D_BMSAFEMAP ||
 		    (bp->b_vflags & BV_BKGRDINPROG)) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		BO_UNLOCK(bo);
 		bremfree(bp);
 		(void) bawrite(bp);
 		goto restart;
 	}
 	drain_output(vp);
 	BO_UNLOCK(bo);
 }
 
 /*
  * Sync all cylinder groups that were dirty at the time this function is
  * called.  Newly dirtied cgs will be inserted before the sentinel.  This
  * is used to flush freedep activity that may be holding up writes to a
  * indirect block.
  */
 static int
 sync_cgs(mp, waitfor)
 	struct mount *mp;
 	int waitfor;
 {
 	struct bmsafemap *bmsafemap;
 	struct bmsafemap *sentinel;
 	struct ufsmount *ump;
 	struct buf *bp;
 	int error;
 
 	sentinel = malloc(sizeof(*sentinel), M_BMSAFEMAP, M_ZERO | M_WAITOK);
 	sentinel->sm_cg = -1;
 	ump = VFSTOUFS(mp);
 	error = 0;
 	ACQUIRE_LOCK(ump);
 	LIST_INSERT_HEAD(&ump->softdep_dirtycg, sentinel, sm_next);
 	for (bmsafemap = LIST_NEXT(sentinel, sm_next); bmsafemap != NULL;
 	    bmsafemap = LIST_NEXT(sentinel, sm_next)) {
 		/* Skip sentinels and cgs with no work to release. */
 		if (bmsafemap->sm_cg == -1 ||
 		    (LIST_EMPTY(&bmsafemap->sm_freehd) &&
 		    LIST_EMPTY(&bmsafemap->sm_freewr))) {
 			LIST_REMOVE(sentinel, sm_next);
 			LIST_INSERT_AFTER(bmsafemap, sentinel, sm_next);
 			continue;
 		}
 		/*
 		 * If we don't get the lock and we're waiting try again, if
 		 * not move on to the next buf and try to sync it.
 		 */
 		bp = getdirtybuf(bmsafemap->sm_buf, LOCK_PTR(ump), waitfor);
 		if (bp == NULL && waitfor == MNT_WAIT)
 			continue;
 		LIST_REMOVE(sentinel, sm_next);
 		LIST_INSERT_AFTER(bmsafemap, sentinel, sm_next);
 		if (bp == NULL)
 			continue;
 		FREE_LOCK(ump);
 		if (waitfor == MNT_NOWAIT)
 			bawrite(bp);
 		else
 			error = bwrite(bp);
 		ACQUIRE_LOCK(ump);
 		if (error)
 			break;
 	}
 	LIST_REMOVE(sentinel, sm_next);
 	FREE_LOCK(ump);
 	free(sentinel, M_BMSAFEMAP);
 	return (error);
 }
 
 /*
  * This routine is called when we are trying to synchronously flush a
  * file. This routine must eliminate any filesystem metadata dependencies
  * so that the syncing routine can succeed.
  */
 int
 softdep_sync_metadata(struct vnode *vp)
 {
 	struct inode *ip;
 	int error;
 
 	ip = VTOI(vp);
 	KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
 	    ("softdep_sync_metadata called on non-softdep filesystem"));
 	/*
 	 * Ensure that any direct block dependencies have been cleared,
 	 * truncations are started, and inode references are journaled.
 	 */
 	ACQUIRE_LOCK(VFSTOUFS(vp->v_mount));
 	/*
 	 * Write all journal records to prevent rollbacks on devvp.
 	 */
 	if (vp->v_type == VCHR)
 		softdep_flushjournal(vp->v_mount);
 	error = flush_inodedep_deps(vp, vp->v_mount, ip->i_number);
 	/*
 	 * Ensure that all truncates are written so we won't find deps on
 	 * indirect blocks.
 	 */
 	process_truncates(vp);
 	FREE_LOCK(VFSTOUFS(vp->v_mount));
 
 	return (error);
 }
 
 /*
  * This routine is called when we are attempting to sync a buf with
  * dependencies.  If waitfor is MNT_NOWAIT it attempts to schedule any
  * other IO it can but returns EBUSY if the buffer is not yet able to
  * be written.  Dependencies which will not cause rollbacks will always
  * return 0.
  */
 int
 softdep_sync_buf(struct vnode *vp, struct buf *bp, int waitfor)
 {
 	struct indirdep *indirdep;
 	struct pagedep *pagedep;
 	struct allocindir *aip;
 	struct newblk *newblk;
 	struct ufsmount *ump;
 	struct buf *nbp;
 	struct worklist *wk;
 	int i, error;
 
 	KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
 	    ("softdep_sync_buf called on non-softdep filesystem"));
 	/*
 	 * For VCHR we just don't want to force flush any dependencies that
 	 * will cause rollbacks.
 	 */
 	if (vp->v_type == VCHR) {
 		if (waitfor == MNT_NOWAIT && softdep_count_dependencies(bp, 0))
 			return (EBUSY);
 		return (0);
 	}
 	ump = VFSTOUFS(vp->v_mount);
 	ACQUIRE_LOCK(ump);
 	/*
 	 * As we hold the buffer locked, none of its dependencies
 	 * will disappear.
 	 */
 	error = 0;
 top:
 	LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 		switch (wk->wk_type) {
 
 		case D_ALLOCDIRECT:
 		case D_ALLOCINDIR:
 			newblk = WK_NEWBLK(wk);
 			if (newblk->nb_jnewblk != NULL) {
 				if (waitfor == MNT_NOWAIT) {
 					error = EBUSY;
 					goto out_unlock;
 				}
 				jwait(&newblk->nb_jnewblk->jn_list, waitfor);
 				goto top;
 			}
 			if (newblk->nb_state & DEPCOMPLETE ||
 			    waitfor == MNT_NOWAIT)
 				continue;
 			nbp = newblk->nb_bmsafemap->sm_buf;
 			nbp = getdirtybuf(nbp, LOCK_PTR(ump), waitfor);
 			if (nbp == NULL)
 				goto top;
 			FREE_LOCK(ump);
 			if ((error = bwrite(nbp)) != 0)
 				goto out;
 			ACQUIRE_LOCK(ump);
 			continue;
 
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 			if (waitfor == MNT_NOWAIT) {
 				if (!TAILQ_EMPTY(&indirdep->ir_trunc) ||
 				    !LIST_EMPTY(&indirdep->ir_deplisthd)) {
 					error = EBUSY;
 					goto out_unlock;
 				}
 			}
 			if (!TAILQ_EMPTY(&indirdep->ir_trunc))
 				panic("softdep_sync_buf: truncation pending.");
 		restart:
 			LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) {
 				newblk = (struct newblk *)aip;
 				if (newblk->nb_jnewblk != NULL) {
 					jwait(&newblk->nb_jnewblk->jn_list,
 					    waitfor);
 					goto restart;
 				}
 				if (newblk->nb_state & DEPCOMPLETE)
 					continue;
 				nbp = newblk->nb_bmsafemap->sm_buf;
 				nbp = getdirtybuf(nbp, LOCK_PTR(ump), waitfor);
 				if (nbp == NULL)
 					goto restart;
 				FREE_LOCK(ump);
 				if ((error = bwrite(nbp)) != 0)
 					goto out;
 				ACQUIRE_LOCK(ump);
 				goto restart;
 			}
 			continue;
 
 		case D_PAGEDEP:
 			/*
 			 * Only flush directory entries in synchronous passes.
 			 */
 			if (waitfor != MNT_WAIT) {
 				error = EBUSY;
 				goto out_unlock;
 			}
 			/*
 			 * While syncing snapshots, we must allow recursive
 			 * lookups.
 			 */
 			BUF_AREC(bp);
 			/*
 			 * We are trying to sync a directory that may
 			 * have dependencies on both its own metadata
 			 * and/or dependencies on the inodes of any
 			 * recently allocated files. We walk its diradd
 			 * lists pushing out the associated inode.
 			 */
 			pagedep = WK_PAGEDEP(wk);
 			for (i = 0; i < DAHASHSZ; i++) {
 				if (LIST_FIRST(&pagedep->pd_diraddhd[i]) == 0)
 					continue;
 				if ((error = flush_pagedep_deps(vp, wk->wk_mp,
 				    &pagedep->pd_diraddhd[i]))) {
 					BUF_NOREC(bp);
 					goto out_unlock;
 				}
 			}
 			BUF_NOREC(bp);
 			continue;
 
 		case D_FREEWORK:
 		case D_FREEDEP:
 		case D_JSEGDEP:
 		case D_JNEWBLK:
 			continue;
 
 		default:
 			panic("softdep_sync_buf: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 out_unlock:
 	FREE_LOCK(ump);
 out:
 	return (error);
 }
 
 /*
  * Flush the dependencies associated with an inodedep.
  * Called with splbio blocked.
  */
 static int
 flush_inodedep_deps(vp, mp, ino)
 	struct vnode *vp;
 	struct mount *mp;
 	ino_t ino;
 {
 	struct inodedep *inodedep;
 	struct inoref *inoref;
 	struct ufsmount *ump;
 	int error, waitfor;
 
 	/*
 	 * This work is done in two passes. The first pass grabs most
 	 * of the buffers and begins asynchronously writing them. The
 	 * only way to wait for these asynchronous writes is to sleep
 	 * on the filesystem vnode which may stay busy for a long time
 	 * if the filesystem is active. So, instead, we make a second
 	 * pass over the dependencies blocking on each write. In the
 	 * usual case we will be blocking against a write that we
 	 * initiated, so when it is done the dependency will have been
 	 * resolved. Thus the second pass is expected to end quickly.
 	 * We give a brief window at the top of the loop to allow
 	 * any pending I/O to complete.
 	 */
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	for (error = 0, waitfor = MNT_NOWAIT; ; ) {
 		if (error)
 			return (error);
 		FREE_LOCK(ump);
 		ACQUIRE_LOCK(ump);
 restart:
 		if (inodedep_lookup(mp, ino, 0, &inodedep) == 0)
 			return (0);
 		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
 			if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
 			    == DEPCOMPLETE) {
 				jwait(&inoref->if_list, MNT_WAIT);
 				goto restart;
 			}
 		}
 		if (flush_deplist(&inodedep->id_inoupdt, waitfor, &error) ||
 		    flush_deplist(&inodedep->id_newinoupdt, waitfor, &error) ||
 		    flush_deplist(&inodedep->id_extupdt, waitfor, &error) ||
 		    flush_deplist(&inodedep->id_newextupdt, waitfor, &error))
 			continue;
 		/*
 		 * If pass2, we are done, otherwise do pass 2.
 		 */
 		if (waitfor == MNT_WAIT)
 			break;
 		waitfor = MNT_WAIT;
 	}
 	/*
 	 * Try freeing inodedep in case all dependencies have been removed.
 	 */
 	if (inodedep_lookup(mp, ino, 0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
 	return (0);
 }
 
 /*
  * Flush an inode dependency list.
  * Called with splbio blocked.
  */
 static int
 flush_deplist(listhead, waitfor, errorp)
 	struct allocdirectlst *listhead;
 	int waitfor;
 	int *errorp;
 {
 	struct allocdirect *adp;
 	struct newblk *newblk;
 	struct ufsmount *ump;
 	struct buf *bp;
 
 	if ((adp = TAILQ_FIRST(listhead)) == NULL)
 		return (0);
 	ump = VFSTOUFS(adp->ad_list.wk_mp);
 	LOCK_OWNED(ump);
 	TAILQ_FOREACH(adp, listhead, ad_next) {
 		newblk = (struct newblk *)adp;
 		if (newblk->nb_jnewblk != NULL) {
 			jwait(&newblk->nb_jnewblk->jn_list, MNT_WAIT);
 			return (1);
 		}
 		if (newblk->nb_state & DEPCOMPLETE)
 			continue;
 		bp = newblk->nb_bmsafemap->sm_buf;
 		bp = getdirtybuf(bp, LOCK_PTR(ump), waitfor);
 		if (bp == NULL) {
 			if (waitfor == MNT_NOWAIT)
 				continue;
 			return (1);
 		}
 		FREE_LOCK(ump);
 		if (waitfor == MNT_NOWAIT)
 			bawrite(bp);
 		else 
 			*errorp = bwrite(bp);
 		ACQUIRE_LOCK(ump);
 		return (1);
 	}
 	return (0);
 }
 
 /*
  * Flush dependencies associated with an allocdirect block.
  */
 static int
 flush_newblk_dep(vp, mp, lbn)
 	struct vnode *vp;
 	struct mount *mp;
 	ufs_lbn_t lbn;
 {
 	struct newblk *newblk;
 	struct ufsmount *ump;
 	struct bufobj *bo;
 	struct inode *ip;
 	struct buf *bp;
 	ufs2_daddr_t blkno;
 	int error;
 
 	error = 0;
 	bo = &vp->v_bufobj;
 	ip = VTOI(vp);
 	blkno = DIP(ip, i_db[lbn]);
 	if (blkno == 0)
 		panic("flush_newblk_dep: Missing block");
 	ump = VFSTOUFS(mp);
 	ACQUIRE_LOCK(ump);
 	/*
 	 * Loop until all dependencies related to this block are satisfied.
 	 * We must be careful to restart after each sleep in case a write
 	 * completes some part of this process for us.
 	 */
 	for (;;) {
 		if (newblk_lookup(mp, blkno, 0, &newblk) == 0) {
 			FREE_LOCK(ump);
 			break;
 		}
 		if (newblk->nb_list.wk_type != D_ALLOCDIRECT)
 			panic("flush_newblk_deps: Bad newblk %p", newblk);
 		/*
 		 * Flush the journal.
 		 */
 		if (newblk->nb_jnewblk != NULL) {
 			jwait(&newblk->nb_jnewblk->jn_list, MNT_WAIT);
 			continue;
 		}
 		/*
 		 * Write the bitmap dependency.
 		 */
 		if ((newblk->nb_state & DEPCOMPLETE) == 0) {
 			bp = newblk->nb_bmsafemap->sm_buf;
 			bp = getdirtybuf(bp, LOCK_PTR(ump), MNT_WAIT);
 			if (bp == NULL)
 				continue;
 			FREE_LOCK(ump);
 			error = bwrite(bp);
 			if (error)
 				break;
 			ACQUIRE_LOCK(ump);
 			continue;
 		}
 		/*
 		 * Write the buffer.
 		 */
 		FREE_LOCK(ump);
 		BO_LOCK(bo);
 		bp = gbincore(bo, lbn);
 		if (bp != NULL) {
 			error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
 			    LK_INTERLOCK, BO_LOCKPTR(bo));
 			if (error == ENOLCK) {
 				ACQUIRE_LOCK(ump);
 				error = 0;
 				continue; /* Slept, retry */
 			}
 			if (error != 0)
 				break;	/* Failed */
 			if (bp->b_flags & B_DELWRI) {
 				bremfree(bp);
 				error = bwrite(bp);
 				if (error)
 					break;
 			} else
 				BUF_UNLOCK(bp);
 		} else
 			BO_UNLOCK(bo);
 		/*
 		 * We have to wait for the direct pointers to
 		 * point at the newdirblk before the dependency
 		 * will go away.
 		 */
 		error = ffs_update(vp, 1);
 		if (error)
 			break;
 		ACQUIRE_LOCK(ump);
 	}
 	return (error);
 }
 
 /*
  * Eliminate a pagedep dependency by flushing out all its diradd dependencies.
  * Called with splbio blocked.
  */
 static int
 flush_pagedep_deps(pvp, mp, diraddhdp)
 	struct vnode *pvp;
 	struct mount *mp;
 	struct diraddhd *diraddhdp;
 {
 	struct inodedep *inodedep;
 	struct inoref *inoref;
 	struct ufsmount *ump;
 	struct diradd *dap;
 	struct vnode *vp;
 	int error = 0;
 	struct buf *bp;
 	ino_t inum;
 	struct diraddhd unfinished;
 
 	LIST_INIT(&unfinished);
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 restart:
 	while ((dap = LIST_FIRST(diraddhdp)) != NULL) {
 		/*
 		 * Flush ourselves if this directory entry
 		 * has a MKDIR_PARENT dependency.
 		 */
 		if (dap->da_state & MKDIR_PARENT) {
 			FREE_LOCK(ump);
 			if ((error = ffs_update(pvp, 1)) != 0)
 				break;
 			ACQUIRE_LOCK(ump);
 			/*
 			 * If that cleared dependencies, go on to next.
 			 */
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
 			/*
 			 * All MKDIR_PARENT dependencies and all the
 			 * NEWBLOCK pagedeps that are contained in direct
 			 * blocks were resolved by doing above ffs_update.
 			 * Pagedeps contained in indirect blocks may
 			 * require a complete sync'ing of the directory.
 			 * We are in the midst of doing a complete sync,
 			 * so if they are not resolved in this pass we
 			 * defer them for now as they will be sync'ed by
 			 * our caller shortly.
 			 */
 			LIST_REMOVE(dap, da_pdlist);
 			LIST_INSERT_HEAD(&unfinished, dap, da_pdlist);
 			continue;
 		}
 		/*
 		 * A newly allocated directory must have its "." and
 		 * ".." entries written out before its name can be
 		 * committed in its parent. 
 		 */
 		inum = dap->da_newinum;
 		if (inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep) == 0)
 			panic("flush_pagedep_deps: lost inode1");
 		/*
 		 * Wait for any pending journal adds to complete so we don't
 		 * cause rollbacks while syncing.
 		 */
 		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
 			if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
 			    == DEPCOMPLETE) {
 				jwait(&inoref->if_list, MNT_WAIT);
 				goto restart;
 			}
 		}
 		if (dap->da_state & MKDIR_BODY) {
 			FREE_LOCK(ump);
 			if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
 			    FFSV_FORCEINSMQ)))
 				break;
 			error = flush_newblk_dep(vp, mp, 0);
 			/*
 			 * If we still have the dependency we might need to
 			 * update the vnode to sync the new link count to
 			 * disk.
 			 */
 			if (error == 0 && dap == LIST_FIRST(diraddhdp))
 				error = ffs_update(vp, 1);
 			vput(vp);
 			if (error != 0)
 				break;
 			ACQUIRE_LOCK(ump);
 			/*
 			 * If that cleared dependencies, go on to next.
 			 */
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
 			if (dap->da_state & MKDIR_BODY) {
 				inodedep_lookup(UFSTOVFS(ump), inum, 0,
 				    &inodedep);
 				panic("flush_pagedep_deps: MKDIR_BODY "
 				    "inodedep %p dap %p vp %p",
 				    inodedep, dap, vp);
 			}
 		}
 		/*
 		 * Flush the inode on which the directory entry depends.
 		 * Having accounted for MKDIR_PARENT and MKDIR_BODY above,
 		 * the only remaining dependency is that the updated inode
 		 * count must get pushed to disk. The inode has already
 		 * been pushed into its inode buffer (via VOP_UPDATE) at
 		 * the time of the reference count change. So we need only
 		 * locate that buffer, ensure that there will be no rollback
 		 * caused by a bitmap dependency, then write the inode buffer.
 		 */
 retry:
 		if (inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep) == 0)
 			panic("flush_pagedep_deps: lost inode");
 		/*
 		 * If the inode still has bitmap dependencies,
 		 * push them to disk.
 		 */
 		if ((inodedep->id_state & (DEPCOMPLETE | GOINGAWAY)) == 0) {
 			bp = inodedep->id_bmsafemap->sm_buf;
 			bp = getdirtybuf(bp, LOCK_PTR(ump), MNT_WAIT);
 			if (bp == NULL)
 				goto retry;
 			FREE_LOCK(ump);
 			if ((error = bwrite(bp)) != 0)
 				break;
 			ACQUIRE_LOCK(ump);
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
 		}
 		/*
 		 * If the inode is still sitting in a buffer waiting
 		 * to be written or waiting for the link count to be
 		 * adjusted update it here to flush it to disk.
 		 */
 		if (dap == LIST_FIRST(diraddhdp)) {
 			FREE_LOCK(ump);
 			if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
 			    FFSV_FORCEINSMQ)))
 				break;
 			error = ffs_update(vp, 1);
 			vput(vp);
 			if (error)
 				break;
 			ACQUIRE_LOCK(ump);
 		}
 		/*
 		 * If we have failed to get rid of all the dependencies
 		 * then something is seriously wrong.
 		 */
 		if (dap == LIST_FIRST(diraddhdp)) {
 			inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep);
 			panic("flush_pagedep_deps: failed to flush " 
 			    "inodedep %p ino %ju dap %p",
 			    inodedep, (uintmax_t)inum, dap);
 		}
 	}
 	if (error)
 		ACQUIRE_LOCK(ump);
 	while ((dap = LIST_FIRST(&unfinished)) != NULL) {
 		LIST_REMOVE(dap, da_pdlist);
 		LIST_INSERT_HEAD(diraddhdp, dap, da_pdlist);
 	}
 	return (error);
 }
 
 /*
  * A large burst of file addition or deletion activity can drive the
  * memory load excessively high. First attempt to slow things down
  * using the techniques below. If that fails, this routine requests
  * the offending operations to fall back to running synchronously
  * until the memory load returns to a reasonable level.
  */
 int
 softdep_slowdown(vp)
 	struct vnode *vp;
 {
 	struct ufsmount *ump;
 	int jlow;
 	int max_softdeps_hard;
 
 	KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
 	    ("softdep_slowdown called on non-softdep filesystem"));
 	ump = VFSTOUFS(vp->v_mount);
 	ACQUIRE_LOCK(ump);
 	jlow = 0;
 	/*
 	 * Check for journal space if needed.
 	 */
 	if (DOINGSUJ(vp)) {
 		if (journal_space(ump, 0) == 0)
 			jlow = 1;
 	}
 	/*
 	 * If the system is under its limits and our filesystem is
 	 * not responsible for more than our share of the usage and
 	 * we are not low on journal space, then no need to slow down.
 	 */
 	max_softdeps_hard = max_softdeps * 11 / 10;
 	if (dep_current[D_DIRREM] < max_softdeps_hard / 2 &&
 	    dep_current[D_INODEDEP] < max_softdeps_hard &&
 	    dep_current[D_INDIRDEP] < max_softdeps_hard / 1000 &&
 	    dep_current[D_FREEBLKS] < max_softdeps_hard && jlow == 0 &&
 	    ump->softdep_curdeps[D_DIRREM] <
 	    (max_softdeps_hard / 2) / stat_flush_threads &&
 	    ump->softdep_curdeps[D_INODEDEP] <
 	    max_softdeps_hard / stat_flush_threads &&
 	    ump->softdep_curdeps[D_INDIRDEP] <
 	    (max_softdeps_hard / 1000) / stat_flush_threads &&
 	    ump->softdep_curdeps[D_FREEBLKS] <
 	    max_softdeps_hard / stat_flush_threads) {
 		FREE_LOCK(ump);
   		return (0);
 	}
 	/*
 	 * If the journal is low or our filesystem is over its limit
 	 * then speedup the cleanup.
 	 */
 	if (ump->softdep_curdeps[D_INDIRDEP] <
 	    (max_softdeps_hard / 1000) / stat_flush_threads || jlow)
 		softdep_speedup(ump);
 	stat_sync_limit_hit += 1;
 	FREE_LOCK(ump);
 	/*
 	 * We only slow down the rate at which new dependencies are
 	 * generated if we are not using journaling. With journaling,
 	 * the cleanup should always be sufficient to keep things
 	 * under control.
 	 */
 	if (DOINGSUJ(vp))
 		return (0);
 	return (1);
 }
 
 /*
  * Called by the allocation routines when they are about to fail
  * in the hope that we can free up the requested resource (inodes
  * or disk space).
  * 
  * First check to see if the work list has anything on it. If it has,
  * clean up entries until we successfully free the requested resource.
  * Because this process holds inodes locked, we cannot handle any remove
  * requests that might block on a locked inode as that could lead to
  * deadlock. If the worklist yields none of the requested resource,
  * start syncing out vnodes to free up the needed space.
  */
 int
 softdep_request_cleanup(fs, vp, cred, resource)
 	struct fs *fs;
 	struct vnode *vp;
 	struct ucred *cred;
 	int resource;
 {
 	struct ufsmount *ump;
 	struct mount *mp;
 	long starttime;
 	ufs2_daddr_t needed;
 	int error, failed_vnode;
 
 	/*
 	 * If we are being called because of a process doing a
 	 * copy-on-write, then it is not safe to process any
 	 * worklist items as we will recurse into the copyonwrite
 	 * routine.  This will result in an incoherent snapshot.
 	 * If the vnode that we hold is a snapshot, we must avoid
 	 * handling other resources that could cause deadlock.
 	 */
 	if ((curthread->td_pflags & TDP_COWINPROGRESS) || IS_SNAPSHOT(VTOI(vp)))
 		return (0);
 
 	if (resource == FLUSH_BLOCKS_WAIT)
 		stat_cleanup_blkrequests += 1;
 	else
 		stat_cleanup_inorequests += 1;
 
 	mp = vp->v_mount;
 	ump = VFSTOUFS(mp);
 	mtx_assert(UFS_MTX(ump), MA_OWNED);
 	UFS_UNLOCK(ump);
 	error = ffs_update(vp, 1);
 	if (error != 0 || MOUNTEDSOFTDEP(mp) == 0) {
 		UFS_LOCK(ump);
 		return (0);
 	}
 	/*
 	 * If we are in need of resources, start by cleaning up
 	 * any block removals associated with our inode.
 	 */
 	ACQUIRE_LOCK(ump);
 	process_removes(vp);
 	process_truncates(vp);
 	FREE_LOCK(ump);
 	/*
 	 * Now clean up at least as many resources as we will need.
 	 *
 	 * When requested to clean up inodes, the number that are needed
 	 * is set by the number of simultaneous writers (mnt_writeopcount)
 	 * plus a bit of slop (2) in case some more writers show up while
 	 * we are cleaning.
 	 *
 	 * When requested to free up space, the amount of space that
 	 * we need is enough blocks to allocate a full-sized segment
 	 * (fs_contigsumsize). The number of such segments that will
 	 * be needed is set by the number of simultaneous writers
 	 * (mnt_writeopcount) plus a bit of slop (2) in case some more
 	 * writers show up while we are cleaning.
 	 *
 	 * Additionally, if we are unpriviledged and allocating space,
 	 * we need to ensure that we clean up enough blocks to get the
 	 * needed number of blocks over the threshold of the minimum
 	 * number of blocks required to be kept free by the filesystem
 	 * (fs_minfree).
 	 */
 	if (resource == FLUSH_INODES_WAIT) {
 		needed = vp->v_mount->mnt_writeopcount + 2;
 	} else if (resource == FLUSH_BLOCKS_WAIT) {
 		needed = (vp->v_mount->mnt_writeopcount + 2) *
 		    fs->fs_contigsumsize;
 		if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0))
 			needed += fragstoblks(fs,
 			    roundup((fs->fs_dsize * fs->fs_minfree / 100) -
 			    fs->fs_cstotal.cs_nffree, fs->fs_frag));
 	} else {
 		UFS_LOCK(ump);
 		printf("softdep_request_cleanup: Unknown resource type %d\n",
 		    resource);
 		return (0);
 	}
 	starttime = time_second;
 retry:
 	if ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 &&
 	    fs->fs_cstotal.cs_nbfree <= needed) ||
 	    (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
 	    fs->fs_cstotal.cs_nifree <= needed)) {
 		ACQUIRE_LOCK(ump);
 		if (ump->softdep_on_worklist > 0 &&
 		    process_worklist_item(UFSTOVFS(ump),
 		    ump->softdep_on_worklist, LK_NOWAIT) != 0)
 			stat_worklist_push += 1;
 		FREE_LOCK(ump);
 	}
 	/*
 	 * If we still need resources and there are no more worklist
 	 * entries to process to obtain them, we have to start flushing
 	 * the dirty vnodes to force the release of additional requests
 	 * to the worklist that we can then process to reap addition
 	 * resources. We walk the vnodes associated with the mount point
 	 * until we get the needed worklist requests that we can reap.
 	 *
 	 * If there are several threads all needing to clean the same
 	 * mount point, only one is allowed to walk the mount list.
 	 * When several threads all try to walk the same mount list,
 	 * they end up competing with each other and often end up in
 	 * livelock. This approach ensures that forward progress is
 	 * made at the cost of occational ENOSPC errors being returned
 	 * that might otherwise have been avoided.
 	 */
 	error = 1;
 	if ((resource == FLUSH_BLOCKS_WAIT && 
 	     fs->fs_cstotal.cs_nbfree <= needed) ||
 	    (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
 	     fs->fs_cstotal.cs_nifree <= needed)) {
 		ACQUIRE_LOCK(ump);
 		if ((ump->um_softdep->sd_flags & FLUSH_RC_ACTIVE) == 0) {
 			ump->um_softdep->sd_flags |= FLUSH_RC_ACTIVE;
 			FREE_LOCK(ump);
 			failed_vnode = softdep_request_cleanup_flush(mp, ump);
 			ACQUIRE_LOCK(ump);
 			ump->um_softdep->sd_flags &= ~FLUSH_RC_ACTIVE;
 			FREE_LOCK(ump);
 			if (ump->softdep_on_worklist > 0) {
 				stat_cleanup_retries += 1;
 				if (!failed_vnode)
 					goto retry;
 			}
 		} else {
 			FREE_LOCK(ump);
 			error = 0;
 		}
 		stat_cleanup_failures += 1;
 	}
 	if (time_second - starttime > stat_cleanup_high_delay)
 		stat_cleanup_high_delay = time_second - starttime;
 	UFS_LOCK(ump);
 	return (error);
 }
 
 /*
  * Scan the vnodes for the specified mount point flushing out any
  * vnodes that can be locked without waiting. Finally, try to flush
  * the device associated with the mount point if it can be locked
  * without waiting.
  *
  * We return 0 if we were able to lock every vnode in our scan.
  * If we had to skip one or more vnodes, we return 1.
  */
 static int
 softdep_request_cleanup_flush(mp, ump)
 	struct mount *mp;
 	struct ufsmount *ump;
 {
 	struct thread *td;
 	struct vnode *lvp, *mvp;
 	int failed_vnode;
 
 	failed_vnode = 0;
 	td = curthread;
 	MNT_VNODE_FOREACH_ALL(lvp, mp, mvp) {
 		if (TAILQ_FIRST(&lvp->v_bufobj.bo_dirty.bv_hd) == 0) {
 			VI_UNLOCK(lvp);
 			continue;
 		}
 		if (vget(lvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_NOWAIT,
 		    td) != 0) {
 			failed_vnode = 1;
 			continue;
 		}
 		if (lvp->v_vflag & VV_NOSYNC) {	/* unlinked */
 			vput(lvp);
 			continue;
 		}
 		(void) ffs_syncvnode(lvp, MNT_NOWAIT, 0);
 		vput(lvp);
 	}
 	lvp = ump->um_devvp;
 	if (vn_lock(lvp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
 		VOP_FSYNC(lvp, MNT_NOWAIT, td);
 		VOP_UNLOCK(lvp, 0);
 	}
 	return (failed_vnode);
 }
 
 static bool
 softdep_excess_items(struct ufsmount *ump, int item)
 {
 
 	KASSERT(item >= 0 && item < D_LAST, ("item %d", item));
 	return (dep_current[item] > max_softdeps &&
 	    ump->softdep_curdeps[item] > max_softdeps /
 	    stat_flush_threads);
 }
 
 static void
 schedule_cleanup(struct mount *mp)
 {
 	struct ufsmount *ump;
 	struct thread *td;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	FREE_LOCK(ump);
 	td = curthread;
 	if ((td->td_pflags & TDP_KTHREAD) != 0 &&
 	    (td->td_proc->p_flag2 & P2_AST_SU) == 0) {
 		/*
 		 * No ast is delivered to kernel threads, so nobody
 		 * would deref the mp.  Some kernel threads
 		 * explicitely check for AST, e.g. NFS daemon does
 		 * this in the serving loop.
 		 */
 		return;
 	}
 	if (td->td_su != NULL)
 		vfs_rel(td->td_su);
 	vfs_ref(mp);
 	td->td_su = mp;
 	thread_lock(td);
 	td->td_flags |= TDF_ASTPENDING;
 	thread_unlock(td);
 }
 
 static void
 softdep_ast_cleanup_proc(struct thread *td)
 {
 	struct mount *mp;
 	struct ufsmount *ump;
 	int error;
 	bool req;
 
 	while ((mp = td->td_su) != NULL) {
 		td->td_su = NULL;
 		error = vfs_busy(mp, MBF_NOWAIT);
 		vfs_rel(mp);
 		if (error != 0)
 			return;
 		if (ffs_own_mount(mp) && MOUNTEDSOFTDEP(mp)) {
 			ump = VFSTOUFS(mp);
 			for (;;) {
 				req = false;
 				ACQUIRE_LOCK(ump);
 				if (softdep_excess_items(ump, D_INODEDEP)) {
 					req = true;
 					request_cleanup(mp, FLUSH_INODES);
 				}
 				if (softdep_excess_items(ump, D_DIRREM)) {
 					req = true;
 					request_cleanup(mp, FLUSH_BLOCKS);
 				}
 				FREE_LOCK(ump);
 				if (softdep_excess_items(ump, D_NEWBLK) ||
 				    softdep_excess_items(ump, D_ALLOCDIRECT) ||
 				    softdep_excess_items(ump, D_ALLOCINDIR)) {
 					error = vn_start_write(NULL, &mp,
 					    V_WAIT);
 					if (error == 0) {
 						req = true;
 						VFS_SYNC(mp, MNT_WAIT);
 						vn_finished_write(mp);
 					}
 				}
 				if ((td->td_pflags & TDP_KTHREAD) != 0 || !req)
 					break;
 			}
 		}
 		vfs_unbusy(mp);
 	}
 	if ((mp = td->td_su) != NULL) {
 		td->td_su = NULL;
 		vfs_rel(mp);
 	}
 }
 
 /*
  * If memory utilization has gotten too high, deliberately slow things
  * down and speed up the I/O processing.
  */
 static int
 request_cleanup(mp, resource)
 	struct mount *mp;
 	int resource;
 {
 	struct thread *td = curthread;
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	/*
 	 * We never hold up the filesystem syncer or buf daemon.
 	 */
 	if (td->td_pflags & (TDP_SOFTDEP|TDP_NORUNNINGBUF))
 		return (0);
 	/*
 	 * First check to see if the work list has gotten backlogged.
 	 * If it has, co-opt this process to help clean up two entries.
 	 * Because this process may hold inodes locked, we cannot
 	 * handle any remove requests that might block on a locked
 	 * inode as that could lead to deadlock.  We set TDP_SOFTDEP
 	 * to avoid recursively processing the worklist.
 	 */
 	if (ump->softdep_on_worklist > max_softdeps / 10) {
 		td->td_pflags |= TDP_SOFTDEP;
 		process_worklist_item(mp, 2, LK_NOWAIT);
 		td->td_pflags &= ~TDP_SOFTDEP;
 		stat_worklist_push += 2;
 		return(1);
 	}
 	/*
 	 * Next, we attempt to speed up the syncer process. If that
 	 * is successful, then we allow the process to continue.
 	 */
 	if (softdep_speedup(ump) &&
 	    resource != FLUSH_BLOCKS_WAIT &&
 	    resource != FLUSH_INODES_WAIT)
 		return(0);
 	/*
 	 * If we are resource constrained on inode dependencies, try
 	 * flushing some dirty inodes. Otherwise, we are constrained
 	 * by file deletions, so try accelerating flushes of directories
 	 * with removal dependencies. We would like to do the cleanup
 	 * here, but we probably hold an inode locked at this point and 
 	 * that might deadlock against one that we try to clean. So,
 	 * the best that we can do is request the syncer daemon to do
 	 * the cleanup for us.
 	 */
 	switch (resource) {
 
 	case FLUSH_INODES:
 	case FLUSH_INODES_WAIT:
 		ACQUIRE_GBLLOCK(&lk);
 		stat_ino_limit_push += 1;
 		req_clear_inodedeps += 1;
 		FREE_GBLLOCK(&lk);
 		stat_countp = &stat_ino_limit_hit;
 		break;
 
 	case FLUSH_BLOCKS:
 	case FLUSH_BLOCKS_WAIT:
 		ACQUIRE_GBLLOCK(&lk);
 		stat_blk_limit_push += 1;
 		req_clear_remove += 1;
 		FREE_GBLLOCK(&lk);
 		stat_countp = &stat_blk_limit_hit;
 		break;
 
 	default:
 		panic("request_cleanup: unknown type");
 	}
 	/*
 	 * Hopefully the syncer daemon will catch up and awaken us.
 	 * We wait at most tickdelay before proceeding in any case.
 	 */
 	ACQUIRE_GBLLOCK(&lk);
 	FREE_LOCK(ump);
 	proc_waiting += 1;
 	if (callout_pending(&softdep_callout) == FALSE)
 		callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2,
 		    pause_timer, 0);
 
 	if ((td->td_pflags & TDP_KTHREAD) == 0)
 		msleep((caddr_t)&proc_waiting, &lk, PPAUSE, "softupdate", 0);
 	proc_waiting -= 1;
 	FREE_GBLLOCK(&lk);
 	ACQUIRE_LOCK(ump);
 	return (1);
 }
 
 /*
  * Awaken processes pausing in request_cleanup and clear proc_waiting
  * to indicate that there is no longer a timer running. Pause_timer
  * will be called with the global softdep mutex (&lk) locked.
  */
 static void
 pause_timer(arg)
 	void *arg;
 {
 
 	GBLLOCK_OWNED(&lk);
 	/*
 	 * The callout_ API has acquired mtx and will hold it around this
 	 * function call.
 	 */
 	*stat_countp += proc_waiting;
 	wakeup(&proc_waiting);
 }
 
 /*
  * If requested, try removing inode or removal dependencies.
  */
 static void
 check_clear_deps(mp)
 	struct mount *mp;
 {
 
 	/*
 	 * If we are suspended, it may be because of our using
 	 * too many inodedeps, so help clear them out.
 	 */
 	if (MOUNTEDSUJ(mp) && VFSTOUFS(mp)->softdep_jblocks->jb_suspended)
 		clear_inodedeps(mp);
 	/*
 	 * General requests for cleanup of backed up dependencies
 	 */
 	ACQUIRE_GBLLOCK(&lk);
 	if (req_clear_inodedeps) {
 		req_clear_inodedeps -= 1;
 		FREE_GBLLOCK(&lk);
 		clear_inodedeps(mp);
 		ACQUIRE_GBLLOCK(&lk);
 		wakeup(&proc_waiting);
 	}
 	if (req_clear_remove) {
 		req_clear_remove -= 1;
 		FREE_GBLLOCK(&lk);
 		clear_remove(mp);
 		ACQUIRE_GBLLOCK(&lk);
 		wakeup(&proc_waiting);
 	}
 	FREE_GBLLOCK(&lk);
 }
 
 /*
  * Flush out a directory with at least one removal dependency in an effort to
  * reduce the number of dirrem, freefile, and freeblks dependency structures.
  */
 static void
 clear_remove(mp)
 	struct mount *mp;
 {
 	struct pagedep_hashhead *pagedephd;
 	struct pagedep *pagedep;
 	struct ufsmount *ump;
 	struct vnode *vp;
 	struct bufobj *bo;
 	int error, cnt;
 	ino_t ino;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 
 	for (cnt = 0; cnt <= ump->pagedep_hash_size; cnt++) {
 		pagedephd = &ump->pagedep_hashtbl[ump->pagedep_nextclean++];
 		if (ump->pagedep_nextclean > ump->pagedep_hash_size)
 			ump->pagedep_nextclean = 0;
 		LIST_FOREACH(pagedep, pagedephd, pd_hash) {
 			if (LIST_EMPTY(&pagedep->pd_dirremhd))
 				continue;
 			ino = pagedep->pd_ino;
 			if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
 				continue;
 			FREE_LOCK(ump);
 
 			/*
 			 * Let unmount clear deps
 			 */
 			error = vfs_busy(mp, MBF_NOWAIT);
 			if (error != 0)
 				goto finish_write;
 			error = ffs_vgetf(mp, ino, LK_EXCLUSIVE, &vp,
 			     FFSV_FORCEINSMQ);
 			vfs_unbusy(mp);
 			if (error != 0) {
 				softdep_error("clear_remove: vget", error);
 				goto finish_write;
 			}
 			if ((error = ffs_syncvnode(vp, MNT_NOWAIT, 0)))
 				softdep_error("clear_remove: fsync", error);
 			bo = &vp->v_bufobj;
 			BO_LOCK(bo);
 			drain_output(vp);
 			BO_UNLOCK(bo);
 			vput(vp);
 		finish_write:
 			vn_finished_write(mp);
 			ACQUIRE_LOCK(ump);
 			return;
 		}
 	}
 }
 
 /*
  * Clear out a block of dirty inodes in an effort to reduce
  * the number of inodedep dependency structures.
  */
 static void
 clear_inodedeps(mp)
 	struct mount *mp;
 {
 	struct inodedep_hashhead *inodedephd;
 	struct inodedep *inodedep;
 	struct ufsmount *ump;
 	struct vnode *vp;
 	struct fs *fs;
 	int error, cnt;
 	ino_t firstino, lastino, ino;
 
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	LOCK_OWNED(ump);
 	/*
 	 * Pick a random inode dependency to be cleared.
 	 * We will then gather up all the inodes in its block 
 	 * that have dependencies and flush them out.
 	 */
 	for (cnt = 0; cnt <= ump->inodedep_hash_size; cnt++) {
 		inodedephd = &ump->inodedep_hashtbl[ump->inodedep_nextclean++];
 		if (ump->inodedep_nextclean > ump->inodedep_hash_size)
 			ump->inodedep_nextclean = 0;
 		if ((inodedep = LIST_FIRST(inodedephd)) != NULL)
 			break;
 	}
 	if (inodedep == NULL)
 		return;
 	/*
 	 * Find the last inode in the block with dependencies.
 	 */
 	firstino = rounddown2(inodedep->id_ino, INOPB(fs));
 	for (lastino = firstino + INOPB(fs) - 1; lastino > firstino; lastino--)
 		if (inodedep_lookup(mp, lastino, 0, &inodedep) != 0)
 			break;
 	/*
 	 * Asynchronously push all but the last inode with dependencies.
 	 * Synchronously push the last inode with dependencies to ensure
 	 * that the inode block gets written to free up the inodedeps.
 	 */
 	for (ino = firstino; ino <= lastino; ino++) {
 		if (inodedep_lookup(mp, ino, 0, &inodedep) == 0)
 			continue;
 		if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
 			continue;
 		FREE_LOCK(ump);
 		error = vfs_busy(mp, MBF_NOWAIT); /* Let unmount clear deps */
 		if (error != 0) {
 			vn_finished_write(mp);
 			ACQUIRE_LOCK(ump);
 			return;
 		}
 		if ((error = ffs_vgetf(mp, ino, LK_EXCLUSIVE, &vp,
 		    FFSV_FORCEINSMQ)) != 0) {
 			softdep_error("clear_inodedeps: vget", error);
 			vfs_unbusy(mp);
 			vn_finished_write(mp);
 			ACQUIRE_LOCK(ump);
 			return;
 		}
 		vfs_unbusy(mp);
 		if (ino == lastino) {
 			if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)))
 				softdep_error("clear_inodedeps: fsync1", error);
 		} else {
 			if ((error = ffs_syncvnode(vp, MNT_NOWAIT, 0)))
 				softdep_error("clear_inodedeps: fsync2", error);
 			BO_LOCK(&vp->v_bufobj);
 			drain_output(vp);
 			BO_UNLOCK(&vp->v_bufobj);
 		}
 		vput(vp);
 		vn_finished_write(mp);
 		ACQUIRE_LOCK(ump);
 	}
 }
 
 void
 softdep_buf_append(bp, wkhd)
 	struct buf *bp;
 	struct workhead *wkhd;
 {
 	struct worklist *wk;
 	struct ufsmount *ump;
 
 	if ((wk = LIST_FIRST(wkhd)) == NULL)
 		return;
 	KASSERT(MOUNTEDSOFTDEP(wk->wk_mp) != 0,
 	    ("softdep_buf_append called on non-softdep filesystem"));
 	ump = VFSTOUFS(wk->wk_mp);
 	ACQUIRE_LOCK(ump);
 	while ((wk = LIST_FIRST(wkhd)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(&bp->b_dep, wk);
 	}
 	FREE_LOCK(ump);
 
 }
 
 void
 softdep_inode_append(ip, cred, wkhd)
 	struct inode *ip;
 	struct ucred *cred;
 	struct workhead *wkhd;
 {
 	struct buf *bp;
 	struct fs *fs;
 	struct ufsmount *ump;
 	int error;
 
 	ump = ITOUMP(ip);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_inode_append called on non-softdep filesystem"));
 	fs = ump->um_fs;
 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 	    (int)fs->fs_bsize, cred, &bp);
 	if (error) {
 		bqrelse(bp);
 		softdep_freework(wkhd);
 		return;
 	}
 	softdep_buf_append(bp, wkhd);
 	bqrelse(bp);
 }
 
 void
 softdep_freework(wkhd)
 	struct workhead *wkhd;
 {
 	struct worklist *wk;
 	struct ufsmount *ump;
 
 	if ((wk = LIST_FIRST(wkhd)) == NULL)
 		return;
 	KASSERT(MOUNTEDSOFTDEP(wk->wk_mp) != 0,
 	    ("softdep_freework called on non-softdep filesystem"));
 	ump = VFSTOUFS(wk->wk_mp);
 	ACQUIRE_LOCK(ump);
 	handle_jwork(wkhd);
 	FREE_LOCK(ump);
 }
 
 static struct ufsmount *
 softdep_bp_to_mp(bp)
 	struct buf *bp;
 {
 	struct mount *mp;
 	struct vnode *vp;
 
 	if (LIST_EMPTY(&bp->b_dep))
 		return (NULL);
 	vp = bp->b_vp;
 
 	/*
 	 * The ump mount point is stable after we get a correct
 	 * pointer, since bp is locked and this prevents unmount from
 	 * proceeding.  But to get to it, we cannot dereference bp->b_dep
 	 * head wk_mp, because we do not yet own SU ump lock and
 	 * workitem might be freed while dereferenced.
 	 */
 retry:
 	if (vp->v_type == VCHR) {
 		VI_LOCK(vp);
 		mp = vp->v_type == VCHR ? vp->v_rdev->si_mountpt : NULL;
 		VI_UNLOCK(vp);
 		if (mp == NULL)
 			goto retry;
 	} else if (vp->v_type == VREG || vp->v_type == VDIR ||
 	    vp->v_type == VLNK) {
 		mp = vp->v_mount;
 	} else {
 		return (NULL);
 	}
 	return (VFSTOUFS(mp));
 }
 
 /*
  * Function to determine if the buffer has outstanding dependencies
  * that will cause a roll-back if the buffer is written. If wantcount
  * is set, return number of dependencies, otherwise just yes or no.
  */
 static int
 softdep_count_dependencies(bp, wantcount)
 	struct buf *bp;
 	int wantcount;
 {
 	struct worklist *wk;
 	struct ufsmount *ump;
 	struct bmsafemap *bmsafemap;
 	struct freework *freework;
 	struct inodedep *inodedep;
 	struct indirdep *indirdep;
 	struct freeblks *freeblks;
 	struct allocindir *aip;
 	struct pagedep *pagedep;
 	struct dirrem *dirrem;
 	struct newblk *newblk;
 	struct mkdir *mkdir;
 	struct diradd *dap;
 	int i, retval;
 
 	ump = softdep_bp_to_mp(bp);
 	if (ump == NULL)
 		return (0);
 	retval = 0;
 	ACQUIRE_LOCK(ump);
 	LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 		switch (wk->wk_type) {
 
 		case D_INODEDEP:
 			inodedep = WK_INODEDEP(wk);
 			if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 				/* bitmap allocation dependency */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			if (TAILQ_FIRST(&inodedep->id_inoupdt)) {
 				/* direct block pointer dependency */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			if (TAILQ_FIRST(&inodedep->id_extupdt)) {
 				/* direct block pointer dependency */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			if (TAILQ_FIRST(&inodedep->id_inoreflst)) {
 				/* Add reference dependency. */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			continue;
 
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 
 			TAILQ_FOREACH(freework, &indirdep->ir_trunc, fw_next) {
 				/* indirect truncation dependency */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 
 			LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) {
 				/* indirect block pointer dependency */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			continue;
 
 		case D_PAGEDEP:
 			pagedep = WK_PAGEDEP(wk);
 			LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) {
 				if (LIST_FIRST(&dirrem->dm_jremrefhd)) {
 					/* Journal remove ref dependency. */
 					retval += 1;
 					if (!wantcount)
 						goto out;
 				}
 			}
 			for (i = 0; i < DAHASHSZ; i++) {
 
 				LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
 					/* directory entry dependency */
 					retval += 1;
 					if (!wantcount)
 						goto out;
 				}
 			}
 			continue;
 
 		case D_BMSAFEMAP:
 			bmsafemap = WK_BMSAFEMAP(wk);
 			if (LIST_FIRST(&bmsafemap->sm_jaddrefhd)) {
 				/* Add reference dependency. */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			if (LIST_FIRST(&bmsafemap->sm_jnewblkhd)) {
 				/* Allocate block dependency. */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			continue;
 
 		case D_FREEBLKS:
 			freeblks = WK_FREEBLKS(wk);
 			if (LIST_FIRST(&freeblks->fb_jblkdephd)) {
 				/* Freeblk journal dependency. */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			continue;
 
 		case D_ALLOCDIRECT:
 		case D_ALLOCINDIR:
 			newblk = WK_NEWBLK(wk);
 			if (newblk->nb_jnewblk) {
 				/* Journal allocate dependency. */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			continue;
 
 		case D_MKDIR:
 			mkdir = WK_MKDIR(wk);
 			if (mkdir->md_jaddref) {
 				/* Journal reference dependency. */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			continue;
 
 		case D_FREEWORK:
 		case D_FREEDEP:
 		case D_JSEGDEP:
 		case D_JSEG:
 		case D_SBDEP:
 			/* never a dependency on these blocks */
 			continue;
 
 		default:
 			panic("softdep_count_dependencies: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 out:
 	FREE_LOCK(ump);
 	return (retval);
 }
 
 /*
  * Acquire exclusive access to a buffer.
  * Must be called with a locked mtx parameter.
  * Return acquired buffer or NULL on failure.
  */
 static struct buf *
 getdirtybuf(bp, lock, waitfor)
 	struct buf *bp;
 	struct rwlock *lock;
 	int waitfor;
 {
 	int error;
 
 	if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0) {
 		if (waitfor != MNT_WAIT)
 			return (NULL);
 		error = BUF_LOCK(bp,
 		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, lock);
 		/*
 		 * Even if we successfully acquire bp here, we have dropped
 		 * lock, which may violates our guarantee.
 		 */
 		if (error == 0)
 			BUF_UNLOCK(bp);
 		else if (error != ENOLCK)
 			panic("getdirtybuf: inconsistent lock: %d", error);
 		rw_wlock(lock);
 		return (NULL);
 	}
 	if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
 		if (lock != BO_LOCKPTR(bp->b_bufobj) && waitfor == MNT_WAIT) {
 			rw_wunlock(lock);
 			BO_LOCK(bp->b_bufobj);
 			BUF_UNLOCK(bp);
 			if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
 				bp->b_vflags |= BV_BKGRDWAIT;
 				msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj),
 				       PRIBIO | PDROP, "getbuf", 0);
 			} else
 				BO_UNLOCK(bp->b_bufobj);
 			rw_wlock(lock);
 			return (NULL);
 		}
 		BUF_UNLOCK(bp);
 		if (waitfor != MNT_WAIT)
 			return (NULL);
 #ifdef DEBUG_VFS_LOCKS
 		if (bp->b_vp->v_type != VCHR)
 			ASSERT_BO_WLOCKED(bp->b_bufobj);
 #endif
 		bp->b_vflags |= BV_BKGRDWAIT;
 		rw_sleep(&bp->b_xflags, lock, PRIBIO, "getbuf", 0);
 		return (NULL);
 	}
 	if ((bp->b_flags & B_DELWRI) == 0) {
 		BUF_UNLOCK(bp);
 		return (NULL);
 	}
 	bremfree(bp);
 	return (bp);
 }
 
 
 /*
  * Check if it is safe to suspend the file system now.  On entry,
  * the vnode interlock for devvp should be held.  Return 0 with
  * the mount interlock held if the file system can be suspended now,
  * otherwise return EAGAIN with the mount interlock held.
  */
 int
 softdep_check_suspend(struct mount *mp,
 		      struct vnode *devvp,
 		      int softdep_depcnt,
 		      int softdep_accdepcnt,
 		      int secondary_writes,
 		      int secondary_accwrites)
 {
 	struct bufobj *bo;
 	struct ufsmount *ump;
 	struct inodedep *inodedep;
 	int error, unlinked;
 
 	bo = &devvp->v_bufobj;
 	ASSERT_BO_WLOCKED(bo);
 
 	/*
 	 * If we are not running with soft updates, then we need only
 	 * deal with secondary writes as we try to suspend.
 	 */
 	if (MOUNTEDSOFTDEP(mp) == 0) {
 		MNT_ILOCK(mp);
 		while (mp->mnt_secondary_writes != 0) {
 			BO_UNLOCK(bo);
 			msleep(&mp->mnt_secondary_writes, MNT_MTX(mp),
 			    (PUSER - 1) | PDROP, "secwr", 0);
 			BO_LOCK(bo);
 			MNT_ILOCK(mp);
 		}
 
 		/*
 		 * Reasons for needing more work before suspend:
 		 * - Dirty buffers on devvp.
 		 * - Secondary writes occurred after start of vnode sync loop
 		 */
 		error = 0;
 		if (bo->bo_numoutput > 0 ||
 		    bo->bo_dirty.bv_cnt > 0 ||
 		    secondary_writes != 0 ||
 		    mp->mnt_secondary_writes != 0 ||
 		    secondary_accwrites != mp->mnt_secondary_accwrites)
 			error = EAGAIN;
 		BO_UNLOCK(bo);
 		return (error);
 	}
 
 	/*
 	 * If we are running with soft updates, then we need to coordinate
 	 * with them as we try to suspend.
 	 */
 	ump = VFSTOUFS(mp);
 	for (;;) {
 		if (!TRY_ACQUIRE_LOCK(ump)) {
 			BO_UNLOCK(bo);
 			ACQUIRE_LOCK(ump);
 			FREE_LOCK(ump);
 			BO_LOCK(bo);
 			continue;
 		}
 		MNT_ILOCK(mp);
 		if (mp->mnt_secondary_writes != 0) {
 			FREE_LOCK(ump);
 			BO_UNLOCK(bo);
 			msleep(&mp->mnt_secondary_writes,
 			       MNT_MTX(mp),
 			       (PUSER - 1) | PDROP, "secwr", 0);
 			BO_LOCK(bo);
 			continue;
 		}
 		break;
 	}
 
 	unlinked = 0;
 	if (MOUNTEDSUJ(mp)) {
 		for (inodedep = TAILQ_FIRST(&ump->softdep_unlinked);
 		    inodedep != NULL;
 		    inodedep = TAILQ_NEXT(inodedep, id_unlinked)) {
 			if ((inodedep->id_state & (UNLINKED | UNLINKLINKS |
 			    UNLINKONLIST)) != (UNLINKED | UNLINKLINKS |
 			    UNLINKONLIST) ||
 			    !check_inodedep_free(inodedep))
 				continue;
 			unlinked++;
 		}
 	}
 
 	/*
 	 * Reasons for needing more work before suspend:
 	 * - Dirty buffers on devvp.
 	 * - Softdep activity occurred after start of vnode sync loop
 	 * - Secondary writes occurred after start of vnode sync loop
 	 */
 	error = 0;
 	if (bo->bo_numoutput > 0 ||
 	    bo->bo_dirty.bv_cnt > 0 ||
 	    softdep_depcnt != unlinked ||
 	    ump->softdep_deps != unlinked ||
 	    softdep_accdepcnt != ump->softdep_accdeps ||
 	    secondary_writes != 0 ||
 	    mp->mnt_secondary_writes != 0 ||
 	    secondary_accwrites != mp->mnt_secondary_accwrites)
 		error = EAGAIN;
 	FREE_LOCK(ump);
 	BO_UNLOCK(bo);
 	return (error);
 }
 
 
 /*
  * Get the number of dependency structures for the file system, both
  * the current number and the total number allocated.  These will
  * later be used to detect that softdep processing has occurred.
  */
 void
 softdep_get_depcounts(struct mount *mp,
 		      int *softdep_depsp,
 		      int *softdep_accdepsp)
 {
 	struct ufsmount *ump;
 
 	if (MOUNTEDSOFTDEP(mp) == 0) {
 		*softdep_depsp = 0;
 		*softdep_accdepsp = 0;
 		return;
 	}
 	ump = VFSTOUFS(mp);
 	ACQUIRE_LOCK(ump);
 	*softdep_depsp = ump->softdep_deps;
 	*softdep_accdepsp = ump->softdep_accdeps;
 	FREE_LOCK(ump);
 }
 
 /*
  * Wait for pending output on a vnode to complete.
  */
 static void
 drain_output(vp)
 	struct vnode *vp;
 {
 
 	ASSERT_VOP_LOCKED(vp, "drain_output");
 	(void)bufobj_wwait(&vp->v_bufobj, 0, 0);
 }
 
 /*
  * Called whenever a buffer that is being invalidated or reallocated
  * contains dependencies. This should only happen if an I/O error has
  * occurred. The routine is called with the buffer locked.
  */ 
 static void
 softdep_deallocate_dependencies(bp)
 	struct buf *bp;
 {
 
 	if ((bp->b_ioflags & BIO_ERROR) == 0)
 		panic("softdep_deallocate_dependencies: dangling deps");
 	if (bp->b_vp != NULL && bp->b_vp->v_mount != NULL)
 		softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);
 	else
 		printf("softdep_deallocate_dependencies: "
 		    "got error %d while accessing filesystem\n", bp->b_error);
 	if (bp->b_error != ENXIO)
 		panic("softdep_deallocate_dependencies: unrecovered I/O error");
 }
 
 /*
  * Function to handle asynchronous write errors in the filesystem.
  */
 static void
 softdep_error(func, error)
 	char *func;
 	int error;
 {
 
 	/* XXX should do something better! */
 	printf("%s: got error %d while accessing filesystem\n", func, error);
 }
 
 #ifdef DDB
 
 static void
 inodedep_print(struct inodedep *inodedep, int verbose)
 {
 	db_printf("%p fs %p st %x ino %jd inoblk %jd delta %jd nlink %jd"
 	    " saveino %p\n",
 	    inodedep, inodedep->id_fs, inodedep->id_state,
 	    (intmax_t)inodedep->id_ino,
 	    (intmax_t)fsbtodb(inodedep->id_fs,
 	    ino_to_fsba(inodedep->id_fs, inodedep->id_ino)),
 	    (intmax_t)inodedep->id_nlinkdelta,
 	    (intmax_t)inodedep->id_savednlink,
 	    inodedep->id_savedino1);
 
 	if (verbose == 0)
 		return;
 
 	db_printf("\tpendinghd %p, bufwait %p, inowait %p, inoreflst %p, "
 	    "mkdiradd %p\n",
 	    LIST_FIRST(&inodedep->id_pendinghd),
 	    LIST_FIRST(&inodedep->id_bufwait),
 	    LIST_FIRST(&inodedep->id_inowait),
 	    TAILQ_FIRST(&inodedep->id_inoreflst),
 	    inodedep->id_mkdiradd);
 	db_printf("\tinoupdt %p, newinoupdt %p, extupdt %p, newextupdt %p\n",
 	    TAILQ_FIRST(&inodedep->id_inoupdt),
 	    TAILQ_FIRST(&inodedep->id_newinoupdt),
 	    TAILQ_FIRST(&inodedep->id_extupdt),
 	    TAILQ_FIRST(&inodedep->id_newextupdt));
 }
 
 DB_SHOW_COMMAND(inodedep, db_show_inodedep)
 {
 
 	if (have_addr == 0) {
 		db_printf("Address required\n");
 		return;
 	}
 	inodedep_print((struct inodedep*)addr, 1);
 }
 
 DB_SHOW_COMMAND(inodedeps, db_show_inodedeps)
 {
 	struct inodedep_hashhead *inodedephd;
 	struct inodedep *inodedep;
 	struct ufsmount *ump;
 	int cnt;
 
 	if (have_addr == 0) {
 		db_printf("Address required\n");
 		return;
 	}
 	ump = (struct ufsmount *)addr;
 	for (cnt = 0; cnt < ump->inodedep_hash_size; cnt++) {
 		inodedephd = &ump->inodedep_hashtbl[cnt];
 		LIST_FOREACH(inodedep, inodedephd, id_hash) {
 			inodedep_print(inodedep, 0);
 		}
 	}
 }
 
 DB_SHOW_COMMAND(worklist, db_show_worklist)
 {
 	struct worklist *wk;
 
 	if (have_addr == 0) {
 		db_printf("Address required\n");
 		return;
 	}
 	wk = (struct worklist *)addr;
 	printf("worklist: %p type %s state 0x%X\n",
 	    wk, TYPENAME(wk->wk_type), wk->wk_state);
 }
 
 DB_SHOW_COMMAND(workhead, db_show_workhead)
 {
 	struct workhead *wkhd;
 	struct worklist *wk;
 	int i;
 
 	if (have_addr == 0) {
 		db_printf("Address required\n");
 		return;
 	}
 	wkhd = (struct workhead *)addr;
 	wk = LIST_FIRST(wkhd);
 	for (i = 0; i < 100 && wk != NULL; i++, wk = LIST_NEXT(wk, wk_list))
 		db_printf("worklist: %p type %s state 0x%X",
 		    wk, TYPENAME(wk->wk_type), wk->wk_state);
 	if (i == 100)
 		db_printf("workhead overflow");
 	printf("\n");
 }
 
 
 DB_SHOW_COMMAND(mkdirs, db_show_mkdirs)
 {
 	struct mkdirlist *mkdirlisthd;
 	struct jaddref *jaddref;
 	struct diradd *diradd;
 	struct mkdir *mkdir;
 
 	if (have_addr == 0) {
 		db_printf("Address required\n");
 		return;
 	}
 	mkdirlisthd = (struct mkdirlist *)addr;
 	LIST_FOREACH(mkdir, mkdirlisthd, md_mkdirs) {
 		diradd = mkdir->md_diradd;
 		db_printf("mkdir: %p state 0x%X dap %p state 0x%X",
 		    mkdir, mkdir->md_state, diradd, diradd->da_state);
 		if ((jaddref = mkdir->md_jaddref) != NULL)
 			db_printf(" jaddref %p jaddref state 0x%X",
 			    jaddref, jaddref->ja_state);
 		db_printf("\n");
 	}
 }
 
 /* exported to ffs_vfsops.c */
 extern void db_print_ffs(struct ufsmount *ump);
 void
 db_print_ffs(struct ufsmount *ump)
 {
 	db_printf("mp %p %s devvp %p fs %p su_wl %d su_deps %d su_req %d\n",
 	    ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
 	    ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
 	    ump->softdep_deps, ump->softdep_req);
 }
 
 #endif /* DDB */
 
 #endif /* SOFTUPDATES */
Index: stable/11
===================================================================
--- stable/11	(revision 330445)
+++ stable/11	(revision 330446)

Property changes on: stable/11
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r327231-327232