Index: head/sys/dev/ath/ath_rate/sample/sample.c
===================================================================
--- head/sys/dev/ath/ath_rate/sample/sample.c	(revision 283743)
+++ head/sys/dev/ath/ath_rate/sample/sample.c	(revision 283744)
@@ -1,1393 +1,1396 @@
 /*-
  * Copyright (c) 2005 John Bicket
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  * 3. Neither the names of the above-listed copyright holders nor the names
  *    of any contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * Alternatively, this software may be distributed under the terms of the
  * GNU General Public License ("GPL") version 2 as published by the Free
  * Software Foundation.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * John Bicket's SampleRate control algorithm.
  */
 #include "opt_ath.h"
 #include "opt_inet.h"
 #include "opt_wlan.h"
 #include "opt_ah.h"
 
 #include <sys/param.h>
 #include <sys/systm.h> 
 #include <sys/sysctl.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/errno.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/bus.h>
 
 #include <sys/socket.h>
  
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_media.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>		/* XXX for ether_sprintf */
 
 #include <net80211/ieee80211_var.h>
 
 #include <net/bpf.h>
 
 #ifdef INET
 #include <netinet/in.h> 
 #include <netinet/if_ether.h>
 #endif
 
 #include <dev/ath/if_athvar.h>
 #include <dev/ath/ath_rate/sample/sample.h>
 #include <dev/ath/ath_hal/ah_desc.h>
 #include <dev/ath/ath_rate/sample/tx_schedules.h>
 
 /*
  * This file is an implementation of the SampleRate algorithm
  * in "Bit-rate Selection in Wireless Networks"
  * (http://www.pdos.lcs.mit.edu/papers/jbicket-ms.ps)
  *
  * SampleRate chooses the bit-rate it predicts will provide the most
  * throughput based on estimates of the expected per-packet
  * transmission time for each bit-rate.  SampleRate periodically sends
  * packets at bit-rates other than the current one to estimate when
  * another bit-rate will provide better performance. SampleRate
  * switches to another bit-rate when its estimated per-packet
  * transmission time becomes smaller than the current bit-rate's.
  * SampleRate reduces the number of bit-rates it must sample by
  * eliminating those that could not perform better than the one
  * currently being used.  SampleRate also stops probing at a bit-rate
  * if it experiences several successive losses.
  *
  * The difference between the algorithm in the thesis and the one in this
  * file is that the one in this file uses a ewma instead of a window.
  *
  * Also, this implementation tracks the average transmission time for
  * a few different packet sizes independently for each link.
  */
 
 static void	ath_rate_ctl_reset(struct ath_softc *, struct ieee80211_node *);
 
 static __inline int
 size_to_bin(int size) 
 {
 #if NUM_PACKET_SIZE_BINS > 1
 	if (size <= packet_size_bins[0])
 		return 0;
 #endif
 #if NUM_PACKET_SIZE_BINS > 2
 	if (size <= packet_size_bins[1])
 		return 1;
 #endif
 #if NUM_PACKET_SIZE_BINS > 3
 	if (size <= packet_size_bins[2])
 		return 2;
 #endif
 #if NUM_PACKET_SIZE_BINS > 4
 #error "add support for more packet sizes"
 #endif
 	return NUM_PACKET_SIZE_BINS-1;
 }
 
 void
 ath_rate_node_init(struct ath_softc *sc, struct ath_node *an)
 {
 	/* NB: assumed to be zero'd by caller */
 }
 
 void
 ath_rate_node_cleanup(struct ath_softc *sc, struct ath_node *an)
 {
 }
 
 static int
 dot11rate(const HAL_RATE_TABLE *rt, int rix)
 {
 	if (rix < 0)
 		return -1;
 	return rt->info[rix].phy == IEEE80211_T_HT ?
 	    rt->info[rix].dot11Rate : (rt->info[rix].dot11Rate & IEEE80211_RATE_VAL) / 2;
 }
 
 static const char *
 dot11rate_label(const HAL_RATE_TABLE *rt, int rix)
 {
 	if (rix < 0)
 		return "";
 	return rt->info[rix].phy == IEEE80211_T_HT ? "MCS" : "Mb ";
 }
 
 /*
  * Return the rix with the lowest average_tx_time,
  * or -1 if all the average_tx_times are 0.
  */
 static __inline int
 pick_best_rate(struct ath_node *an, const HAL_RATE_TABLE *rt,
     int size_bin, int require_acked_before)
 {
 	struct sample_node *sn = ATH_NODE_SAMPLE(an);
         int best_rate_rix, best_rate_tt, best_rate_pct;
 	uint64_t mask;
 	int rix, tt, pct;
 
         best_rate_rix = 0;
         best_rate_tt = 0;
 	best_rate_pct = 0;
 	for (mask = sn->ratemask, rix = 0; mask != 0; mask >>= 1, rix++) {
 		if ((mask & 1) == 0)		/* not a supported rate */
 			continue;
 
 		/* Don't pick a non-HT rate for a HT node */
 		if ((an->an_node.ni_flags & IEEE80211_NODE_HT) &&
 		    (rt->info[rix].phy != IEEE80211_T_HT)) {
 			continue;
 		}
 
 		tt = sn->stats[size_bin][rix].average_tx_time;
 		if (tt <= 0 ||
 		    (require_acked_before &&
 		     !sn->stats[size_bin][rix].packets_acked))
 			continue;
 
 		/* Calculate percentage if possible */
 		if (sn->stats[size_bin][rix].total_packets > 0) {
 			pct = sn->stats[size_bin][rix].ewma_pct;
 		} else {
 			/* XXX for now, assume 95% ok */
 			pct = 95;
 		}
 
 		/* don't use a bit-rate that has been failing */
 		if (sn->stats[size_bin][rix].successive_failures > 3)
 			continue;
 
 		/*
 		 * For HT, Don't use a bit rate that is much more
 		 * lossy than the best.
 		 *
 		 * XXX this isn't optimal; it's just designed to
 		 * eliminate rates that are going to be obviously
 		 * worse.
 		 */
 		if (an->an_node.ni_flags & IEEE80211_NODE_HT) {
 			if (best_rate_pct > (pct + 50))
 				continue;
 		}
 
 		/*
 		 * For non-MCS rates, use the current average txtime for
 		 * comparison.
 		 */
 		if (! (an->an_node.ni_flags & IEEE80211_NODE_HT)) {
 			if (best_rate_tt == 0 || tt <= best_rate_tt) {
 				best_rate_tt = tt;
 				best_rate_rix = rix;
 				best_rate_pct = pct;
 			}
 		}
 
 		/*
 		 * Since 2 stream rates have slightly higher TX times,
 		 * allow a little bit of leeway. This should later
 		 * be abstracted out and properly handled.
 		 */
 		if (an->an_node.ni_flags & IEEE80211_NODE_HT) {
 			if (best_rate_tt == 0 || (tt * 8 <= best_rate_tt * 10)) {
 				best_rate_tt = tt;
 				best_rate_rix = rix;
 				best_rate_pct = pct;
 			}
 		}
         }
         return (best_rate_tt ? best_rate_rix : -1);
 }
 
 /*
  * Pick a good "random" bit-rate to sample other than the current one.
  */
 static __inline int
 pick_sample_rate(struct sample_softc *ssc , struct ath_node *an,
     const HAL_RATE_TABLE *rt, int size_bin)
 {
 #define	DOT11RATE(ix)	(rt->info[ix].dot11Rate & IEEE80211_RATE_VAL)
 #define	MCS(ix)		(rt->info[ix].dot11Rate | IEEE80211_RATE_MCS)
 	struct sample_node *sn = ATH_NODE_SAMPLE(an);
 	int current_rix, rix;
 	unsigned current_tt;
 	uint64_t mask;
 	
 	current_rix = sn->current_rix[size_bin];
 	if (current_rix < 0) {
 		/* no successes yet, send at the lowest bit-rate */
 		/* XXX should return MCS0 if HT */
 		return 0;
 	}
 
 	current_tt = sn->stats[size_bin][current_rix].average_tx_time;
 
 	rix = sn->last_sample_rix[size_bin]+1;	/* next sample rate */
 	mask = sn->ratemask &~ ((uint64_t) 1<<current_rix);/* don't sample current rate */
 	while (mask != 0) {
 		if ((mask & ((uint64_t) 1<<rix)) == 0) {	/* not a supported rate */
 	nextrate:
 			if (++rix >= rt->rateCount)
 				rix = 0;
 			continue;
 		}
 
 		/*
 		 * The following code stops trying to sample
 		 * non-MCS rates when speaking to an MCS node.
 		 * However, at least for CCK rates in 2.4GHz mode,
 		 * the non-MCS rates MAY actually provide better
 		 * PER at the very far edge of reception.
 		 *
 		 * However! Until ath_rate_form_aggr() grows
 		 * some logic to not form aggregates if the
 		 * selected rate is non-MCS, this won't work.
 		 *
 		 * So don't disable this code until you've taught
 		 * ath_rate_form_aggr() to drop out if any of
 		 * the selected rates are non-MCS.
 		 */
 #if 1
 		/* if the node is HT and the rate isn't HT, don't bother sample */
 		if ((an->an_node.ni_flags & IEEE80211_NODE_HT) &&
 		    (rt->info[rix].phy != IEEE80211_T_HT)) {
 			mask &= ~((uint64_t) 1<<rix);
 			goto nextrate;
 		}
 #endif
 
 		/* this bit-rate is always worse than the current one */
 		if (sn->stats[size_bin][rix].perfect_tx_time > current_tt) {
 			mask &= ~((uint64_t) 1<<rix);
 			goto nextrate;
 		}
 
 		/* rarely sample bit-rates that fail a lot */
 		if (sn->stats[size_bin][rix].successive_failures > ssc->max_successive_failures &&
 		    ticks - sn->stats[size_bin][rix].last_tx < ssc->stale_failure_timeout) {
 			mask &= ~((uint64_t) 1<<rix);
 			goto nextrate;
 		}
 
 		/*
 		 * For HT, only sample a few rates on either side of the
 		 * current rix; there's quite likely a lot of them.
 		 */
 		if (an->an_node.ni_flags & IEEE80211_NODE_HT) {
 			if (rix < (current_rix - 3) ||
 			    rix > (current_rix + 3)) {
 				mask &= ~((uint64_t) 1<<rix);
 				goto nextrate;
 			}
 		}
 
 		/* Don't sample more than 2 rates higher for rates > 11M for non-HT rates */
 		if (! (an->an_node.ni_flags & IEEE80211_NODE_HT)) {
 			if (DOT11RATE(rix) > 2*11 && rix > current_rix + 2) {
 				mask &= ~((uint64_t) 1<<rix);
 				goto nextrate;
 			}
 		}
 
 		sn->last_sample_rix[size_bin] = rix;
 		return rix;
 	}
 	return current_rix;
 #undef DOT11RATE
 #undef	MCS
 }
 
 static int
 ath_rate_get_static_rix(struct ath_softc *sc, const struct ieee80211_node *ni)
 {
 #define	RATE(_ix)	(ni->ni_rates.rs_rates[(_ix)] & IEEE80211_RATE_VAL)
 #define	DOT11RATE(_ix)	(rt->info[(_ix)].dot11Rate & IEEE80211_RATE_VAL)
 #define	MCS(_ix)	(ni->ni_htrates.rs_rates[_ix] | IEEE80211_RATE_MCS)
 	const struct ieee80211_txparam *tp = ni->ni_txparms;
 	int srate;
 
 	/* Check MCS rates */
 	for (srate = ni->ni_htrates.rs_nrates - 1; srate >= 0; srate--) {
 		if (MCS(srate) == tp->ucastrate)
 			return sc->sc_rixmap[tp->ucastrate];
 	}
 
 	/* Check legacy rates */
 	for (srate = ni->ni_rates.rs_nrates - 1; srate >= 0; srate--) {
 		if (RATE(srate) == tp->ucastrate)
 			return sc->sc_rixmap[tp->ucastrate];
 	}
 	return -1;
 #undef	RATE
 #undef	DOT11RATE
 #undef	MCS
 }
 
 static void
 ath_rate_update_static_rix(struct ath_softc *sc, struct ieee80211_node *ni)
 {
 	struct ath_node *an = ATH_NODE(ni);
 	const struct ieee80211_txparam *tp = ni->ni_txparms;
 	struct sample_node *sn = ATH_NODE_SAMPLE(an);
 
 	if (tp != NULL && tp->ucastrate != IEEE80211_FIXED_RATE_NONE) {
 		/*
 		 * A fixed rate is to be used; ucastrate is the IEEE code
 		 * for this rate (sans basic bit).  Check this against the
 		 * negotiated rate set for the node.  Note the fixed rate
 		 * may not be available for various reasons so we only
 		 * setup the static rate index if the lookup is successful.
 		 */
 		sn->static_rix = ath_rate_get_static_rix(sc, ni);
 	} else {
 		sn->static_rix = -1;
 	}
 }
 
 /*
  * Pick a non-HT rate to begin using.
  */
 static int
 ath_rate_pick_seed_rate_legacy(struct ath_softc *sc, struct ath_node *an,
     int frameLen)
 {
 #define	DOT11RATE(ix)	(rt->info[ix].dot11Rate & IEEE80211_RATE_VAL)
 #define	MCS(ix)		(rt->info[ix].dot11Rate | IEEE80211_RATE_MCS)
 #define	RATE(ix)	(DOT11RATE(ix) / 2)
 	int rix = -1;
 	const HAL_RATE_TABLE *rt = sc->sc_currates;
 	struct sample_node *sn = ATH_NODE_SAMPLE(an);
 	const int size_bin = size_to_bin(frameLen);
 
 	/* no packet has been sent successfully yet */
 	for (rix = rt->rateCount-1; rix > 0; rix--) {
 		if ((sn->ratemask & ((uint64_t) 1<<rix)) == 0)
 			continue;
 
 		/* Skip HT rates */
 		if (rt->info[rix].phy == IEEE80211_T_HT)
 			continue;
 
 		/*
 		 * Pick the highest rate <= 36 Mbps
 		 * that hasn't failed.
 		 */
 		if (DOT11RATE(rix) <= 72 &&
 		    sn->stats[size_bin][rix].successive_failures == 0) {
 			break;
 		}
 	}
 	return rix;
 #undef	RATE
 #undef	MCS
 #undef	DOT11RATE
 }
 
 /*
  * Pick a HT rate to begin using.
  *
  * Don't use any non-HT rates; only consider HT rates.
  */
 static int
 ath_rate_pick_seed_rate_ht(struct ath_softc *sc, struct ath_node *an,
     int frameLen)
 {
 #define	DOT11RATE(ix)	(rt->info[ix].dot11Rate & IEEE80211_RATE_VAL)
 #define	MCS(ix)		(rt->info[ix].dot11Rate | IEEE80211_RATE_MCS)
 #define	RATE(ix)	(DOT11RATE(ix) / 2)
 	int rix = -1, ht_rix = -1;
 	const HAL_RATE_TABLE *rt = sc->sc_currates;
 	struct sample_node *sn = ATH_NODE_SAMPLE(an);
 	const int size_bin = size_to_bin(frameLen);
 
 	/* no packet has been sent successfully yet */
 	for (rix = rt->rateCount-1; rix > 0; rix--) {
 		/* Skip rates we can't use */
 		if ((sn->ratemask & ((uint64_t) 1<<rix)) == 0)
 			continue;
 
 		/* Keep a copy of the last seen HT rate index */
 		if (rt->info[rix].phy == IEEE80211_T_HT)
 			ht_rix = rix;
 
 		/* Skip non-HT rates */
 		if (rt->info[rix].phy != IEEE80211_T_HT)
 			continue;
 
 		/*
 		 * Pick a medium-speed rate regardless of stream count
 		 * which has not seen any failures. Higher rates may fail;
 		 * we'll try them later.
 		 */
 		if (((MCS(rix) & 0x7) <= 4) &&
 		    sn->stats[size_bin][rix].successive_failures == 0) {
 			break;
 		}
 	}
 
 	/*
 	 * If all the MCS rates have successive failures, rix should be
 	 * > 0; otherwise use the lowest MCS rix (hopefully MCS 0.)
 	 */
 	return MAX(rix, ht_rix);
 #undef	RATE
 #undef	MCS
 #undef	DOT11RATE
 }
 
 
 void
 ath_rate_findrate(struct ath_softc *sc, struct ath_node *an,
 		  int shortPreamble, size_t frameLen,
 		  u_int8_t *rix0, int *try0, u_int8_t *txrate)
 {
 #define	DOT11RATE(ix)	(rt->info[ix].dot11Rate & IEEE80211_RATE_VAL)
 #define	MCS(ix)		(rt->info[ix].dot11Rate | IEEE80211_RATE_MCS)
 #define	RATE(ix)	(DOT11RATE(ix) / 2)
 	struct sample_node *sn = ATH_NODE_SAMPLE(an);
 	struct sample_softc *ssc = ATH_SOFTC_SAMPLE(sc);
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	const HAL_RATE_TABLE *rt = sc->sc_currates;
 	const int size_bin = size_to_bin(frameLen);
 	int rix, mrr, best_rix, change_rates;
 	unsigned average_tx_time;
 
 	ath_rate_update_static_rix(sc, &an->an_node);
 
 	if (sn->currates != sc->sc_currates) {
 		device_printf(sc->sc_dev, "%s: currates != sc_currates!\n",
 		    __func__);
 		rix = 0;
 		*try0 = ATH_TXMAXTRY;
 		goto done;
 	}
 
 	if (sn->static_rix != -1) {
 		rix = sn->static_rix;
 		*try0 = ATH_TXMAXTRY;
 		goto done;
 	}
 
 	mrr = sc->sc_mrretry;
 	/* XXX check HT protmode too */
 	if (mrr && (ic->ic_flags & IEEE80211_F_USEPROT && !sc->sc_mrrprot))
 		mrr = 0;
 
 	best_rix = pick_best_rate(an, rt, size_bin, !mrr);
 	if (best_rix >= 0) {
 		average_tx_time = sn->stats[size_bin][best_rix].average_tx_time;
 	} else {
 		average_tx_time = 0;
 	}
 	/*
 	 * Limit the time measuring the performance of other tx
 	 * rates to sample_rate% of the total transmission time.
 	 */
 	if (sn->sample_tt[size_bin] < average_tx_time * (sn->packets_since_sample[size_bin]*ssc->sample_rate/100)) {
 		rix = pick_sample_rate(ssc, an, rt, size_bin);
 		IEEE80211_NOTE(an->an_node.ni_vap, IEEE80211_MSG_RATECTL,
 		     &an->an_node, "att %d sample_tt %d size %u sample rate %d %s current rate %d %s",
 		     average_tx_time,
 		     sn->sample_tt[size_bin],
 		     bin_to_size(size_bin),
 		     dot11rate(rt, rix),
 		     dot11rate_label(rt, rix),
 		     dot11rate(rt, sn->current_rix[size_bin]),
 		     dot11rate_label(rt, sn->current_rix[size_bin]));
 		if (rix != sn->current_rix[size_bin]) {
 			sn->current_sample_rix[size_bin] = rix;
 		} else {
 			sn->current_sample_rix[size_bin] = -1;
 		}
 		sn->packets_since_sample[size_bin] = 0;
 	} else {
 		change_rates = 0;
 		if (!sn->packets_sent[size_bin] || best_rix == -1) {
 			/* no packet has been sent successfully yet */
 			change_rates = 1;
 			if (an->an_node.ni_flags & IEEE80211_NODE_HT)
 				best_rix =
 				    ath_rate_pick_seed_rate_ht(sc, an, frameLen);
 			else
 				best_rix =
 				    ath_rate_pick_seed_rate_legacy(sc, an, frameLen);
 		} else if (sn->packets_sent[size_bin] < 20) {
 			/* let the bit-rate switch quickly during the first few packets */
 			IEEE80211_NOTE(an->an_node.ni_vap,
 			    IEEE80211_MSG_RATECTL, &an->an_node,
 			    "%s: switching quickly..", __func__);
 			change_rates = 1;
 		} else if (ticks - ssc->min_switch > sn->ticks_since_switch[size_bin]) {
 			/* min_switch seconds have gone by */
 			IEEE80211_NOTE(an->an_node.ni_vap,
 			    IEEE80211_MSG_RATECTL, &an->an_node,
 			    "%s: min_switch %d > ticks_since_switch %d..",
 			    __func__, ticks - ssc->min_switch, sn->ticks_since_switch[size_bin]);
 			change_rates = 1;
 		} else if ((! (an->an_node.ni_flags & IEEE80211_NODE_HT)) &&
 		    (2*average_tx_time < sn->stats[size_bin][sn->current_rix[size_bin]].average_tx_time)) {
 			/* the current bit-rate is twice as slow as the best one */
 			IEEE80211_NOTE(an->an_node.ni_vap,
 			    IEEE80211_MSG_RATECTL, &an->an_node,
 			    "%s: 2x att (= %d) < cur_rix att %d",
 			    __func__,
 			    2 * average_tx_time, sn->stats[size_bin][sn->current_rix[size_bin]].average_tx_time);
 			change_rates = 1;
 		} else if ((an->an_node.ni_flags & IEEE80211_NODE_HT)) {
 			int cur_rix = sn->current_rix[size_bin];
 			int cur_att = sn->stats[size_bin][cur_rix].average_tx_time;
 			/*
 			 * If the node is HT, upgrade it if the MCS rate is
 			 * higher and the average tx time is within 20% of
 			 * the current rate. It can fail a little.
 			 *
 			 * This is likely not optimal!
 			 */
 #if 0
 			printf("cur rix/att %x/%d, best rix/att %x/%d\n",
 			    MCS(cur_rix), cur_att, MCS(best_rix), average_tx_time);
 #endif
 			if ((MCS(best_rix) > MCS(cur_rix)) &&
 			    (average_tx_time * 8) <= (cur_att * 10)) {
 				IEEE80211_NOTE(an->an_node.ni_vap,
 				    IEEE80211_MSG_RATECTL, &an->an_node,
 				    "%s: HT: best_rix 0x%d > cur_rix 0x%x, average_tx_time %d, cur_att %d",
 				    __func__,
 				    MCS(best_rix), MCS(cur_rix), average_tx_time, cur_att);
 				change_rates = 1;
 			}
 		}
 
 		sn->packets_since_sample[size_bin]++;
 		
 		if (change_rates) {
 			if (best_rix != sn->current_rix[size_bin]) {
 				IEEE80211_NOTE(an->an_node.ni_vap,
 				    IEEE80211_MSG_RATECTL,
 				    &an->an_node,
 "%s: size %d switch rate %d (%d/%d) -> %d (%d/%d) after %d packets mrr %d",
 				    __func__,
 				    bin_to_size(size_bin),
 				    RATE(sn->current_rix[size_bin]),
 				    sn->stats[size_bin][sn->current_rix[size_bin]].average_tx_time,
 				    sn->stats[size_bin][sn->current_rix[size_bin]].perfect_tx_time,
 				    RATE(best_rix),
 				    sn->stats[size_bin][best_rix].average_tx_time,
 				    sn->stats[size_bin][best_rix].perfect_tx_time,
 				    sn->packets_since_switch[size_bin],
 				    mrr);
 			}
 			sn->packets_since_switch[size_bin] = 0;
 			sn->current_rix[size_bin] = best_rix;
 			sn->ticks_since_switch[size_bin] = ticks;
 			/* 
 			 * Set the visible txrate for this node.
 			 */
 			an->an_node.ni_txrate = (rt->info[best_rix].phy == IEEE80211_T_HT) ?  MCS(best_rix) : DOT11RATE(best_rix);
 		}
 		rix = sn->current_rix[size_bin];
 		sn->packets_since_switch[size_bin]++;
 	}
 	*try0 = mrr ? sn->sched[rix].t0 : ATH_TXMAXTRY;
 done:
 
 	/*
 	 * This bug totally sucks and should be fixed.
 	 *
 	 * For now though, let's not panic, so we can start to figure
 	 * out how to better reproduce it.
 	 */
 	if (rix < 0 || rix >= rt->rateCount) {
 		printf("%s: ERROR: rix %d out of bounds (rateCount=%d)\n",
 		    __func__,
 		    rix,
 		    rt->rateCount);
 		    rix = 0;	/* XXX just default for now */
 	}
 	KASSERT(rix >= 0 && rix < rt->rateCount, ("rix is %d", rix));
 
 	*rix0 = rix;
 	*txrate = rt->info[rix].rateCode
 		| (shortPreamble ? rt->info[rix].shortPreamble : 0);
 	sn->packets_sent[size_bin]++;
 #undef DOT11RATE
 #undef MCS
 #undef RATE
 }
 
 /*
  * Get the TX rates. Don't fiddle with short preamble flags for them;
  * the caller can do that.
  */
 void
 ath_rate_getxtxrates(struct ath_softc *sc, struct ath_node *an,
     uint8_t rix0, struct ath_rc_series *rc)
 {
 	struct sample_node *sn = ATH_NODE_SAMPLE(an);
 	const struct txschedule *sched = &sn->sched[rix0];
 
 	KASSERT(rix0 == sched->r0, ("rix0 (%x) != sched->r0 (%x)!\n",
 	    rix0, sched->r0));
 
 	rc[0].flags = rc[1].flags = rc[2].flags = rc[3].flags = 0;
 
 	rc[0].rix = sched->r0;
 	rc[1].rix = sched->r1;
 	rc[2].rix = sched->r2;
 	rc[3].rix = sched->r3;
 
 	rc[0].tries = sched->t0;
 	rc[1].tries = sched->t1;
 	rc[2].tries = sched->t2;
 	rc[3].tries = sched->t3;
 }
 
 void
 ath_rate_setupxtxdesc(struct ath_softc *sc, struct ath_node *an,
 		      struct ath_desc *ds, int shortPreamble, u_int8_t rix)
 {
 	struct sample_node *sn = ATH_NODE_SAMPLE(an);
 	const struct txschedule *sched = &sn->sched[rix];
 	const HAL_RATE_TABLE *rt = sc->sc_currates;
 	uint8_t rix1, s1code, rix2, s2code, rix3, s3code;
 
 	/* XXX precalculate short preamble tables */
 	rix1 = sched->r1;
 	s1code = rt->info[rix1].rateCode
 	       | (shortPreamble ? rt->info[rix1].shortPreamble : 0);
 	rix2 = sched->r2;
 	s2code = rt->info[rix2].rateCode
 	       | (shortPreamble ? rt->info[rix2].shortPreamble : 0);
 	rix3 = sched->r3;
 	s3code = rt->info[rix3].rateCode
 	       | (shortPreamble ? rt->info[rix3].shortPreamble : 0);
 	ath_hal_setupxtxdesc(sc->sc_ah, ds,
 	    s1code, sched->t1,		/* series 1 */
 	    s2code, sched->t2,		/* series 2 */
 	    s3code, sched->t3);		/* series 3 */
 }
 
 static void
 update_stats(struct ath_softc *sc, struct ath_node *an, 
 		  int frame_size,
 		  int rix0, int tries0,
 		  int rix1, int tries1,
 		  int rix2, int tries2,
 		  int rix3, int tries3,
 		  int short_tries, int tries, int status,
 		  int nframes, int nbad)
 {
 	struct sample_node *sn = ATH_NODE_SAMPLE(an);
 	struct sample_softc *ssc = ATH_SOFTC_SAMPLE(sc);
 #ifdef IEEE80211_DEBUG
 	const HAL_RATE_TABLE *rt = sc->sc_currates;
 #endif
 	const int size_bin = size_to_bin(frame_size);
 	const int size = bin_to_size(size_bin);
 	int tt, tries_so_far;
 	int is_ht40 = (an->an_node.ni_chw == 40);
 	int pct;
 
 	if (!IS_RATE_DEFINED(sn, rix0))
 		return;
 	tt = calc_usecs_unicast_packet(sc, size, rix0, short_tries,
 		MIN(tries0, tries) - 1, is_ht40);
 	tries_so_far = tries0;
 
 	if (tries1 && tries_so_far < tries) {
 		if (!IS_RATE_DEFINED(sn, rix1))
 			return;
 		tt += calc_usecs_unicast_packet(sc, size, rix1, short_tries,
 			MIN(tries1 + tries_so_far, tries) - tries_so_far - 1, is_ht40);
 		tries_so_far += tries1;
 	}
 
 	if (tries2 && tries_so_far < tries) {
 		if (!IS_RATE_DEFINED(sn, rix2))
 			return;
 		tt += calc_usecs_unicast_packet(sc, size, rix2, short_tries,
 			MIN(tries2 + tries_so_far, tries) - tries_so_far - 1, is_ht40);
 		tries_so_far += tries2;
 	}
 
 	if (tries3 && tries_so_far < tries) {
 		if (!IS_RATE_DEFINED(sn, rix3))
 			return;
 		tt += calc_usecs_unicast_packet(sc, size, rix3, short_tries,
 			MIN(tries3 + tries_so_far, tries) - tries_so_far - 1, is_ht40);
 	}
 
 	if (sn->stats[size_bin][rix0].total_packets < ssc->smoothing_minpackets) {
 		/* just average the first few packets */
 		int avg_tx = sn->stats[size_bin][rix0].average_tx_time;
 		int packets = sn->stats[size_bin][rix0].total_packets;
 		sn->stats[size_bin][rix0].average_tx_time = (tt+(avg_tx*packets))/(packets+nframes);
 	} else {
 		/* use a ewma */
 		sn->stats[size_bin][rix0].average_tx_time = 
 			((sn->stats[size_bin][rix0].average_tx_time * ssc->smoothing_rate) + 
 			 (tt * (100 - ssc->smoothing_rate))) / 100;
 	}
 	
 	/*
 	 * XXX Don't mark the higher bit rates as also having failed; as this
 	 * unfortunately stops those rates from being tasted when trying to
 	 * TX. This happens with 11n aggregation.
 	 */
 	if (nframes == nbad) {
 #if 0
 		int y;
 #endif
 		sn->stats[size_bin][rix0].successive_failures += nbad;
 #if 0
 		for (y = size_bin+1; y < NUM_PACKET_SIZE_BINS; y++) {
 			/*
 			 * Also say larger packets failed since we
 			 * assume if a small packet fails at a
 			 * bit-rate then a larger one will also.
 			 */
 			sn->stats[y][rix0].successive_failures += nbad;
 			sn->stats[y][rix0].last_tx = ticks;
 			sn->stats[y][rix0].tries += tries;
 			sn->stats[y][rix0].total_packets += nframes;
 		}
 #endif
 	} else {
 		sn->stats[size_bin][rix0].packets_acked += (nframes - nbad);
 		sn->stats[size_bin][rix0].successive_failures = 0;
 	}
 	sn->stats[size_bin][rix0].tries += tries;
 	sn->stats[size_bin][rix0].last_tx = ticks;
 	sn->stats[size_bin][rix0].total_packets += nframes;
 
 	/* update EWMA for this rix */
 
 	/* Calculate percentage based on current rate */
 	if (nframes == 0)
 		nframes = nbad = 1;
 	pct = ((nframes - nbad) * 1000) / nframes;
 
 	if (sn->stats[size_bin][rix0].total_packets <
 	    ssc->smoothing_minpackets) {
 		/* just average the first few packets */
 		int a_pct = (sn->stats[size_bin][rix0].packets_acked * 1000) /
 		    (sn->stats[size_bin][rix0].total_packets);
 		sn->stats[size_bin][rix0].ewma_pct = a_pct;
 	} else {
 		/* use a ewma */
 		sn->stats[size_bin][rix0].ewma_pct =
 			((sn->stats[size_bin][rix0].ewma_pct * ssc->smoothing_rate) +
 			 (pct * (100 - ssc->smoothing_rate))) / 100;
 	}
 
 
 	if (rix0 == sn->current_sample_rix[size_bin]) {
 		IEEE80211_NOTE(an->an_node.ni_vap, IEEE80211_MSG_RATECTL,
 		   &an->an_node,
 "%s: size %d %s sample rate %d %s tries (%d/%d) tt %d avg_tt (%d/%d) nfrm %d nbad %d", 
 		    __func__, 
 		    size,
 		    status ? "FAIL" : "OK",
 		    dot11rate(rt, rix0),
 		    dot11rate_label(rt, rix0),
 		    short_tries, tries, tt, 
 		    sn->stats[size_bin][rix0].average_tx_time,
 		    sn->stats[size_bin][rix0].perfect_tx_time,
 		    nframes, nbad);
 		sn->sample_tt[size_bin] = tt;
 		sn->current_sample_rix[size_bin] = -1;
 	}
 }
 
 static void
-badrate(struct ifnet *ifp, int series, int hwrate, int tries, int status)
+badrate(struct ath_softc *sc, int series, int hwrate, int tries, int status)
 {
-	if_printf(ifp, "bad series%d hwrate 0x%x, tries %u ts_status 0x%x\n",
+
+	device_printf(sc->sc_dev,
+	    "bad series%d hwrate 0x%x, tries %u ts_status 0x%x\n",
 	    series, hwrate, tries, status);
 }
 
 void
 ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an,
 	const struct ath_rc_series *rc, const struct ath_tx_status *ts,
 	int frame_size, int nframes, int nbad)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct sample_node *sn = ATH_NODE_SAMPLE(an);
 	int final_rix, short_tries, long_tries;
 	const HAL_RATE_TABLE *rt = sc->sc_currates;
 	int status = ts->ts_status;
 	int mrr;
 
 	final_rix = rt->rateCodeToIndex[ts->ts_rate];
 	short_tries = ts->ts_shortretry;
 	long_tries = ts->ts_longretry + 1;
 
 	if (nframes == 0) {
 		device_printf(sc->sc_dev, "%s: nframes=0?\n", __func__);
 		return;
 	}
 
 	if (frame_size == 0)		    /* NB: should not happen */
 		frame_size = 1500;
 
 	if (sn->ratemask == 0) {
 		IEEE80211_NOTE(an->an_node.ni_vap, IEEE80211_MSG_RATECTL,
 		    &an->an_node,
 		    "%s: size %d %s rate/try %d/%d no rates yet", 
 		    __func__,
 		    bin_to_size(size_to_bin(frame_size)),
 		    status ? "FAIL" : "OK",
 		    short_tries, long_tries);
 		return;
 	}
 	mrr = sc->sc_mrretry;
 	/* XXX check HT protmode too */
 	if (mrr && (ic->ic_flags & IEEE80211_F_USEPROT && !sc->sc_mrrprot))
 		mrr = 0;
 
 	if (!mrr || ts->ts_finaltsi == 0) {
 		if (!IS_RATE_DEFINED(sn, final_rix)) {
-			device_printf(sc->sc_dev, "%s: ts_rate=%d ts_finaltsi=%d, final_rix=%d\n",
+			device_printf(sc->sc_dev,
+			    "%s: ts_rate=%d ts_finaltsi=%d, final_rix=%d\n",
 			    __func__, ts->ts_rate, ts->ts_finaltsi, final_rix);
-			badrate(ifp, 0, ts->ts_rate, long_tries, status);
+			badrate(sc, 0, ts->ts_rate, long_tries, status);
 			return;
 		}
 		/*
 		 * Only one rate was used; optimize work.
 		 */
 		IEEE80211_NOTE(an->an_node.ni_vap, IEEE80211_MSG_RATECTL,
 		     &an->an_node, "%s: size %d (%d bytes) %s rate/short/long %d %s/%d/%d nframes/nbad [%d/%d]",
 		     __func__,
 		     bin_to_size(size_to_bin(frame_size)),
 		     frame_size,
 		     status ? "FAIL" : "OK",
 		     dot11rate(rt, final_rix), dot11rate_label(rt, final_rix),
 		     short_tries, long_tries, nframes, nbad);
 		update_stats(sc, an, frame_size, 
 			     final_rix, long_tries,
 			     0, 0,
 			     0, 0,
 			     0, 0,
 			     short_tries, long_tries, status,
 			     nframes, nbad);
 
 	} else {
 		int finalTSIdx = ts->ts_finaltsi;
 		int i;
 
 		/*
 		 * Process intermediate rates that failed.
 		 */
 
 		IEEE80211_NOTE(an->an_node.ni_vap, IEEE80211_MSG_RATECTL,
 		    &an->an_node,
 "%s: size %d (%d bytes) finaltsidx %d short %d long %d %s rate/try [%d %s/%d %d %s/%d %d %s/%d %d %s/%d] nframes/nbad [%d/%d]", 
 		     __func__,
 		     bin_to_size(size_to_bin(frame_size)),
 		     frame_size,
 		     finalTSIdx,
 		     short_tries,
 		     long_tries,
 		     status ? "FAIL" : "OK",
 		     dot11rate(rt, rc[0].rix),
 		      dot11rate_label(rt, rc[0].rix), rc[0].tries,
 		     dot11rate(rt, rc[1].rix),
 		      dot11rate_label(rt, rc[1].rix), rc[1].tries,
 		     dot11rate(rt, rc[2].rix),
 		      dot11rate_label(rt, rc[2].rix), rc[2].tries,
 		     dot11rate(rt, rc[3].rix),
 		      dot11rate_label(rt, rc[3].rix), rc[3].tries,
 		     nframes, nbad);
 
 		for (i = 0; i < 4; i++) {
 			if (rc[i].tries && !IS_RATE_DEFINED(sn, rc[i].rix))
-				badrate(ifp, 0, rc[i].ratecode, rc[i].tries,
+				badrate(sc, 0, rc[i].ratecode, rc[i].tries,
 				    status);
 		}
 
 		/*
 		 * NB: series > 0 are not penalized for failure
 		 * based on the try counts under the assumption
 		 * that losses are often bursty and since we
 		 * sample higher rates 1 try at a time doing so
 		 * may unfairly penalize them.
 		 */
 		if (rc[0].tries) {
 			update_stats(sc, an, frame_size,
 				     rc[0].rix, rc[0].tries,
 				     rc[1].rix, rc[1].tries,
 				     rc[2].rix, rc[2].tries,
 				     rc[3].rix, rc[3].tries,
 				     short_tries, long_tries,
 				     long_tries > rc[0].tries,
 				     nframes, nbad);
 			long_tries -= rc[0].tries;
 		}
 		
 		if (rc[1].tries && finalTSIdx > 0) {
 			update_stats(sc, an, frame_size,
 				     rc[1].rix, rc[1].tries,
 				     rc[2].rix, rc[2].tries,
 				     rc[3].rix, rc[3].tries,
 				     0, 0,
 				     short_tries, long_tries,
 				     status,
 				     nframes, nbad);
 			long_tries -= rc[1].tries;
 		}
 
 		if (rc[2].tries && finalTSIdx > 1) {
 			update_stats(sc, an, frame_size,
 				     rc[2].rix, rc[2].tries,
 				     rc[3].rix, rc[3].tries,
 				     0, 0,
 				     0, 0,
 				     short_tries, long_tries,
 				     status,
 				     nframes, nbad);
 			long_tries -= rc[2].tries;
 		}
 
 		if (rc[3].tries && finalTSIdx > 2) {
 			update_stats(sc, an, frame_size,
 				     rc[3].rix, rc[3].tries,
 				     0, 0,
 				     0, 0,
 				     0, 0,
 				     short_tries, long_tries,
 				     status,
 				     nframes, nbad);
 		}
 	}
 }
 
 void
 ath_rate_newassoc(struct ath_softc *sc, struct ath_node *an, int isnew)
 {
 	if (isnew)
 		ath_rate_ctl_reset(sc, &an->an_node);
 }
 
 static const struct txschedule *mrr_schedules[IEEE80211_MODE_MAX+2] = {
 	NULL,		/* IEEE80211_MODE_AUTO */
 	series_11a,	/* IEEE80211_MODE_11A */
 	series_11g,	/* IEEE80211_MODE_11B */
 	series_11g,	/* IEEE80211_MODE_11G */
 	NULL,		/* IEEE80211_MODE_FH */
 	series_11a,	/* IEEE80211_MODE_TURBO_A */
 	series_11g,	/* IEEE80211_MODE_TURBO_G */
 	series_11a,	/* IEEE80211_MODE_STURBO_A */
 	series_11na,	/* IEEE80211_MODE_11NA */
 	series_11ng,	/* IEEE80211_MODE_11NG */
 	series_half,	/* IEEE80211_MODE_HALF */
 	series_quarter,	/* IEEE80211_MODE_QUARTER */
 };
 
 /*
  * Initialize the tables for a node.
  */
 static void
 ath_rate_ctl_reset(struct ath_softc *sc, struct ieee80211_node *ni)
 {
 #define	RATE(_ix)	(ni->ni_rates.rs_rates[(_ix)] & IEEE80211_RATE_VAL)
 #define	DOT11RATE(_ix)	(rt->info[(_ix)].dot11Rate & IEEE80211_RATE_VAL)
 #define	MCS(_ix)	(ni->ni_htrates.rs_rates[_ix] | IEEE80211_RATE_MCS)
 	struct ath_node *an = ATH_NODE(ni);
 	struct sample_node *sn = ATH_NODE_SAMPLE(an);
 	const HAL_RATE_TABLE *rt = sc->sc_currates;
 	int x, y, rix;
 
 	KASSERT(rt != NULL, ("no rate table, mode %u", sc->sc_curmode));
 
 	KASSERT(sc->sc_curmode < IEEE80211_MODE_MAX+2,
 	    ("curmode %u", sc->sc_curmode));
 
 	sn->sched = mrr_schedules[sc->sc_curmode];
 	KASSERT(sn->sched != NULL,
 	    ("no mrr schedule for mode %u", sc->sc_curmode));
 
         sn->static_rix = -1;
 	ath_rate_update_static_rix(sc, ni);
 
 	sn->currates = sc->sc_currates;
 
 	/*
 	 * Construct a bitmask of usable rates.  This has all
 	 * negotiated rates minus those marked by the hal as
 	 * to be ignored for doing rate control.
 	 */
 	sn->ratemask = 0;
 	/* MCS rates */
 	if (ni->ni_flags & IEEE80211_NODE_HT) {
 		for (x = 0; x < ni->ni_htrates.rs_nrates; x++) {
 			rix = sc->sc_rixmap[MCS(x)];
 			if (rix == 0xff)
 				continue;
 			/* skip rates marked broken by hal */
 			if (!rt->info[rix].valid)
 				continue;
 			KASSERT(rix < SAMPLE_MAXRATES,
 			    ("mcs %u has rix %d", MCS(x), rix));
 			sn->ratemask |= (uint64_t) 1<<rix;
 		}
 	}
 
 	/* Legacy rates */
 	for (x = 0; x < ni->ni_rates.rs_nrates; x++) {
 		rix = sc->sc_rixmap[RATE(x)];
 		if (rix == 0xff)
 			continue;
 		/* skip rates marked broken by hal */
 		if (!rt->info[rix].valid)
 			continue;
 		KASSERT(rix < SAMPLE_MAXRATES,
 		    ("rate %u has rix %d", RATE(x), rix));
 		sn->ratemask |= (uint64_t) 1<<rix;
 	}
 #ifdef IEEE80211_DEBUG
 	if (ieee80211_msg(ni->ni_vap, IEEE80211_MSG_RATECTL)) {
 		uint64_t mask;
 
 		ieee80211_note(ni->ni_vap, "[%6D] %s: size 1600 rate/tt",
 		    ni->ni_macaddr, ":", __func__);
 		for (mask = sn->ratemask, rix = 0; mask != 0; mask >>= 1, rix++) {
 			if ((mask & 1) == 0)
 				continue;
 			printf(" %d %s/%d", dot11rate(rt, rix), dot11rate_label(rt, rix),
 			    calc_usecs_unicast_packet(sc, 1600, rix, 0,0,
 			        (ni->ni_chw == 40)));
 		}
 		printf("\n");
 	}
 #endif
 	for (y = 0; y < NUM_PACKET_SIZE_BINS; y++) {
 		int size = bin_to_size(y);
 		uint64_t mask;
 
 		sn->packets_sent[y] = 0;
 		sn->current_sample_rix[y] = -1;
 		sn->last_sample_rix[y] = 0;
 		/* XXX start with first valid rate */
 		sn->current_rix[y] = ffs(sn->ratemask)-1;
 		
 		/*
 		 * Initialize the statistics buckets; these are
 		 * indexed by the rate code index.
 		 */
 		for (rix = 0, mask = sn->ratemask; mask != 0; rix++, mask >>= 1) {
 			if ((mask & 1) == 0)		/* not a valid rate */
 				continue;
 			sn->stats[y][rix].successive_failures = 0;
 			sn->stats[y][rix].tries = 0;
 			sn->stats[y][rix].total_packets = 0;
 			sn->stats[y][rix].packets_acked = 0;
 			sn->stats[y][rix].last_tx = 0;
 			sn->stats[y][rix].ewma_pct = 0;
 			
 			sn->stats[y][rix].perfect_tx_time =
 			    calc_usecs_unicast_packet(sc, size, rix, 0, 0,
 			    (ni->ni_chw == 40));
 			sn->stats[y][rix].average_tx_time =
 			    sn->stats[y][rix].perfect_tx_time;
 		}
 	}
 #if 0
 	/* XXX 0, num_rates-1 are wrong */
 	IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_RATECTL, ni,
 	    "%s: %d rates %d%sMbps (%dus)- %d%sMbps (%dus)", __func__, 
 	    sn->num_rates,
 	    DOT11RATE(0)/2, DOT11RATE(0) % 1 ? ".5" : "",
 	    sn->stats[1][0].perfect_tx_time,
 	    DOT11RATE(sn->num_rates-1)/2, DOT11RATE(sn->num_rates-1) % 1 ? ".5" : "",
 	    sn->stats[1][sn->num_rates-1].perfect_tx_time
 	);
 #endif
 	/* set the visible bit-rate */
 	if (sn->static_rix != -1)
 		ni->ni_txrate = DOT11RATE(sn->static_rix);
 	else
 		ni->ni_txrate = RATE(0);
 #undef RATE
 #undef DOT11RATE
 }
 
 /*
  * Fetch the statistics for the given node.
  *
  * The ieee80211 node must be referenced and unlocked, however the ath_node
  * must be locked.
  *
  * The main difference here is that we convert the rate indexes
  * to 802.11 rates, or the userland output won't make much sense
  * as it has no access to the rix table.
  */
 int
 ath_rate_fetch_node_stats(struct ath_softc *sc, struct ath_node *an,
     struct ath_rateioctl *rs)
 {
 	struct sample_node *sn = ATH_NODE_SAMPLE(an);
 	const HAL_RATE_TABLE *rt = sc->sc_currates;
 	struct ath_rateioctl_tlv av;
 	struct ath_rateioctl_rt *tv;
 	int y;
 	int o = 0;
 
 	ATH_NODE_LOCK_ASSERT(an);
 
 	/*
 	 * Ensure there's enough space for the statistics.
 	 */
 	if (rs->len <
 	    sizeof(struct ath_rateioctl_tlv) +
 	    sizeof(struct ath_rateioctl_rt) +
 	    sizeof(struct ath_rateioctl_tlv) +
 	    sizeof(struct sample_node)) {
 		device_printf(sc->sc_dev, "%s: len=%d, too short\n",
 		    __func__,
 		    rs->len);
 		return (EINVAL);
 	}
 
 	/*
 	 * Take a temporary copy of the sample node state so we can
 	 * modify it before we copy it.
 	 */
 	tv = malloc(sizeof(struct ath_rateioctl_rt), M_TEMP,
 	    M_NOWAIT | M_ZERO);
 	if (tv == NULL) {
 		return (ENOMEM);
 	}
 
 	/*
 	 * Populate the rate table mapping TLV.
 	 */
 	tv->nentries = rt->rateCount;
 	for (y = 0; y < rt->rateCount; y++) {
 		tv->ratecode[y] = rt->info[y].dot11Rate & IEEE80211_RATE_VAL;
 		if (rt->info[y].phy == IEEE80211_T_HT)
 			tv->ratecode[y] |= IEEE80211_RATE_MCS;
 	}
 
 	o = 0;
 	/*
 	 * First TLV - rate code mapping
 	 */
 	av.tlv_id = ATH_RATE_TLV_RATETABLE;
 	av.tlv_len = sizeof(struct ath_rateioctl_rt);
 	copyout(&av, rs->buf + o, sizeof(struct ath_rateioctl_tlv));
 	o += sizeof(struct ath_rateioctl_tlv);
 	copyout(tv, rs->buf + o, sizeof(struct ath_rateioctl_rt));
 	o += sizeof(struct ath_rateioctl_rt);
 
 	/*
 	 * Second TLV - sample node statistics
 	 */
 	av.tlv_id = ATH_RATE_TLV_SAMPLENODE;
 	av.tlv_len = sizeof(struct sample_node);
 	copyout(&av, rs->buf + o, sizeof(struct ath_rateioctl_tlv));
 	o += sizeof(struct ath_rateioctl_tlv);
 
 	/*
 	 * Copy the statistics over to the provided buffer.
 	 */
 	copyout(sn, rs->buf + o, sizeof(struct sample_node));
 	o += sizeof(struct sample_node);
 
 	free(tv, M_TEMP);
 
 	return (0);
 }
 
 static void
 sample_stats(void *arg, struct ieee80211_node *ni)
 {
 	struct ath_softc *sc = arg;
 	const HAL_RATE_TABLE *rt = sc->sc_currates;
 	struct sample_node *sn = ATH_NODE_SAMPLE(ATH_NODE(ni));
 	uint64_t mask;
 	int rix, y;
 
 	printf("\n[%s] refcnt %d static_rix (%d %s) ratemask 0x%jx\n",
 	    ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni),
 	    dot11rate(rt, sn->static_rix),
 	    dot11rate_label(rt, sn->static_rix),
 	    (uintmax_t)sn->ratemask);
 	for (y = 0; y < NUM_PACKET_SIZE_BINS; y++) {
 		printf("[%4u] cur rix %d (%d %s) since switch: packets %d ticks %u\n",
 		    bin_to_size(y), sn->current_rix[y],
 		    dot11rate(rt, sn->current_rix[y]),
 		    dot11rate_label(rt, sn->current_rix[y]),
 		    sn->packets_since_switch[y], sn->ticks_since_switch[y]);
 		printf("[%4u] last sample (%d %s) cur sample (%d %s) packets sent %d\n",
 		    bin_to_size(y),
 		    dot11rate(rt, sn->last_sample_rix[y]),
 		    dot11rate_label(rt, sn->last_sample_rix[y]),
 		    dot11rate(rt, sn->current_sample_rix[y]),
 		    dot11rate_label(rt, sn->current_sample_rix[y]),
 		    sn->packets_sent[y]);
 		printf("[%4u] packets since sample %d sample tt %u\n",
 		    bin_to_size(y), sn->packets_since_sample[y],
 		    sn->sample_tt[y]);
 	}
 	for (mask = sn->ratemask, rix = 0; mask != 0; mask >>= 1, rix++) {
 		if ((mask & 1) == 0)
 				continue;
 		for (y = 0; y < NUM_PACKET_SIZE_BINS; y++) {
 			if (sn->stats[y][rix].total_packets == 0)
 				continue;
 			printf("[%2u %s:%4u] %8ju:%-8ju (%3d%%) (EWMA %3d.%1d%%) T %8ju F %4d avg %5u last %u\n",
 			    dot11rate(rt, rix), dot11rate_label(rt, rix),
 			    bin_to_size(y),
 			    (uintmax_t) sn->stats[y][rix].total_packets,
 			    (uintmax_t) sn->stats[y][rix].packets_acked,
 			    (int) ((sn->stats[y][rix].packets_acked * 100ULL) /
 			     sn->stats[y][rix].total_packets),
 			    sn->stats[y][rix].ewma_pct / 10,
 			    sn->stats[y][rix].ewma_pct % 10,
 			    (uintmax_t) sn->stats[y][rix].tries,
 			    sn->stats[y][rix].successive_failures,
 			    sn->stats[y][rix].average_tx_time,
 			    ticks - sn->stats[y][rix].last_tx);
 		}
 	}
 }
 
 static int
 ath_rate_sysctl_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct ath_softc *sc = arg1;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	int error, v;
 
 	v = 0;
 	error = sysctl_handle_int(oidp, &v, 0, req);
 	if (error || !req->newptr)
 		return error;
 	ieee80211_iterate_nodes(&ic->ic_sta, sample_stats, sc);
 	return 0;
 }
 
 static int
 ath_rate_sysctl_smoothing_rate(SYSCTL_HANDLER_ARGS)
 {
 	struct sample_softc *ssc = arg1;
 	int rate, error;
 
 	rate = ssc->smoothing_rate;
 	error = sysctl_handle_int(oidp, &rate, 0, req);
 	if (error || !req->newptr)
 		return error;
 	if (!(0 <= rate && rate < 100))
 		return EINVAL;
 	ssc->smoothing_rate = rate;
 	ssc->smoothing_minpackets = 100 / (100 - rate);
 	return 0;
 }
 
 static int
 ath_rate_sysctl_sample_rate(SYSCTL_HANDLER_ARGS)
 {
 	struct sample_softc *ssc = arg1;
 	int rate, error;
 
 	rate = ssc->sample_rate;
 	error = sysctl_handle_int(oidp, &rate, 0, req);
 	if (error || !req->newptr)
 		return error;
 	if (!(2 <= rate && rate <= 100))
 		return EINVAL;
 	ssc->sample_rate = rate;
 	return 0;
 }
 
 static void
 ath_rate_sysctlattach(struct ath_softc *sc, struct sample_softc *ssc)
 {
 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev);
 	struct sysctl_oid *tree = device_get_sysctl_tree(sc->sc_dev);
 
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 	    "smoothing_rate", CTLTYPE_INT | CTLFLAG_RW, ssc, 0,
 	    ath_rate_sysctl_smoothing_rate, "I",
 	    "sample: smoothing rate for avg tx time (%%)");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 	    "sample_rate", CTLTYPE_INT | CTLFLAG_RW, ssc, 0,
 	    ath_rate_sysctl_sample_rate, "I",
 	    "sample: percent air time devoted to sampling new rates (%%)");
 	/* XXX max_successive_failures, stale_failure_timeout, min_switch */
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
 	    "sample_stats", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
 	    ath_rate_sysctl_stats, "I", "sample: print statistics");
 }
 
 struct ath_ratectrl *
 ath_rate_attach(struct ath_softc *sc)
 {
 	struct sample_softc *ssc;
 	
 	ssc = malloc(sizeof(struct sample_softc), M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (ssc == NULL)
 		return NULL;
 	ssc->arc.arc_space = sizeof(struct sample_node);
 	ssc->smoothing_rate = 75;		/* ewma percentage ([0..99]) */
 	ssc->smoothing_minpackets = 100 / (100 - ssc->smoothing_rate);
 	ssc->sample_rate = 10;			/* %time to try diff tx rates */
 	ssc->max_successive_failures = 3;	/* threshold for rate sampling*/
 	ssc->stale_failure_timeout = 10 * hz;	/* 10 seconds */
 	ssc->min_switch = hz;			/* 1 second */
 	ath_rate_sysctlattach(sc, ssc);
 	return &ssc->arc;
 }
 
 void
 ath_rate_detach(struct ath_ratectrl *arc)
 {
 	struct sample_softc *ssc = (struct sample_softc *) arc;
 	
 	free(ssc, M_DEVBUF);
 }
Index: head/sys/dev/ath/if_ath.c
===================================================================
--- head/sys/dev/ath/if_ath.c	(revision 283743)
+++ head/sys/dev/ath/if_ath.c	(revision 283744)
@@ -1,7260 +1,7271 @@
 /*-
  * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for the Atheros Wireless LAN controller.
  *
  * This software is derived from work of Atsushi Onoe; his contribution
  * is greatly appreciated.
  */
 
 #include "opt_inet.h"
 #include "opt_ath.h"
 /*
  * This is needed for register operations which are performed
  * by the driver - eg, calls to ath_hal_gettsf32().
  *
  * It's also required for any AH_DEBUG checks in here, eg the
  * module dependencies.
  */
 #include "opt_ah.h"
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/callout.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kthread.h>
 #include <sys/taskqueue.h>
 #include <sys/priv.h>
 #include <sys/module.h>
 #include <sys/ktr.h>
 #include <sys/smp.h>	/* for mp_ncpus */
 
 #include <machine/bus.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_llc.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_regdomain.h>
 #ifdef IEEE80211_SUPPORT_SUPERG
 #include <net80211/ieee80211_superg.h>
 #endif
 #ifdef IEEE80211_SUPPORT_TDMA
 #include <net80211/ieee80211_tdma.h>
 #endif
 
 #include <net/bpf.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #endif
 
 #include <dev/ath/if_athvar.h>
 #include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
 #include <dev/ath/ath_hal/ah_diagcodes.h>
 
 #include <dev/ath/if_ath_debug.h>
 #include <dev/ath/if_ath_misc.h>
 #include <dev/ath/if_ath_tsf.h>
 #include <dev/ath/if_ath_tx.h>
 #include <dev/ath/if_ath_sysctl.h>
 #include <dev/ath/if_ath_led.h>
 #include <dev/ath/if_ath_keycache.h>
 #include <dev/ath/if_ath_rx.h>
 #include <dev/ath/if_ath_rx_edma.h>
 #include <dev/ath/if_ath_tx_edma.h>
 #include <dev/ath/if_ath_beacon.h>
 #include <dev/ath/if_ath_btcoex.h>
 #include <dev/ath/if_ath_spectral.h>
 #include <dev/ath/if_ath_lna_div.h>
 #include <dev/ath/if_athdfs.h>
 
 #ifdef ATH_TX99_DIAG
 #include <dev/ath/ath_tx99/ath_tx99.h>
 #endif
 
 #ifdef	ATH_DEBUG_ALQ
 #include <dev/ath/if_ath_alq.h>
 #endif
 
 /*
  * Only enable this if you're working on PS-POLL support.
  */
 #define	ATH_SW_PSQ
 
 /*
  * ATH_BCBUF determines the number of vap's that can transmit
  * beacons and also (currently) the number of vap's that can
  * have unique mac addresses/bssid.  When staggering beacons
  * 4 is probably a good max as otherwise the beacons become
  * very closely spaced and there is limited time for cab q traffic
  * to go out.  You can burst beacons instead but that is not good
  * for stations in power save and at some point you really want
  * another radio (and channel).
  *
  * The limit on the number of mac addresses is tied to our use of
  * the U/L bit and tracking addresses in a byte; it would be
  * worthwhile to allow more for applications like proxy sta.
  */
 CTASSERT(ATH_BCBUF <= 8);
 
 static struct ieee80211vap *ath_vap_create(struct ieee80211com *,
 		    const char [IFNAMSIZ], int, enum ieee80211_opmode, int,
 		    const uint8_t [IEEE80211_ADDR_LEN],
 		    const uint8_t [IEEE80211_ADDR_LEN]);
 static void	ath_vap_delete(struct ieee80211vap *);
 static void	ath_init(void *);
 static void	ath_stop_locked(struct ifnet *);
 static void	ath_stop(struct ifnet *);
 static int	ath_reset_vap(struct ieee80211vap *, u_long);
 static int	ath_transmit(struct ifnet *ifp, struct mbuf *m);
 static void	ath_qflush(struct ifnet *ifp);
 static int	ath_media_change(struct ifnet *);
 static void	ath_watchdog(void *);
 static int	ath_ioctl(struct ifnet *, u_long, caddr_t);
 static void	ath_fatal_proc(void *, int);
 static void	ath_bmiss_vap(struct ieee80211vap *);
 static void	ath_bmiss_proc(void *, int);
 static void	ath_key_update_begin(struct ieee80211vap *);
 static void	ath_key_update_end(struct ieee80211vap *);
 static void	ath_update_mcast_hw(struct ath_softc *);
 static void	ath_update_mcast(struct ieee80211com *);
 static void	ath_update_promisc(struct ieee80211com *);
 static void	ath_updateslot(struct ieee80211com *);
 static void	ath_bstuck_proc(void *, int);
 static void	ath_reset_proc(void *, int);
 static int	ath_desc_alloc(struct ath_softc *);
 static void	ath_desc_free(struct ath_softc *);
 static struct ieee80211_node *ath_node_alloc(struct ieee80211vap *,
 			const uint8_t [IEEE80211_ADDR_LEN]);
 static void	ath_node_cleanup(struct ieee80211_node *);
 static void	ath_node_free(struct ieee80211_node *);
 static void	ath_node_getsignal(const struct ieee80211_node *,
 			int8_t *, int8_t *);
 static void	ath_txq_init(struct ath_softc *sc, struct ath_txq *, int);
 static struct ath_txq *ath_txq_setup(struct ath_softc*, int qtype, int subtype);
 static int	ath_tx_setup(struct ath_softc *, int, int);
 static void	ath_tx_cleanupq(struct ath_softc *, struct ath_txq *);
 static void	ath_tx_cleanup(struct ath_softc *);
 static int	ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq,
 		    int dosched);
 static void	ath_tx_proc_q0(void *, int);
 static void	ath_tx_proc_q0123(void *, int);
 static void	ath_tx_proc(void *, int);
 static void	ath_txq_sched_tasklet(void *, int);
 static int	ath_chan_set(struct ath_softc *, struct ieee80211_channel *);
 static void	ath_chan_change(struct ath_softc *, struct ieee80211_channel *);
 static void	ath_scan_start(struct ieee80211com *);
 static void	ath_scan_end(struct ieee80211com *);
 static void	ath_set_channel(struct ieee80211com *);
 #ifdef	ATH_ENABLE_11N
 static void	ath_update_chw(struct ieee80211com *);
 #endif	/* ATH_ENABLE_11N */
 static void	ath_calibrate(void *);
 static int	ath_newstate(struct ieee80211vap *, enum ieee80211_state, int);
 static void	ath_setup_stationkey(struct ieee80211_node *);
 static void	ath_newassoc(struct ieee80211_node *, int);
 static int	ath_setregdomain(struct ieee80211com *,
 		    struct ieee80211_regdomain *, int,
 		    struct ieee80211_channel []);
 static void	ath_getradiocaps(struct ieee80211com *, int, int *,
 		    struct ieee80211_channel []);
 static int	ath_getchannels(struct ath_softc *);
 
 static int	ath_rate_setup(struct ath_softc *, u_int mode);
 static void	ath_setcurmode(struct ath_softc *, enum ieee80211_phymode);
 
 static void	ath_announce(struct ath_softc *);
 
 static void	ath_dfs_tasklet(void *, int);
 static void	ath_node_powersave(struct ieee80211_node *, int);
 static int	ath_node_set_tim(struct ieee80211_node *, int);
 static void	ath_node_recv_pspoll(struct ieee80211_node *, struct mbuf *);
 
 #ifdef IEEE80211_SUPPORT_TDMA
 #include <dev/ath/if_ath_tdma.h>
 #endif
 
 SYSCTL_DECL(_hw_ath);
 
 /* XXX validate sysctl values */
 static	int ath_longcalinterval = 30;		/* long cals every 30 secs */
 SYSCTL_INT(_hw_ath, OID_AUTO, longcal, CTLFLAG_RW, &ath_longcalinterval,
 	    0, "long chip calibration interval (secs)");
 static	int ath_shortcalinterval = 100;		/* short cals every 100 ms */
 SYSCTL_INT(_hw_ath, OID_AUTO, shortcal, CTLFLAG_RW, &ath_shortcalinterval,
 	    0, "short chip calibration interval (msecs)");
 static	int ath_resetcalinterval = 20*60;	/* reset cal state 20 mins */
 SYSCTL_INT(_hw_ath, OID_AUTO, resetcal, CTLFLAG_RW, &ath_resetcalinterval,
 	    0, "reset chip calibration results (secs)");
 static	int ath_anicalinterval = 100;		/* ANI calibration - 100 msec */
 SYSCTL_INT(_hw_ath, OID_AUTO, anical, CTLFLAG_RW, &ath_anicalinterval,
 	    0, "ANI calibration (msecs)");
 
 int ath_rxbuf = ATH_RXBUF;		/* # rx buffers to allocate */
 SYSCTL_INT(_hw_ath, OID_AUTO, rxbuf, CTLFLAG_RWTUN, &ath_rxbuf,
 	    0, "rx buffers allocated");
 int ath_txbuf = ATH_TXBUF;		/* # tx buffers to allocate */
 SYSCTL_INT(_hw_ath, OID_AUTO, txbuf, CTLFLAG_RWTUN, &ath_txbuf,
 	    0, "tx buffers allocated");
 int ath_txbuf_mgmt = ATH_MGMT_TXBUF;	/* # mgmt tx buffers to allocate */
 SYSCTL_INT(_hw_ath, OID_AUTO, txbuf_mgmt, CTLFLAG_RWTUN, &ath_txbuf_mgmt,
 	    0, "tx (mgmt) buffers allocated");
 
 int ath_bstuck_threshold = 4;		/* max missed beacons */
 SYSCTL_INT(_hw_ath, OID_AUTO, bstuck, CTLFLAG_RW, &ath_bstuck_threshold,
 	    0, "max missed beacon xmits before chip reset");
 
 MALLOC_DEFINE(M_ATHDEV, "athdev", "ath driver dma buffers");
 
 void
 ath_legacy_attach_comp_func(struct ath_softc *sc)
 {
 
 	/*
 	 * Special case certain configurations.  Note the
 	 * CAB queue is handled by these specially so don't
 	 * include them when checking the txq setup mask.
 	 */
 	switch (sc->sc_txqsetup &~ (1<<sc->sc_cabq->axq_qnum)) {
 	case 0x01:
 		TASK_INIT(&sc->sc_txtask, 0, ath_tx_proc_q0, sc);
 		break;
 	case 0x0f:
 		TASK_INIT(&sc->sc_txtask, 0, ath_tx_proc_q0123, sc);
 		break;
 	default:
 		TASK_INIT(&sc->sc_txtask, 0, ath_tx_proc, sc);
 		break;
 	}
 }
 
 /*
  * Set the target power mode.
  *
  * If this is called during a point in time where
  * the hardware is being programmed elsewhere, it will
  * simply store it away and update it when all current
  * uses of the hardware are completed.
  */
 void
 _ath_power_setpower(struct ath_softc *sc, int power_state, const char *file, int line)
 {
 	ATH_LOCK_ASSERT(sc);
 
 	sc->sc_target_powerstate = power_state;
 
 	DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) state=%d, refcnt=%d\n",
 	    __func__,
 	    file,
 	    line,
 	    power_state,
 	    sc->sc_powersave_refcnt);
 
 	if (sc->sc_powersave_refcnt == 0 &&
 	    power_state != sc->sc_cur_powerstate) {
 		sc->sc_cur_powerstate = power_state;
 		ath_hal_setpower(sc->sc_ah, power_state);
 
 		/*
 		 * If the NIC is force-awake, then set the
 		 * self-gen frame state appropriately.
 		 *
 		 * If the nic is in network sleep or full-sleep,
 		 * we let the above call leave the self-gen
 		 * state as "sleep".
 		 */
 		if (sc->sc_cur_powerstate == HAL_PM_AWAKE &&
 		    sc->sc_target_selfgen_state != HAL_PM_AWAKE) {
 			ath_hal_setselfgenpower(sc->sc_ah,
 			    sc->sc_target_selfgen_state);
 		}
 	}
 }
 
 /*
  * Set the current self-generated frames state.
  *
  * This is separate from the target power mode.  The chip may be
  * awake but the desired state is "sleep", so frames sent to the
  * destination has PWRMGT=1 in the 802.11 header.  The NIC also
  * needs to know to set PWRMGT=1 in self-generated frames.
  */
 void
 _ath_power_set_selfgen(struct ath_softc *sc, int power_state, const char *file, int line)
 {
 
 	ATH_LOCK_ASSERT(sc);
 
 	DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) state=%d, refcnt=%d\n",
 	    __func__,
 	    file,
 	    line,
 	    power_state,
 	    sc->sc_target_selfgen_state);
 
 	sc->sc_target_selfgen_state = power_state;
 
 	/*
 	 * If the NIC is force-awake, then set the power state.
 	 * Network-state and full-sleep will already transition it to
 	 * mark self-gen frames as sleeping - and we can't
 	 * guarantee the NIC is awake to program the self-gen frame
 	 * setting anyway.
 	 */
 	if (sc->sc_cur_powerstate == HAL_PM_AWAKE) {
 		ath_hal_setselfgenpower(sc->sc_ah, power_state);
 	}
 }
 
 /*
  * Set the hardware power mode and take a reference.
  *
  * This doesn't update the target power mode in the driver;
  * it just updates the hardware power state.
  *
  * XXX it should only ever force the hardware awake; it should
  * never be called to set it asleep.
  */
 void
 _ath_power_set_power_state(struct ath_softc *sc, int power_state, const char *file, int line)
 {
 	ATH_LOCK_ASSERT(sc);
 
 	DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) state=%d, refcnt=%d\n",
 	    __func__,
 	    file,
 	    line,
 	    power_state,
 	    sc->sc_powersave_refcnt);
 
 	sc->sc_powersave_refcnt++;
 
 	if (power_state != sc->sc_cur_powerstate) {
 		ath_hal_setpower(sc->sc_ah, power_state);
 		sc->sc_cur_powerstate = power_state;
 
 		/*
 		 * Adjust the self-gen powerstate if appropriate.
 		 */
 		if (sc->sc_cur_powerstate == HAL_PM_AWAKE &&
 		    sc->sc_target_selfgen_state != HAL_PM_AWAKE) {
 			ath_hal_setselfgenpower(sc->sc_ah,
 			    sc->sc_target_selfgen_state);
 		}
 
 	}
 }
 
 /*
  * Restore the power save mode to what it once was.
  *
  * This will decrement the reference counter and once it hits
  * zero, it'll restore the powersave state.
  */
 void
 _ath_power_restore_power_state(struct ath_softc *sc, const char *file, int line)
 {
 
 	ATH_LOCK_ASSERT(sc);
 
 	DPRINTF(sc, ATH_DEBUG_PWRSAVE, "%s: (%s:%d) refcnt=%d, target state=%d\n",
 	    __func__,
 	    file,
 	    line,
 	    sc->sc_powersave_refcnt,
 	    sc->sc_target_powerstate);
 
 	if (sc->sc_powersave_refcnt == 0)
 		device_printf(sc->sc_dev, "%s: refcnt=0?\n", __func__);
 	else
 		sc->sc_powersave_refcnt--;
 
 	if (sc->sc_powersave_refcnt == 0 &&
 	    sc->sc_target_powerstate != sc->sc_cur_powerstate) {
 		sc->sc_cur_powerstate = sc->sc_target_powerstate;
 		ath_hal_setpower(sc->sc_ah, sc->sc_target_powerstate);
 	}
 
 	/*
 	 * Adjust the self-gen powerstate if appropriate.
 	 */
 	if (sc->sc_cur_powerstate == HAL_PM_AWAKE &&
 	    sc->sc_target_selfgen_state != HAL_PM_AWAKE) {
 		ath_hal_setselfgenpower(sc->sc_ah,
 		    sc->sc_target_selfgen_state);
 	}
 
 }
 
 /*
  * Configure the initial HAL configuration values based on bus
  * specific parameters.
  *
  * Some PCI IDs and other information may need tweaking.
  *
  * XXX TODO: ath9k and the Atheros HAL only program comm2g_switch_enable
  * if BT antenna diversity isn't enabled.
  *
  * So, let's also figure out how to enable BT diversity for AR9485.
  */
 static void
 ath_setup_hal_config(struct ath_softc *sc, HAL_OPS_CONFIG *ah_config)
 {
 	/* XXX TODO: only for PCI devices? */
 
 	if (sc->sc_pci_devinfo & (ATH_PCI_CUS198 | ATH_PCI_CUS230)) {
 		ah_config->ath_hal_ext_lna_ctl_gpio = 0x200; /* bit 9 */
 		ah_config->ath_hal_ext_atten_margin_cfg = AH_TRUE;
 		ah_config->ath_hal_min_gainidx = AH_TRUE;
 		ah_config->ath_hal_ant_ctrl_comm2g_switch_enable = 0x000bbb88;
 		/* XXX low_rssi_thresh */
 		/* XXX fast_div_bias */
 		device_printf(sc->sc_dev, "configuring for %s\n",
 		    (sc->sc_pci_devinfo & ATH_PCI_CUS198) ?
 		    "CUS198" : "CUS230");
 	}
 
 	if (sc->sc_pci_devinfo & ATH_PCI_CUS217)
 		device_printf(sc->sc_dev, "CUS217 card detected\n");
 
 	if (sc->sc_pci_devinfo & ATH_PCI_CUS252)
 		device_printf(sc->sc_dev, "CUS252 card detected\n");
 
 	if (sc->sc_pci_devinfo & ATH_PCI_AR9565_1ANT)
 		device_printf(sc->sc_dev, "WB335 1-ANT card detected\n");
 
 	if (sc->sc_pci_devinfo & ATH_PCI_AR9565_2ANT)
 		device_printf(sc->sc_dev, "WB335 2-ANT card detected\n");
 
 	if (sc->sc_pci_devinfo & ATH_PCI_KILLER)
 		device_printf(sc->sc_dev, "Killer Wireless card detected\n");
 
 #if 0
         /*
          * Some WB335 cards do not support antenna diversity. Since
          * we use a hardcoded value for AR9565 instead of using the
          * EEPROM/OTP data, remove the combining feature from
          * the HW capabilities bitmap.
          */
         if (sc->sc_pci_devinfo & (ATH9K_PCI_AR9565_1ANT | ATH9K_PCI_AR9565_2ANT)) {
                 if (!(sc->sc_pci_devinfo & ATH9K_PCI_BT_ANT_DIV))
                         pCap->hw_caps &= ~ATH9K_HW_CAP_ANT_DIV_COMB;
         }
 
         if (sc->sc_pci_devinfo & ATH9K_PCI_BT_ANT_DIV) {
                 pCap->hw_caps |= ATH9K_HW_CAP_BT_ANT_DIV;
                 device_printf(sc->sc_dev, "Set BT/WLAN RX diversity capability\n");
         }
 #endif
 
         if (sc->sc_pci_devinfo & ATH_PCI_D3_L1_WAR) {
                 ah_config->ath_hal_pcie_waen = 0x0040473b;
                 device_printf(sc->sc_dev, "Enable WAR for ASPM D3/L1\n");
         }
 
 #if 0
         if (sc->sc_pci_devinfo & ATH9K_PCI_NO_PLL_PWRSAVE) {
                 ah->config.no_pll_pwrsave = true;
                 device_printf(sc->sc_dev, "Disable PLL PowerSave\n");
         }
 #endif
 
 }
 
 /*
  * Attempt to fetch the MAC address from the kernel environment.
  *
  * Returns 0, macaddr in macaddr if successful; -1 otherwise.
  */
 static int
 ath_fetch_mac_kenv(struct ath_softc *sc, uint8_t *macaddr)
 {
 	char devid_str[32];
 	int local_mac = 0;
 	char *local_macstr;
 
 	/*
 	 * Fetch from the kenv rather than using hints.
 	 *
 	 * Hints would be nice but the transition to dynamic
 	 * hints/kenv doesn't happen early enough for this
 	 * to work reliably (eg on anything embedded.)
 	 */
 	snprintf(devid_str, 32, "hint.%s.%d.macaddr",
 	    device_get_name(sc->sc_dev),
 	    device_get_unit(sc->sc_dev));
 
 	if ((local_macstr = kern_getenv(devid_str)) != NULL) {
 		uint32_t tmpmac[ETHER_ADDR_LEN];
 		int count;
 		int i;
 
 		/* Have a MAC address; should use it */
 		device_printf(sc->sc_dev,
 		    "Overriding MAC address from environment: '%s'\n",
 		    local_macstr);
 
 		/* Extract out the MAC address */
 		count = sscanf(local_macstr, "%x%*c%x%*c%x%*c%x%*c%x%*c%x",
 		    &tmpmac[0], &tmpmac[1],
 		    &tmpmac[2], &tmpmac[3],
 		    &tmpmac[4], &tmpmac[5]);
 		if (count == 6) {
 			/* Valid! */
 			local_mac = 1;
 			for (i = 0; i < ETHER_ADDR_LEN; i++)
 				macaddr[i] = tmpmac[i];
 		}
 		/* Done! */
 		freeenv(local_macstr);
 		local_macstr = NULL;
 	}
 
 	if (local_mac)
 		return (0);
 	return (-1);
 }
 
 #define	HAL_MODE_HT20 (HAL_MODE_11NG_HT20 | HAL_MODE_11NA_HT20)
 #define	HAL_MODE_HT40 \
 	(HAL_MODE_11NG_HT40PLUS | HAL_MODE_11NG_HT40MINUS | \
 	HAL_MODE_11NA_HT40PLUS | HAL_MODE_11NA_HT40MINUS)
 int
 ath_attach(u_int16_t devid, struct ath_softc *sc)
 {
 	struct ifnet *ifp;
 	struct ieee80211com *ic;
 	struct ath_hal *ah = NULL;
 	HAL_STATUS status;
 	int error = 0, i;
 	u_int wmodes;
 	uint8_t macaddr[IEEE80211_ADDR_LEN];
 	int rx_chainmask, tx_chainmask;
 	HAL_OPS_CONFIG ah_config;
 
 	DPRINTF(sc, ATH_DEBUG_ANY, "%s: devid 0x%x\n", __func__, devid);
 
 	CURVNET_SET(vnet0);
 	ifp = sc->sc_ifp = if_alloc(IFT_IEEE80211);
 	if (ifp == NULL) {
 		device_printf(sc->sc_dev, "can not if_alloc()\n");
 		error = ENOSPC;
 		CURVNET_RESTORE();
 		goto bad;
 	}
 	ic = ifp->if_l2com;
 	ic->ic_softc = sc;
 	ic->ic_name = device_get_nameunit(sc->sc_dev);
 
-	/* set these up early for if_printf use */
 	if_initname(ifp, device_get_name(sc->sc_dev),
 		device_get_unit(sc->sc_dev));
 	CURVNET_RESTORE();
 
 	/*
 	 * Configure the initial configuration data.
 	 *
 	 * This is stuff that may be needed early during attach
 	 * rather than done via configuration calls later.
 	 */
 	bzero(&ah_config, sizeof(ah_config));
 	ath_setup_hal_config(sc, &ah_config);
 
 	ah = ath_hal_attach(devid, sc, sc->sc_st, sc->sc_sh,
 	    sc->sc_eepromdata, &ah_config, &status);
 	if (ah == NULL) {
-		if_printf(ifp, "unable to attach hardware; HAL status %u\n",
-			status);
+		device_printf(sc->sc_dev,
+		    "unable to attach hardware; HAL status %u\n", status);
 		error = ENXIO;
 		goto bad;
 	}
 	sc->sc_ah = ah;
 	sc->sc_invalid = 0;	/* ready to go, enable interrupt handling */
 #ifdef	ATH_DEBUG
 	sc->sc_debug = ath_debug;
 #endif
 
 	/*
 	 * Setup the DMA/EDMA functions based on the current
 	 * hardware support.
 	 *
 	 * This is required before the descriptors are allocated.
 	 */
 	if (ath_hal_hasedma(sc->sc_ah)) {
 		sc->sc_isedma = 1;
 		ath_recv_setup_edma(sc);
 		ath_xmit_setup_edma(sc);
 	} else {
 		ath_recv_setup_legacy(sc);
 		ath_xmit_setup_legacy(sc);
 	}
 
 	if (ath_hal_hasmybeacon(sc->sc_ah)) {
 		sc->sc_do_mybeacon = 1;
 	}
 
 	/*
 	 * Check if the MAC has multi-rate retry support.
 	 * We do this by trying to setup a fake extended
 	 * descriptor.  MAC's that don't have support will
 	 * return false w/o doing anything.  MAC's that do
 	 * support it will return true w/o doing anything.
 	 */
 	sc->sc_mrretry = ath_hal_setupxtxdesc(ah, NULL, 0,0, 0,0, 0,0);
 
 	/*
 	 * Check if the device has hardware counters for PHY
 	 * errors.  If so we need to enable the MIB interrupt
 	 * so we can act on stat triggers.
 	 */
 	if (ath_hal_hwphycounters(ah))
 		sc->sc_needmib = 1;
 
 	/*
 	 * Get the hardware key cache size.
 	 */
 	sc->sc_keymax = ath_hal_keycachesize(ah);
 	if (sc->sc_keymax > ATH_KEYMAX) {
-		if_printf(ifp, "Warning, using only %u of %u key cache slots\n",
-			ATH_KEYMAX, sc->sc_keymax);
+		device_printf(sc->sc_dev,
+		    "Warning, using only %u of %u key cache slots\n",
+		    ATH_KEYMAX, sc->sc_keymax);
 		sc->sc_keymax = ATH_KEYMAX;
 	}
 	/*
 	 * Reset the key cache since some parts do not
 	 * reset the contents on initial power up.
 	 */
 	for (i = 0; i < sc->sc_keymax; i++)
 		ath_hal_keyreset(ah, i);
 
 	/*
 	 * Collect the default channel list.
 	 */
 	error = ath_getchannels(sc);
 	if (error != 0)
 		goto bad;
 
 	/*
 	 * Setup rate tables for all potential media types.
 	 */
 	ath_rate_setup(sc, IEEE80211_MODE_11A);
 	ath_rate_setup(sc, IEEE80211_MODE_11B);
 	ath_rate_setup(sc, IEEE80211_MODE_11G);
 	ath_rate_setup(sc, IEEE80211_MODE_TURBO_A);
 	ath_rate_setup(sc, IEEE80211_MODE_TURBO_G);
 	ath_rate_setup(sc, IEEE80211_MODE_STURBO_A);
 	ath_rate_setup(sc, IEEE80211_MODE_11NA);
 	ath_rate_setup(sc, IEEE80211_MODE_11NG);
 	ath_rate_setup(sc, IEEE80211_MODE_HALF);
 	ath_rate_setup(sc, IEEE80211_MODE_QUARTER);
 
 	/* NB: setup here so ath_rate_update is happy */
 	ath_setcurmode(sc, IEEE80211_MODE_11A);
 
 	/*
 	 * Allocate TX descriptors and populate the lists.
 	 */
 	error = ath_desc_alloc(sc);
 	if (error != 0) {
-		if_printf(ifp, "failed to allocate TX descriptors: %d\n",
-		    error);
+		device_printf(sc->sc_dev,
+		    "failed to allocate TX descriptors: %d\n", error);
 		goto bad;
 	}
 	error = ath_txdma_setup(sc);
 	if (error != 0) {
-		if_printf(ifp, "failed to allocate TX descriptors: %d\n",
-		    error);
+		device_printf(sc->sc_dev,
+		    "failed to allocate TX descriptors: %d\n", error);
 		goto bad;
 	}
 
 	/*
 	 * Allocate RX descriptors and populate the lists.
 	 */
 	error = ath_rxdma_setup(sc);
 	if (error != 0) {
-		if_printf(ifp, "failed to allocate RX descriptors: %d\n",
-		    error);
+		device_printf(sc->sc_dev,
+		     "failed to allocate RX descriptors: %d\n", error);
 		goto bad;
 	}
 
 	callout_init_mtx(&sc->sc_cal_ch, &sc->sc_mtx, 0);
 	callout_init_mtx(&sc->sc_wd_ch, &sc->sc_mtx, 0);
 
 	ATH_TXBUF_LOCK_INIT(sc);
 
 	sc->sc_tq = taskqueue_create("ath_taskq", M_NOWAIT,
 		taskqueue_thread_enqueue, &sc->sc_tq);
 	taskqueue_start_threads(&sc->sc_tq, 1, PI_NET,
 		"%s taskq", ifp->if_xname);
 
 	TASK_INIT(&sc->sc_rxtask, 0, sc->sc_rx.recv_tasklet, sc);
 	TASK_INIT(&sc->sc_bmisstask, 0, ath_bmiss_proc, sc);
 	TASK_INIT(&sc->sc_bstucktask,0, ath_bstuck_proc, sc);
 	TASK_INIT(&sc->sc_resettask,0, ath_reset_proc, sc);
 	TASK_INIT(&sc->sc_txqtask, 0, ath_txq_sched_tasklet, sc);
 	TASK_INIT(&sc->sc_fataltask, 0, ath_fatal_proc, sc);
 
 	/*
 	 * Allocate hardware transmit queues: one queue for
 	 * beacon frames and one data queue for each QoS
 	 * priority.  Note that the hal handles resetting
 	 * these queues at the needed time.
 	 *
 	 * XXX PS-Poll
 	 */
 	sc->sc_bhalq = ath_beaconq_setup(sc);
 	if (sc->sc_bhalq == (u_int) -1) {
-		if_printf(ifp, "unable to setup a beacon xmit queue!\n");
+		device_printf(sc->sc_dev,
+		    "unable to setup a beacon xmit queue!\n");
 		error = EIO;
 		goto bad2;
 	}
 	sc->sc_cabq = ath_txq_setup(sc, HAL_TX_QUEUE_CAB, 0);
 	if (sc->sc_cabq == NULL) {
-		if_printf(ifp, "unable to setup CAB xmit queue!\n");
+		device_printf(sc->sc_dev, "unable to setup CAB xmit queue!\n");
 		error = EIO;
 		goto bad2;
 	}
 	/* NB: insure BK queue is the lowest priority h/w queue */
 	if (!ath_tx_setup(sc, WME_AC_BK, HAL_WME_AC_BK)) {
-		if_printf(ifp, "unable to setup xmit queue for %s traffic!\n",
-			ieee80211_wme_acnames[WME_AC_BK]);
+		device_printf(sc->sc_dev,
+		    "unable to setup xmit queue for %s traffic!\n",
+		    ieee80211_wme_acnames[WME_AC_BK]);
 		error = EIO;
 		goto bad2;
 	}
 	if (!ath_tx_setup(sc, WME_AC_BE, HAL_WME_AC_BE) ||
 	    !ath_tx_setup(sc, WME_AC_VI, HAL_WME_AC_VI) ||
 	    !ath_tx_setup(sc, WME_AC_VO, HAL_WME_AC_VO)) {
 		/*
 		 * Not enough hardware tx queues to properly do WME;
 		 * just punt and assign them all to the same h/w queue.
 		 * We could do a better job of this if, for example,
 		 * we allocate queues when we switch from station to
 		 * AP mode.
 		 */
 		if (sc->sc_ac2q[WME_AC_VI] != NULL)
 			ath_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_VI]);
 		if (sc->sc_ac2q[WME_AC_BE] != NULL)
 			ath_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_BE]);
 		sc->sc_ac2q[WME_AC_BE] = sc->sc_ac2q[WME_AC_BK];
 		sc->sc_ac2q[WME_AC_VI] = sc->sc_ac2q[WME_AC_BK];
 		sc->sc_ac2q[WME_AC_VO] = sc->sc_ac2q[WME_AC_BK];
 	}
 
 	/*
 	 * Attach the TX completion function.
 	 *
 	 * The non-EDMA chips may have some special case optimisations;
 	 * this method gives everyone a chance to attach cleanly.
 	 */
 	sc->sc_tx.xmit_attach_comp_func(sc);
 
 	/*
 	 * Setup rate control.  Some rate control modules
 	 * call back to change the anntena state so expose
 	 * the necessary entry points.
 	 * XXX maybe belongs in struct ath_ratectrl?
 	 */
 	sc->sc_setdefantenna = ath_setdefantenna;
 	sc->sc_rc = ath_rate_attach(sc);
 	if (sc->sc_rc == NULL) {
 		error = EIO;
 		goto bad2;
 	}
 
 	/* Attach DFS module */
 	if (! ath_dfs_attach(sc)) {
 		device_printf(sc->sc_dev,
 		    "%s: unable to attach DFS\n", __func__);
 		error = EIO;
 		goto bad2;
 	}
 
 	/* Attach spectral module */
 	if (ath_spectral_attach(sc) < 0) {
 		device_printf(sc->sc_dev,
 		    "%s: unable to attach spectral\n", __func__);
 		error = EIO;
 		goto bad2;
 	}
 
 	/* Attach bluetooth coexistence module */
 	if (ath_btcoex_attach(sc) < 0) {
 		device_printf(sc->sc_dev,
 		    "%s: unable to attach bluetooth coexistence\n", __func__);
 		error = EIO;
 		goto bad2;
 	}
 
 	/* Attach LNA diversity module */
 	if (ath_lna_div_attach(sc) < 0) {
 		device_printf(sc->sc_dev,
 		    "%s: unable to attach LNA diversity\n", __func__);
 		error = EIO;
 		goto bad2;
 	}
 
 	/* Start DFS processing tasklet */
 	TASK_INIT(&sc->sc_dfstask, 0, ath_dfs_tasklet, sc);
 
 	/* Configure LED state */
 	sc->sc_blinking = 0;
 	sc->sc_ledstate = 1;
 	sc->sc_ledon = 0;			/* low true */
 	sc->sc_ledidle = (2700*hz)/1000;	/* 2.7sec */
 	callout_init(&sc->sc_ledtimer, 1);
 
 	/*
 	 * Don't setup hardware-based blinking.
 	 *
 	 * Although some NICs may have this configured in the
 	 * default reset register values, the user may wish
 	 * to alter which pins have which function.
 	 *
 	 * The reference driver attaches the MAC network LED to GPIO1 and
 	 * the MAC power LED to GPIO2.  However, the DWA-552 cardbus
 	 * NIC has these reversed.
 	 */
 	sc->sc_hardled = (1 == 0);
 	sc->sc_led_net_pin = -1;
 	sc->sc_led_pwr_pin = -1;
 	/*
 	 * Auto-enable soft led processing for IBM cards and for
 	 * 5211 minipci cards.  Users can also manually enable/disable
 	 * support with a sysctl.
 	 */
 	sc->sc_softled = (devid == AR5212_DEVID_IBM || devid == AR5211_DEVID);
 	ath_led_config(sc);
 	ath_hal_setledstate(ah, HAL_LED_INIT);
 
 	ifp->if_softc = sc;
 	ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
 	ifp->if_transmit = ath_transmit;
 	ifp->if_qflush = ath_qflush;
 	ifp->if_ioctl = ath_ioctl;
 	ifp->if_init = ath_init;
 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
 	ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
 	IFQ_SET_READY(&ifp->if_snd);
 
 	ic->ic_ifp = ifp;
 	/* XXX not right but it's not used anywhere important */
 	ic->ic_phytype = IEEE80211_T_OFDM;
 	ic->ic_opmode = IEEE80211_M_STA;
 	ic->ic_caps =
 		  IEEE80211_C_STA		/* station mode */
 		| IEEE80211_C_IBSS		/* ibss, nee adhoc, mode */
 		| IEEE80211_C_HOSTAP		/* hostap mode */
 		| IEEE80211_C_MONITOR		/* monitor mode */
 		| IEEE80211_C_AHDEMO		/* adhoc demo mode */
 		| IEEE80211_C_WDS		/* 4-address traffic works */
 		| IEEE80211_C_MBSS		/* mesh point link mode */
 		| IEEE80211_C_SHPREAMBLE	/* short preamble supported */
 		| IEEE80211_C_SHSLOT		/* short slot time supported */
 		| IEEE80211_C_WPA		/* capable of WPA1+WPA2 */
 #ifndef	ATH_ENABLE_11N
 		| IEEE80211_C_BGSCAN		/* capable of bg scanning */
 #endif
 		| IEEE80211_C_TXFRAG		/* handle tx frags */
 #ifdef	ATH_ENABLE_DFS
 		| IEEE80211_C_DFS		/* Enable radar detection */
 #endif
 		| IEEE80211_C_PMGT		/* Station side power mgmt */
 		| IEEE80211_C_SWSLEEP
 		;
 	/*
 	 * Query the hal to figure out h/w crypto support.
 	 */
 	if (ath_hal_ciphersupported(ah, HAL_CIPHER_WEP))
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP;
 	if (ath_hal_ciphersupported(ah, HAL_CIPHER_AES_OCB))
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_AES_OCB;
 	if (ath_hal_ciphersupported(ah, HAL_CIPHER_AES_CCM))
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_AES_CCM;
 	if (ath_hal_ciphersupported(ah, HAL_CIPHER_CKIP))
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_CKIP;
 	if (ath_hal_ciphersupported(ah, HAL_CIPHER_TKIP)) {
 		ic->ic_cryptocaps |= IEEE80211_CRYPTO_TKIP;
 		/*
 		 * Check if h/w does the MIC and/or whether the
 		 * separate key cache entries are required to
 		 * handle both tx+rx MIC keys.
 		 */
 		if (ath_hal_ciphersupported(ah, HAL_CIPHER_MIC))
 			ic->ic_cryptocaps |= IEEE80211_CRYPTO_TKIPMIC;
 		/*
 		 * If the h/w supports storing tx+rx MIC keys
 		 * in one cache slot automatically enable use.
 		 */
 		if (ath_hal_hastkipsplit(ah) ||
 		    !ath_hal_settkipsplit(ah, AH_FALSE))
 			sc->sc_splitmic = 1;
 		/*
 		 * If the h/w can do TKIP MIC together with WME then
 		 * we use it; otherwise we force the MIC to be done
 		 * in software by the net80211 layer.
 		 */
 		if (ath_hal_haswmetkipmic(ah))
 			sc->sc_wmetkipmic = 1;
 	}
 	sc->sc_hasclrkey = ath_hal_ciphersupported(ah, HAL_CIPHER_CLR);
 	/*
 	 * Check for multicast key search support.
 	 */
 	if (ath_hal_hasmcastkeysearch(sc->sc_ah) &&
 	    !ath_hal_getmcastkeysearch(sc->sc_ah)) {
 		ath_hal_setmcastkeysearch(sc->sc_ah, 1);
 	}
 	sc->sc_mcastkey = ath_hal_getmcastkeysearch(ah);
 	/*
 	 * Mark key cache slots associated with global keys
 	 * as in use.  If we knew TKIP was not to be used we
 	 * could leave the +32, +64, and +32+64 slots free.
 	 */
 	for (i = 0; i < IEEE80211_WEP_NKID; i++) {
 		setbit(sc->sc_keymap, i);
 		setbit(sc->sc_keymap, i+64);
 		if (sc->sc_splitmic) {
 			setbit(sc->sc_keymap, i+32);
 			setbit(sc->sc_keymap, i+32+64);
 		}
 	}
 	/*
 	 * TPC support can be done either with a global cap or
 	 * per-packet support.  The latter is not available on
 	 * all parts.  We're a bit pedantic here as all parts
 	 * support a global cap.
 	 */
 	if (ath_hal_hastpc(ah) || ath_hal_hastxpowlimit(ah))
 		ic->ic_caps |= IEEE80211_C_TXPMGT;
 
 	/*
 	 * Mark WME capability only if we have sufficient
 	 * hardware queues to do proper priority scheduling.
 	 */
 	if (sc->sc_ac2q[WME_AC_BE] != sc->sc_ac2q[WME_AC_BK])
 		ic->ic_caps |= IEEE80211_C_WME;
 	/*
 	 * Check for misc other capabilities.
 	 */
 	if (ath_hal_hasbursting(ah))
 		ic->ic_caps |= IEEE80211_C_BURST;
 	sc->sc_hasbmask = ath_hal_hasbssidmask(ah);
 	sc->sc_hasbmatch = ath_hal_hasbssidmatch(ah);
 	sc->sc_hastsfadd = ath_hal_hastsfadjust(ah);
 	sc->sc_rxslink = ath_hal_self_linked_final_rxdesc(ah);
 	sc->sc_rxtsf32 = ath_hal_has_long_rxdesc_tsf(ah);
 	sc->sc_hasenforcetxop = ath_hal_hasenforcetxop(ah);
 	sc->sc_rx_lnamixer = ath_hal_hasrxlnamixer(ah);
 	sc->sc_hasdivcomb = ath_hal_hasdivantcomb(ah);
 
 	if (ath_hal_hasfastframes(ah))
 		ic->ic_caps |= IEEE80211_C_FF;
 	wmodes = ath_hal_getwirelessmodes(ah);
 	if (wmodes & (HAL_MODE_108G|HAL_MODE_TURBO))
 		ic->ic_caps |= IEEE80211_C_TURBOP;
 #ifdef IEEE80211_SUPPORT_TDMA
 	if (ath_hal_macversion(ah) > 0x78) {
 		ic->ic_caps |= IEEE80211_C_TDMA; /* capable of TDMA */
 		ic->ic_tdma_update = ath_tdma_update;
 	}
 #endif
 
 	/*
 	 * TODO: enforce that at least this many frames are available
 	 * in the txbuf list before allowing data frames (raw or
 	 * otherwise) to be transmitted.
 	 */
 	sc->sc_txq_data_minfree = 10;
 	/*
 	 * Leave this as default to maintain legacy behaviour.
 	 * Shortening the cabq/mcastq may end up causing some
 	 * undesirable behaviour.
 	 */
 	sc->sc_txq_mcastq_maxdepth = ath_txbuf;
 
 	/*
 	 * How deep can the node software TX queue get whilst it's asleep.
 	 */
 	sc->sc_txq_node_psq_maxdepth = 16;
 
 	/*
 	 * Default the maximum queue depth for a given node
 	 * to 1/4'th the TX buffers, or 64, whichever
 	 * is larger.
 	 */
 	sc->sc_txq_node_maxdepth = MAX(64, ath_txbuf / 4);
 
 	/* Enable CABQ by default */
 	sc->sc_cabq_enable = 1;
 
 	/*
 	 * Allow the TX and RX chainmasks to be overridden by
 	 * environment variables and/or device.hints.
 	 *
 	 * This must be done early - before the hardware is
 	 * calibrated or before the 802.11n stream calculation
 	 * is done.
 	 */
 	if (resource_int_value(device_get_name(sc->sc_dev),
 	    device_get_unit(sc->sc_dev), "rx_chainmask",
 	    &rx_chainmask) == 0) {
 		device_printf(sc->sc_dev, "Setting RX chainmask to 0x%x\n",
 		    rx_chainmask);
 		(void) ath_hal_setrxchainmask(sc->sc_ah, rx_chainmask);
 	}
 	if (resource_int_value(device_get_name(sc->sc_dev),
 	    device_get_unit(sc->sc_dev), "tx_chainmask",
 	    &tx_chainmask) == 0) {
 		device_printf(sc->sc_dev, "Setting TX chainmask to 0x%x\n",
 		    tx_chainmask);
 		(void) ath_hal_settxchainmask(sc->sc_ah, tx_chainmask);
 	}
 
 	/*
 	 * Query the TX/RX chainmask configuration.
 	 *
 	 * This is only relevant for 11n devices.
 	 */
 	ath_hal_getrxchainmask(ah, &sc->sc_rxchainmask);
 	ath_hal_gettxchainmask(ah, &sc->sc_txchainmask);
 
 	/*
 	 * Disable MRR with protected frames by default.
 	 * Only 802.11n series NICs can handle this.
 	 */
 	sc->sc_mrrprot = 0;	/* XXX should be a capability */
 
 	/*
 	 * Query the enterprise mode information the HAL.
 	 */
 	if (ath_hal_getcapability(ah, HAL_CAP_ENTERPRISE_MODE, 0,
 	    &sc->sc_ent_cfg) == HAL_OK)
 		sc->sc_use_ent = 1;
 
 #ifdef	ATH_ENABLE_11N
 	/*
 	 * Query HT capabilities
 	 */
 	if (ath_hal_getcapability(ah, HAL_CAP_HT, 0, NULL) == HAL_OK &&
 	    (wmodes & (HAL_MODE_HT20 | HAL_MODE_HT40))) {
 		uint32_t rxs, txs;
 
 		device_printf(sc->sc_dev, "[HT] enabling HT modes\n");
 
 		sc->sc_mrrprot = 1;	/* XXX should be a capability */
 
 		ic->ic_htcaps = IEEE80211_HTC_HT	/* HT operation */
 			    | IEEE80211_HTC_AMPDU	/* A-MPDU tx/rx */
 			    | IEEE80211_HTC_AMSDU	/* A-MSDU tx/rx */
 			    | IEEE80211_HTCAP_MAXAMSDU_3839
 			    				/* max A-MSDU length */
 			    | IEEE80211_HTCAP_SMPS_OFF;	/* SM power save off */
 			;
 
 		/*
 		 * Enable short-GI for HT20 only if the hardware
 		 * advertises support.
 		 * Notably, anything earlier than the AR9287 doesn't.
 		 */
 		if ((ath_hal_getcapability(ah,
 		    HAL_CAP_HT20_SGI, 0, NULL) == HAL_OK) &&
 		    (wmodes & HAL_MODE_HT20)) {
 			device_printf(sc->sc_dev,
 			    "[HT] enabling short-GI in 20MHz mode\n");
 			ic->ic_htcaps |= IEEE80211_HTCAP_SHORTGI20;
 		}
 
 		if (wmodes & HAL_MODE_HT40)
 			ic->ic_htcaps |= IEEE80211_HTCAP_CHWIDTH40
 			    |  IEEE80211_HTCAP_SHORTGI40;
 
 		/*
 		 * TX/RX streams need to be taken into account when
 		 * negotiating which MCS rates it'll receive and
 		 * what MCS rates are available for TX.
 		 */
 		(void) ath_hal_getcapability(ah, HAL_CAP_STREAMS, 0, &txs);
 		(void) ath_hal_getcapability(ah, HAL_CAP_STREAMS, 1, &rxs);
 		ic->ic_txstream = txs;
 		ic->ic_rxstream = rxs;
 
 		/*
 		 * Setup TX and RX STBC based on what the HAL allows and
 		 * the currently configured chainmask set.
 		 * Ie - don't enable STBC TX if only one chain is enabled.
 		 * STBC RX is fine on a single RX chain; it just won't
 		 * provide any real benefit.
 		 */
 		if (ath_hal_getcapability(ah, HAL_CAP_RX_STBC, 0,
 		    NULL) == HAL_OK) {
 			sc->sc_rx_stbc = 1;
 			device_printf(sc->sc_dev,
 			    "[HT] 1 stream STBC receive enabled\n");
 			ic->ic_htcaps |= IEEE80211_HTCAP_RXSTBC_1STREAM;
 		}
 		if (txs > 1 && ath_hal_getcapability(ah, HAL_CAP_TX_STBC, 0,
 		    NULL) == HAL_OK) {
 			sc->sc_tx_stbc = 1;
 			device_printf(sc->sc_dev,
 			    "[HT] 1 stream STBC transmit enabled\n");
 			ic->ic_htcaps |= IEEE80211_HTCAP_TXSTBC;
 		}
 
 		(void) ath_hal_getcapability(ah, HAL_CAP_RTS_AGGR_LIMIT, 1,
 		    &sc->sc_rts_aggr_limit);
 		if (sc->sc_rts_aggr_limit != (64 * 1024))
 			device_printf(sc->sc_dev,
 			    "[HT] RTS aggregates limited to %d KiB\n",
 			    sc->sc_rts_aggr_limit / 1024);
 
 		device_printf(sc->sc_dev,
 		    "[HT] %d RX streams; %d TX streams\n", rxs, txs);
 	}
 #endif
 
 	/*
 	 * Initial aggregation settings.
 	 */
 	sc->sc_hwq_limit_aggr = ATH_AGGR_MIN_QDEPTH;
 	sc->sc_hwq_limit_nonaggr = ATH_NONAGGR_MIN_QDEPTH;
 	sc->sc_tid_hwq_lo = ATH_AGGR_SCHED_LOW;
 	sc->sc_tid_hwq_hi = ATH_AGGR_SCHED_HIGH;
 	sc->sc_aggr_limit = ATH_AGGR_MAXSIZE;
 	sc->sc_delim_min_pad = 0;
 
 	/*
 	 * Check if the hardware requires PCI register serialisation.
 	 * Some of the Owl based MACs require this.
 	 */
 	if (mp_ncpus > 1 &&
 	    ath_hal_getcapability(ah, HAL_CAP_SERIALISE_WAR,
 	     0, NULL) == HAL_OK) {
 		sc->sc_ah->ah_config.ah_serialise_reg_war = 1;
 		device_printf(sc->sc_dev,
 		    "Enabling register serialisation\n");
 	}
 
 	/*
 	 * Initialise the deferred completed RX buffer list.
 	 */
 	TAILQ_INIT(&sc->sc_rx_rxlist[HAL_RX_QUEUE_HP]);
 	TAILQ_INIT(&sc->sc_rx_rxlist[HAL_RX_QUEUE_LP]);
 
 	/*
 	 * Indicate we need the 802.11 header padded to a
 	 * 32-bit boundary for 4-address and QoS frames.
 	 */
 	ic->ic_flags |= IEEE80211_F_DATAPAD;
 
 	/*
 	 * Query the hal about antenna support.
 	 */
 	sc->sc_defant = ath_hal_getdefantenna(ah);
 
 	/*
 	 * Not all chips have the VEOL support we want to
 	 * use with IBSS beacons; check here for it.
 	 */
 	sc->sc_hasveol = ath_hal_hasveol(ah);
 
 	/* get mac address from kenv first, then hardware */
 	if (ath_fetch_mac_kenv(sc, macaddr) == 0) {
 		/* Tell the HAL now about the new MAC */
 		ath_hal_setmac(ah, macaddr);
 	} else {
 		ath_hal_getmac(ah, macaddr);
 	}
 
 	if (sc->sc_hasbmask)
 		ath_hal_getbssidmask(ah, sc->sc_hwbssidmask);
 
 	/* NB: used to size node table key mapping array */
 	ic->ic_max_keyix = sc->sc_keymax;
 	/* call MI attach routine. */
 	ieee80211_ifattach(ic, macaddr);
 	ic->ic_setregdomain = ath_setregdomain;
 	ic->ic_getradiocaps = ath_getradiocaps;
 	sc->sc_opmode = HAL_M_STA;
 
 	/* override default methods */
 	ic->ic_newassoc = ath_newassoc;
 	ic->ic_updateslot = ath_updateslot;
 	ic->ic_wme.wme_update = ath_wme_update;
 	ic->ic_vap_create = ath_vap_create;
 	ic->ic_vap_delete = ath_vap_delete;
 	ic->ic_raw_xmit = ath_raw_xmit;
 	ic->ic_update_mcast = ath_update_mcast;
 	ic->ic_update_promisc = ath_update_promisc;
 	ic->ic_node_alloc = ath_node_alloc;
 	sc->sc_node_free = ic->ic_node_free;
 	ic->ic_node_free = ath_node_free;
 	sc->sc_node_cleanup = ic->ic_node_cleanup;
 	ic->ic_node_cleanup = ath_node_cleanup;
 	ic->ic_node_getsignal = ath_node_getsignal;
 	ic->ic_scan_start = ath_scan_start;
 	ic->ic_scan_end = ath_scan_end;
 	ic->ic_set_channel = ath_set_channel;
 #ifdef	ATH_ENABLE_11N
 	/* 802.11n specific - but just override anyway */
 	sc->sc_addba_request = ic->ic_addba_request;
 	sc->sc_addba_response = ic->ic_addba_response;
 	sc->sc_addba_stop = ic->ic_addba_stop;
 	sc->sc_bar_response = ic->ic_bar_response;
 	sc->sc_addba_response_timeout = ic->ic_addba_response_timeout;
 
 	ic->ic_addba_request = ath_addba_request;
 	ic->ic_addba_response = ath_addba_response;
 	ic->ic_addba_response_timeout = ath_addba_response_timeout;
 	ic->ic_addba_stop = ath_addba_stop;
 	ic->ic_bar_response = ath_bar_response;
 
 	ic->ic_update_chw = ath_update_chw;
 #endif	/* ATH_ENABLE_11N */
 
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 	/*
 	 * There's one vendor bitmap entry in the RX radiotap
 	 * header; make sure that's taken into account.
 	 */
 	ieee80211_radiotap_attachv(ic,
 	    &sc->sc_tx_th.wt_ihdr, sizeof(sc->sc_tx_th), 0,
 		ATH_TX_RADIOTAP_PRESENT,
 	    &sc->sc_rx_th.wr_ihdr, sizeof(sc->sc_rx_th), 1,
 		ATH_RX_RADIOTAP_PRESENT);
 #else
 	/*
 	 * No vendor bitmap/extensions are present.
 	 */
 	ieee80211_radiotap_attach(ic,
 	    &sc->sc_tx_th.wt_ihdr, sizeof(sc->sc_tx_th),
 		ATH_TX_RADIOTAP_PRESENT,
 	    &sc->sc_rx_th.wr_ihdr, sizeof(sc->sc_rx_th),
 		ATH_RX_RADIOTAP_PRESENT);
 #endif	/* ATH_ENABLE_RADIOTAP_VENDOR_EXT */
 
 	/*
 	 * Setup the ALQ logging if required
 	 */
 #ifdef	ATH_DEBUG_ALQ
 	if_ath_alq_init(&sc->sc_alq, device_get_nameunit(sc->sc_dev));
 	if_ath_alq_setcfg(&sc->sc_alq,
 	    sc->sc_ah->ah_macVersion,
 	    sc->sc_ah->ah_macRev,
 	    sc->sc_ah->ah_phyRev,
 	    sc->sc_ah->ah_magic);
 #endif
 
 	/*
 	 * Setup dynamic sysctl's now that country code and
 	 * regdomain are available from the hal.
 	 */
 	ath_sysctlattach(sc);
 	ath_sysctl_stats_attach(sc);
 	ath_sysctl_hal_attach(sc);
 
 	if (bootverbose)
 		ieee80211_announce(ic);
 	ath_announce(sc);
 
 	/*
 	 * Put it to sleep for now.
 	 */
 	ATH_LOCK(sc);
 	ath_power_setpower(sc, HAL_PM_FULL_SLEEP);
 	ATH_UNLOCK(sc);
 
 	return 0;
 bad2:
 	ath_tx_cleanup(sc);
 	ath_desc_free(sc);
 	ath_txdma_teardown(sc);
 	ath_rxdma_teardown(sc);
 bad:
 	if (ah)
 		ath_hal_detach(ah);
 
 	/*
 	 * To work around scoping issues with CURVNET_SET/CURVNET_RESTORE..
 	 */
 	if (ifp != NULL && ifp->if_vnet) {
 		CURVNET_SET(ifp->if_vnet);
 		if_free(ifp);
 		CURVNET_RESTORE();
 	} else if (ifp != NULL)
 		if_free(ifp);
 	sc->sc_invalid = 1;
 	return error;
 }
 
 int
 ath_detach(struct ath_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 
 	DPRINTF(sc, ATH_DEBUG_ANY, "%s: if_flags %x\n",
 		__func__, ifp->if_flags);
 
 	/*
 	 * NB: the order of these is important:
 	 * o stop the chip so no more interrupts will fire
 	 * o call the 802.11 layer before detaching the hal to
 	 *   insure callbacks into the driver to delete global
 	 *   key cache entries can be handled
 	 * o free the taskqueue which drains any pending tasks
 	 * o reclaim the tx queue data structures after calling
 	 *   the 802.11 layer as we'll get called back to reclaim
 	 *   node state and potentially want to use them
 	 * o to cleanup the tx queues the hal is called, so detach
 	 *   it last
 	 * Other than that, it's straightforward...
 	 */
 
 	/*
 	 * XXX Wake the hardware up first.  ath_stop() will still
 	 * wake it up first, but I'd rather do it here just to
 	 * ensure it's awake.
 	 */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ath_power_setpower(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	/*
 	 * Stop things cleanly.
 	 */
 	ath_stop(ifp);
 
 	ieee80211_ifdetach(ifp->if_l2com);
 	taskqueue_free(sc->sc_tq);
 #ifdef ATH_TX99_DIAG
 	if (sc->sc_tx99 != NULL)
 		sc->sc_tx99->detach(sc->sc_tx99);
 #endif
 	ath_rate_detach(sc->sc_rc);
 #ifdef	ATH_DEBUG_ALQ
 	if_ath_alq_tidyup(&sc->sc_alq);
 #endif
 	ath_lna_div_detach(sc);
 	ath_btcoex_detach(sc);
 	ath_spectral_detach(sc);
 	ath_dfs_detach(sc);
 	ath_desc_free(sc);
 	ath_txdma_teardown(sc);
 	ath_rxdma_teardown(sc);
 	ath_tx_cleanup(sc);
 	ath_hal_detach(sc->sc_ah);	/* NB: sets chip in full sleep */
 
 	CURVNET_SET(ifp->if_vnet);
 	if_free(ifp);
 	CURVNET_RESTORE();
 
 	return 0;
 }
 
 /*
  * MAC address handling for multiple BSS on the same radio.
  * The first vap uses the MAC address from the EEPROM.  For
  * subsequent vap's we set the U/L bit (bit 1) in the MAC
  * address and use the next six bits as an index.
  */
 static void
 assign_address(struct ath_softc *sc, uint8_t mac[IEEE80211_ADDR_LEN], int clone)
 {
 	int i;
 
 	if (clone && sc->sc_hasbmask) {
 		/* NB: we only do this if h/w supports multiple bssid */
 		for (i = 0; i < 8; i++)
 			if ((sc->sc_bssidmask & (1<<i)) == 0)
 				break;
 		if (i != 0)
 			mac[0] |= (i << 2)|0x2;
 	} else
 		i = 0;
 	sc->sc_bssidmask |= 1<<i;
 	sc->sc_hwbssidmask[0] &= ~mac[0];
 	if (i == 0)
 		sc->sc_nbssid0++;
 }
 
 static void
 reclaim_address(struct ath_softc *sc, const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	int i = mac[0] >> 2;
 	uint8_t mask;
 
 	if (i != 0 || --sc->sc_nbssid0 == 0) {
 		sc->sc_bssidmask &= ~(1<<i);
 		/* recalculate bssid mask from remaining addresses */
 		mask = 0xff;
 		for (i = 1; i < 8; i++)
 			if (sc->sc_bssidmask & (1<<i))
 				mask &= ~((i<<2)|0x2);
 		sc->sc_hwbssidmask[0] |= mask;
 	}
 }
 
 /*
  * Assign a beacon xmit slot.  We try to space out
  * assignments so when beacons are staggered the
  * traffic coming out of the cab q has maximal time
  * to go out before the next beacon is scheduled.
  */
 static int
 assign_bslot(struct ath_softc *sc)
 {
 	u_int slot, free;
 
 	free = 0;
 	for (slot = 0; slot < ATH_BCBUF; slot++)
 		if (sc->sc_bslot[slot] == NULL) {
 			if (sc->sc_bslot[(slot+1)%ATH_BCBUF] == NULL &&
 			    sc->sc_bslot[(slot-1)%ATH_BCBUF] == NULL)
 				return slot;
 			free = slot;
 			/* NB: keep looking for a double slot */
 		}
 	return free;
 }
 
 static struct ieee80211vap *
 ath_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit,
     enum ieee80211_opmode opmode, int flags,
     const uint8_t bssid[IEEE80211_ADDR_LEN],
     const uint8_t mac0[IEEE80211_ADDR_LEN])
 {
 	struct ath_softc *sc = ic->ic_ifp->if_softc;
 	struct ath_vap *avp;
 	struct ieee80211vap *vap;
 	uint8_t mac[IEEE80211_ADDR_LEN];
 	int needbeacon, error;
 	enum ieee80211_opmode ic_opmode;
 
 	avp = (struct ath_vap *) malloc(sizeof(struct ath_vap),
 	    M_80211_VAP, M_WAITOK | M_ZERO);
 	needbeacon = 0;
 	IEEE80211_ADDR_COPY(mac, mac0);
 
 	ATH_LOCK(sc);
 	ic_opmode = opmode;		/* default to opmode of new vap */
 	switch (opmode) {
 	case IEEE80211_M_STA:
 		if (sc->sc_nstavaps != 0) {	/* XXX only 1 for now */
 			device_printf(sc->sc_dev, "only 1 sta vap supported\n");
 			goto bad;
 		}
 		if (sc->sc_nvaps) {
 			/*
 			 * With multiple vaps we must fall back
 			 * to s/w beacon miss handling.
 			 */
 			flags |= IEEE80211_CLONE_NOBEACONS;
 		}
 		if (flags & IEEE80211_CLONE_NOBEACONS) {
 			/*
 			 * Station mode w/o beacons are implemented w/ AP mode.
 			 */
 			ic_opmode = IEEE80211_M_HOSTAP;
 		}
 		break;
 	case IEEE80211_M_IBSS:
 		if (sc->sc_nvaps != 0) {	/* XXX only 1 for now */
 			device_printf(sc->sc_dev,
 			    "only 1 ibss vap supported\n");
 			goto bad;
 		}
 		needbeacon = 1;
 		break;
 	case IEEE80211_M_AHDEMO:
 #ifdef IEEE80211_SUPPORT_TDMA
 		if (flags & IEEE80211_CLONE_TDMA) {
 			if (sc->sc_nvaps != 0) {
 				device_printf(sc->sc_dev,
 				    "only 1 tdma vap supported\n");
 				goto bad;
 			}
 			needbeacon = 1;
 			flags |= IEEE80211_CLONE_NOBEACONS;
 		}
 		/* fall thru... */
 #endif
 	case IEEE80211_M_MONITOR:
 		if (sc->sc_nvaps != 0 && ic->ic_opmode != opmode) {
 			/*
 			 * Adopt existing mode.  Adding a monitor or ahdemo
 			 * vap to an existing configuration is of dubious
 			 * value but should be ok.
 			 */
 			/* XXX not right for monitor mode */
 			ic_opmode = ic->ic_opmode;
 		}
 		break;
 	case IEEE80211_M_HOSTAP:
 	case IEEE80211_M_MBSS:
 		needbeacon = 1;
 		break;
 	case IEEE80211_M_WDS:
 		if (sc->sc_nvaps != 0 && ic->ic_opmode == IEEE80211_M_STA) {
 			device_printf(sc->sc_dev,
 			    "wds not supported in sta mode\n");
 			goto bad;
 		}
 		/*
 		 * Silently remove any request for a unique
 		 * bssid; WDS vap's always share the local
 		 * mac address.
 		 */
 		flags &= ~IEEE80211_CLONE_BSSID;
 		if (sc->sc_nvaps == 0)
 			ic_opmode = IEEE80211_M_HOSTAP;
 		else
 			ic_opmode = ic->ic_opmode;
 		break;
 	default:
 		device_printf(sc->sc_dev, "unknown opmode %d\n", opmode);
 		goto bad;
 	}
 	/*
 	 * Check that a beacon buffer is available; the code below assumes it.
 	 */
 	if (needbeacon & TAILQ_EMPTY(&sc->sc_bbuf)) {
 		device_printf(sc->sc_dev, "no beacon buffer available\n");
 		goto bad;
 	}
 
 	/* STA, AHDEMO? */
 	if (opmode == IEEE80211_M_HOSTAP || opmode == IEEE80211_M_MBSS) {
 		assign_address(sc, mac, flags & IEEE80211_CLONE_BSSID);
 		ath_hal_setbssidmask(sc->sc_ah, sc->sc_hwbssidmask);
 	}
 
 	vap = &avp->av_vap;
 	/* XXX can't hold mutex across if_alloc */
 	ATH_UNLOCK(sc);
 	error = ieee80211_vap_setup(ic, vap, name, unit, opmode, flags,
 	    bssid, mac);
 	ATH_LOCK(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "%s: error %d creating vap\n",
 		    __func__, error);
 		goto bad2;
 	}
 
 	/* h/w crypto support */
 	vap->iv_key_alloc = ath_key_alloc;
 	vap->iv_key_delete = ath_key_delete;
 	vap->iv_key_set = ath_key_set;
 	vap->iv_key_update_begin = ath_key_update_begin;
 	vap->iv_key_update_end = ath_key_update_end;
 
 	/* override various methods */
 	avp->av_recv_mgmt = vap->iv_recv_mgmt;
 	vap->iv_recv_mgmt = ath_recv_mgmt;
 	vap->iv_reset = ath_reset_vap;
 	vap->iv_update_beacon = ath_beacon_update;
 	avp->av_newstate = vap->iv_newstate;
 	vap->iv_newstate = ath_newstate;
 	avp->av_bmiss = vap->iv_bmiss;
 	vap->iv_bmiss = ath_bmiss_vap;
 
 	avp->av_node_ps = vap->iv_node_ps;
 	vap->iv_node_ps = ath_node_powersave;
 
 	avp->av_set_tim = vap->iv_set_tim;
 	vap->iv_set_tim = ath_node_set_tim;
 
 	avp->av_recv_pspoll = vap->iv_recv_pspoll;
 	vap->iv_recv_pspoll = ath_node_recv_pspoll;
 
 	/* Set default parameters */
 
 	/*
 	 * Anything earlier than some AR9300 series MACs don't
 	 * support a smaller MPDU density.
 	 */
 	vap->iv_ampdu_density = IEEE80211_HTCAP_MPDUDENSITY_8;
 	/*
 	 * All NICs can handle the maximum size, however
 	 * AR5416 based MACs can only TX aggregates w/ RTS
 	 * protection when the total aggregate size is <= 8k.
 	 * However, for now that's enforced by the TX path.
 	 */
 	vap->iv_ampdu_rxmax = IEEE80211_HTCAP_MAXRXAMPDU_64K;
 
 	avp->av_bslot = -1;
 	if (needbeacon) {
 		/*
 		 * Allocate beacon state and setup the q for buffered
 		 * multicast frames.  We know a beacon buffer is
 		 * available because we checked above.
 		 */
 		avp->av_bcbuf = TAILQ_FIRST(&sc->sc_bbuf);
 		TAILQ_REMOVE(&sc->sc_bbuf, avp->av_bcbuf, bf_list);
 		if (opmode != IEEE80211_M_IBSS || !sc->sc_hasveol) {
 			/*
 			 * Assign the vap to a beacon xmit slot.  As above
 			 * this cannot fail to find a free one.
 			 */
 			avp->av_bslot = assign_bslot(sc);
 			KASSERT(sc->sc_bslot[avp->av_bslot] == NULL,
 			    ("beacon slot %u not empty", avp->av_bslot));
 			sc->sc_bslot[avp->av_bslot] = vap;
 			sc->sc_nbcnvaps++;
 		}
 		if (sc->sc_hastsfadd && sc->sc_nbcnvaps > 0) {
 			/*
 			 * Multple vaps are to transmit beacons and we
 			 * have h/w support for TSF adjusting; enable
 			 * use of staggered beacons.
 			 */
 			sc->sc_stagbeacons = 1;
 		}
 		ath_txq_init(sc, &avp->av_mcastq, ATH_TXQ_SWQ);
 	}
 
 	ic->ic_opmode = ic_opmode;
 	if (opmode != IEEE80211_M_WDS) {
 		sc->sc_nvaps++;
 		if (opmode == IEEE80211_M_STA)
 			sc->sc_nstavaps++;
 		if (opmode == IEEE80211_M_MBSS)
 			sc->sc_nmeshvaps++;
 	}
 	switch (ic_opmode) {
 	case IEEE80211_M_IBSS:
 		sc->sc_opmode = HAL_M_IBSS;
 		break;
 	case IEEE80211_M_STA:
 		sc->sc_opmode = HAL_M_STA;
 		break;
 	case IEEE80211_M_AHDEMO:
 #ifdef IEEE80211_SUPPORT_TDMA
 		if (vap->iv_caps & IEEE80211_C_TDMA) {
 			sc->sc_tdma = 1;
 			/* NB: disable tsf adjust */
 			sc->sc_stagbeacons = 0;
 		}
 		/*
 		 * NB: adhoc demo mode is a pseudo mode; to the hal it's
 		 * just ap mode.
 		 */
 		/* fall thru... */
 #endif
 	case IEEE80211_M_HOSTAP:
 	case IEEE80211_M_MBSS:
 		sc->sc_opmode = HAL_M_HOSTAP;
 		break;
 	case IEEE80211_M_MONITOR:
 		sc->sc_opmode = HAL_M_MONITOR;
 		break;
 	default:
 		/* XXX should not happen */
 		break;
 	}
 	if (sc->sc_hastsfadd) {
 		/*
 		 * Configure whether or not TSF adjust should be done.
 		 */
 		ath_hal_settsfadjust(sc->sc_ah, sc->sc_stagbeacons);
 	}
 	if (flags & IEEE80211_CLONE_NOBEACONS) {
 		/*
 		 * Enable s/w beacon miss handling.
 		 */
 		sc->sc_swbmiss = 1;
 	}
 	ATH_UNLOCK(sc);
 
 	/* complete setup */
 	ieee80211_vap_attach(vap, ath_media_change, ieee80211_media_status);
 	return vap;
 bad2:
 	reclaim_address(sc, mac);
 	ath_hal_setbssidmask(sc->sc_ah, sc->sc_hwbssidmask);
 bad:
 	free(avp, M_80211_VAP);
 	ATH_UNLOCK(sc);
 	return NULL;
 }
 
 static void
 ath_vap_delete(struct ieee80211vap *vap)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ifnet *ifp = ic->ic_ifp;
 	struct ath_softc *sc = ifp->if_softc;
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_vap *avp = ATH_VAP(vap);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		/*
 		 * Quiesce the hardware while we remove the vap.  In
 		 * particular we need to reclaim all references to
 		 * the vap state by any frames pending on the tx queues.
 		 */
 		ath_hal_intrset(ah, 0);		/* disable interrupts */
 		/* XXX Do all frames from all vaps/nodes need draining here? */
 		ath_stoprecv(sc, 1);		/* stop recv side */
 		ath_draintxq(sc, ATH_RESET_DEFAULT);		/* stop hw xmit side */
 	}
 
 	/* .. leave the hardware awake for now. */
 
 	ieee80211_vap_detach(vap);
 
 	/*
 	 * XXX Danger Will Robinson! Danger!
 	 *
 	 * Because ieee80211_vap_detach() can queue a frame (the station
 	 * diassociate message?) after we've drained the TXQ and
 	 * flushed the software TXQ, we will end up with a frame queued
 	 * to a node whose vap is about to be freed.
 	 *
 	 * To work around this, flush the hardware/software again.
 	 * This may be racy - the ath task may be running and the packet
 	 * may be being scheduled between sw->hw txq. Tsk.
 	 *
 	 * TODO: figure out why a new node gets allocated somewhere around
 	 * here (after the ath_tx_swq() call; and after an ath_stop_locked()
 	 * call!)
 	 */
 
 	ath_draintxq(sc, ATH_RESET_DEFAULT);
 
 	ATH_LOCK(sc);
 	/*
 	 * Reclaim beacon state.  Note this must be done before
 	 * the vap instance is reclaimed as we may have a reference
 	 * to it in the buffer for the beacon frame.
 	 */
 	if (avp->av_bcbuf != NULL) {
 		if (avp->av_bslot != -1) {
 			sc->sc_bslot[avp->av_bslot] = NULL;
 			sc->sc_nbcnvaps--;
 		}
 		ath_beacon_return(sc, avp->av_bcbuf);
 		avp->av_bcbuf = NULL;
 		if (sc->sc_nbcnvaps == 0) {
 			sc->sc_stagbeacons = 0;
 			if (sc->sc_hastsfadd)
 				ath_hal_settsfadjust(sc->sc_ah, 0);
 		}
 		/*
 		 * Reclaim any pending mcast frames for the vap.
 		 */
 		ath_tx_draintxq(sc, &avp->av_mcastq);
 	}
 	/*
 	 * Update bookkeeping.
 	 */
 	if (vap->iv_opmode == IEEE80211_M_STA) {
 		sc->sc_nstavaps--;
 		if (sc->sc_nstavaps == 0 && sc->sc_swbmiss)
 			sc->sc_swbmiss = 0;
 	} else if (vap->iv_opmode == IEEE80211_M_HOSTAP ||
 	    vap->iv_opmode == IEEE80211_M_MBSS) {
 		reclaim_address(sc, vap->iv_myaddr);
 		ath_hal_setbssidmask(ah, sc->sc_hwbssidmask);
 		if (vap->iv_opmode == IEEE80211_M_MBSS)
 			sc->sc_nmeshvaps--;
 	}
 	if (vap->iv_opmode != IEEE80211_M_WDS)
 		sc->sc_nvaps--;
 #ifdef IEEE80211_SUPPORT_TDMA
 	/* TDMA operation ceases when the last vap is destroyed */
 	if (sc->sc_tdma && sc->sc_nvaps == 0) {
 		sc->sc_tdma = 0;
 		sc->sc_swbmiss = 0;
 	}
 #endif
 	free(avp, M_80211_VAP);
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		/*
 		 * Restart rx+tx machines if still running (RUNNING will
 		 * be reset if we just destroyed the last vap).
 		 */
 		if (ath_startrecv(sc) != 0)
-			if_printf(ifp, "%s: unable to restart recv logic\n",
-			    __func__);
+			device_printf(sc->sc_dev,
+			    "%s: unable to restart recv logic\n", __func__);
 		if (sc->sc_beacons) {		/* restart beacons */
 #ifdef IEEE80211_SUPPORT_TDMA
 			if (sc->sc_tdma)
 				ath_tdma_config(sc, NULL);
 			else
 #endif
 				ath_beacon_config(sc, NULL);
 		}
 		ath_hal_intrset(ah, sc->sc_imask);
 	}
 
 	/* Ok, let the hardware asleep. */
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 }
 
 void
 ath_suspend(struct ath_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 
 	DPRINTF(sc, ATH_DEBUG_ANY, "%s: if_flags %x\n",
 		__func__, ifp->if_flags);
 
 	sc->sc_resume_up = (ifp->if_flags & IFF_UP) != 0;
 
 	ieee80211_suspend_all(ic);
 	/*
 	 * NB: don't worry about putting the chip in low power
 	 * mode; pci will power off our socket on suspend and
 	 * CardBus detaches the device.
 	 *
 	 * XXX TODO: well, that's great, except for non-cardbus
 	 * devices!
 	 */
 
 	/*
 	 * XXX This doesn't wait until all pending taskqueue
 	 * items and parallel transmit/receive/other threads
 	 * are running!
 	 */
 	ath_hal_intrset(sc->sc_ah, 0);
 	taskqueue_block(sc->sc_tq);
 
 	ATH_LOCK(sc);
 	callout_stop(&sc->sc_cal_ch);
 	ATH_UNLOCK(sc);
 
 	/*
 	 * XXX ensure sc_invalid is 1
 	 */
 
 	/* Disable the PCIe PHY, complete with workarounds */
 	ath_hal_enablepcie(sc->sc_ah, 1, 1);
 }
 
 /*
  * Reset the key cache since some parts do not reset the
  * contents on resume.  First we clear all entries, then
  * re-load keys that the 802.11 layer assumes are setup
  * in h/w.
  */
 static void
 ath_reset_keycache(struct ath_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct ath_hal *ah = sc->sc_ah;
 	int i;
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	for (i = 0; i < sc->sc_keymax; i++)
 		ath_hal_keyreset(ah, i);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 	ieee80211_crypto_reload_keys(ic);
 }
 
 /*
  * Fetch the current chainmask configuration based on the current
  * operating channel and options.
  */
 static void
 ath_update_chainmasks(struct ath_softc *sc, struct ieee80211_channel *chan)
 {
 
 	/*
 	 * Set TX chainmask to the currently configured chainmask;
 	 * the TX chainmask depends upon the current operating mode.
 	 */
 	sc->sc_cur_rxchainmask = sc->sc_rxchainmask;
 	if (IEEE80211_IS_CHAN_HT(chan)) {
 		sc->sc_cur_txchainmask = sc->sc_txchainmask;
 	} else {
 		sc->sc_cur_txchainmask = 1;
 	}
 
 	DPRINTF(sc, ATH_DEBUG_RESET,
 	    "%s: TX chainmask is now 0x%x, RX is now 0x%x\n",
 	    __func__,
 	    sc->sc_cur_txchainmask,
 	    sc->sc_cur_rxchainmask);
 }
 
 void
 ath_resume(struct ath_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 
 	DPRINTF(sc, ATH_DEBUG_ANY, "%s: if_flags %x\n",
 		__func__, ifp->if_flags);
 
 	/* Re-enable PCIe, re-enable the PCIe bus */
 	ath_hal_enablepcie(ah, 0, 0);
 
 	/*
 	 * Must reset the chip before we reload the
 	 * keycache as we were powered down on suspend.
 	 */
 	ath_update_chainmasks(sc,
 	    sc->sc_curchan != NULL ? sc->sc_curchan : ic->ic_curchan);
 	ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask,
 	    sc->sc_cur_rxchainmask);
 
 	/* Ensure we set the current power state to on */
 	ATH_LOCK(sc);
 	ath_power_setselfgen(sc, HAL_PM_AWAKE);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ath_power_setpower(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ath_hal_reset(ah, sc->sc_opmode,
 	    sc->sc_curchan != NULL ? sc->sc_curchan : ic->ic_curchan,
 	    AH_FALSE, &status);
 	ath_reset_keycache(sc);
 
 	ATH_RX_LOCK(sc);
 	sc->sc_rx_stopped = 1;
 	sc->sc_rx_resetted = 1;
 	ATH_RX_UNLOCK(sc);
 
 	/* Let DFS at it in case it's a DFS channel */
 	ath_dfs_radar_enable(sc, ic->ic_curchan);
 
 	/* Let spectral at in case spectral is enabled */
 	ath_spectral_enable(sc, ic->ic_curchan);
 
 	/*
 	 * Let bluetooth coexistence at in case it's needed for this channel
 	 */
 	ath_btcoex_enable(sc, ic->ic_curchan);
 
 	/*
 	 * If we're doing TDMA, enforce the TXOP limitation for chips that
 	 * support it.
 	 */
 	if (sc->sc_hasenforcetxop && sc->sc_tdma)
 		ath_hal_setenforcetxop(sc->sc_ah, 1);
 	else
 		ath_hal_setenforcetxop(sc->sc_ah, 0);
 
 	/* Restore the LED configuration */
 	ath_led_config(sc);
 	ath_hal_setledstate(ah, HAL_LED_INIT);
 
 	if (sc->sc_resume_up)
 		ieee80211_resume_all(ic);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	/* XXX beacons ? */
 }
 
 void
 ath_shutdown(struct ath_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 
 	DPRINTF(sc, ATH_DEBUG_ANY, "%s: if_flags %x\n",
 		__func__, ifp->if_flags);
 
 	ath_stop(ifp);
 	/* NB: no point powering down chip as we're about to reboot */
 }
 
 /*
  * Interrupt handler.  Most of the actual processing is deferred.
  */
 void
 ath_intr(void *arg)
 {
 	struct ath_softc *sc = arg;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_INT status = 0;
 	uint32_t txqs;
 
 	/*
 	 * If we're inside a reset path, just print a warning and
 	 * clear the ISR. The reset routine will finish it for us.
 	 */
 	ATH_PCU_LOCK(sc);
 	if (sc->sc_inreset_cnt) {
 		HAL_INT status;
 		ath_hal_getisr(ah, &status);	/* clear ISR */
 		ath_hal_intrset(ah, 0);		/* disable further intr's */
 		DPRINTF(sc, ATH_DEBUG_ANY,
 		    "%s: in reset, ignoring: status=0x%x\n",
 		    __func__, status);
 		ATH_PCU_UNLOCK(sc);
 		return;
 	}
 
 	if (sc->sc_invalid) {
 		/*
 		 * The hardware is not ready/present, don't touch anything.
 		 * Note this can happen early on if the IRQ is shared.
 		 */
 		DPRINTF(sc, ATH_DEBUG_ANY, "%s: invalid; ignored\n", __func__);
 		ATH_PCU_UNLOCK(sc);
 		return;
 	}
 	if (!ath_hal_intrpend(ah)) {		/* shared irq, not for us */
 		ATH_PCU_UNLOCK(sc);
 		return;
 	}
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	if ((ifp->if_flags & IFF_UP) == 0 ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		HAL_INT status;
 
 		DPRINTF(sc, ATH_DEBUG_ANY, "%s: if_flags 0x%x\n",
 			__func__, ifp->if_flags);
 		ath_hal_getisr(ah, &status);	/* clear ISR */
 		ath_hal_intrset(ah, 0);		/* disable further intr's */
 		ATH_PCU_UNLOCK(sc);
 
 		ATH_LOCK(sc);
 		ath_power_restore_power_state(sc);
 		ATH_UNLOCK(sc);
 		return;
 	}
 
 	/*
 	 * Figure out the reason(s) for the interrupt.  Note
 	 * that the hal returns a pseudo-ISR that may include
 	 * bits we haven't explicitly enabled so we mask the
 	 * value to insure we only process bits we requested.
 	 */
 	ath_hal_getisr(ah, &status);		/* NB: clears ISR too */
 	DPRINTF(sc, ATH_DEBUG_INTR, "%s: status 0x%x\n", __func__, status);
 	ATH_KTR(sc, ATH_KTR_INTERRUPTS, 1, "ath_intr: mask=0x%.8x", status);
 #ifdef	ATH_DEBUG_ALQ
 	if_ath_alq_post_intr(&sc->sc_alq, status, ah->ah_intrstate,
 	    ah->ah_syncstate);
 #endif	/* ATH_DEBUG_ALQ */
 #ifdef	ATH_KTR_INTR_DEBUG
 	ATH_KTR(sc, ATH_KTR_INTERRUPTS, 5,
 	    "ath_intr: ISR=0x%.8x, ISR_S0=0x%.8x, ISR_S1=0x%.8x, ISR_S2=0x%.8x, ISR_S5=0x%.8x",
 	    ah->ah_intrstate[0],
 	    ah->ah_intrstate[1],
 	    ah->ah_intrstate[2],
 	    ah->ah_intrstate[3],
 	    ah->ah_intrstate[6]);
 #endif
 
 	/* Squirrel away SYNC interrupt debugging */
 	if (ah->ah_syncstate != 0) {
 		int i;
 		for (i = 0; i < 32; i++)
 			if (ah->ah_syncstate & (i << i))
 				sc->sc_intr_stats.sync_intr[i]++;
 	}
 
 	status &= sc->sc_imask;			/* discard unasked for bits */
 
 	/* Short-circuit un-handled interrupts */
 	if (status == 0x0) {
 		ATH_PCU_UNLOCK(sc);
 
 		ATH_LOCK(sc);
 		ath_power_restore_power_state(sc);
 		ATH_UNLOCK(sc);
 
 		return;
 	}
 
 	/*
 	 * Take a note that we're inside the interrupt handler, so
 	 * the reset routines know to wait.
 	 */
 	sc->sc_intr_cnt++;
 	ATH_PCU_UNLOCK(sc);
 
 	/*
 	 * Handle the interrupt. We won't run concurrent with the reset
 	 * or channel change routines as they'll wait for sc_intr_cnt
 	 * to be 0 before continuing.
 	 */
 	if (status & HAL_INT_FATAL) {
 		sc->sc_stats.ast_hardware++;
 		ath_hal_intrset(ah, 0);		/* disable intr's until reset */
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_fataltask);
 	} else {
 		if (status & HAL_INT_SWBA) {
 			/*
 			 * Software beacon alert--time to send a beacon.
 			 * Handle beacon transmission directly; deferring
 			 * this is too slow to meet timing constraints
 			 * under load.
 			 */
 #ifdef IEEE80211_SUPPORT_TDMA
 			if (sc->sc_tdma) {
 				if (sc->sc_tdmaswba == 0) {
 					struct ieee80211com *ic = ifp->if_l2com;
 					struct ieee80211vap *vap =
 					    TAILQ_FIRST(&ic->ic_vaps);
 					ath_tdma_beacon_send(sc, vap);
 					sc->sc_tdmaswba =
 					    vap->iv_tdma->tdma_bintval;
 				} else
 					sc->sc_tdmaswba--;
 			} else
 #endif
 			{
 				ath_beacon_proc(sc, 0);
 #ifdef IEEE80211_SUPPORT_SUPERG
 				/*
 				 * Schedule the rx taskq in case there's no
 				 * traffic so any frames held on the staging
 				 * queue are aged and potentially flushed.
 				 */
 				sc->sc_rx.recv_sched(sc, 1);
 #endif
 			}
 		}
 		if (status & HAL_INT_RXEOL) {
 			int imask;
 			ATH_KTR(sc, ATH_KTR_ERROR, 0, "ath_intr: RXEOL");
 			if (! sc->sc_isedma) {
 				ATH_PCU_LOCK(sc);
 				/*
 				 * NB: the hardware should re-read the link when
 				 *     RXE bit is written, but it doesn't work at
 				 *     least on older hardware revs.
 				 */
 				sc->sc_stats.ast_rxeol++;
 				/*
 				 * Disable RXEOL/RXORN - prevent an interrupt
 				 * storm until the PCU logic can be reset.
 				 * In case the interface is reset some other
 				 * way before "sc_kickpcu" is called, don't
 				 * modify sc_imask - that way if it is reset
 				 * by a call to ath_reset() somehow, the
 				 * interrupt mask will be correctly reprogrammed.
 				 */
 				imask = sc->sc_imask;
 				imask &= ~(HAL_INT_RXEOL | HAL_INT_RXORN);
 				ath_hal_intrset(ah, imask);
 				/*
 				 * Only blank sc_rxlink if we've not yet kicked
 				 * the PCU.
 				 *
 				 * This isn't entirely correct - the correct solution
 				 * would be to have a PCU lock and engage that for
 				 * the duration of the PCU fiddling; which would include
 				 * running the RX process. Otherwise we could end up
 				 * messing up the RX descriptor chain and making the
 				 * RX desc list much shorter.
 				 */
 				if (! sc->sc_kickpcu)
 					sc->sc_rxlink = NULL;
 				sc->sc_kickpcu = 1;
 				ATH_PCU_UNLOCK(sc);
 			}
 			/*
 			 * Enqueue an RX proc to handle whatever
 			 * is in the RX queue.
 			 * This will then kick the PCU if required.
 			 */
 			sc->sc_rx.recv_sched(sc, 1);
 		}
 		if (status & HAL_INT_TXURN) {
 			sc->sc_stats.ast_txurn++;
 			/* bump tx trigger level */
 			ath_hal_updatetxtriglevel(ah, AH_TRUE);
 		}
 		/*
 		 * Handle both the legacy and RX EDMA interrupt bits.
 		 * Note that HAL_INT_RXLP is also HAL_INT_RXDESC.
 		 */
 		if (status & (HAL_INT_RX | HAL_INT_RXHP | HAL_INT_RXLP)) {
 			sc->sc_stats.ast_rx_intr++;
 			sc->sc_rx.recv_sched(sc, 1);
 		}
 		if (status & HAL_INT_TX) {
 			sc->sc_stats.ast_tx_intr++;
 			/*
 			 * Grab all the currently set bits in the HAL txq bitmap
 			 * and blank them. This is the only place we should be
 			 * doing this.
 			 */
 			if (! sc->sc_isedma) {
 				ATH_PCU_LOCK(sc);
 				txqs = 0xffffffff;
 				ath_hal_gettxintrtxqs(sc->sc_ah, &txqs);
 				ATH_KTR(sc, ATH_KTR_INTERRUPTS, 3,
 				    "ath_intr: TX; txqs=0x%08x, txq_active was 0x%08x, now 0x%08x",
 				    txqs,
 				    sc->sc_txq_active,
 				    sc->sc_txq_active | txqs);
 				sc->sc_txq_active |= txqs;
 				ATH_PCU_UNLOCK(sc);
 			}
 			taskqueue_enqueue(sc->sc_tq, &sc->sc_txtask);
 		}
 		if (status & HAL_INT_BMISS) {
 			sc->sc_stats.ast_bmiss++;
 			taskqueue_enqueue(sc->sc_tq, &sc->sc_bmisstask);
 		}
 		if (status & HAL_INT_GTT)
 			sc->sc_stats.ast_tx_timeout++;
 		if (status & HAL_INT_CST)
 			sc->sc_stats.ast_tx_cst++;
 		if (status & HAL_INT_MIB) {
 			sc->sc_stats.ast_mib++;
 			ATH_PCU_LOCK(sc);
 			/*
 			 * Disable interrupts until we service the MIB
 			 * interrupt; otherwise it will continue to fire.
 			 */
 			ath_hal_intrset(ah, 0);
 			/*
 			 * Let the hal handle the event.  We assume it will
 			 * clear whatever condition caused the interrupt.
 			 */
 			ath_hal_mibevent(ah, &sc->sc_halstats);
 			/*
 			 * Don't reset the interrupt if we've just
 			 * kicked the PCU, or we may get a nested
 			 * RXEOL before the rxproc has had a chance
 			 * to run.
 			 */
 			if (sc->sc_kickpcu == 0)
 				ath_hal_intrset(ah, sc->sc_imask);
 			ATH_PCU_UNLOCK(sc);
 		}
 		if (status & HAL_INT_RXORN) {
 			/* NB: hal marks HAL_INT_FATAL when RXORN is fatal */
 			ATH_KTR(sc, ATH_KTR_ERROR, 0, "ath_intr: RXORN");
 			sc->sc_stats.ast_rxorn++;
 		}
 		if (status & HAL_INT_TSFOOR) {
 			device_printf(sc->sc_dev, "%s: TSFOOR\n", __func__);
 			sc->sc_syncbeacon = 1;
 		}
 	}
 	ATH_PCU_LOCK(sc);
 	sc->sc_intr_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 }
 
 static void
 ath_fatal_proc(void *arg, int pending)
 {
 	struct ath_softc *sc = arg;
 	struct ifnet *ifp = sc->sc_ifp;
 	u_int32_t *state;
 	u_int32_t len;
 	void *sp;
 
-	if_printf(ifp, "hardware error; resetting\n");
+	device_printf(sc->sc_dev, "hardware error; resetting\n");
 	/*
 	 * Fatal errors are unrecoverable.  Typically these
 	 * are caused by DMA errors.  Collect h/w state from
 	 * the hal so we can diagnose what's going on.
 	 */
 	if (ath_hal_getfatalstate(sc->sc_ah, &sp, &len)) {
 		KASSERT(len >= 6*sizeof(u_int32_t), ("len %u bytes", len));
 		state = sp;
-		if_printf(ifp, "0x%08x 0x%08x 0x%08x, 0x%08x 0x%08x 0x%08x\n",
-		    state[0], state[1] , state[2], state[3],
-		    state[4], state[5]);
+		device_printf(sc->sc_dev,
+		    "0x%08x 0x%08x 0x%08x, 0x%08x 0x%08x 0x%08x\n", state[0],
+		    state[1] , state[2], state[3], state[4], state[5]);
 	}
 	ath_reset(ifp, ATH_RESET_NOLOSS);
 }
 
 static void
 ath_bmiss_vap(struct ieee80211vap *vap)
 {
 	struct ath_softc *sc = vap->iv_ic->ic_ifp->if_softc;
 
 	/*
 	 * Workaround phantom bmiss interrupts by sanity-checking
 	 * the time of our last rx'd frame.  If it is within the
 	 * beacon miss interval then ignore the interrupt.  If it's
 	 * truly a bmiss we'll get another interrupt soon and that'll
 	 * be dispatched up for processing.  Note this applies only
 	 * for h/w beacon miss events.
 	 */
 
 	/*
 	 * XXX TODO: Just read the TSF during the interrupt path;
 	 * that way we don't have to wake up again just to read it
 	 * again.
 	 */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	if ((vap->iv_flags_ext & IEEE80211_FEXT_SWBMISS) == 0) {
 		struct ifnet *ifp = vap->iv_ic->ic_ifp;
 		struct ath_softc *sc = ifp->if_softc;
 		u_int64_t lastrx = sc->sc_lastrx;
 		u_int64_t tsf = ath_hal_gettsf64(sc->sc_ah);
 		/* XXX should take a locked ref to iv_bss */
 		u_int bmisstimeout =
 			vap->iv_bmissthreshold * vap->iv_bss->ni_intval * 1024;
 
 		DPRINTF(sc, ATH_DEBUG_BEACON,
 		    "%s: tsf %llu lastrx %lld (%llu) bmiss %u\n",
 		    __func__, (unsigned long long) tsf,
 		    (unsigned long long)(tsf - lastrx),
 		    (unsigned long long) lastrx, bmisstimeout);
 
 		if (tsf - lastrx <= bmisstimeout) {
 			sc->sc_stats.ast_bmiss_phantom++;
 
 			ATH_LOCK(sc);
 			ath_power_restore_power_state(sc);
 			ATH_UNLOCK(sc);
 
 			return;
 		}
 	}
 
 	/*
 	 * There's no need to keep the hardware awake during the call
 	 * to av_bmiss().
 	 */
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	/*
 	 * Attempt to force a beacon resync.
 	 */
 	sc->sc_syncbeacon = 1;
 
 	ATH_VAP(vap)->av_bmiss(vap);
 }
 
 /* XXX this needs a force wakeup! */
 int
 ath_hal_gethangstate(struct ath_hal *ah, uint32_t mask, uint32_t *hangs)
 {
 	uint32_t rsize;
 	void *sp;
 
 	if (!ath_hal_getdiagstate(ah, HAL_DIAG_CHECK_HANGS, &mask, sizeof(mask), &sp, &rsize))
 		return 0;
 	KASSERT(rsize == sizeof(uint32_t), ("resultsize %u", rsize));
 	*hangs = *(uint32_t *)sp;
 	return 1;
 }
 
 static void
 ath_bmiss_proc(void *arg, int pending)
 {
 	struct ath_softc *sc = arg;
 	struct ifnet *ifp = sc->sc_ifp;
 	uint32_t hangs;
 
 	DPRINTF(sc, ATH_DEBUG_ANY, "%s: pending %u\n", __func__, pending);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ath_beacon_miss(sc);
 
 	/*
 	 * Do a reset upon any becaon miss event.
 	 *
 	 * It may be a non-recognised RX clear hang which needs a reset
 	 * to clear.
 	 */
 	if (ath_hal_gethangstate(sc->sc_ah, 0xff, &hangs) && hangs != 0) {
 		ath_reset(ifp, ATH_RESET_NOLOSS);
-		if_printf(ifp, "bb hang detected (0x%x), resetting\n", hangs);
+		device_printf(sc->sc_dev,
+		    "bb hang detected (0x%x), resetting\n", hangs);
 	} else {
 		ath_reset(ifp, ATH_RESET_NOLOSS);
 		ieee80211_beacon_miss(ifp->if_l2com);
 	}
 
 	/* Force a beacon resync, in case they've drifted */
 	sc->sc_syncbeacon = 1;
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 }
 
 /*
  * Handle TKIP MIC setup to deal hardware that doesn't do MIC
  * calcs together with WME.  If necessary disable the crypto
  * hardware and mark the 802.11 state so keys will be setup
  * with the MIC work done in software.
  */
 static void
 ath_settkipmic(struct ath_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 
 	if ((ic->ic_cryptocaps & IEEE80211_CRYPTO_TKIP) && !sc->sc_wmetkipmic) {
 		if (ic->ic_flags & IEEE80211_F_WME) {
 			ath_hal_settkipmic(sc->sc_ah, AH_FALSE);
 			ic->ic_cryptocaps &= ~IEEE80211_CRYPTO_TKIPMIC;
 		} else {
 			ath_hal_settkipmic(sc->sc_ah, AH_TRUE);
 			ic->ic_cryptocaps |= IEEE80211_CRYPTO_TKIPMIC;
 		}
 	}
 }
 
 static void
 ath_init(void *arg)
 {
 	struct ath_softc *sc = (struct ath_softc *) arg;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 
 	DPRINTF(sc, ATH_DEBUG_ANY, "%s: if_flags 0x%x\n",
 		__func__, ifp->if_flags);
 
 	ATH_LOCK(sc);
 	/*
 	 * Force the sleep state awake.
 	 */
 	ath_power_setselfgen(sc, HAL_PM_AWAKE);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ath_power_setpower(sc, HAL_PM_AWAKE);
 
 	/*
 	 * Stop anything previously setup.  This is safe
 	 * whether this is the first time through or not.
 	 */
 	ath_stop_locked(ifp);
 
 	/*
 	 * The basic interface to setting the hardware in a good
 	 * state is ``reset''.  On return the hardware is known to
 	 * be powered up and with interrupts disabled.  This must
 	 * be followed by initialization of the appropriate bits
 	 * and then setup of the interrupt mask.
 	 */
 	ath_settkipmic(sc);
 	ath_update_chainmasks(sc, ic->ic_curchan);
 	ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask,
 	    sc->sc_cur_rxchainmask);
 
-	if (!ath_hal_reset(ah, sc->sc_opmode, ic->ic_curchan, AH_FALSE, &status)) {
-		if_printf(ifp, "unable to reset hardware; hal status %u\n",
-			status);
+	if (!ath_hal_reset(ah, sc->sc_opmode, ic->ic_curchan, AH_FALSE,
+	    &status)) {
+		device_printf(sc->sc_dev,
+		    "unable to reset hardware; hal status %u\n", status);
 		ATH_UNLOCK(sc);
 		return;
 	}
 
 	ATH_RX_LOCK(sc);
 	sc->sc_rx_stopped = 1;
 	sc->sc_rx_resetted = 1;
 	ATH_RX_UNLOCK(sc);
 
 	ath_chan_change(sc, ic->ic_curchan);
 
 	/* Let DFS at it in case it's a DFS channel */
 	ath_dfs_radar_enable(sc, ic->ic_curchan);
 
 	/* Let spectral at in case spectral is enabled */
 	ath_spectral_enable(sc, ic->ic_curchan);
 
 	/*
 	 * Let bluetooth coexistence at in case it's needed for this channel
 	 */
 	ath_btcoex_enable(sc, ic->ic_curchan);
 
 	/*
 	 * If we're doing TDMA, enforce the TXOP limitation for chips that
 	 * support it.
 	 */
 	if (sc->sc_hasenforcetxop && sc->sc_tdma)
 		ath_hal_setenforcetxop(sc->sc_ah, 1);
 	else
 		ath_hal_setenforcetxop(sc->sc_ah, 0);
 
 	/*
 	 * Likewise this is set during reset so update
 	 * state cached in the driver.
 	 */
 	sc->sc_diversity = ath_hal_getdiversity(ah);
 	sc->sc_lastlongcal = ticks;
 	sc->sc_resetcal = 1;
 	sc->sc_lastcalreset = 0;
 	sc->sc_lastani = ticks;
 	sc->sc_lastshortcal = ticks;
 	sc->sc_doresetcal = AH_FALSE;
 	/*
 	 * Beacon timers were cleared here; give ath_newstate()
 	 * a hint that the beacon timers should be poked when
 	 * things transition to the RUN state.
 	 */
 	sc->sc_beacons = 0;
 
 	/*
 	 * Setup the hardware after reset: the key cache
 	 * is filled as needed and the receive engine is
 	 * set going.  Frame transmit is handled entirely
 	 * in the frame output path; there's nothing to do
 	 * here except setup the interrupt mask.
 	 */
 	if (ath_startrecv(sc) != 0) {
-		if_printf(ifp, "unable to start recv logic\n");
+		device_printf(sc->sc_dev, "unable to start recv logic\n");
 		ath_power_restore_power_state(sc);
 		ATH_UNLOCK(sc);
 		return;
 	}
 
 	/*
 	 * Enable interrupts.
 	 */
 	sc->sc_imask = HAL_INT_RX | HAL_INT_TX
 		  | HAL_INT_RXORN | HAL_INT_TXURN
 		  | HAL_INT_FATAL | HAL_INT_GLOBAL;
 
 	/*
 	 * Enable RX EDMA bits.  Note these overlap with
 	 * HAL_INT_RX and HAL_INT_RXDESC respectively.
 	 */
 	if (sc->sc_isedma)
 		sc->sc_imask |= (HAL_INT_RXHP | HAL_INT_RXLP);
 
 	/*
 	 * If we're an EDMA NIC, we don't care about RXEOL.
 	 * Writing a new descriptor in will simply restart
 	 * RX DMA.
 	 */
 	if (! sc->sc_isedma)
 		sc->sc_imask |= HAL_INT_RXEOL;
 
 	/*
 	 * Enable MIB interrupts when there are hardware phy counters.
 	 * Note we only do this (at the moment) for station mode.
 	 */
 	if (sc->sc_needmib && ic->ic_opmode == IEEE80211_M_STA)
 		sc->sc_imask |= HAL_INT_MIB;
 
 	/*
 	 * XXX add capability for this.
 	 *
 	 * If we're in STA mode (and maybe IBSS?) then register for
 	 * TSFOOR interrupts.
 	 */
 	if (ic->ic_opmode == IEEE80211_M_STA)
 		sc->sc_imask |= HAL_INT_TSFOOR;
 
 	/* Enable global TX timeout and carrier sense timeout if available */
 	if (ath_hal_gtxto_supported(ah))
 		sc->sc_imask |= HAL_INT_GTT;
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: imask=0x%x\n",
 		__func__, sc->sc_imask);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	callout_reset(&sc->sc_wd_ch, hz, ath_watchdog, sc);
 	ath_hal_intrset(ah, sc->sc_imask);
 
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 #ifdef ATH_TX99_DIAG
 	if (sc->sc_tx99 != NULL)
 		sc->sc_tx99->start(sc->sc_tx99);
 	else
 #endif
 	ieee80211_start_all(ic);		/* start all vap's */
 }
 
 static void
 ath_stop_locked(struct ifnet *ifp)
 {
 	struct ath_softc *sc = ifp->if_softc;
 	struct ath_hal *ah = sc->sc_ah;
 
 	DPRINTF(sc, ATH_DEBUG_ANY, "%s: invalid %u if_flags 0x%x\n",
 		__func__, sc->sc_invalid, ifp->if_flags);
 
 	ATH_LOCK_ASSERT(sc);
 
 	/*
 	 * Wake the hardware up before fiddling with it.
 	 */
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		/*
 		 * Shutdown the hardware and driver:
 		 *    reset 802.11 state machine
 		 *    turn off timers
 		 *    disable interrupts
 		 *    turn off the radio
 		 *    clear transmit machinery
 		 *    clear receive machinery
 		 *    drain and release tx queues
 		 *    reclaim beacon resources
 		 *    power down hardware
 		 *
 		 * Note that some of this work is not possible if the
 		 * hardware is gone (invalid).
 		 */
 #ifdef ATH_TX99_DIAG
 		if (sc->sc_tx99 != NULL)
 			sc->sc_tx99->stop(sc->sc_tx99);
 #endif
 		callout_stop(&sc->sc_wd_ch);
 		sc->sc_wd_timer = 0;
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		if (!sc->sc_invalid) {
 			if (sc->sc_softled) {
 				callout_stop(&sc->sc_ledtimer);
 				ath_hal_gpioset(ah, sc->sc_ledpin,
 					!sc->sc_ledon);
 				sc->sc_blinking = 0;
 			}
 			ath_hal_intrset(ah, 0);
 		}
 		/* XXX we should stop RX regardless of whether it's valid */
 		if (!sc->sc_invalid) {
 			ath_stoprecv(sc, 1);
 			ath_hal_phydisable(ah);
 		} else
 			sc->sc_rxlink = NULL;
 		ath_draintxq(sc, ATH_RESET_DEFAULT);
 		ath_beacon_free(sc);	/* XXX not needed */
 	}
 
 	/* And now, restore the current power state */
 	ath_power_restore_power_state(sc);
 }
 
 /*
  * Wait until all pending TX/RX has completed.
  *
  * This waits until all existing transmit, receive and interrupts
  * have completed.  It's assumed that the caller has first
  * grabbed the reset lock so it doesn't try to do overlapping
  * chip resets.
  */
 #define	MAX_TXRX_ITERATIONS	100
 static void
 ath_txrx_stop_locked(struct ath_softc *sc)
 {
 	int i = MAX_TXRX_ITERATIONS;
 
 	ATH_UNLOCK_ASSERT(sc);
 	ATH_PCU_LOCK_ASSERT(sc);
 
 	/*
 	 * Sleep until all the pending operations have completed.
 	 *
 	 * The caller must ensure that reset has been incremented
 	 * or the pending operations may continue being queued.
 	 */
 	while (sc->sc_rxproc_cnt || sc->sc_txproc_cnt ||
 	    sc->sc_txstart_cnt || sc->sc_intr_cnt) {
 		if (i <= 0)
 			break;
 		msleep(sc, &sc->sc_pcu_mtx, 0, "ath_txrx_stop",
 		    msecs_to_ticks(10));
 		i--;
 	}
 
 	if (i <= 0)
 		device_printf(sc->sc_dev,
 		    "%s: didn't finish after %d iterations\n",
 		    __func__, MAX_TXRX_ITERATIONS);
 }
 #undef	MAX_TXRX_ITERATIONS
 
 #if 0
 static void
 ath_txrx_stop(struct ath_softc *sc)
 {
 	ATH_UNLOCK_ASSERT(sc);
 	ATH_PCU_UNLOCK_ASSERT(sc);
 
 	ATH_PCU_LOCK(sc);
 	ath_txrx_stop_locked(sc);
 	ATH_PCU_UNLOCK(sc);
 }
 #endif
 
 static void
 ath_txrx_start(struct ath_softc *sc)
 {
 
 	taskqueue_unblock(sc->sc_tq);
 }
 
 /*
  * Grab the reset lock, and wait around until noone else
  * is trying to do anything with it.
  *
  * This is totally horrible but we can't hold this lock for
  * long enough to do TX/RX or we end up with net80211/ip stack
  * LORs and eventual deadlock.
  *
  * "dowait" signals whether to spin, waiting for the reset
  * lock count to reach 0. This should (for now) only be used
  * during the reset path, as the rest of the code may not
  * be locking-reentrant enough to behave correctly.
  *
  * Another, cleaner way should be found to serialise all of
  * these operations.
  */
 #define	MAX_RESET_ITERATIONS	25
 static int
 ath_reset_grablock(struct ath_softc *sc, int dowait)
 {
 	int w = 0;
 	int i = MAX_RESET_ITERATIONS;
 
 	ATH_PCU_LOCK_ASSERT(sc);
 	do {
 		if (sc->sc_inreset_cnt == 0) {
 			w = 1;
 			break;
 		}
 		if (dowait == 0) {
 			w = 0;
 			break;
 		}
 		ATH_PCU_UNLOCK(sc);
 		/*
 		 * 1 tick is likely not enough time for long calibrations
 		 * to complete.  So we should wait quite a while.
 		 */
 		pause("ath_reset_grablock", msecs_to_ticks(100));
 		i--;
 		ATH_PCU_LOCK(sc);
 	} while (i > 0);
 
 	/*
 	 * We always increment the refcounter, regardless
 	 * of whether we succeeded to get it in an exclusive
 	 * way.
 	 */
 	sc->sc_inreset_cnt++;
 
 	if (i <= 0)
 		device_printf(sc->sc_dev,
 		    "%s: didn't finish after %d iterations\n",
 		    __func__, MAX_RESET_ITERATIONS);
 
 	if (w == 0)
 		device_printf(sc->sc_dev,
 		    "%s: warning, recursive reset path!\n",
 		    __func__);
 
 	return w;
 }
 #undef MAX_RESET_ITERATIONS
 
 /*
  * XXX TODO: write ath_reset_releaselock
  */
 
 static void
 ath_stop(struct ifnet *ifp)
 {
 	struct ath_softc *sc = ifp->if_softc;
 
 	ATH_LOCK(sc);
 	ath_stop_locked(ifp);
 	ATH_UNLOCK(sc);
 }
 
 /*
  * Reset the hardware w/o losing operational state.  This is
  * basically a more efficient way of doing ath_stop, ath_init,
  * followed by state transitions to the current 802.11
  * operational state.  Used to recover from various errors and
  * to reset or reload hardware state.
  */
 int
 ath_reset(struct ifnet *ifp, ATH_RESET_TYPE reset_type)
 {
 	struct ath_softc *sc = ifp->if_softc;
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 	int i;
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
 
 	/* Ensure ATH_LOCK isn't held; ath_rx_proc can't be locked */
 	ATH_PCU_UNLOCK_ASSERT(sc);
 	ATH_UNLOCK_ASSERT(sc);
 
 	/* Try to (stop any further TX/RX from occuring */
 	taskqueue_block(sc->sc_tq);
 
 	/*
 	 * Wake the hardware up.
 	 */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 
 	/*
 	 * Grab the reset lock before TX/RX is stopped.
 	 *
 	 * This is needed to ensure that when the TX/RX actually does finish,
 	 * no further TX/RX/reset runs in parallel with this.
 	 */
 	if (ath_reset_grablock(sc, 1) == 0) {
 		device_printf(sc->sc_dev, "%s: concurrent reset! Danger!\n",
 		    __func__);
 	}
 
 	/* disable interrupts */
 	ath_hal_intrset(ah, 0);
 
 	/*
 	 * Now, ensure that any in progress TX/RX completes before we
 	 * continue.
 	 */
 	ath_txrx_stop_locked(sc);
 
 	ATH_PCU_UNLOCK(sc);
 
 	/*
 	 * Regardless of whether we're doing a no-loss flush or
 	 * not, stop the PCU and handle what's in the RX queue.
 	 * That way frames aren't dropped which shouldn't be.
 	 */
 	ath_stoprecv(sc, (reset_type != ATH_RESET_NOLOSS));
 	ath_rx_flush(sc);
 
 	/*
 	 * Should now wait for pending TX/RX to complete
 	 * and block future ones from occuring. This needs to be
 	 * done before the TX queue is drained.
 	 */
 	ath_draintxq(sc, reset_type);	/* stop xmit side */
 
 	ath_settkipmic(sc);		/* configure TKIP MIC handling */
 	/* NB: indicate channel change so we do a full reset */
 	ath_update_chainmasks(sc, ic->ic_curchan);
 	ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask,
 	    sc->sc_cur_rxchainmask);
 	if (!ath_hal_reset(ah, sc->sc_opmode, ic->ic_curchan, AH_TRUE, &status))
-		if_printf(ifp, "%s: unable to reset hardware; hal status %u\n",
-			__func__, status);
+		device_printf(sc->sc_dev,
+		    "%s: unable to reset hardware; hal status %u\n",
+		    __func__, status);
 	sc->sc_diversity = ath_hal_getdiversity(ah);
 
 	ATH_RX_LOCK(sc);
 	sc->sc_rx_stopped = 1;
 	sc->sc_rx_resetted = 1;
 	ATH_RX_UNLOCK(sc);
 
 	/* Let DFS at it in case it's a DFS channel */
 	ath_dfs_radar_enable(sc, ic->ic_curchan);
 
 	/* Let spectral at in case spectral is enabled */
 	ath_spectral_enable(sc, ic->ic_curchan);
 
 	/*
 	 * Let bluetooth coexistence at in case it's needed for this channel
 	 */
 	ath_btcoex_enable(sc, ic->ic_curchan);
 
 	/*
 	 * If we're doing TDMA, enforce the TXOP limitation for chips that
 	 * support it.
 	 */
 	if (sc->sc_hasenforcetxop && sc->sc_tdma)
 		ath_hal_setenforcetxop(sc->sc_ah, 1);
 	else
 		ath_hal_setenforcetxop(sc->sc_ah, 0);
 
 	if (ath_startrecv(sc) != 0)	/* restart recv */
-		if_printf(ifp, "%s: unable to start recv logic\n", __func__);
+		device_printf(sc->sc_dev,
+		    "%s: unable to start recv logic\n", __func__);
 	/*
 	 * We may be doing a reset in response to an ioctl
 	 * that changes the channel so update any state that
 	 * might change as a result.
 	 */
 	ath_chan_change(sc, ic->ic_curchan);
 	if (sc->sc_beacons) {		/* restart beacons */
 #ifdef IEEE80211_SUPPORT_TDMA
 		if (sc->sc_tdma)
 			ath_tdma_config(sc, NULL);
 		else
 #endif
 			ath_beacon_config(sc, NULL);
 	}
 
 	/*
 	 * Release the reset lock and re-enable interrupts here.
 	 * If an interrupt was being processed in ath_intr(),
 	 * it would disable interrupts at this point. So we have
 	 * to atomically enable interrupts and decrement the
 	 * reset counter - this way ath_intr() doesn't end up
 	 * disabling interrupts without a corresponding enable
 	 * in the rest or channel change path.
 	 *
 	 * Grab the TX reference in case we need to transmit.
 	 * That way a parallel transmit doesn't.
 	 */
 	ATH_PCU_LOCK(sc);
 	sc->sc_inreset_cnt--;
 	sc->sc_txstart_cnt++;
 	/* XXX only do this if sc_inreset_cnt == 0? */
 	ath_hal_intrset(ah, sc->sc_imask);
 	ATH_PCU_UNLOCK(sc);
 
 	/*
 	 * TX and RX can be started here. If it were started with
 	 * sc_inreset_cnt > 0, the TX and RX path would abort.
 	 * Thus if this is a nested call through the reset or
 	 * channel change code, TX completion will occur but
 	 * RX completion and ath_start / ath_tx_start will not
 	 * run.
 	 */
 
 	/* Restart TX/RX as needed */
 	ath_txrx_start(sc);
 
 	/* XXX TODO: we need to hold the tx refcount here! */
 
 	/* Restart TX completion and pending TX */
 	if (reset_type == ATH_RESET_NOLOSS) {
 		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 			if (ATH_TXQ_SETUP(sc, i)) {
 				ATH_TXQ_LOCK(&sc->sc_txq[i]);
 				ath_txq_restart_dma(sc, &sc->sc_txq[i]);
 				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
 
 				ATH_TX_LOCK(sc);
 				ath_txq_sched(sc, &sc->sc_txq[i]);
 				ATH_TX_UNLOCK(sc);
 			}
 		}
 	}
 
 	/*
 	 * This may have been set during an ath_start() call which
 	 * set this once it detected a concurrent TX was going on.
 	 * So, clear it.
 	 */
 	IF_LOCK(&ifp->if_snd);
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	IF_UNLOCK(&ifp->if_snd);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txstart_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	/* Handle any frames in the TX queue */
 	/*
 	 * XXX should this be done by the caller, rather than
 	 * ath_reset() ?
 	 */
 	ath_tx_kick(sc);		/* restart xmit */
 	return 0;
 }
 
 static int
 ath_reset_vap(struct ieee80211vap *vap, u_long cmd)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ifnet *ifp = ic->ic_ifp;
 	struct ath_softc *sc = ifp->if_softc;
 	struct ath_hal *ah = sc->sc_ah;
 
 	switch (cmd) {
 	case IEEE80211_IOC_TXPOWER:
 		/*
 		 * If per-packet TPC is enabled, then we have nothing
 		 * to do; otherwise we need to force the global limit.
 		 * All this can happen directly; no need to reset.
 		 */
 		if (!ath_hal_gettpc(ah))
 			ath_hal_settxpowlimit(ah, ic->ic_txpowlimit);
 		return 0;
 	}
 	/* XXX? Full or NOLOSS? */
 	return ath_reset(ifp, ATH_RESET_FULL);
 }
 
 struct ath_buf *
 _ath_getbuf_locked(struct ath_softc *sc, ath_buf_type_t btype)
 {
 	struct ath_buf *bf;
 
 	ATH_TXBUF_LOCK_ASSERT(sc);
 
 	if (btype == ATH_BUFTYPE_MGMT)
 		bf = TAILQ_FIRST(&sc->sc_txbuf_mgmt);
 	else
 		bf = TAILQ_FIRST(&sc->sc_txbuf);
 
 	if (bf == NULL) {
 		sc->sc_stats.ast_tx_getnobuf++;
 	} else {
 		if (bf->bf_flags & ATH_BUF_BUSY) {
 			sc->sc_stats.ast_tx_getbusybuf++;
 			bf = NULL;
 		}
 	}
 
 	if (bf != NULL && (bf->bf_flags & ATH_BUF_BUSY) == 0) {
 		if (btype == ATH_BUFTYPE_MGMT)
 			TAILQ_REMOVE(&sc->sc_txbuf_mgmt, bf, bf_list);
 		else {
 			TAILQ_REMOVE(&sc->sc_txbuf, bf, bf_list);
 			sc->sc_txbuf_cnt--;
 
 			/*
 			 * This shuldn't happen; however just to be
 			 * safe print a warning and fudge the txbuf
 			 * count.
 			 */
 			if (sc->sc_txbuf_cnt < 0) {
 				device_printf(sc->sc_dev,
 				    "%s: sc_txbuf_cnt < 0?\n",
 				    __func__);
 				sc->sc_txbuf_cnt = 0;
 			}
 		}
 	} else
 		bf = NULL;
 
 	if (bf == NULL) {
 		/* XXX should check which list, mgmt or otherwise */
 		DPRINTF(sc, ATH_DEBUG_XMIT, "%s: %s\n", __func__,
 		    TAILQ_FIRST(&sc->sc_txbuf) == NULL ?
 			"out of xmit buffers" : "xmit buffer busy");
 		return NULL;
 	}
 
 	/* XXX TODO: should do this at buffer list initialisation */
 	/* XXX (then, ensure the buffer has the right flag set) */
 	bf->bf_flags = 0;
 	if (btype == ATH_BUFTYPE_MGMT)
 		bf->bf_flags |= ATH_BUF_MGMT;
 	else
 		bf->bf_flags &= (~ATH_BUF_MGMT);
 
 	/* Valid bf here; clear some basic fields */
 	bf->bf_next = NULL;	/* XXX just to be sure */
 	bf->bf_last = NULL;	/* XXX again, just to be sure */
 	bf->bf_comp = NULL;	/* XXX again, just to be sure */
 	bzero(&bf->bf_state, sizeof(bf->bf_state));
 
 	/*
 	 * Track the descriptor ID only if doing EDMA
 	 */
 	if (sc->sc_isedma) {
 		bf->bf_descid = sc->sc_txbuf_descid;
 		sc->sc_txbuf_descid++;
 	}
 
 	return bf;
 }
 
 /*
  * When retrying a software frame, buffers marked ATH_BUF_BUSY
  * can't be thrown back on the queue as they could still be
  * in use by the hardware.
  *
  * This duplicates the buffer, or returns NULL.
  *
  * The descriptor is also copied but the link pointers and
  * the DMA segments aren't copied; this frame should thus
  * be again passed through the descriptor setup/chain routines
  * so the link is correct.
  *
  * The caller must free the buffer using ath_freebuf().
  */
 struct ath_buf *
 ath_buf_clone(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ath_buf *tbf;
 
 	tbf = ath_getbuf(sc,
 	    (bf->bf_flags & ATH_BUF_MGMT) ?
 	     ATH_BUFTYPE_MGMT : ATH_BUFTYPE_NORMAL);
 	if (tbf == NULL)
 		return NULL;	/* XXX failure? Why? */
 
 	/* Copy basics */
 	tbf->bf_next = NULL;
 	tbf->bf_nseg = bf->bf_nseg;
 	tbf->bf_flags = bf->bf_flags & ATH_BUF_FLAGS_CLONE;
 	tbf->bf_status = bf->bf_status;
 	tbf->bf_m = bf->bf_m;
 	tbf->bf_node = bf->bf_node;
 	KASSERT((bf->bf_node != NULL), ("%s: bf_node=NULL!", __func__));
 	/* will be setup by the chain/setup function */
 	tbf->bf_lastds = NULL;
 	/* for now, last == self */
 	tbf->bf_last = tbf;
 	tbf->bf_comp = bf->bf_comp;
 
 	/* NOTE: DMA segments will be setup by the setup/chain functions */
 
 	/* The caller has to re-init the descriptor + links */
 
 	/*
 	 * Free the DMA mapping here, before we NULL the mbuf.
 	 * We must only call bus_dmamap_unload() once per mbuf chain
 	 * or behaviour is undefined.
 	 */
 	if (bf->bf_m != NULL) {
 		/*
 		 * XXX is this POSTWRITE call required?
 		 */
 		bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 	}
 
 	bf->bf_m = NULL;
 	bf->bf_node = NULL;
 
 	/* Copy state */
 	memcpy(&tbf->bf_state, &bf->bf_state, sizeof(bf->bf_state));
 
 	return tbf;
 }
 
 struct ath_buf *
 ath_getbuf(struct ath_softc *sc, ath_buf_type_t btype)
 {
 	struct ath_buf *bf;
 
 	ATH_TXBUF_LOCK(sc);
 	bf = _ath_getbuf_locked(sc, btype);
 	/*
 	 * If a mgmt buffer was requested but we're out of those,
 	 * try requesting a normal one.
 	 */
 	if (bf == NULL && btype == ATH_BUFTYPE_MGMT)
 		bf = _ath_getbuf_locked(sc, ATH_BUFTYPE_NORMAL);
 	ATH_TXBUF_UNLOCK(sc);
 	if (bf == NULL) {
 		struct ifnet *ifp = sc->sc_ifp;
 
 		DPRINTF(sc, ATH_DEBUG_XMIT, "%s: stop queue\n", __func__);
 		sc->sc_stats.ast_tx_qstop++;
 		IF_LOCK(&ifp->if_snd);
 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 		IF_UNLOCK(&ifp->if_snd);
 	}
 	return bf;
 }
 
 static void
 ath_qflush(struct ifnet *ifp)
 {
 
 	/* XXX TODO */
 }
 
 /*
  * Transmit a single frame.
  *
  * net80211 will free the node reference if the transmit
  * fails, so don't free the node reference here.
  */
 static int
 ath_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct ath_softc *sc = ic->ic_ifp->if_softc;
 	struct ieee80211_node *ni;
 	struct mbuf *next;
 	struct ath_buf *bf;
 	ath_bufhead frags;
 	int retval = 0;
 
 	/*
 	 * Tell the reset path that we're currently transmitting.
 	 */
 	ATH_PCU_LOCK(sc);
 	if (sc->sc_inreset_cnt > 0) {
 		DPRINTF(sc, ATH_DEBUG_XMIT,
 		    "%s: sc_inreset_cnt > 0; bailing\n", __func__);
 		ATH_PCU_UNLOCK(sc);
 		IF_LOCK(&ifp->if_snd);
 		sc->sc_stats.ast_tx_qstop++;
 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 		IF_UNLOCK(&ifp->if_snd);
 		ATH_KTR(sc, ATH_KTR_TX, 0, "ath_start_task: OACTIVE, finish");
 		return (ENOBUFS);	/* XXX should be EINVAL or? */
 	}
 	sc->sc_txstart_cnt++;
 	ATH_PCU_UNLOCK(sc);
 
 	/* Wake the hardware up already */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_KTR(sc, ATH_KTR_TX, 0, "ath_transmit: start");
 	/*
 	 * Grab the TX lock - it's ok to do this here; we haven't
 	 * yet started transmitting.
 	 */
 	ATH_TX_LOCK(sc);
 
 	/*
 	 * Node reference, if there's one.
 	 */
 	ni = (struct ieee80211_node *) m->m_pkthdr.rcvif;
 
 	/*
 	 * Enforce how deep a node queue can get.
 	 *
 	 * XXX it would be nicer if we kept an mbuf queue per
 	 * node and only whacked them into ath_bufs when we
 	 * are ready to schedule some traffic from them.
 	 * .. that may come later.
 	 *
 	 * XXX we should also track the per-node hardware queue
 	 * depth so it is easy to limit the _SUM_ of the swq and
 	 * hwq frames.  Since we only schedule two HWQ frames
 	 * at a time, this should be OK for now.
 	 */
 	if ((!(m->m_flags & M_EAPOL)) &&
 	    (ATH_NODE(ni)->an_swq_depth > sc->sc_txq_node_maxdepth)) {
 		sc->sc_stats.ast_tx_nodeq_overflow++;
 		m_freem(m);
 		m = NULL;
 		retval = ENOBUFS;
 		goto finish;
 	}
 
 	/*
 	 * Check how many TX buffers are available.
 	 *
 	 * If this is for non-EAPOL traffic, just leave some
 	 * space free in order for buffer cloning and raw
 	 * frame transmission to occur.
 	 *
 	 * If it's for EAPOL traffic, ignore this for now.
 	 * Management traffic will be sent via the raw transmit
 	 * method which bypasses this check.
 	 *
 	 * This is needed to ensure that EAPOL frames during
 	 * (re) keying have a chance to go out.
 	 *
 	 * See kern/138379 for more information.
 	 */
 	if ((!(m->m_flags & M_EAPOL)) &&
 	    (sc->sc_txbuf_cnt <= sc->sc_txq_data_minfree)) {
 		sc->sc_stats.ast_tx_nobuf++;
 		m_freem(m);
 		m = NULL;
 		retval = ENOBUFS;
 		goto finish;
 	}
 
 	/*
 	 * Grab a TX buffer and associated resources.
 	 *
 	 * If it's an EAPOL frame, allocate a MGMT ath_buf.
 	 * That way even with temporary buffer exhaustion due to
 	 * the data path doesn't leave us without the ability
 	 * to transmit management frames.
 	 *
 	 * Otherwise allocate a normal buffer.
 	 */
 	if (m->m_flags & M_EAPOL)
 		bf = ath_getbuf(sc, ATH_BUFTYPE_MGMT);
 	else
 		bf = ath_getbuf(sc, ATH_BUFTYPE_NORMAL);
 
 	if (bf == NULL) {
 		/*
 		 * If we failed to allocate a buffer, fail.
 		 *
 		 * We shouldn't fail normally, due to the check
 		 * above.
 		 */
 		sc->sc_stats.ast_tx_nobuf++;
 		IF_LOCK(&ifp->if_snd);
 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 		IF_UNLOCK(&ifp->if_snd);
 		m_freem(m);
 		m = NULL;
 		retval = ENOBUFS;
 		goto finish;
 	}
 
 	/*
 	 * At this point we have a buffer; so we need to free it
 	 * if we hit any error conditions.
 	 */
 
 	/*
 	 * Check for fragmentation.  If this frame
 	 * has been broken up verify we have enough
 	 * buffers to send all the fragments so all
 	 * go out or none...
 	 */
 	TAILQ_INIT(&frags);
 	if ((m->m_flags & M_FRAG) &&
 	    !ath_txfrag_setup(sc, &frags, m, ni)) {
 		DPRINTF(sc, ATH_DEBUG_XMIT,
 		    "%s: out of txfrag buffers\n", __func__);
 		sc->sc_stats.ast_tx_nofrag++;
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		ath_freetx(m);
 		goto bad;
 	}
 
 	/*
 	 * At this point if we have any TX fragments, then we will
 	 * have bumped the node reference once for each of those.
 	 */
 
 	/*
 	 * XXX Is there anything actually _enforcing_ that the
 	 * fragments are being transmitted in one hit, rather than
 	 * being interleaved with other transmissions on that
 	 * hardware queue?
 	 *
 	 * The ATH TX output lock is the only thing serialising this
 	 * right now.
 	 */
 
 	/*
 	 * Calculate the "next fragment" length field in ath_buf
 	 * in order to let the transmit path know enough about
 	 * what to next write to the hardware.
 	 */
 	if (m->m_flags & M_FRAG) {
 		struct ath_buf *fbf = bf;
 		struct ath_buf *n_fbf = NULL;
 		struct mbuf *fm = m->m_nextpkt;
 
 		/*
 		 * We need to walk the list of fragments and set
 		 * the next size to the following buffer.
 		 * However, the first buffer isn't in the frag
 		 * list, so we have to do some gymnastics here.
 		 */
 		TAILQ_FOREACH(n_fbf, &frags, bf_list) {
 			fbf->bf_nextfraglen = fm->m_pkthdr.len;
 			fbf = n_fbf;
 			fm = fm->m_nextpkt;
 		}
 	}
 
 	/*
 	 * Bump the ifp output counter.
 	 *
 	 * XXX should use atomics?
 	 */
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 nextfrag:
 	/*
 	 * Pass the frame to the h/w for transmission.
 	 * Fragmented frames have each frag chained together
 	 * with m_nextpkt.  We know there are sufficient ath_buf's
 	 * to send all the frags because of work done by
 	 * ath_txfrag_setup.  We leave m_nextpkt set while
 	 * calling ath_tx_start so it can use it to extend the
 	 * the tx duration to cover the subsequent frag and
 	 * so it can reclaim all the mbufs in case of an error;
 	 * ath_tx_start clears m_nextpkt once it commits to
 	 * handing the frame to the hardware.
 	 *
 	 * Note: if this fails, then the mbufs are freed but
 	 * not the node reference.
 	 */
 	next = m->m_nextpkt;
 	if (ath_tx_start(sc, ni, bf, m)) {
 bad:
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 reclaim:
 		bf->bf_m = NULL;
 		bf->bf_node = NULL;
 		ATH_TXBUF_LOCK(sc);
 		ath_returnbuf_head(sc, bf);
 		/*
 		 * Free the rest of the node references and
 		 * buffers for the fragment list.
 		 */
 		ath_txfrag_cleanup(sc, &frags, ni);
 		ATH_TXBUF_UNLOCK(sc);
 		retval = ENOBUFS;
 		goto finish;
 	}
 
 	/*
 	 * Check here if the node is in power save state.
 	 */
 	ath_tx_update_tim(sc, ni, 1);
 
 	if (next != NULL) {
 		/*
 		 * Beware of state changing between frags.
 		 * XXX check sta power-save state?
 		 */
 		if (ni->ni_vap->iv_state != IEEE80211_S_RUN) {
 			DPRINTF(sc, ATH_DEBUG_XMIT,
 			    "%s: flush fragmented packet, state %s\n",
 			    __func__,
 			    ieee80211_state_name[ni->ni_vap->iv_state]);
 			/* XXX dmamap */
 			ath_freetx(next);
 			goto reclaim;
 		}
 		m = next;
 		bf = TAILQ_FIRST(&frags);
 		KASSERT(bf != NULL, ("no buf for txfrag"));
 		TAILQ_REMOVE(&frags, bf, bf_list);
 		goto nextfrag;
 	}
 
 	/*
 	 * Bump watchdog timer.
 	 */
 	sc->sc_wd_timer = 5;
 
 finish:
 	ATH_TX_UNLOCK(sc);
 
 	/*
 	 * Finished transmitting!
 	 */
 	ATH_PCU_LOCK(sc);
 	sc->sc_txstart_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	/* Sleep the hardware if required */
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ATH_KTR(sc, ATH_KTR_TX, 0, "ath_transmit: finished");
 	
 	return (retval);
 }
 
 static int
 ath_media_change(struct ifnet *ifp)
 {
 	int error = ieee80211_media_change(ifp);
 	/* NB: only the fixed rate can change and that doesn't need a reset */
 	return (error == ENETRESET ? 0 : error);
 }
 
 /*
  * Block/unblock tx+rx processing while a key change is done.
  * We assume the caller serializes key management operations
  * so we only need to worry about synchronization with other
  * uses that originate in the driver.
  */
 static void
 ath_key_update_begin(struct ieee80211vap *vap)
 {
 	struct ifnet *ifp = vap->iv_ic->ic_ifp;
 	struct ath_softc *sc = ifp->if_softc;
 
 	DPRINTF(sc, ATH_DEBUG_KEYCACHE, "%s:\n", __func__);
 	taskqueue_block(sc->sc_tq);
 }
 
 static void
 ath_key_update_end(struct ieee80211vap *vap)
 {
 	struct ifnet *ifp = vap->iv_ic->ic_ifp;
 	struct ath_softc *sc = ifp->if_softc;
 
 	DPRINTF(sc, ATH_DEBUG_KEYCACHE, "%s:\n", __func__);
 	taskqueue_unblock(sc->sc_tq);
 }
 
 static void
 ath_update_promisc(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 	u_int32_t rfilt;
 
 	/* configure rx filter */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	rfilt = ath_calcrxfilter(sc);
 	ath_hal_setrxfilter(sc->sc_ah, rfilt);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_MODE, "%s: RX filter 0x%x\n", __func__, rfilt);
 }
 
 /*
  * Driver-internal mcast update call.
  *
  * Assumes the hardware is already awake.
  */
 static void
 ath_update_mcast_hw(struct ath_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	u_int32_t mfilt[2];
 
 	/* calculate and install multicast filter */
 	if ((ifp->if_flags & IFF_ALLMULTI) == 0) {
 		struct ifmultiaddr *ifma;
 		/*
 		 * Merge multicast addresses to form the hardware filter.
 		 */
 		mfilt[0] = mfilt[1] = 0;
 		if_maddr_rlock(ifp);	/* XXX need some fiddling to remove? */
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			caddr_t dl;
 			u_int32_t val;
 			u_int8_t pos;
 
 			/* calculate XOR of eight 6bit values */
 			dl = LLADDR((struct sockaddr_dl *) ifma->ifma_addr);
 			val = LE_READ_4(dl + 0);
 			pos = (val >> 18) ^ (val >> 12) ^ (val >> 6) ^ val;
 			val = LE_READ_4(dl + 3);
 			pos ^= (val >> 18) ^ (val >> 12) ^ (val >> 6) ^ val;
 			pos &= 0x3f;
 			mfilt[pos / 32] |= (1 << (pos % 32));
 		}
 		if_maddr_runlock(ifp);
 	} else
 		mfilt[0] = mfilt[1] = ~0;
 
 	ath_hal_setmcastfilter(sc->sc_ah, mfilt[0], mfilt[1]);
 
 	DPRINTF(sc, ATH_DEBUG_MODE, "%s: MC filter %08x:%08x\n",
 		__func__, mfilt[0], mfilt[1]);
 }
 
 /*
  * Called from the net80211 layer - force the hardware
  * awake before operating.
  */
 static void
 ath_update_mcast(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ath_update_mcast_hw(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 }
 
 void
 ath_mode_init(struct ath_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ath_hal *ah = sc->sc_ah;
 	u_int32_t rfilt;
 
 	/* configure rx filter */
 	rfilt = ath_calcrxfilter(sc);
 	ath_hal_setrxfilter(ah, rfilt);
 
 	/* configure operational mode */
 	ath_hal_setopmode(ah);
 
 	DPRINTF(sc, ATH_DEBUG_STATE | ATH_DEBUG_MODE,
 	    "%s: ah=%p, ifp=%p, if_addr=%p\n",
 	    __func__,
 	    ah,
 	    ifp,
 	    (ifp == NULL) ? NULL : ifp->if_addr);
 
 	/* handle any link-level address change */
 	ath_hal_setmac(ah, IF_LLADDR(ifp));
 
 	/* calculate and install multicast filter */
 	ath_update_mcast_hw(sc);
 }
 
 /*
  * Set the slot time based on the current setting.
  */
 void
 ath_setslottime(struct ath_softc *sc)
 {
 	struct ieee80211com *ic = sc->sc_ifp->if_l2com;
 	struct ath_hal *ah = sc->sc_ah;
 	u_int usec;
 
 	if (IEEE80211_IS_CHAN_HALF(ic->ic_curchan))
 		usec = 13;
 	else if (IEEE80211_IS_CHAN_QUARTER(ic->ic_curchan))
 		usec = 21;
 	else if (IEEE80211_IS_CHAN_ANYG(ic->ic_curchan)) {
 		/* honor short/long slot time only in 11g */
 		/* XXX shouldn't honor on pure g or turbo g channel */
 		if (ic->ic_flags & IEEE80211_F_SHSLOT)
 			usec = HAL_SLOT_TIME_9;
 		else
 			usec = HAL_SLOT_TIME_20;
 	} else
 		usec = HAL_SLOT_TIME_9;
 
 	DPRINTF(sc, ATH_DEBUG_RESET,
 	    "%s: chan %u MHz flags 0x%x %s slot, %u usec\n",
 	    __func__, ic->ic_curchan->ic_freq, ic->ic_curchan->ic_flags,
 	    ic->ic_flags & IEEE80211_F_SHSLOT ? "short" : "long", usec);
 
 	/* Wake up the hardware first before updating the slot time */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ath_hal_setslottime(ah, usec);
 	ath_power_restore_power_state(sc);
 	sc->sc_updateslot = OK;
 	ATH_UNLOCK(sc);
 }
 
 /*
  * Callback from the 802.11 layer to update the
  * slot time based on the current setting.
  */
 static void
 ath_updateslot(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_softc;
 
 	/*
 	 * When not coordinating the BSS, change the hardware
 	 * immediately.  For other operation we defer the change
 	 * until beacon updates have propagated to the stations.
 	 *
 	 * XXX sc_updateslot isn't changed behind a lock?
 	 */
 	if (ic->ic_opmode == IEEE80211_M_HOSTAP ||
 	    ic->ic_opmode == IEEE80211_M_MBSS)
 		sc->sc_updateslot = UPDATE;
 	else
 		ath_setslottime(sc);
 }
 
 /*
  * Append the contents of src to dst; both queues
  * are assumed to be locked.
  */
 void
 ath_txqmove(struct ath_txq *dst, struct ath_txq *src)
 {
 
 	ATH_TXQ_LOCK_ASSERT(src);
 	ATH_TXQ_LOCK_ASSERT(dst);
 
 	TAILQ_CONCAT(&dst->axq_q, &src->axq_q, bf_list);
 	dst->axq_link = src->axq_link;
 	src->axq_link = NULL;
 	dst->axq_depth += src->axq_depth;
 	dst->axq_aggr_depth += src->axq_aggr_depth;
 	src->axq_depth = 0;
 	src->axq_aggr_depth = 0;
 }
 
 /*
  * Reset the hardware, with no loss.
  *
  * This can't be used for a general case reset.
  */
 static void
 ath_reset_proc(void *arg, int pending)
 {
 	struct ath_softc *sc = arg;
 	struct ifnet *ifp = sc->sc_ifp;
 
 #if 0
-	if_printf(ifp, "%s: resetting\n", __func__);
+	device_printf(sc->sc_dev, "%s: resetting\n", __func__);
 #endif
 	ath_reset(ifp, ATH_RESET_NOLOSS);
 }
 
 /*
  * Reset the hardware after detecting beacons have stopped.
  */
 static void
 ath_bstuck_proc(void *arg, int pending)
 {
 	struct ath_softc *sc = arg;
 	struct ifnet *ifp = sc->sc_ifp;
 	uint32_t hangs = 0;
 
 	if (ath_hal_gethangstate(sc->sc_ah, 0xff, &hangs) && hangs != 0)
-		if_printf(ifp, "bb hang detected (0x%x)\n", hangs);
+		device_printf(sc->sc_dev, "bb hang detected (0x%x)\n", hangs);
 
 #ifdef	ATH_DEBUG_ALQ
 	if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_STUCK_BEACON))
 		if_ath_alq_post(&sc->sc_alq, ATH_ALQ_STUCK_BEACON, 0, NULL);
 #endif
 
-	if_printf(ifp, "stuck beacon; resetting (bmiss count %u)\n",
-		sc->sc_bmisscount);
+	device_printf(sc->sc_dev, "stuck beacon; resetting (bmiss count %u)\n",
+	    sc->sc_bmisscount);
 	sc->sc_stats.ast_bstuck++;
 	/*
 	 * This assumes that there's no simultaneous channel mode change
 	 * occuring.
 	 */
 	ath_reset(ifp, ATH_RESET_NOLOSS);
 }
 
 static void
 ath_load_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 {
 	bus_addr_t *paddr = (bus_addr_t*) arg;
 	KASSERT(error == 0, ("error %u on bus_dma callback", error));
 	*paddr = segs->ds_addr;
 }
 
 /*
  * Allocate the descriptors and appropriate DMA tag/setup.
  *
  * For some situations (eg EDMA TX completion), there isn't a requirement
  * for the ath_buf entries to be allocated.
  */
 int
 ath_descdma_alloc_desc(struct ath_softc *sc,
 	struct ath_descdma *dd, ath_bufhead *head,
 	const char *name, int ds_size, int ndesc)
 {
 #define	DS2PHYS(_dd, _ds) \
 	((_dd)->dd_desc_paddr + ((caddr_t)(_ds) - (caddr_t)(_dd)->dd_desc))
 #define	ATH_DESC_4KB_BOUND_CHECK(_daddr, _len) \
 	((((u_int32_t)(_daddr) & 0xFFF) > (0x1000 - (_len))) ? 1 : 0)
-	struct ifnet *ifp = sc->sc_ifp;
 	int error;
 
 	dd->dd_descsize = ds_size;
 
 	DPRINTF(sc, ATH_DEBUG_RESET,
 	    "%s: %s DMA: %u desc, %d bytes per descriptor\n",
 	    __func__, name, ndesc, dd->dd_descsize);
 
 	dd->dd_name = name;
 	dd->dd_desc_len = dd->dd_descsize * ndesc;
 
 	/*
 	 * Merlin work-around:
 	 * Descriptors that cross the 4KB boundary can't be used.
 	 * Assume one skipped descriptor per 4KB page.
 	 */
 	if (! ath_hal_split4ktrans(sc->sc_ah)) {
 		int numpages = dd->dd_desc_len / 4096;
 		dd->dd_desc_len += ds_size * numpages;
 	}
 
 	/*
 	 * Setup DMA descriptor area.
 	 *
 	 * BUS_DMA_ALLOCNOW is not used; we never use bounce
 	 * buffers for the descriptors themselves.
 	 */
 	error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev),	/* parent */
 		       PAGE_SIZE, 0,		/* alignment, bounds */
 		       BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
 		       BUS_SPACE_MAXADDR,	/* highaddr */
 		       NULL, NULL,		/* filter, filterarg */
 		       dd->dd_desc_len,		/* maxsize */
 		       1,			/* nsegments */
 		       dd->dd_desc_len,		/* maxsegsize */
 		       0,			/* flags */
 		       NULL,			/* lockfunc */
 		       NULL,			/* lockarg */
 		       &dd->dd_dmat);
 	if (error != 0) {
-		if_printf(ifp, "cannot allocate %s DMA tag\n", dd->dd_name);
+		device_printf(sc->sc_dev,
+		    "cannot allocate %s DMA tag\n", dd->dd_name);
 		return error;
 	}
 
 	/* allocate descriptors */
 	error = bus_dmamem_alloc(dd->dd_dmat, (void**) &dd->dd_desc,
 				 BUS_DMA_NOWAIT | BUS_DMA_COHERENT,
 				 &dd->dd_dmamap);
 	if (error != 0) {
-		if_printf(ifp, "unable to alloc memory for %u %s descriptors, "
-			"error %u\n", ndesc, dd->dd_name, error);
+		device_printf(sc->sc_dev,
+		    "unable to alloc memory for %u %s descriptors, error %u\n",
+		    ndesc, dd->dd_name, error);
 		goto fail1;
 	}
 
 	error = bus_dmamap_load(dd->dd_dmat, dd->dd_dmamap,
 				dd->dd_desc, dd->dd_desc_len,
 				ath_load_cb, &dd->dd_desc_paddr,
 				BUS_DMA_NOWAIT);
 	if (error != 0) {
-		if_printf(ifp, "unable to map %s descriptors, error %u\n",
-			dd->dd_name, error);
+		device_printf(sc->sc_dev,
+		    "unable to map %s descriptors, error %u\n",
+		    dd->dd_name, error);
 		goto fail2;
 	}
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: %s DMA map: %p (%lu) -> %p (%lu)\n",
 	    __func__, dd->dd_name, (uint8_t *) dd->dd_desc,
 	    (u_long) dd->dd_desc_len, (caddr_t) dd->dd_desc_paddr,
 	    /*XXX*/ (u_long) dd->dd_desc_len);
 
 	return (0);
 
 fail2:
 	bus_dmamem_free(dd->dd_dmat, dd->dd_desc, dd->dd_dmamap);
 fail1:
 	bus_dma_tag_destroy(dd->dd_dmat);
 	memset(dd, 0, sizeof(*dd));
 	return error;
 #undef DS2PHYS
 #undef ATH_DESC_4KB_BOUND_CHECK
 }
 
 int
 ath_descdma_setup(struct ath_softc *sc,
 	struct ath_descdma *dd, ath_bufhead *head,
 	const char *name, int ds_size, int nbuf, int ndesc)
 {
 #define	DS2PHYS(_dd, _ds) \
 	((_dd)->dd_desc_paddr + ((caddr_t)(_ds) - (caddr_t)(_dd)->dd_desc))
 #define	ATH_DESC_4KB_BOUND_CHECK(_daddr, _len) \
 	((((u_int32_t)(_daddr) & 0xFFF) > (0x1000 - (_len))) ? 1 : 0)
-	struct ifnet *ifp = sc->sc_ifp;
 	uint8_t *ds;
 	struct ath_buf *bf;
 	int i, bsize, error;
 
 	/* Allocate descriptors */
 	error = ath_descdma_alloc_desc(sc, dd, head, name, ds_size,
 	    nbuf * ndesc);
 
 	/* Assume any errors during allocation were dealt with */
 	if (error != 0) {
 		return (error);
 	}
 
 	ds = (uint8_t *) dd->dd_desc;
 
 	/* allocate rx buffers */
 	bsize = sizeof(struct ath_buf) * nbuf;
 	bf = malloc(bsize, M_ATHDEV, M_NOWAIT | M_ZERO);
 	if (bf == NULL) {
-		if_printf(ifp, "malloc of %s buffers failed, size %u\n",
-			dd->dd_name, bsize);
+		device_printf(sc->sc_dev,
+		    "malloc of %s buffers failed, size %u\n",
+		    dd->dd_name, bsize);
 		goto fail3;
 	}
 	dd->dd_bufptr = bf;
 
 	TAILQ_INIT(head);
 	for (i = 0; i < nbuf; i++, bf++, ds += (ndesc * dd->dd_descsize)) {
 		bf->bf_desc = (struct ath_desc *) ds;
 		bf->bf_daddr = DS2PHYS(dd, ds);
 		if (! ath_hal_split4ktrans(sc->sc_ah)) {
 			/*
 			 * Merlin WAR: Skip descriptor addresses which
 			 * cause 4KB boundary crossing along any point
 			 * in the descriptor.
 			 */
 			 if (ATH_DESC_4KB_BOUND_CHECK(bf->bf_daddr,
 			     dd->dd_descsize)) {
 				/* Start at the next page */
 				ds += 0x1000 - (bf->bf_daddr & 0xFFF);
 				bf->bf_desc = (struct ath_desc *) ds;
 				bf->bf_daddr = DS2PHYS(dd, ds);
 			}
 		}
 		error = bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT,
 				&bf->bf_dmamap);
 		if (error != 0) {
-			if_printf(ifp, "unable to create dmamap for %s "
-				"buffer %u, error %u\n", dd->dd_name, i, error);
+			device_printf(sc->sc_dev, "unable to create dmamap "
+			    "for %s buffer %u, error %u\n",
+			    dd->dd_name, i, error);
 			ath_descdma_cleanup(sc, dd, head);
 			return error;
 		}
 		bf->bf_lastds = bf->bf_desc;	/* Just an initial value */
 		TAILQ_INSERT_TAIL(head, bf, bf_list);
 	}
 
 	/*
 	 * XXX TODO: ensure that ds doesn't overflow the descriptor
 	 * allocation otherwise weird stuff will occur and crash your
 	 * machine.
 	 */
 	return 0;
 	/* XXX this should likely just call ath_descdma_cleanup() */
 fail3:
 	bus_dmamap_unload(dd->dd_dmat, dd->dd_dmamap);
 	bus_dmamem_free(dd->dd_dmat, dd->dd_desc, dd->dd_dmamap);
 	bus_dma_tag_destroy(dd->dd_dmat);
 	memset(dd, 0, sizeof(*dd));
 	return error;
 #undef DS2PHYS
 #undef ATH_DESC_4KB_BOUND_CHECK
 }
 
 /*
  * Allocate ath_buf entries but no descriptor contents.
  *
  * This is for RX EDMA where the descriptors are the header part of
  * the RX buffer.
  */
 int
 ath_descdma_setup_rx_edma(struct ath_softc *sc,
 	struct ath_descdma *dd, ath_bufhead *head,
 	const char *name, int nbuf, int rx_status_len)
 {
-	struct ifnet *ifp = sc->sc_ifp;
 	struct ath_buf *bf;
 	int i, bsize, error;
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: %s DMA: %u buffers\n",
 	    __func__, name, nbuf);
 
 	dd->dd_name = name;
 	/*
 	 * This is (mostly) purely for show.  We're not allocating any actual
 	 * descriptors here as EDMA RX has the descriptor be part
 	 * of the RX buffer.
 	 *
 	 * However, dd_desc_len is used by ath_descdma_free() to determine
 	 * whether we have already freed this DMA mapping.
 	 */
 	dd->dd_desc_len = rx_status_len * nbuf;
 	dd->dd_descsize = rx_status_len;
 
 	/* allocate rx buffers */
 	bsize = sizeof(struct ath_buf) * nbuf;
 	bf = malloc(bsize, M_ATHDEV, M_NOWAIT | M_ZERO);
 	if (bf == NULL) {
-		if_printf(ifp, "malloc of %s buffers failed, size %u\n",
-			dd->dd_name, bsize);
+		device_printf(sc->sc_dev,
+		    "malloc of %s buffers failed, size %u\n",
+		    dd->dd_name, bsize);
 		error = ENOMEM;
 		goto fail3;
 	}
 	dd->dd_bufptr = bf;
 
 	TAILQ_INIT(head);
 	for (i = 0; i < nbuf; i++, bf++) {
 		bf->bf_desc = NULL;
 		bf->bf_daddr = 0;
 		bf->bf_lastds = NULL;	/* Just an initial value */
 
 		error = bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT,
 				&bf->bf_dmamap);
 		if (error != 0) {
-			if_printf(ifp, "unable to create dmamap for %s "
-				"buffer %u, error %u\n", dd->dd_name, i, error);
+			device_printf(sc->sc_dev, "unable to create dmamap "
+			    "for %s buffer %u, error %u\n",
+			    dd->dd_name, i, error);
 			ath_descdma_cleanup(sc, dd, head);
 			return error;
 		}
 		TAILQ_INSERT_TAIL(head, bf, bf_list);
 	}
 	return 0;
 fail3:
 	memset(dd, 0, sizeof(*dd));
 	return error;
 }
 
 void
 ath_descdma_cleanup(struct ath_softc *sc,
 	struct ath_descdma *dd, ath_bufhead *head)
 {
 	struct ath_buf *bf;
 	struct ieee80211_node *ni;
 	int do_warning = 0;
 
 	if (dd->dd_dmamap != 0) {
 		bus_dmamap_unload(dd->dd_dmat, dd->dd_dmamap);
 		bus_dmamem_free(dd->dd_dmat, dd->dd_desc, dd->dd_dmamap);
 		bus_dma_tag_destroy(dd->dd_dmat);
 	}
 
 	if (head != NULL) {
 		TAILQ_FOREACH(bf, head, bf_list) {
 			if (bf->bf_m) {
 				/*
 				 * XXX warn if there's buffers here.
 				 * XXX it should have been freed by the
 				 * owner!
 				 */
 				
 				if (do_warning == 0) {
 					do_warning = 1;
 					device_printf(sc->sc_dev,
 					    "%s: %s: mbuf should've been"
 					    " unmapped/freed!\n",
 					    __func__,
 					    dd->dd_name);
 				}
 				bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap,
 				    BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 				m_freem(bf->bf_m);
 				bf->bf_m = NULL;
 			}
 			if (bf->bf_dmamap != NULL) {
 				bus_dmamap_destroy(sc->sc_dmat, bf->bf_dmamap);
 				bf->bf_dmamap = NULL;
 			}
 			ni = bf->bf_node;
 			bf->bf_node = NULL;
 			if (ni != NULL) {
 				/*
 				 * Reclaim node reference.
 				 */
 				ieee80211_free_node(ni);
 			}
 		}
 	}
 
 	if (head != NULL)
 		TAILQ_INIT(head);
 
 	if (dd->dd_bufptr != NULL)
 		free(dd->dd_bufptr, M_ATHDEV);
 	memset(dd, 0, sizeof(*dd));
 }
 
 static int
 ath_desc_alloc(struct ath_softc *sc)
 {
 	int error;
 
 	error = ath_descdma_setup(sc, &sc->sc_txdma, &sc->sc_txbuf,
 		    "tx", sc->sc_tx_desclen, ath_txbuf, ATH_MAX_SCATTER);
 	if (error != 0) {
 		return error;
 	}
 	sc->sc_txbuf_cnt = ath_txbuf;
 
 	error = ath_descdma_setup(sc, &sc->sc_txdma_mgmt, &sc->sc_txbuf_mgmt,
 		    "tx_mgmt", sc->sc_tx_desclen, ath_txbuf_mgmt,
 		    ATH_TXDESC);
 	if (error != 0) {
 		ath_descdma_cleanup(sc, &sc->sc_txdma, &sc->sc_txbuf);
 		return error;
 	}
 
 	/*
 	 * XXX mark txbuf_mgmt frames with ATH_BUF_MGMT, so the
 	 * flag doesn't have to be set in ath_getbuf_locked().
 	 */
 
 	error = ath_descdma_setup(sc, &sc->sc_bdma, &sc->sc_bbuf,
 			"beacon", sc->sc_tx_desclen, ATH_BCBUF, 1);
 	if (error != 0) {
 		ath_descdma_cleanup(sc, &sc->sc_txdma, &sc->sc_txbuf);
 		ath_descdma_cleanup(sc, &sc->sc_txdma_mgmt,
 		    &sc->sc_txbuf_mgmt);
 		return error;
 	}
 	return 0;
 }
 
 static void
 ath_desc_free(struct ath_softc *sc)
 {
 
 	if (sc->sc_bdma.dd_desc_len != 0)
 		ath_descdma_cleanup(sc, &sc->sc_bdma, &sc->sc_bbuf);
 	if (sc->sc_txdma.dd_desc_len != 0)
 		ath_descdma_cleanup(sc, &sc->sc_txdma, &sc->sc_txbuf);
 	if (sc->sc_txdma_mgmt.dd_desc_len != 0)
 		ath_descdma_cleanup(sc, &sc->sc_txdma_mgmt,
 		    &sc->sc_txbuf_mgmt);
 }
 
 static struct ieee80211_node *
 ath_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ath_softc *sc = ic->ic_ifp->if_softc;
 	const size_t space = sizeof(struct ath_node) + sc->sc_rc->arc_space;
 	struct ath_node *an;
 
 	an = malloc(space, M_80211_NODE, M_NOWAIT|M_ZERO);
 	if (an == NULL) {
 		/* XXX stat+msg */
 		return NULL;
 	}
 	ath_rate_node_init(sc, an);
 
 	/* Setup the mutex - there's no associd yet so set the name to NULL */
 	snprintf(an->an_name, sizeof(an->an_name), "%s: node %p",
 	    device_get_nameunit(sc->sc_dev), an);
 	mtx_init(&an->an_mtx, an->an_name, NULL, MTX_DEF);
 
 	/* XXX setup ath_tid */
 	ath_tx_tid_init(sc, an);
 
 	DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: an %p\n", __func__, mac, ":", an);
 	return &an->an_node;
 }
 
 static void
 ath_node_cleanup(struct ieee80211_node *ni)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_ifp->if_softc;
 
 	DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: an %p\n", __func__,
 	    ni->ni_macaddr, ":", ATH_NODE(ni));
 
 	/* Cleanup ath_tid, free unused bufs, unlink bufs in TXQ */
 	ath_tx_node_flush(sc, ATH_NODE(ni));
 	ath_rate_node_cleanup(sc, ATH_NODE(ni));
 	sc->sc_node_cleanup(ni);
 }
 
 static void
 ath_node_free(struct ieee80211_node *ni)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_ifp->if_softc;
 
 	DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: an %p\n", __func__,
 	    ni->ni_macaddr, ":", ATH_NODE(ni));
 	mtx_destroy(&ATH_NODE(ni)->an_mtx);
 	sc->sc_node_free(ni);
 }
 
 static void
 ath_node_getsignal(const struct ieee80211_node *ni, int8_t *rssi, int8_t *noise)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_ifp->if_softc;
 	struct ath_hal *ah = sc->sc_ah;
 
 	*rssi = ic->ic_node_getrssi(ni);
 	if (ni->ni_chan != IEEE80211_CHAN_ANYC)
 		*noise = ath_hal_getchannoise(ah, ni->ni_chan);
 	else
 		*noise = -95;		/* nominally correct */
 }
 
 /*
  * Set the default antenna.
  */
 void
 ath_setdefantenna(struct ath_softc *sc, u_int antenna)
 {
 	struct ath_hal *ah = sc->sc_ah;
 
 	/* XXX block beacon interrupts */
 	ath_hal_setdefantenna(ah, antenna);
 	if (sc->sc_defant != antenna)
 		sc->sc_stats.ast_ant_defswitch++;
 	sc->sc_defant = antenna;
 	sc->sc_rxotherant = 0;
 }
 
 static void
 ath_txq_init(struct ath_softc *sc, struct ath_txq *txq, int qnum)
 {
 	txq->axq_qnum = qnum;
 	txq->axq_ac = 0;
 	txq->axq_depth = 0;
 	txq->axq_aggr_depth = 0;
 	txq->axq_intrcnt = 0;
 	txq->axq_link = NULL;
 	txq->axq_softc = sc;
 	TAILQ_INIT(&txq->axq_q);
 	TAILQ_INIT(&txq->axq_tidq);
 	TAILQ_INIT(&txq->fifo.axq_q);
 	ATH_TXQ_LOCK_INIT(sc, txq);
 }
 
 /*
  * Setup a h/w transmit queue.
  */
 static struct ath_txq *
 ath_txq_setup(struct ath_softc *sc, int qtype, int subtype)
 {
 #define	N(a)	(sizeof(a)/sizeof(a[0]))
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_TXQ_INFO qi;
 	int qnum;
 
 	memset(&qi, 0, sizeof(qi));
 	qi.tqi_subtype = subtype;
 	qi.tqi_aifs = HAL_TXQ_USEDEFAULT;
 	qi.tqi_cwmin = HAL_TXQ_USEDEFAULT;
 	qi.tqi_cwmax = HAL_TXQ_USEDEFAULT;
 	/*
 	 * Enable interrupts only for EOL and DESC conditions.
 	 * We mark tx descriptors to receive a DESC interrupt
 	 * when a tx queue gets deep; otherwise waiting for the
 	 * EOL to reap descriptors.  Note that this is done to
 	 * reduce interrupt load and this only defers reaping
 	 * descriptors, never transmitting frames.  Aside from
 	 * reducing interrupts this also permits more concurrency.
 	 * The only potential downside is if the tx queue backs
 	 * up in which case the top half of the kernel may backup
 	 * due to a lack of tx descriptors.
 	 */
 	if (sc->sc_isedma)
 		qi.tqi_qflags = HAL_TXQ_TXEOLINT_ENABLE |
 		    HAL_TXQ_TXOKINT_ENABLE;
 	else
 		qi.tqi_qflags = HAL_TXQ_TXEOLINT_ENABLE |
 		    HAL_TXQ_TXDESCINT_ENABLE;
 
 	qnum = ath_hal_setuptxqueue(ah, qtype, &qi);
 	if (qnum == -1) {
 		/*
 		 * NB: don't print a message, this happens
 		 * normally on parts with too few tx queues
 		 */
 		return NULL;
 	}
 	if (qnum >= N(sc->sc_txq)) {
 		device_printf(sc->sc_dev,
 			"hal qnum %u out of range, max %zu!\n",
 			qnum, N(sc->sc_txq));
 		ath_hal_releasetxqueue(ah, qnum);
 		return NULL;
 	}
 	if (!ATH_TXQ_SETUP(sc, qnum)) {
 		ath_txq_init(sc, &sc->sc_txq[qnum], qnum);
 		sc->sc_txqsetup |= 1<<qnum;
 	}
 	return &sc->sc_txq[qnum];
 #undef N
 }
 
 /*
  * Setup a hardware data transmit queue for the specified
  * access control.  The hal may not support all requested
  * queues in which case it will return a reference to a
  * previously setup queue.  We record the mapping from ac's
  * to h/w queues for use by ath_tx_start and also track
  * the set of h/w queues being used to optimize work in the
  * transmit interrupt handler and related routines.
  */
 static int
 ath_tx_setup(struct ath_softc *sc, int ac, int haltype)
 {
 #define	N(a)	(sizeof(a)/sizeof(a[0]))
 	struct ath_txq *txq;
 
 	if (ac >= N(sc->sc_ac2q)) {
 		device_printf(sc->sc_dev, "AC %u out of range, max %zu!\n",
 			ac, N(sc->sc_ac2q));
 		return 0;
 	}
 	txq = ath_txq_setup(sc, HAL_TX_QUEUE_DATA, haltype);
 	if (txq != NULL) {
 		txq->axq_ac = ac;
 		sc->sc_ac2q[ac] = txq;
 		return 1;
 	} else
 		return 0;
 #undef N
 }
 
 /*
  * Update WME parameters for a transmit queue.
  */
 static int
 ath_txq_update(struct ath_softc *sc, int ac)
 {
 #define	ATH_EXPONENT_TO_VALUE(v)	((1<<v)-1)
 #define	ATH_TXOP_TO_US(v)		(v<<5)
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct ath_txq *txq = sc->sc_ac2q[ac];
 	struct wmeParams *wmep = &ic->ic_wme.wme_chanParams.cap_wmeParams[ac];
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_TXQ_INFO qi;
 
 	ath_hal_gettxqueueprops(ah, txq->axq_qnum, &qi);
 #ifdef IEEE80211_SUPPORT_TDMA
 	if (sc->sc_tdma) {
 		/*
 		 * AIFS is zero so there's no pre-transmit wait.  The
 		 * burst time defines the slot duration and is configured
 		 * through net80211.  The QCU is setup to not do post-xmit
 		 * back off, lockout all lower-priority QCU's, and fire
 		 * off the DMA beacon alert timer which is setup based
 		 * on the slot configuration.
 		 */
 		qi.tqi_qflags = HAL_TXQ_TXOKINT_ENABLE
 			      | HAL_TXQ_TXERRINT_ENABLE
 			      | HAL_TXQ_TXURNINT_ENABLE
 			      | HAL_TXQ_TXEOLINT_ENABLE
 			      | HAL_TXQ_DBA_GATED
 			      | HAL_TXQ_BACKOFF_DISABLE
 			      | HAL_TXQ_ARB_LOCKOUT_GLOBAL
 			      ;
 		qi.tqi_aifs = 0;
 		/* XXX +dbaprep? */
 		qi.tqi_readyTime = sc->sc_tdmaslotlen;
 		qi.tqi_burstTime = qi.tqi_readyTime;
 	} else {
 #endif
 		/*
 		 * XXX shouldn't this just use the default flags
 		 * used in the previous queue setup?
 		 */
 		qi.tqi_qflags = HAL_TXQ_TXOKINT_ENABLE
 			      | HAL_TXQ_TXERRINT_ENABLE
 			      | HAL_TXQ_TXDESCINT_ENABLE
 			      | HAL_TXQ_TXURNINT_ENABLE
 			      | HAL_TXQ_TXEOLINT_ENABLE
 			      ;
 		qi.tqi_aifs = wmep->wmep_aifsn;
 		qi.tqi_cwmin = ATH_EXPONENT_TO_VALUE(wmep->wmep_logcwmin);
 		qi.tqi_cwmax = ATH_EXPONENT_TO_VALUE(wmep->wmep_logcwmax);
 		qi.tqi_readyTime = 0;
 		qi.tqi_burstTime = ATH_TXOP_TO_US(wmep->wmep_txopLimit);
 #ifdef IEEE80211_SUPPORT_TDMA
 	}
 #endif
 
 	DPRINTF(sc, ATH_DEBUG_RESET,
 	    "%s: Q%u qflags 0x%x aifs %u cwmin %u cwmax %u burstTime %u\n",
 	    __func__, txq->axq_qnum, qi.tqi_qflags,
 	    qi.tqi_aifs, qi.tqi_cwmin, qi.tqi_cwmax, qi.tqi_burstTime);
 
 	if (!ath_hal_settxqueueprops(ah, txq->axq_qnum, &qi)) {
-		if_printf(ifp, "unable to update hardware queue "
-			"parameters for %s traffic!\n",
-			ieee80211_wme_acnames[ac]);
+		device_printf(sc->sc_dev, "unable to update hardware queue "
+		    "parameters for %s traffic!\n", ieee80211_wme_acnames[ac]);
 		return 0;
 	} else {
 		ath_hal_resettxqueue(ah, txq->axq_qnum); /* push to h/w */
 		return 1;
 	}
 #undef ATH_TXOP_TO_US
 #undef ATH_EXPONENT_TO_VALUE
 }
 
 /*
  * Callback from the 802.11 layer to update WME parameters.
  */
 int
 ath_wme_update(struct ieee80211com *ic)
 {
 	struct ath_softc *sc = ic->ic_ifp->if_softc;
 
 	return !ath_txq_update(sc, WME_AC_BE) ||
 	    !ath_txq_update(sc, WME_AC_BK) ||
 	    !ath_txq_update(sc, WME_AC_VI) ||
 	    !ath_txq_update(sc, WME_AC_VO) ? EIO : 0;
 }
 
 /*
  * Reclaim resources for a setup queue.
  */
 static void
 ath_tx_cleanupq(struct ath_softc *sc, struct ath_txq *txq)
 {
 
 	ath_hal_releasetxqueue(sc->sc_ah, txq->axq_qnum);
 	sc->sc_txqsetup &= ~(1<<txq->axq_qnum);
 	ATH_TXQ_LOCK_DESTROY(txq);
 }
 
 /*
  * Reclaim all tx queue resources.
  */
 static void
 ath_tx_cleanup(struct ath_softc *sc)
 {
 	int i;
 
 	ATH_TXBUF_LOCK_DESTROY(sc);
 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++)
 		if (ATH_TXQ_SETUP(sc, i))
 			ath_tx_cleanupq(sc, &sc->sc_txq[i]);
 }
 
 /*
  * Return h/w rate index for an IEEE rate (w/o basic rate bit)
  * using the current rates in sc_rixmap.
  */
 int
 ath_tx_findrix(const struct ath_softc *sc, uint8_t rate)
 {
 	int rix = sc->sc_rixmap[rate];
 	/* NB: return lowest rix for invalid rate */
 	return (rix == 0xff ? 0 : rix);
 }
 
 static void
 ath_tx_update_stats(struct ath_softc *sc, struct ath_tx_status *ts,
     struct ath_buf *bf)
 {
 	struct ieee80211_node *ni = bf->bf_node;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	int sr, lr, pri;
 
 	if (ts->ts_status == 0) {
 		u_int8_t txant = ts->ts_antenna;
 		sc->sc_stats.ast_ant_tx[txant]++;
 		sc->sc_ant_tx[txant]++;
 		if (ts->ts_finaltsi != 0)
 			sc->sc_stats.ast_tx_altrate++;
 		pri = M_WME_GETAC(bf->bf_m);
 		if (pri >= WME_AC_VO)
 			ic->ic_wme.wme_hipri_traffic++;
 		if ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)
 			ni->ni_inact = ni->ni_inact_reload;
 	} else {
 		if (ts->ts_status & HAL_TXERR_XRETRY)
 			sc->sc_stats.ast_tx_xretries++;
 		if (ts->ts_status & HAL_TXERR_FIFO)
 			sc->sc_stats.ast_tx_fifoerr++;
 		if (ts->ts_status & HAL_TXERR_FILT)
 			sc->sc_stats.ast_tx_filtered++;
 		if (ts->ts_status & HAL_TXERR_XTXOP)
 			sc->sc_stats.ast_tx_xtxop++;
 		if (ts->ts_status & HAL_TXERR_TIMER_EXPIRED)
 			sc->sc_stats.ast_tx_timerexpired++;
 
 		if (bf->bf_m->m_flags & M_FF)
 			sc->sc_stats.ast_ff_txerr++;
 	}
 	/* XXX when is this valid? */
 	if (ts->ts_flags & HAL_TX_DESC_CFG_ERR)
 		sc->sc_stats.ast_tx_desccfgerr++;
 	/*
 	 * This can be valid for successful frame transmission!
 	 * If there's a TX FIFO underrun during aggregate transmission,
 	 * the MAC will pad the rest of the aggregate with delimiters.
 	 * If a BA is returned, the frame is marked as "OK" and it's up
 	 * to the TX completion code to notice which frames weren't
 	 * successfully transmitted.
 	 */
 	if (ts->ts_flags & HAL_TX_DATA_UNDERRUN)
 		sc->sc_stats.ast_tx_data_underrun++;
 	if (ts->ts_flags & HAL_TX_DELIM_UNDERRUN)
 		sc->sc_stats.ast_tx_delim_underrun++;
 
 	sr = ts->ts_shortretry;
 	lr = ts->ts_longretry;
 	sc->sc_stats.ast_tx_shortretry += sr;
 	sc->sc_stats.ast_tx_longretry += lr;
 
 }
 
 /*
  * The default completion. If fail is 1, this means
  * "please don't retry the frame, and just return -1 status
  * to the net80211 stack.
  */
 void
 ath_tx_default_comp(struct ath_softc *sc, struct ath_buf *bf, int fail)
 {
 	struct ath_tx_status *ts = &bf->bf_status.ds_txstat;
 	int st;
 
 	if (fail == 1)
 		st = -1;
 	else
 		st = ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0) ?
 		    ts->ts_status : HAL_TXERR_XRETRY;
 
 #if 0
 	if (bf->bf_state.bfs_dobaw)
 		device_printf(sc->sc_dev,
 		    "%s: bf %p: seqno %d: dobaw should've been cleared!\n",
 		    __func__,
 		    bf,
 		    SEQNO(bf->bf_state.bfs_seqno));
 #endif
 	if (bf->bf_next != NULL)
 		device_printf(sc->sc_dev,
 		    "%s: bf %p: seqno %d: bf_next not NULL!\n",
 		    __func__,
 		    bf,
 		    SEQNO(bf->bf_state.bfs_seqno));
 
 	/*
 	 * Check if the node software queue is empty; if so
 	 * then clear the TIM.
 	 *
 	 * This needs to be done before the buffer is freed as
 	 * otherwise the node reference will have been released
 	 * and the node may not actually exist any longer.
 	 *
 	 * XXX I don't like this belonging here, but it's cleaner
 	 * to do it here right now then all the other places
 	 * where ath_tx_default_comp() is called.
 	 *
 	 * XXX TODO: during drain, ensure that the callback is
 	 * being called so we get a chance to update the TIM.
 	 */
 	if (bf->bf_node) {
 		ATH_TX_LOCK(sc);
 		ath_tx_update_tim(sc, bf->bf_node, 0);
 		ATH_TX_UNLOCK(sc);
 	}
 
 	/*
 	 * Do any tx complete callback.  Note this must
 	 * be done before releasing the node reference.
 	 * This will free the mbuf, release the net80211
 	 * node and recycle the ath_buf.
 	 */
 	ath_tx_freebuf(sc, bf, st);
 }
 
 /*
  * Update rate control with the given completion status.
  */
 void
 ath_tx_update_ratectrl(struct ath_softc *sc, struct ieee80211_node *ni,
     struct ath_rc_series *rc, struct ath_tx_status *ts, int frmlen,
     int nframes, int nbad)
 {
 	struct ath_node *an;
 
 	/* Only for unicast frames */
 	if (ni == NULL)
 		return;
 
 	an = ATH_NODE(ni);
 	ATH_NODE_UNLOCK_ASSERT(an);
 
 	if ((ts->ts_status & HAL_TXERR_FILT) == 0) {
 		ATH_NODE_LOCK(an);
 		ath_rate_tx_complete(sc, an, rc, ts, frmlen, nframes, nbad);
 		ATH_NODE_UNLOCK(an);
 	}
 }
 
 /*
  * Process the completion of the given buffer.
  *
  * This calls the rate control update and then the buffer completion.
  * This will either free the buffer or requeue it.  In any case, the
  * bf pointer should be treated as invalid after this function is called.
  */
 void
 ath_tx_process_buf_completion(struct ath_softc *sc, struct ath_txq *txq,
     struct ath_tx_status *ts, struct ath_buf *bf)
 {
 	struct ieee80211_node *ni = bf->bf_node;
 
 	ATH_TX_UNLOCK_ASSERT(sc);
 	ATH_TXQ_UNLOCK_ASSERT(txq);
 
 	/* If unicast frame, update general statistics */
 	if (ni != NULL) {
 		/* update statistics */
 		ath_tx_update_stats(sc, ts, bf);
 	}
 
 	/*
 	 * Call the completion handler.
 	 * The completion handler is responsible for
 	 * calling the rate control code.
 	 *
 	 * Frames with no completion handler get the
 	 * rate control code called here.
 	 */
 	if (bf->bf_comp == NULL) {
 		if ((ts->ts_status & HAL_TXERR_FILT) == 0 &&
 		    (bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0) {
 			/*
 			 * XXX assume this isn't an aggregate
 			 * frame.
 			 */
 			ath_tx_update_ratectrl(sc, ni,
 			     bf->bf_state.bfs_rc, ts,
 			    bf->bf_state.bfs_pktlen, 1,
 			    (ts->ts_status == 0 ? 0 : 1));
 		}
 		ath_tx_default_comp(sc, bf, 0);
 	} else
 		bf->bf_comp(sc, bf, 0);
 }
 
 
 
 /*
  * Process completed xmit descriptors from the specified queue.
  * Kick the packet scheduler if needed. This can occur from this
  * particular task.
  */
 static int
 ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq, int dosched)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_buf *bf;
 	struct ath_desc *ds;
 	struct ath_tx_status *ts;
 	struct ieee80211_node *ni;
 #ifdef	IEEE80211_SUPPORT_SUPERG
 	struct ieee80211com *ic = sc->sc_ifp->if_l2com;
 #endif	/* IEEE80211_SUPPORT_SUPERG */
 	int nacked;
 	HAL_STATUS status;
 
 	DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: tx queue %u head %p link %p\n",
 		__func__, txq->axq_qnum,
 		(caddr_t)(uintptr_t) ath_hal_gettxbuf(sc->sc_ah, txq->axq_qnum),
 		txq->axq_link);
 
 	ATH_KTR(sc, ATH_KTR_TXCOMP, 4,
 	    "ath_tx_processq: txq=%u head %p link %p depth %p",
 	    txq->axq_qnum,
 	    (caddr_t)(uintptr_t) ath_hal_gettxbuf(sc->sc_ah, txq->axq_qnum),
 	    txq->axq_link,
 	    txq->axq_depth);
 
 	nacked = 0;
 	for (;;) {
 		ATH_TXQ_LOCK(txq);
 		txq->axq_intrcnt = 0;	/* reset periodic desc intr count */
 		bf = TAILQ_FIRST(&txq->axq_q);
 		if (bf == NULL) {
 			ATH_TXQ_UNLOCK(txq);
 			break;
 		}
 		ds = bf->bf_lastds;	/* XXX must be setup correctly! */
 		ts = &bf->bf_status.ds_txstat;
 
 		status = ath_hal_txprocdesc(ah, ds, ts);
 #ifdef ATH_DEBUG
 		if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
 			ath_printtxbuf(sc, bf, txq->axq_qnum, 0,
 			    status == HAL_OK);
 		else if ((sc->sc_debug & ATH_DEBUG_RESET) && (dosched == 0))
 			ath_printtxbuf(sc, bf, txq->axq_qnum, 0,
 			    status == HAL_OK);
 #endif
 #ifdef	ATH_DEBUG_ALQ
 		if (if_ath_alq_checkdebug(&sc->sc_alq,
 		    ATH_ALQ_EDMA_TXSTATUS)) {
 			if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS,
 			sc->sc_tx_statuslen,
 			(char *) ds);
 		}
 #endif
 
 		if (status == HAL_EINPROGRESS) {
 			ATH_KTR(sc, ATH_KTR_TXCOMP, 3,
 			    "ath_tx_processq: txq=%u, bf=%p ds=%p, HAL_EINPROGRESS",
 			    txq->axq_qnum, bf, ds);
 			ATH_TXQ_UNLOCK(txq);
 			break;
 		}
 		ATH_TXQ_REMOVE(txq, bf, bf_list);
 
 		/*
 		 * Sanity check.
 		 */
 		if (txq->axq_qnum != bf->bf_state.bfs_tx_queue) {
 			device_printf(sc->sc_dev,
 			    "%s: TXQ=%d: bf=%p, bfs_tx_queue=%d\n",
 			    __func__,
 			    txq->axq_qnum,
 			    bf,
 			    bf->bf_state.bfs_tx_queue);
 		}
 		if (txq->axq_qnum != bf->bf_last->bf_state.bfs_tx_queue) {
 			device_printf(sc->sc_dev,
 			    "%s: TXQ=%d: bf_last=%p, bfs_tx_queue=%d\n",
 			    __func__,
 			    txq->axq_qnum,
 			    bf->bf_last,
 			    bf->bf_last->bf_state.bfs_tx_queue);
 		}
 
 #if 0
 		if (txq->axq_depth > 0) {
 			/*
 			 * More frames follow.  Mark the buffer busy
 			 * so it's not re-used while the hardware may
 			 * still re-read the link field in the descriptor.
 			 *
 			 * Use the last buffer in an aggregate as that
 			 * is where the hardware may be - intermediate
 			 * descriptors won't be "busy".
 			 */
 			bf->bf_last->bf_flags |= ATH_BUF_BUSY;
 		} else
 			txq->axq_link = NULL;
 #else
 		bf->bf_last->bf_flags |= ATH_BUF_BUSY;
 #endif
 		if (bf->bf_state.bfs_aggr)
 			txq->axq_aggr_depth--;
 
 		ni = bf->bf_node;
 
 		ATH_KTR(sc, ATH_KTR_TXCOMP, 5,
 		    "ath_tx_processq: txq=%u, bf=%p, ds=%p, ni=%p, ts_status=0x%08x",
 		    txq->axq_qnum, bf, ds, ni, ts->ts_status);
 		/*
 		 * If unicast frame was ack'd update RSSI,
 		 * including the last rx time used to
 		 * workaround phantom bmiss interrupts.
 		 */
 		if (ni != NULL && ts->ts_status == 0 &&
 		    ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)) {
 			nacked++;
 			sc->sc_stats.ast_tx_rssi = ts->ts_rssi;
 			ATH_RSSI_LPF(sc->sc_halstats.ns_avgtxrssi,
 				ts->ts_rssi);
 		}
 		ATH_TXQ_UNLOCK(txq);
 
 		/*
 		 * Update statistics and call completion
 		 */
 		ath_tx_process_buf_completion(sc, txq, ts, bf);
 
 		/* XXX at this point, bf and ni may be totally invalid */
 	}
 #ifdef IEEE80211_SUPPORT_SUPERG
 	/*
 	 * Flush fast-frame staging queue when traffic slows.
 	 */
 	if (txq->axq_depth <= 1)
 		ieee80211_ff_flush(ic, txq->axq_ac);
 #endif
 
 	/* Kick the software TXQ scheduler */
 	if (dosched) {
 		ATH_TX_LOCK(sc);
 		ath_txq_sched(sc, txq);
 		ATH_TX_UNLOCK(sc);
 	}
 
 	ATH_KTR(sc, ATH_KTR_TXCOMP, 1,
 	    "ath_tx_processq: txq=%u: done",
 	    txq->axq_qnum);
 
 	return nacked;
 }
 
 #define	TXQACTIVE(t, q)		( (t) & (1 << (q)))
 
 /*
  * Deferred processing of transmit interrupt; special-cased
  * for a single hardware transmit queue (e.g. 5210 and 5211).
  */
 static void
 ath_tx_proc_q0(void *arg, int npending)
 {
 	struct ath_softc *sc = arg;
 	struct ifnet *ifp = sc->sc_ifp;
 	uint32_t txqs;
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt++;
 	txqs = sc->sc_txq_active;
 	sc->sc_txq_active &= ~txqs;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_KTR(sc, ATH_KTR_TXCOMP, 1,
 	    "ath_tx_proc_q0: txqs=0x%08x", txqs);
 
 	if (TXQACTIVE(txqs, 0) && ath_tx_processq(sc, &sc->sc_txq[0], 1))
 		/* XXX why is lastrx updated in tx code? */
 		sc->sc_lastrx = ath_hal_gettsf64(sc->sc_ah);
 	if (TXQACTIVE(txqs, sc->sc_cabq->axq_qnum))
 		ath_tx_processq(sc, sc->sc_cabq, 1);
 	IF_LOCK(&ifp->if_snd);
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	IF_UNLOCK(&ifp->if_snd);
 	sc->sc_wd_timer = 0;
 
 	if (sc->sc_softled)
 		ath_led_event(sc, sc->sc_txrix);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ath_tx_kick(sc);
 }
 
 /*
  * Deferred processing of transmit interrupt; special-cased
  * for four hardware queues, 0-3 (e.g. 5212 w/ WME support).
  */
 static void
 ath_tx_proc_q0123(void *arg, int npending)
 {
 	struct ath_softc *sc = arg;
 	struct ifnet *ifp = sc->sc_ifp;
 	int nacked;
 	uint32_t txqs;
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt++;
 	txqs = sc->sc_txq_active;
 	sc->sc_txq_active &= ~txqs;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_KTR(sc, ATH_KTR_TXCOMP, 1,
 	    "ath_tx_proc_q0123: txqs=0x%08x", txqs);
 
 	/*
 	 * Process each active queue.
 	 */
 	nacked = 0;
 	if (TXQACTIVE(txqs, 0))
 		nacked += ath_tx_processq(sc, &sc->sc_txq[0], 1);
 	if (TXQACTIVE(txqs, 1))
 		nacked += ath_tx_processq(sc, &sc->sc_txq[1], 1);
 	if (TXQACTIVE(txqs, 2))
 		nacked += ath_tx_processq(sc, &sc->sc_txq[2], 1);
 	if (TXQACTIVE(txqs, 3))
 		nacked += ath_tx_processq(sc, &sc->sc_txq[3], 1);
 	if (TXQACTIVE(txqs, sc->sc_cabq->axq_qnum))
 		ath_tx_processq(sc, sc->sc_cabq, 1);
 	if (nacked)
 		sc->sc_lastrx = ath_hal_gettsf64(sc->sc_ah);
 
 	IF_LOCK(&ifp->if_snd);
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	IF_UNLOCK(&ifp->if_snd);
 	sc->sc_wd_timer = 0;
 
 	if (sc->sc_softled)
 		ath_led_event(sc, sc->sc_txrix);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ath_tx_kick(sc);
 }
 
 /*
  * Deferred processing of transmit interrupt.
  */
 static void
 ath_tx_proc(void *arg, int npending)
 {
 	struct ath_softc *sc = arg;
 	struct ifnet *ifp = sc->sc_ifp;
 	int i, nacked;
 	uint32_t txqs;
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt++;
 	txqs = sc->sc_txq_active;
 	sc->sc_txq_active &= ~txqs;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_KTR(sc, ATH_KTR_TXCOMP, 1, "ath_tx_proc: txqs=0x%08x", txqs);
 
 	/*
 	 * Process each active queue.
 	 */
 	nacked = 0;
 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++)
 		if (ATH_TXQ_SETUP(sc, i) && TXQACTIVE(txqs, i))
 			nacked += ath_tx_processq(sc, &sc->sc_txq[i], 1);
 	if (nacked)
 		sc->sc_lastrx = ath_hal_gettsf64(sc->sc_ah);
 
 	/* XXX check this inside of IF_LOCK? */
 	IF_LOCK(&ifp->if_snd);
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	IF_UNLOCK(&ifp->if_snd);
 	sc->sc_wd_timer = 0;
 
 	if (sc->sc_softled)
 		ath_led_event(sc, sc->sc_txrix);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ath_tx_kick(sc);
 }
 #undef	TXQACTIVE
 
 /*
  * Deferred processing of TXQ rescheduling.
  */
 static void
 ath_txq_sched_tasklet(void *arg, int npending)
 {
 	struct ath_softc *sc = arg;
 	int i;
 
 	/* XXX is skipping ok? */
 	ATH_PCU_LOCK(sc);
 #if 0
 	if (sc->sc_inreset_cnt > 0) {
 		device_printf(sc->sc_dev,
 		    "%s: sc_inreset_cnt > 0; skipping\n", __func__);
 		ATH_PCU_UNLOCK(sc);
 		return;
 	}
 #endif
 	sc->sc_txproc_cnt++;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_TX_LOCK(sc);
 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 		if (ATH_TXQ_SETUP(sc, i)) {
 			ath_txq_sched(sc, &sc->sc_txq[i]);
 		}
 	}
 	ATH_TX_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txproc_cnt--;
 	ATH_PCU_UNLOCK(sc);
 }
 
 void
 ath_returnbuf_tail(struct ath_softc *sc, struct ath_buf *bf)
 {
 
 	ATH_TXBUF_LOCK_ASSERT(sc);
 
 	if (bf->bf_flags & ATH_BUF_MGMT)
 		TAILQ_INSERT_TAIL(&sc->sc_txbuf_mgmt, bf, bf_list);
 	else {
 		TAILQ_INSERT_TAIL(&sc->sc_txbuf, bf, bf_list);
 		sc->sc_txbuf_cnt++;
 		if (sc->sc_txbuf_cnt > ath_txbuf) {
 			device_printf(sc->sc_dev,
 			    "%s: sc_txbuf_cnt > %d?\n",
 			    __func__,
 			    ath_txbuf);
 			sc->sc_txbuf_cnt = ath_txbuf;
 		}
 	}
 }
 
 void
 ath_returnbuf_head(struct ath_softc *sc, struct ath_buf *bf)
 {
 
 	ATH_TXBUF_LOCK_ASSERT(sc);
 
 	if (bf->bf_flags & ATH_BUF_MGMT)
 		TAILQ_INSERT_HEAD(&sc->sc_txbuf_mgmt, bf, bf_list);
 	else {
 		TAILQ_INSERT_HEAD(&sc->sc_txbuf, bf, bf_list);
 		sc->sc_txbuf_cnt++;
 		if (sc->sc_txbuf_cnt > ATH_TXBUF) {
 			device_printf(sc->sc_dev,
 			    "%s: sc_txbuf_cnt > %d?\n",
 			    __func__,
 			    ATH_TXBUF);
 			sc->sc_txbuf_cnt = ATH_TXBUF;
 		}
 	}
 }
 
 /*
  * Free the holding buffer if it exists
  */
 void
 ath_txq_freeholdingbuf(struct ath_softc *sc, struct ath_txq *txq)
 {
 	ATH_TXBUF_UNLOCK_ASSERT(sc);
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	if (txq->axq_holdingbf == NULL)
 		return;
 
 	txq->axq_holdingbf->bf_flags &= ~ATH_BUF_BUSY;
 
 	ATH_TXBUF_LOCK(sc);
 	ath_returnbuf_tail(sc, txq->axq_holdingbf);
 	ATH_TXBUF_UNLOCK(sc);
 
 	txq->axq_holdingbf = NULL;
 }
 
 /*
  * Add this buffer to the holding queue, freeing the previous
  * one if it exists.
  */
 static void
 ath_txq_addholdingbuf(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ath_txq *txq;
 
 	txq = &sc->sc_txq[bf->bf_state.bfs_tx_queue];
 
 	ATH_TXBUF_UNLOCK_ASSERT(sc);
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	/* XXX assert ATH_BUF_BUSY is set */
 
 	/* XXX assert the tx queue is under the max number */
 	if (bf->bf_state.bfs_tx_queue > HAL_NUM_TX_QUEUES) {
 		device_printf(sc->sc_dev, "%s: bf=%p: invalid tx queue (%d)\n",
 		    __func__,
 		    bf,
 		    bf->bf_state.bfs_tx_queue);
 		bf->bf_flags &= ~ATH_BUF_BUSY;
 		ath_returnbuf_tail(sc, bf);
 		return;
 	}
 	ath_txq_freeholdingbuf(sc, txq);
 	txq->axq_holdingbf = bf;
 }
 
 /*
  * Return a buffer to the pool and update the 'busy' flag on the
  * previous 'tail' entry.
  *
  * This _must_ only be called when the buffer is involved in a completed
  * TX. The logic is that if it was part of an active TX, the previous
  * buffer on the list is now not involved in a halted TX DMA queue, waiting
  * for restart (eg for TDMA.)
  *
  * The caller must free the mbuf and recycle the node reference.
  *
  * XXX This method of handling busy / holding buffers is insanely stupid.
  * It requires bf_state.bfs_tx_queue to be correctly assigned.  It would
  * be much nicer if buffers in the processq() methods would instead be
  * always completed there (pushed onto a txq or ath_bufhead) so we knew
  * exactly what hardware queue they came from in the first place.
  */
 void
 ath_freebuf(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ath_txq *txq;
 
 	txq = &sc->sc_txq[bf->bf_state.bfs_tx_queue];
 
 	KASSERT((bf->bf_node == NULL), ("%s: bf->bf_node != NULL\n", __func__));
 	KASSERT((bf->bf_m == NULL), ("%s: bf->bf_m != NULL\n", __func__));
 
 	/*
 	 * If this buffer is busy, push it onto the holding queue.
 	 */
 	if (bf->bf_flags & ATH_BUF_BUSY) {
 		ATH_TXQ_LOCK(txq);
 		ath_txq_addholdingbuf(sc, bf);
 		ATH_TXQ_UNLOCK(txq);
 		return;
 	}
 
 	/*
 	 * Not a busy buffer, so free normally
 	 */
 	ATH_TXBUF_LOCK(sc);
 	ath_returnbuf_tail(sc, bf);
 	ATH_TXBUF_UNLOCK(sc);
 }
 
 /*
  * This is currently used by ath_tx_draintxq() and
  * ath_tx_tid_free_pkts().
  *
  * It recycles a single ath_buf.
  */
 void
 ath_tx_freebuf(struct ath_softc *sc, struct ath_buf *bf, int status)
 {
 	struct ieee80211_node *ni = bf->bf_node;
 	struct mbuf *m0 = bf->bf_m;
 
 	/*
 	 * Make sure that we only sync/unload if there's an mbuf.
 	 * If not (eg we cloned a buffer), the unload will have already
 	 * occured.
 	 */
 	if (bf->bf_m != NULL) {
 		bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 	}
 
 	bf->bf_node = NULL;
 	bf->bf_m = NULL;
 
 	/* Free the buffer, it's not needed any longer */
 	ath_freebuf(sc, bf);
 
 	/* Pass the buffer back to net80211 - completing it */
 	ieee80211_tx_complete(ni, m0, status);
 }
 
 static struct ath_buf *
 ath_tx_draintxq_get_one(struct ath_softc *sc, struct ath_txq *txq)
 {
 	struct ath_buf *bf;
 
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	/*
 	 * Drain the FIFO queue first, then if it's
 	 * empty, move to the normal frame queue.
 	 */
 	bf = TAILQ_FIRST(&txq->fifo.axq_q);
 	if (bf != NULL) {
 		/*
 		 * Is it the last buffer in this set?
 		 * Decrement the FIFO counter.
 		 */
 		if (bf->bf_flags & ATH_BUF_FIFOEND) {
 			if (txq->axq_fifo_depth == 0) {
 				device_printf(sc->sc_dev,
 				    "%s: Q%d: fifo_depth=0, fifo.axq_depth=%d?\n",
 				    __func__,
 				    txq->axq_qnum,
 				    txq->fifo.axq_depth);
 			} else
 				txq->axq_fifo_depth--;
 		}
 		ATH_TXQ_REMOVE(&txq->fifo, bf, bf_list);
 		return (bf);
 	}
 
 	/*
 	 * Debugging!
 	 */
 	if (txq->axq_fifo_depth != 0 || txq->fifo.axq_depth != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: Q%d: fifo_depth=%d, fifo.axq_depth=%d\n",
 		    __func__,
 		    txq->axq_qnum,
 		    txq->axq_fifo_depth,
 		    txq->fifo.axq_depth);
 	}
 
 	/*
 	 * Now drain the pending queue.
 	 */
 	bf = TAILQ_FIRST(&txq->axq_q);
 	if (bf == NULL) {
 		txq->axq_link = NULL;
 		return (NULL);
 	}
 	ATH_TXQ_REMOVE(txq, bf, bf_list);
 	return (bf);
 }
 
 void
 ath_tx_draintxq(struct ath_softc *sc, struct ath_txq *txq)
 {
 #ifdef ATH_DEBUG
 	struct ath_hal *ah = sc->sc_ah;
 #endif
 	struct ath_buf *bf;
 	u_int ix;
 
 	/*
 	 * NB: this assumes output has been stopped and
 	 *     we do not need to block ath_tx_proc
 	 */
 	for (ix = 0;; ix++) {
 		ATH_TXQ_LOCK(txq);
 		bf = ath_tx_draintxq_get_one(sc, txq);
 		if (bf == NULL) {
 			ATH_TXQ_UNLOCK(txq);
 			break;
 		}
 		if (bf->bf_state.bfs_aggr)
 			txq->axq_aggr_depth--;
 #ifdef ATH_DEBUG
 		if (sc->sc_debug & ATH_DEBUG_RESET) {
 			struct ieee80211com *ic = sc->sc_ifp->if_l2com;
 			int status = 0;
 
 			/*
 			 * EDMA operation has a TX completion FIFO
 			 * separate from the TX descriptor, so this
 			 * method of checking the "completion" status
 			 * is wrong.
 			 */
 			if (! sc->sc_isedma) {
 				status = (ath_hal_txprocdesc(ah,
 				    bf->bf_lastds,
 				    &bf->bf_status.ds_txstat) == HAL_OK);
 			}
 			ath_printtxbuf(sc, bf, txq->axq_qnum, ix, status);
 			ieee80211_dump_pkt(ic, mtod(bf->bf_m, const uint8_t *),
 			    bf->bf_m->m_len, 0, -1);
 		}
 #endif /* ATH_DEBUG */
 		/*
 		 * Since we're now doing magic in the completion
 		 * functions, we -must- call it for aggregation
 		 * destinations or BAW tracking will get upset.
 		 */
 		/*
 		 * Clear ATH_BUF_BUSY; the completion handler
 		 * will free the buffer.
 		 */
 		ATH_TXQ_UNLOCK(txq);
 		bf->bf_flags &= ~ATH_BUF_BUSY;
 		if (bf->bf_comp)
 			bf->bf_comp(sc, bf, 1);
 		else
 			ath_tx_default_comp(sc, bf, 1);
 	}
 
 	/*
 	 * Free the holding buffer if it exists
 	 */
 	ATH_TXQ_LOCK(txq);
 	ath_txq_freeholdingbuf(sc, txq);
 	ATH_TXQ_UNLOCK(txq);
 
 	/*
 	 * Drain software queued frames which are on
 	 * active TIDs.
 	 */
 	ath_tx_txq_drain(sc, txq);
 }
 
 static void
 ath_tx_stopdma(struct ath_softc *sc, struct ath_txq *txq)
 {
 	struct ath_hal *ah = sc->sc_ah;
 
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	DPRINTF(sc, ATH_DEBUG_RESET,
 	    "%s: tx queue [%u] %p, active=%d, hwpending=%d, flags 0x%08x, "
 	    "link %p, holdingbf=%p\n",
 	    __func__,
 	    txq->axq_qnum,
 	    (caddr_t)(uintptr_t) ath_hal_gettxbuf(ah, txq->axq_qnum),
 	    (int) (!! ath_hal_txqenabled(ah, txq->axq_qnum)),
 	    (int) ath_hal_numtxpending(ah, txq->axq_qnum),
 	    txq->axq_flags,
 	    txq->axq_link,
 	    txq->axq_holdingbf);
 
 	(void) ath_hal_stoptxdma(ah, txq->axq_qnum);
 	/* We've stopped TX DMA, so mark this as stopped. */
 	txq->axq_flags &= ~ATH_TXQ_PUTRUNNING;
 
 #ifdef	ATH_DEBUG
 	if ((sc->sc_debug & ATH_DEBUG_RESET)
 	    && (txq->axq_holdingbf != NULL)) {
 		ath_printtxbuf(sc, txq->axq_holdingbf, txq->axq_qnum, 0, 0);
 	}
 #endif
 }
 
 int
 ath_stoptxdma(struct ath_softc *sc)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	int i;
 
 	/* XXX return value */
 	if (sc->sc_invalid)
 		return 0;
 
 	if (!sc->sc_invalid) {
 		/* don't touch the hardware if marked invalid */
 		DPRINTF(sc, ATH_DEBUG_RESET, "%s: tx queue [%u] %p, link %p\n",
 		    __func__, sc->sc_bhalq,
 		    (caddr_t)(uintptr_t) ath_hal_gettxbuf(ah, sc->sc_bhalq),
 		    NULL);
 
 		/* stop the beacon queue */
 		(void) ath_hal_stoptxdma(ah, sc->sc_bhalq);
 
 		/* Stop the data queues */
 		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 			if (ATH_TXQ_SETUP(sc, i)) {
 				ATH_TXQ_LOCK(&sc->sc_txq[i]);
 				ath_tx_stopdma(sc, &sc->sc_txq[i]);
 				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
 			}
 		}
 	}
 
 	return 1;
 }
 
 #ifdef	ATH_DEBUG
 void
 ath_tx_dump(struct ath_softc *sc, struct ath_txq *txq)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_buf *bf;
 	int i = 0;
 
 	if (! (sc->sc_debug & ATH_DEBUG_RESET))
 		return;
 
 	device_printf(sc->sc_dev, "%s: Q%d: begin\n",
 	    __func__, txq->axq_qnum);
 	TAILQ_FOREACH(bf, &txq->axq_q, bf_list) {
 		ath_printtxbuf(sc, bf, txq->axq_qnum, i,
 			ath_hal_txprocdesc(ah, bf->bf_lastds,
 			    &bf->bf_status.ds_txstat) == HAL_OK);
 		i++;
 	}
 	device_printf(sc->sc_dev, "%s: Q%d: end\n",
 	    __func__, txq->axq_qnum);
 }
 #endif /* ATH_DEBUG */
 
 /*
  * Drain the transmit queues and reclaim resources.
  */
 void
 ath_legacy_tx_drain(struct ath_softc *sc, ATH_RESET_TYPE reset_type)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	struct ifnet *ifp = sc->sc_ifp;
 	int i;
 	struct ath_buf *bf_last;
 
 	(void) ath_stoptxdma(sc);
 
 	/*
 	 * Dump the queue contents
 	 */
 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
 		/*
 		 * XXX TODO: should we just handle the completed TX frames
 		 * here, whether or not the reset is a full one or not?
 		 */
 		if (ATH_TXQ_SETUP(sc, i)) {
 #ifdef	ATH_DEBUG
 			if (sc->sc_debug & ATH_DEBUG_RESET)
 				ath_tx_dump(sc, &sc->sc_txq[i]);
 #endif	/* ATH_DEBUG */
 			if (reset_type == ATH_RESET_NOLOSS) {
 				ath_tx_processq(sc, &sc->sc_txq[i], 0);
 				ATH_TXQ_LOCK(&sc->sc_txq[i]);
 				/*
 				 * Free the holding buffer; DMA is now
 				 * stopped.
 				 */
 				ath_txq_freeholdingbuf(sc, &sc->sc_txq[i]);
 				/*
 				 * Setup the link pointer to be the
 				 * _last_ buffer/descriptor in the list.
 				 * If there's nothing in the list, set it
 				 * to NULL.
 				 */
 				bf_last = ATH_TXQ_LAST(&sc->sc_txq[i],
 				    axq_q_s);
 				if (bf_last != NULL) {
 					ath_hal_gettxdesclinkptr(ah,
 					    bf_last->bf_lastds,
 					    &sc->sc_txq[i].axq_link);
 				} else {
 					sc->sc_txq[i].axq_link = NULL;
 				}
 				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
 			} else
 				ath_tx_draintxq(sc, &sc->sc_txq[i]);
 		}
 	}
 #ifdef ATH_DEBUG
 	if (sc->sc_debug & ATH_DEBUG_RESET) {
 		struct ath_buf *bf = TAILQ_FIRST(&sc->sc_bbuf);
 		if (bf != NULL && bf->bf_m != NULL) {
 			ath_printtxbuf(sc, bf, sc->sc_bhalq, 0,
 				ath_hal_txprocdesc(ah, bf->bf_lastds,
 				    &bf->bf_status.ds_txstat) == HAL_OK);
 			ieee80211_dump_pkt(ifp->if_l2com,
 			    mtod(bf->bf_m, const uint8_t *), bf->bf_m->m_len,
 			    0, -1);
 		}
 	}
 #endif /* ATH_DEBUG */
 	IF_LOCK(&ifp->if_snd);
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	IF_UNLOCK(&ifp->if_snd);
 	sc->sc_wd_timer = 0;
 }
 
 /*
  * Update internal state after a channel change.
  */
 static void
 ath_chan_change(struct ath_softc *sc, struct ieee80211_channel *chan)
 {
 	enum ieee80211_phymode mode;
 
 	/*
 	 * Change channels and update the h/w rate map
 	 * if we're switching; e.g. 11a to 11b/g.
 	 */
 	mode = ieee80211_chan2mode(chan);
 	if (mode != sc->sc_curmode)
 		ath_setcurmode(sc, mode);
 	sc->sc_curchan = chan;
 }
 
 /*
  * Set/change channels.  If the channel is really being changed,
  * it's done by resetting the chip.  To accomplish this we must
  * first cleanup any pending DMA, then restart stuff after a la
  * ath_init.
  */
 static int
 ath_chan_set(struct ath_softc *sc, struct ieee80211_channel *chan)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct ath_hal *ah = sc->sc_ah;
 	int ret = 0;
 
 	/* Treat this as an interface reset */
 	ATH_PCU_UNLOCK_ASSERT(sc);
 	ATH_UNLOCK_ASSERT(sc);
 
 	/* (Try to) stop TX/RX from occuring */
 	taskqueue_block(sc->sc_tq);
 
 	ATH_PCU_LOCK(sc);
 
 	/* Disable interrupts */
 	ath_hal_intrset(ah, 0);
 
 	/* Stop new RX/TX/interrupt completion */
 	if (ath_reset_grablock(sc, 1) == 0) {
 		device_printf(sc->sc_dev, "%s: concurrent reset! Danger!\n",
 		    __func__);
 	}
 
 	/* Stop pending RX/TX completion */
 	ath_txrx_stop_locked(sc);
 
 	ATH_PCU_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: %u (%u MHz, flags 0x%x)\n",
 	    __func__, ieee80211_chan2ieee(ic, chan),
 	    chan->ic_freq, chan->ic_flags);
 	if (chan != sc->sc_curchan) {
 		HAL_STATUS status;
 		/*
 		 * To switch channels clear any pending DMA operations;
 		 * wait long enough for the RX fifo to drain, reset the
 		 * hardware at the new frequency, and then re-enable
 		 * the relevant bits of the h/w.
 		 */
 #if 0
 		ath_hal_intrset(ah, 0);		/* disable interrupts */
 #endif
 		ath_stoprecv(sc, 1);		/* turn off frame recv */
 		/*
 		 * First, handle completed TX/RX frames.
 		 */
 		ath_rx_flush(sc);
 		ath_draintxq(sc, ATH_RESET_NOLOSS);
 		/*
 		 * Next, flush the non-scheduled frames.
 		 */
 		ath_draintxq(sc, ATH_RESET_FULL);	/* clear pending tx frames */
 
 		ath_update_chainmasks(sc, chan);
 		ath_hal_setchainmasks(sc->sc_ah, sc->sc_cur_txchainmask,
 		    sc->sc_cur_rxchainmask);
 		if (!ath_hal_reset(ah, sc->sc_opmode, chan, AH_TRUE, &status)) {
-			if_printf(ifp, "%s: unable to reset "
+			device_printf(sc->sc_dev, "%s: unable to reset "
 			    "channel %u (%u MHz, flags 0x%x), hal status %u\n",
 			    __func__, ieee80211_chan2ieee(ic, chan),
 			    chan->ic_freq, chan->ic_flags, status);
 			ret = EIO;
 			goto finish;
 		}
 		sc->sc_diversity = ath_hal_getdiversity(ah);
 
 		ATH_RX_LOCK(sc);
 		sc->sc_rx_stopped = 1;
 		sc->sc_rx_resetted = 1;
 		ATH_RX_UNLOCK(sc);
 
 		/* Let DFS at it in case it's a DFS channel */
 		ath_dfs_radar_enable(sc, chan);
 
 		/* Let spectral at in case spectral is enabled */
 		ath_spectral_enable(sc, chan);
 
 		/*
 		 * Let bluetooth coexistence at in case it's needed for this
 		 * channel
 		 */
 		ath_btcoex_enable(sc, ic->ic_curchan);
 
 		/*
 		 * If we're doing TDMA, enforce the TXOP limitation for chips
 		 * that support it.
 		 */
 		if (sc->sc_hasenforcetxop && sc->sc_tdma)
 			ath_hal_setenforcetxop(sc->sc_ah, 1);
 		else
 			ath_hal_setenforcetxop(sc->sc_ah, 0);
 
 		/*
 		 * Re-enable rx framework.
 		 */
 		if (ath_startrecv(sc) != 0) {
-			if_printf(ifp, "%s: unable to restart recv logic\n",
-			    __func__);
+			device_printf(sc->sc_dev,
+			    "%s: unable to restart recv logic\n", __func__);
 			ret = EIO;
 			goto finish;
 		}
 
 		/*
 		 * Change channels and update the h/w rate map
 		 * if we're switching; e.g. 11a to 11b/g.
 		 */
 		ath_chan_change(sc, chan);
 
 		/*
 		 * Reset clears the beacon timers; reset them
 		 * here if needed.
 		 */
 		if (sc->sc_beacons) {		/* restart beacons */
 #ifdef IEEE80211_SUPPORT_TDMA
 			if (sc->sc_tdma)
 				ath_tdma_config(sc, NULL);
 			else
 #endif
 			ath_beacon_config(sc, NULL);
 		}
 
 		/*
 		 * Re-enable interrupts.
 		 */
 #if 0
 		ath_hal_intrset(ah, sc->sc_imask);
 #endif
 	}
 
 finish:
 	ATH_PCU_LOCK(sc);
 	sc->sc_inreset_cnt--;
 	/* XXX only do this if sc_inreset_cnt == 0? */
 	ath_hal_intrset(ah, sc->sc_imask);
 	ATH_PCU_UNLOCK(sc);
 
 	IF_LOCK(&ifp->if_snd);
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	IF_UNLOCK(&ifp->if_snd);
 	ath_txrx_start(sc);
 	/* XXX ath_start? */
 
 	return ret;
 }
 
 /*
  * Periodically recalibrate the PHY to account
  * for temperature/environment changes.
  */
 static void
 ath_calibrate(void *arg)
 {
 	struct ath_softc *sc = arg;
 	struct ath_hal *ah = sc->sc_ah;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	HAL_BOOL longCal, isCalDone = AH_TRUE;
 	HAL_BOOL aniCal, shortCal = AH_FALSE;
 	int nextcal;
 
 	ATH_LOCK_ASSERT(sc);
 
 	/*
 	 * Force the hardware awake for ANI work.
 	 */
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 
 	/* Skip trying to do this if we're in reset */
 	if (sc->sc_inreset_cnt)
 		goto restart;
 
 	if (ic->ic_flags & IEEE80211_F_SCAN)	/* defer, off channel */
 		goto restart;
 	longCal = (ticks - sc->sc_lastlongcal >= ath_longcalinterval*hz);
 	aniCal = (ticks - sc->sc_lastani >= ath_anicalinterval*hz/1000);
 	if (sc->sc_doresetcal)
 		shortCal = (ticks - sc->sc_lastshortcal >= ath_shortcalinterval*hz/1000);
 
 	DPRINTF(sc, ATH_DEBUG_CALIBRATE, "%s: shortCal=%d; longCal=%d; aniCal=%d\n", __func__, shortCal, longCal, aniCal);
 	if (aniCal) {
 		sc->sc_stats.ast_ani_cal++;
 		sc->sc_lastani = ticks;
 		ath_hal_ani_poll(ah, sc->sc_curchan);
 	}
 
 	if (longCal) {
 		sc->sc_stats.ast_per_cal++;
 		sc->sc_lastlongcal = ticks;
 		if (ath_hal_getrfgain(ah) == HAL_RFGAIN_NEED_CHANGE) {
 			/*
 			 * Rfgain is out of bounds, reset the chip
 			 * to load new gain values.
 			 */
 			DPRINTF(sc, ATH_DEBUG_CALIBRATE,
 				"%s: rfgain change\n", __func__);
 			sc->sc_stats.ast_per_rfgain++;
 			sc->sc_resetcal = 0;
 			sc->sc_doresetcal = AH_TRUE;
 			taskqueue_enqueue(sc->sc_tq, &sc->sc_resettask);
 			callout_reset(&sc->sc_cal_ch, 1, ath_calibrate, sc);
 			ath_power_restore_power_state(sc);
 			return;
 		}
 		/*
 		 * If this long cal is after an idle period, then
 		 * reset the data collection state so we start fresh.
 		 */
 		if (sc->sc_resetcal) {
 			(void) ath_hal_calreset(ah, sc->sc_curchan);
 			sc->sc_lastcalreset = ticks;
 			sc->sc_lastshortcal = ticks;
 			sc->sc_resetcal = 0;
 			sc->sc_doresetcal = AH_TRUE;
 		}
 	}
 
 	/* Only call if we're doing a short/long cal, not for ANI calibration */
 	if (shortCal || longCal) {
 		isCalDone = AH_FALSE;
 		if (ath_hal_calibrateN(ah, sc->sc_curchan, longCal, &isCalDone)) {
 			if (longCal) {
 				/*
 				 * Calibrate noise floor data again in case of change.
 				 */
 				ath_hal_process_noisefloor(ah);
 			}
 		} else {
 			DPRINTF(sc, ATH_DEBUG_ANY,
 				"%s: calibration of channel %u failed\n",
 				__func__, sc->sc_curchan->ic_freq);
 			sc->sc_stats.ast_per_calfail++;
 		}
 		if (shortCal)
 			sc->sc_lastshortcal = ticks;
 	}
 	if (!isCalDone) {
 restart:
 		/*
 		 * Use a shorter interval to potentially collect multiple
 		 * data samples required to complete calibration.  Once
 		 * we're told the work is done we drop back to a longer
 		 * interval between requests.  We're more aggressive doing
 		 * work when operating as an AP to improve operation right
 		 * after startup.
 		 */
 		sc->sc_lastshortcal = ticks;
 		nextcal = ath_shortcalinterval*hz/1000;
 		if (sc->sc_opmode != HAL_M_HOSTAP)
 			nextcal *= 10;
 		sc->sc_doresetcal = AH_TRUE;
 	} else {
 		/* nextcal should be the shortest time for next event */
 		nextcal = ath_longcalinterval*hz;
 		if (sc->sc_lastcalreset == 0)
 			sc->sc_lastcalreset = sc->sc_lastlongcal;
 		else if (ticks - sc->sc_lastcalreset >= ath_resetcalinterval*hz)
 			sc->sc_resetcal = 1;	/* setup reset next trip */
 		sc->sc_doresetcal = AH_FALSE;
 	}
 	/* ANI calibration may occur more often than short/long/resetcal */
 	if (ath_anicalinterval > 0)
 		nextcal = MIN(nextcal, ath_anicalinterval*hz/1000);
 
 	if (nextcal != 0) {
 		DPRINTF(sc, ATH_DEBUG_CALIBRATE, "%s: next +%u (%sisCalDone)\n",
 		    __func__, nextcal, isCalDone ? "" : "!");
 		callout_reset(&sc->sc_cal_ch, nextcal, ath_calibrate, sc);
 	} else {
 		DPRINTF(sc, ATH_DEBUG_CALIBRATE, "%s: calibration disabled\n",
 		    __func__);
 		/* NB: don't rearm timer */
 	}
 	/*
 	 * Restore power state now that we're done.
 	 */
 	ath_power_restore_power_state(sc);
 }
 
 static void
 ath_scan_start(struct ieee80211com *ic)
 {
 	struct ifnet *ifp = ic->ic_ifp;
 	struct ath_softc *sc = ifp->if_softc;
 	struct ath_hal *ah = sc->sc_ah;
 	u_int32_t rfilt;
 
 	/* XXX calibration timer? */
 
 	ATH_LOCK(sc);
 	sc->sc_scanning = 1;
 	sc->sc_syncbeacon = 0;
 	rfilt = ath_calcrxfilter(sc);
 	ATH_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 	ath_hal_setrxfilter(ah, rfilt);
 	ath_hal_setassocid(ah, ifp->if_broadcastaddr, 0);
 	ATH_PCU_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_STATE, "%s: RX filter 0x%x bssid %s aid 0\n",
 		 __func__, rfilt, ether_sprintf(ifp->if_broadcastaddr));
 }
 
 static void
 ath_scan_end(struct ieee80211com *ic)
 {
 	struct ifnet *ifp = ic->ic_ifp;
 	struct ath_softc *sc = ifp->if_softc;
 	struct ath_hal *ah = sc->sc_ah;
 	u_int32_t rfilt;
 
 	ATH_LOCK(sc);
 	sc->sc_scanning = 0;
 	rfilt = ath_calcrxfilter(sc);
 	ATH_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 	ath_hal_setrxfilter(ah, rfilt);
 	ath_hal_setassocid(ah, sc->sc_curbssid, sc->sc_curaid);
 
 	ath_hal_process_noisefloor(ah);
 	ATH_PCU_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_STATE, "%s: RX filter 0x%x bssid %s aid 0x%x\n",
 		 __func__, rfilt, ether_sprintf(sc->sc_curbssid),
 		 sc->sc_curaid);
 }
 
 #ifdef	ATH_ENABLE_11N
 /*
  * For now, just do a channel change.
  *
  * Later, we'll go through the hard slog of suspending tx/rx, changing rate
  * control state and resetting the hardware without dropping frames out
  * of the queue.
  *
  * The unfortunate trouble here is making absolutely sure that the
  * channel width change has propagated enough so the hardware
  * absolutely isn't handed bogus frames for it's current operating
  * mode. (Eg, 40MHz frames in 20MHz mode.) Since TX and RX can and
  * does occur in parallel, we need to make certain we've blocked
  * any further ongoing TX (and RX, that can cause raw TX)
  * before we do this.
  */
 static void
 ath_update_chw(struct ieee80211com *ic)
 {
 	struct ifnet *ifp = ic->ic_ifp;
 	struct ath_softc *sc = ifp->if_softc;
 
 	DPRINTF(sc, ATH_DEBUG_STATE, "%s: called\n", __func__);
 	ath_set_channel(ic);
 }
 #endif	/* ATH_ENABLE_11N */
 
 static void
 ath_set_channel(struct ieee80211com *ic)
 {
 	struct ifnet *ifp = ic->ic_ifp;
 	struct ath_softc *sc = ifp->if_softc;
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	(void) ath_chan_set(sc, ic->ic_curchan);
 	/*
 	 * If we are returning to our bss channel then mark state
 	 * so the next recv'd beacon's tsf will be used to sync the
 	 * beacon timers.  Note that since we only hear beacons in
 	 * sta/ibss mode this has no effect in other operating modes.
 	 */
 	ATH_LOCK(sc);
 	if (!sc->sc_scanning && ic->ic_curchan == ic->ic_bsschan)
 		sc->sc_syncbeacon = 1;
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 }
 
 /*
  * Walk the vap list and check if there any vap's in RUN state.
  */
 static int
 ath_isanyrunningvaps(struct ieee80211vap *this)
 {
 	struct ieee80211com *ic = this->iv_ic;
 	struct ieee80211vap *vap;
 
 	IEEE80211_LOCK_ASSERT(ic);
 
 	TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) {
 		if (vap != this && vap->iv_state >= IEEE80211_S_RUN)
 			return 1;
 	}
 	return 0;
 }
 
 static int
 ath_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
 {
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ath_softc *sc = ic->ic_ifp->if_softc;
 	struct ath_vap *avp = ATH_VAP(vap);
 	struct ath_hal *ah = sc->sc_ah;
 	struct ieee80211_node *ni = NULL;
 	int i, error, stamode;
 	u_int32_t rfilt;
 	int csa_run_transition = 0;
 	enum ieee80211_state ostate = vap->iv_state;
 
 	static const HAL_LED_STATE leds[] = {
 	    HAL_LED_INIT,	/* IEEE80211_S_INIT */
 	    HAL_LED_SCAN,	/* IEEE80211_S_SCAN */
 	    HAL_LED_AUTH,	/* IEEE80211_S_AUTH */
 	    HAL_LED_ASSOC, 	/* IEEE80211_S_ASSOC */
 	    HAL_LED_RUN, 	/* IEEE80211_S_CAC */
 	    HAL_LED_RUN, 	/* IEEE80211_S_RUN */
 	    HAL_LED_RUN, 	/* IEEE80211_S_CSA */
 	    HAL_LED_RUN, 	/* IEEE80211_S_SLEEP */
 	};
 
 	DPRINTF(sc, ATH_DEBUG_STATE, "%s: %s -> %s\n", __func__,
 		ieee80211_state_name[ostate],
 		ieee80211_state_name[nstate]);
 
 	/*
 	 * net80211 _should_ have the comlock asserted at this point.
 	 * There are some comments around the calls to vap->iv_newstate
 	 * which indicate that it (newstate) may end up dropping the
 	 * lock.  This and the subsequent lock assert check after newstate
 	 * are an attempt to catch these and figure out how/why.
 	 */
 	IEEE80211_LOCK_ASSERT(ic);
 
 	/* Before we touch the hardware - wake it up */
 	ATH_LOCK(sc);
 	/*
 	 * If the NIC is in anything other than SLEEP state,
 	 * we need to ensure that self-generated frames are
 	 * set for PWRMGT=0.  Otherwise we may end up with
 	 * strange situations.
 	 *
 	 * XXX TODO: is this actually the case? :-)
 	 */
 	if (nstate != IEEE80211_S_SLEEP)
 		ath_power_setselfgen(sc, HAL_PM_AWAKE);
 
 	/*
 	 * Now, wake the thing up.
 	 */
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 
 	/*
 	 * And stop the calibration callout whilst we have
 	 * ATH_LOCK held.
 	 */
 	callout_stop(&sc->sc_cal_ch);
 	ATH_UNLOCK(sc);
 
 	if (ostate == IEEE80211_S_CSA && nstate == IEEE80211_S_RUN)
 		csa_run_transition = 1;
 
 	ath_hal_setledstate(ah, leds[nstate]);	/* set LED */
 
 	if (nstate == IEEE80211_S_SCAN) {
 		/*
 		 * Scanning: turn off beacon miss and don't beacon.
 		 * Mark beacon state so when we reach RUN state we'll
 		 * [re]setup beacons.  Unblock the task q thread so
 		 * deferred interrupt processing is done.
 		 */
 
 		/* Ensure we stay awake during scan */
 		ATH_LOCK(sc);
 		ath_power_setselfgen(sc, HAL_PM_AWAKE);
 		ath_power_setpower(sc, HAL_PM_AWAKE);
 		ATH_UNLOCK(sc);
 
 		ath_hal_intrset(ah,
 		    sc->sc_imask &~ (HAL_INT_SWBA | HAL_INT_BMISS));
 		sc->sc_imask &= ~(HAL_INT_SWBA | HAL_INT_BMISS);
 		sc->sc_beacons = 0;
 		taskqueue_unblock(sc->sc_tq);
 	}
 
 	ni = ieee80211_ref_node(vap->iv_bss);
 	rfilt = ath_calcrxfilter(sc);
 	stamode = (vap->iv_opmode == IEEE80211_M_STA ||
 		   vap->iv_opmode == IEEE80211_M_AHDEMO ||
 		   vap->iv_opmode == IEEE80211_M_IBSS);
 
 	/*
 	 * XXX Dont need to do this (and others) if we've transitioned
 	 * from SLEEP->RUN.
 	 */
 	if (stamode && nstate == IEEE80211_S_RUN) {
 		sc->sc_curaid = ni->ni_associd;
 		IEEE80211_ADDR_COPY(sc->sc_curbssid, ni->ni_bssid);
 		ath_hal_setassocid(ah, sc->sc_curbssid, sc->sc_curaid);
 	}
 	DPRINTF(sc, ATH_DEBUG_STATE, "%s: RX filter 0x%x bssid %s aid 0x%x\n",
 	   __func__, rfilt, ether_sprintf(sc->sc_curbssid), sc->sc_curaid);
 	ath_hal_setrxfilter(ah, rfilt);
 
 	/* XXX is this to restore keycache on resume? */
 	if (vap->iv_opmode != IEEE80211_M_STA &&
 	    (vap->iv_flags & IEEE80211_F_PRIVACY)) {
 		for (i = 0; i < IEEE80211_WEP_NKID; i++)
 			if (ath_hal_keyisvalid(ah, i))
 				ath_hal_keysetmac(ah, i, ni->ni_bssid);
 	}
 
 	/*
 	 * Invoke the parent method to do net80211 work.
 	 */
 	error = avp->av_newstate(vap, nstate, arg);
 	if (error != 0)
 		goto bad;
 
 	/*
 	 * See above: ensure av_newstate() doesn't drop the lock
 	 * on us.
 	 */
 	IEEE80211_LOCK_ASSERT(ic);
 
 	if (nstate == IEEE80211_S_RUN) {
 		/* NB: collect bss node again, it may have changed */
 		ieee80211_free_node(ni);
 		ni = ieee80211_ref_node(vap->iv_bss);
 
 		DPRINTF(sc, ATH_DEBUG_STATE,
 		    "%s(RUN): iv_flags 0x%08x bintvl %d bssid %s "
 		    "capinfo 0x%04x chan %d\n", __func__,
 		    vap->iv_flags, ni->ni_intval, ether_sprintf(ni->ni_bssid),
 		    ni->ni_capinfo, ieee80211_chan2ieee(ic, ic->ic_curchan));
 
 		switch (vap->iv_opmode) {
 #ifdef IEEE80211_SUPPORT_TDMA
 		case IEEE80211_M_AHDEMO:
 			if ((vap->iv_caps & IEEE80211_C_TDMA) == 0)
 				break;
 			/* fall thru... */
 #endif
 		case IEEE80211_M_HOSTAP:
 		case IEEE80211_M_IBSS:
 		case IEEE80211_M_MBSS:
 			/*
 			 * Allocate and setup the beacon frame.
 			 *
 			 * Stop any previous beacon DMA.  This may be
 			 * necessary, for example, when an ibss merge
 			 * causes reconfiguration; there will be a state
 			 * transition from RUN->RUN that means we may
 			 * be called with beacon transmission active.
 			 */
 			ath_hal_stoptxdma(ah, sc->sc_bhalq);
 
 			error = ath_beacon_alloc(sc, ni);
 			if (error != 0)
 				goto bad;
 			/*
 			 * If joining an adhoc network defer beacon timer
 			 * configuration to the next beacon frame so we
 			 * have a current TSF to use.  Otherwise we're
 			 * starting an ibss/bss so there's no need to delay;
 			 * if this is the first vap moving to RUN state, then
 			 * beacon state needs to be [re]configured.
 			 */
 			if (vap->iv_opmode == IEEE80211_M_IBSS &&
 			    ni->ni_tstamp.tsf != 0) {
 				sc->sc_syncbeacon = 1;
 			} else if (!sc->sc_beacons) {
 #ifdef IEEE80211_SUPPORT_TDMA
 				if (vap->iv_caps & IEEE80211_C_TDMA)
 					ath_tdma_config(sc, vap);
 				else
 #endif
 					ath_beacon_config(sc, vap);
 				sc->sc_beacons = 1;
 			}
 			break;
 		case IEEE80211_M_STA:
 			/*
 			 * Defer beacon timer configuration to the next
 			 * beacon frame so we have a current TSF to use
 			 * (any TSF collected when scanning is likely old).
 			 * However if it's due to a CSA -> RUN transition,
 			 * force a beacon update so we pick up a lack of
 			 * beacons from an AP in CAC and thus force a
 			 * scan.
 			 *
 			 * And, there's also corner cases here where
 			 * after a scan, the AP may have disappeared.
 			 * In that case, we may not receive an actual
 			 * beacon to update the beacon timer and thus we
 			 * won't get notified of the missing beacons.
 			 */
 			if (ostate != IEEE80211_S_RUN &&
 			    ostate != IEEE80211_S_SLEEP) {
 				DPRINTF(sc, ATH_DEBUG_BEACON,
 				    "%s: STA; syncbeacon=1\n", __func__);
 				sc->sc_syncbeacon = 1;
 
 				if (csa_run_transition)
 					ath_beacon_config(sc, vap);
 
 			/*
 			 * PR: kern/175227
 			 *
 			 * Reconfigure beacons during reset; as otherwise
 			 * we won't get the beacon timers reprogrammed
 			 * after a reset and thus we won't pick up a
 			 * beacon miss interrupt.
 			 *
 			 * Hopefully we'll see a beacon before the BMISS
 			 * timer fires (too often), leading to a STA
 			 * disassociation.
 			 */
 				sc->sc_beacons = 1;
 			}
 			break;
 		case IEEE80211_M_MONITOR:
 			/*
 			 * Monitor mode vaps have only INIT->RUN and RUN->RUN
 			 * transitions so we must re-enable interrupts here to
 			 * handle the case of a single monitor mode vap.
 			 */
 			ath_hal_intrset(ah, sc->sc_imask);
 			break;
 		case IEEE80211_M_WDS:
 			break;
 		default:
 			break;
 		}
 		/*
 		 * Let the hal process statistics collected during a
 		 * scan so it can provide calibrated noise floor data.
 		 */
 		ath_hal_process_noisefloor(ah);
 		/*
 		 * Reset rssi stats; maybe not the best place...
 		 */
 		sc->sc_halstats.ns_avgbrssi = ATH_RSSI_DUMMY_MARKER;
 		sc->sc_halstats.ns_avgrssi = ATH_RSSI_DUMMY_MARKER;
 		sc->sc_halstats.ns_avgtxrssi = ATH_RSSI_DUMMY_MARKER;
 
 		/*
 		 * Force awake for RUN mode.
 		 */
 		ATH_LOCK(sc);
 		ath_power_setselfgen(sc, HAL_PM_AWAKE);
 		ath_power_setpower(sc, HAL_PM_AWAKE);
 
 		/*
 		 * Finally, start any timers and the task q thread
 		 * (in case we didn't go through SCAN state).
 		 */
 		if (ath_longcalinterval != 0) {
 			/* start periodic recalibration timer */
 			callout_reset(&sc->sc_cal_ch, 1, ath_calibrate, sc);
 		} else {
 			DPRINTF(sc, ATH_DEBUG_CALIBRATE,
 			    "%s: calibration disabled\n", __func__);
 		}
 		ATH_UNLOCK(sc);
 
 		taskqueue_unblock(sc->sc_tq);
 	} else if (nstate == IEEE80211_S_INIT) {
 		/*
 		 * If there are no vaps left in RUN state then
 		 * shutdown host/driver operation:
 		 * o disable interrupts
 		 * o disable the task queue thread
 		 * o mark beacon processing as stopped
 		 */
 		if (!ath_isanyrunningvaps(vap)) {
 			sc->sc_imask &= ~(HAL_INT_SWBA | HAL_INT_BMISS);
 			/* disable interrupts  */
 			ath_hal_intrset(ah, sc->sc_imask &~ HAL_INT_GLOBAL);
 			taskqueue_block(sc->sc_tq);
 			sc->sc_beacons = 0;
 		}
 #ifdef IEEE80211_SUPPORT_TDMA
 		ath_hal_setcca(ah, AH_TRUE);
 #endif
 	} else if (nstate == IEEE80211_S_SLEEP) {
 		/* We're going to sleep, so transition appropriately */
 		/* For now, only do this if we're a single STA vap */
 		if (sc->sc_nvaps == 1 &&
 		    vap->iv_opmode == IEEE80211_M_STA) {
 			DPRINTF(sc, ATH_DEBUG_BEACON, "%s: syncbeacon=%d\n", __func__, sc->sc_syncbeacon);
 			ATH_LOCK(sc);
 			/*
 			 * Always at least set the self-generated
 			 * frame config to set PWRMGT=1.
 			 */
 			ath_power_setselfgen(sc, HAL_PM_NETWORK_SLEEP);
 
 			/*
 			 * If we're not syncing beacons, transition
 			 * to NETWORK_SLEEP.
 			 *
 			 * We stay awake if syncbeacon > 0 in case
 			 * we need to listen for some beacons otherwise
 			 * our beacon timer config may be wrong.
 			 */
 			if (sc->sc_syncbeacon == 0) {
 				ath_power_setpower(sc, HAL_PM_NETWORK_SLEEP);
 			}
 			ATH_UNLOCK(sc);
 		}
 	}
 bad:
 	ieee80211_free_node(ni);
 
 	/*
 	 * Restore the power state - either to what it was, or
 	 * to network_sleep if it's alright.
 	 */
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 	return error;
 }
 
 /*
  * Allocate a key cache slot to the station so we can
  * setup a mapping from key index to node. The key cache
  * slot is needed for managing antenna state and for
  * compression when stations do not use crypto.  We do
  * it uniliaterally here; if crypto is employed this slot
  * will be reassigned.
  */
 static void
 ath_setup_stationkey(struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ath_softc *sc = vap->iv_ic->ic_ifp->if_softc;
 	ieee80211_keyix keyix, rxkeyix;
 
 	/* XXX should take a locked ref to vap->iv_bss */
 	if (!ath_key_alloc(vap, &ni->ni_ucastkey, &keyix, &rxkeyix)) {
 		/*
 		 * Key cache is full; we'll fall back to doing
 		 * the more expensive lookup in software.  Note
 		 * this also means no h/w compression.
 		 */
 		/* XXX msg+statistic */
 	} else {
 		/* XXX locking? */
 		ni->ni_ucastkey.wk_keyix = keyix;
 		ni->ni_ucastkey.wk_rxkeyix = rxkeyix;
 		/* NB: must mark device key to get called back on delete */
 		ni->ni_ucastkey.wk_flags |= IEEE80211_KEY_DEVKEY;
 		IEEE80211_ADDR_COPY(ni->ni_ucastkey.wk_macaddr, ni->ni_macaddr);
 		/* NB: this will create a pass-thru key entry */
 		ath_keyset(sc, vap, &ni->ni_ucastkey, vap->iv_bss);
 	}
 }
 
 /*
  * Setup driver-specific state for a newly associated node.
  * Note that we're called also on a re-associate, the isnew
  * param tells us if this is the first time or not.
  */
 static void
 ath_newassoc(struct ieee80211_node *ni, int isnew)
 {
 	struct ath_node *an = ATH_NODE(ni);
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ath_softc *sc = vap->iv_ic->ic_ifp->if_softc;
 	const struct ieee80211_txparam *tp = ni->ni_txparms;
 
 	an->an_mcastrix = ath_tx_findrix(sc, tp->mcastrate);
 	an->an_mgmtrix = ath_tx_findrix(sc, tp->mgmtrate);
 
 	DPRINTF(sc, ATH_DEBUG_NODE, "%s: %6D: reassoc; isnew=%d, is_powersave=%d\n",
 	    __func__,
 	    ni->ni_macaddr,
 	    ":",
 	    isnew,
 	    an->an_is_powersave);
 
 	ATH_NODE_LOCK(an);
 	ath_rate_newassoc(sc, an, isnew);
 	ATH_NODE_UNLOCK(an);
 
 	if (isnew &&
 	    (vap->iv_flags & IEEE80211_F_PRIVACY) == 0 && sc->sc_hasclrkey &&
 	    ni->ni_ucastkey.wk_keyix == IEEE80211_KEYIX_NONE)
 		ath_setup_stationkey(ni);
 
 	/*
 	 * If we're reassociating, make sure that any paused queues
 	 * get unpaused.
 	 *
 	 * Now, we may hvae frames in the hardware queue for this node.
 	 * So if we are reassociating and there are frames in the queue,
 	 * we need to go through the cleanup path to ensure that they're
 	 * marked as non-aggregate.
 	 */
 	if (! isnew) {
 		DPRINTF(sc, ATH_DEBUG_NODE,
 		    "%s: %6D: reassoc; is_powersave=%d\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    an->an_is_powersave);
 
 		/* XXX for now, we can't hold the lock across assoc */
 		ath_tx_node_reassoc(sc, an);
 
 		/* XXX for now, we can't hold the lock across wakeup */
 		if (an->an_is_powersave)
 			ath_tx_node_wakeup(sc, an);
 	}
 }
 
 static int
 ath_setregdomain(struct ieee80211com *ic, struct ieee80211_regdomain *reg,
 	int nchans, struct ieee80211_channel chans[])
 {
 	struct ath_softc *sc = ic->ic_ifp->if_softc;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 
 	DPRINTF(sc, ATH_DEBUG_REGDOMAIN,
 	    "%s: rd %u cc %u location %c%s\n",
 	    __func__, reg->regdomain, reg->country, reg->location,
 	    reg->ecm ? " ecm" : "");
 
 	status = ath_hal_set_channels(ah, chans, nchans,
 	    reg->country, reg->regdomain);
 	if (status != HAL_OK) {
 		DPRINTF(sc, ATH_DEBUG_REGDOMAIN, "%s: failed, status %u\n",
 		    __func__, status);
 		return EINVAL;		/* XXX */
 	}
 
 	return 0;
 }
 
 static void
 ath_getradiocaps(struct ieee80211com *ic,
 	int maxchans, int *nchans, struct ieee80211_channel chans[])
 {
 	struct ath_softc *sc = ic->ic_ifp->if_softc;
 	struct ath_hal *ah = sc->sc_ah;
 
 	DPRINTF(sc, ATH_DEBUG_REGDOMAIN, "%s: use rd %u cc %d\n",
 	    __func__, SKU_DEBUG, CTRY_DEFAULT);
 
 	/* XXX check return */
 	(void) ath_hal_getchannels(ah, chans, maxchans, nchans,
 	    HAL_MODE_ALL, CTRY_DEFAULT, SKU_DEBUG, AH_TRUE);
 
 }
 
 static int
 ath_getchannels(struct ath_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
 
 	/*
 	 * Collect channel set based on EEPROM contents.
 	 */
 	status = ath_hal_init_channels(ah, ic->ic_channels, IEEE80211_CHAN_MAX,
 	    &ic->ic_nchans, HAL_MODE_ALL, CTRY_DEFAULT, SKU_NONE, AH_TRUE);
 	if (status != HAL_OK) {
-		if_printf(ifp, "%s: unable to collect channel list from hal, "
-		    "status %d\n", __func__, status);
+		device_printf(sc->sc_dev,
+		    "%s: unable to collect channel list from hal, status %d\n",
+		    __func__, status);
 		return EINVAL;
 	}
 	(void) ath_hal_getregdomain(ah, &sc->sc_eerd);
 	ath_hal_getcountrycode(ah, &sc->sc_eecc);	/* NB: cannot fail */
 	/* XXX map Atheros sku's to net80211 SKU's */
 	/* XXX net80211 types too small */
 	ic->ic_regdomain.regdomain = (uint16_t) sc->sc_eerd;
 	ic->ic_regdomain.country = (uint16_t) sc->sc_eecc;
 	ic->ic_regdomain.isocc[0] = ' ';	/* XXX don't know */
 	ic->ic_regdomain.isocc[1] = ' ';
 
 	ic->ic_regdomain.ecm = 1;
 	ic->ic_regdomain.location = 'I';
 
 	DPRINTF(sc, ATH_DEBUG_REGDOMAIN,
 	    "%s: eeprom rd %u cc %u (mapped rd %u cc %u) location %c%s\n",
 	    __func__, sc->sc_eerd, sc->sc_eecc,
 	    ic->ic_regdomain.regdomain, ic->ic_regdomain.country,
 	    ic->ic_regdomain.location, ic->ic_regdomain.ecm ? " ecm" : "");
 	return 0;
 }
 
 static int
 ath_rate_setup(struct ath_softc *sc, u_int mode)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	const HAL_RATE_TABLE *rt;
 
 	switch (mode) {
 	case IEEE80211_MODE_11A:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11A);
 		break;
 	case IEEE80211_MODE_HALF:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11A_HALF_RATE);
 		break;
 	case IEEE80211_MODE_QUARTER:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11A_QUARTER_RATE);
 		break;
 	case IEEE80211_MODE_11B:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11B);
 		break;
 	case IEEE80211_MODE_11G:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11G);
 		break;
 	case IEEE80211_MODE_TURBO_A:
 		rt = ath_hal_getratetable(ah, HAL_MODE_108A);
 		break;
 	case IEEE80211_MODE_TURBO_G:
 		rt = ath_hal_getratetable(ah, HAL_MODE_108G);
 		break;
 	case IEEE80211_MODE_STURBO_A:
 		rt = ath_hal_getratetable(ah, HAL_MODE_TURBO);
 		break;
 	case IEEE80211_MODE_11NA:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11NA_HT20);
 		break;
 	case IEEE80211_MODE_11NG:
 		rt = ath_hal_getratetable(ah, HAL_MODE_11NG_HT20);
 		break;
 	default:
 		DPRINTF(sc, ATH_DEBUG_ANY, "%s: invalid mode %u\n",
 			__func__, mode);
 		return 0;
 	}
 	sc->sc_rates[mode] = rt;
 	return (rt != NULL);
 }
 
 static void
 ath_setcurmode(struct ath_softc *sc, enum ieee80211_phymode mode)
 {
 #define	N(a)	(sizeof(a)/sizeof(a[0]))
 	/* NB: on/off times from the Atheros NDIS driver, w/ permission */
 	static const struct {
 		u_int		rate;		/* tx/rx 802.11 rate */
 		u_int16_t	timeOn;		/* LED on time (ms) */
 		u_int16_t	timeOff;	/* LED off time (ms) */
 	} blinkrates[] = {
 		{ 108,  40,  10 },
 		{  96,  44,  11 },
 		{  72,  50,  13 },
 		{  48,  57,  14 },
 		{  36,  67,  16 },
 		{  24,  80,  20 },
 		{  22, 100,  25 },
 		{  18, 133,  34 },
 		{  12, 160,  40 },
 		{  10, 200,  50 },
 		{   6, 240,  58 },
 		{   4, 267,  66 },
 		{   2, 400, 100 },
 		{   0, 500, 130 },
 		/* XXX half/quarter rates */
 	};
 	const HAL_RATE_TABLE *rt;
 	int i, j;
 
 	memset(sc->sc_rixmap, 0xff, sizeof(sc->sc_rixmap));
 	rt = sc->sc_rates[mode];
 	KASSERT(rt != NULL, ("no h/w rate set for phy mode %u", mode));
 	for (i = 0; i < rt->rateCount; i++) {
 		uint8_t ieeerate = rt->info[i].dot11Rate & IEEE80211_RATE_VAL;
 		if (rt->info[i].phy != IEEE80211_T_HT)
 			sc->sc_rixmap[ieeerate] = i;
 		else
 			sc->sc_rixmap[ieeerate | IEEE80211_RATE_MCS] = i;
 	}
 	memset(sc->sc_hwmap, 0, sizeof(sc->sc_hwmap));
 	for (i = 0; i < N(sc->sc_hwmap); i++) {
 		if (i >= rt->rateCount) {
 			sc->sc_hwmap[i].ledon = (500 * hz) / 1000;
 			sc->sc_hwmap[i].ledoff = (130 * hz) / 1000;
 			continue;
 		}
 		sc->sc_hwmap[i].ieeerate =
 			rt->info[i].dot11Rate & IEEE80211_RATE_VAL;
 		if (rt->info[i].phy == IEEE80211_T_HT)
 			sc->sc_hwmap[i].ieeerate |= IEEE80211_RATE_MCS;
 		sc->sc_hwmap[i].txflags = IEEE80211_RADIOTAP_F_DATAPAD;
 		if (rt->info[i].shortPreamble ||
 		    rt->info[i].phy == IEEE80211_T_OFDM)
 			sc->sc_hwmap[i].txflags |= IEEE80211_RADIOTAP_F_SHORTPRE;
 		sc->sc_hwmap[i].rxflags = sc->sc_hwmap[i].txflags;
 		for (j = 0; j < N(blinkrates)-1; j++)
 			if (blinkrates[j].rate == sc->sc_hwmap[i].ieeerate)
 				break;
 		/* NB: this uses the last entry if the rate isn't found */
 		/* XXX beware of overlow */
 		sc->sc_hwmap[i].ledon = (blinkrates[j].timeOn * hz) / 1000;
 		sc->sc_hwmap[i].ledoff = (blinkrates[j].timeOff * hz) / 1000;
 	}
 	sc->sc_currates = rt;
 	sc->sc_curmode = mode;
 	/*
 	 * All protection frames are transmited at 2Mb/s for
 	 * 11g, otherwise at 1Mb/s.
 	 */
 	if (mode == IEEE80211_MODE_11G)
 		sc->sc_protrix = ath_tx_findrix(sc, 2*2);
 	else
 		sc->sc_protrix = ath_tx_findrix(sc, 2*1);
 	/* NB: caller is responsible for resetting rate control state */
 #undef N
 }
 
 static void
 ath_watchdog(void *arg)
 {
 	struct ath_softc *sc = arg;
 	int do_reset = 0;
 
 	ATH_LOCK_ASSERT(sc);
 
 	if (sc->sc_wd_timer != 0 && --sc->sc_wd_timer == 0) {
 		struct ifnet *ifp = sc->sc_ifp;
 		uint32_t hangs;
 
 		ath_power_set_power_state(sc, HAL_PM_AWAKE);
 
 		if (ath_hal_gethangstate(sc->sc_ah, 0xffff, &hangs) &&
 		    hangs != 0) {
-			if_printf(ifp, "%s hang detected (0x%x)\n",
+			device_printf(sc->sc_dev, "%s hang detected (0x%x)\n",
 			    hangs & 0xff ? "bb" : "mac", hangs);
 		} else
-			if_printf(ifp, "device timeout\n");
+			device_printf(sc->sc_dev, "device timeout\n");
 		do_reset = 1;
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		sc->sc_stats.ast_watchdog++;
 
 		ath_power_restore_power_state(sc);
 	}
 
 	/*
 	 * We can't hold the lock across the ath_reset() call.
 	 *
 	 * And since this routine can't hold a lock and sleep,
 	 * do the reset deferred.
 	 */
 	if (do_reset) {
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_resettask);
 	}
 
 	callout_schedule(&sc->sc_wd_ch, hz);
 }
 
 /*
  * Fetch the rate control statistics for the given node.
  */
 static int
 ath_ioctl_ratestats(struct ath_softc *sc, struct ath_rateioctl *rs)
 {
 	struct ath_node *an;
 	struct ieee80211com *ic = sc->sc_ifp->if_l2com;
 	struct ieee80211_node *ni;
 	int error = 0;
 
 	/* Perform a lookup on the given node */
 	ni = ieee80211_find_node(&ic->ic_sta, rs->is_u.macaddr);
 	if (ni == NULL) {
 		error = EINVAL;
 		goto bad;
 	}
 
 	/* Lock the ath_node */
 	an = ATH_NODE(ni);
 	ATH_NODE_LOCK(an);
 
 	/* Fetch the rate control stats for this node */
 	error = ath_rate_fetch_node_stats(sc, an, rs);
 
 	/* No matter what happens here, just drop through */
 
 	/* Unlock the ath_node */
 	ATH_NODE_UNLOCK(an);
 
 	/* Unref the node */
 	ieee80211_node_decref(ni);
 
 bad:
 	return (error);
 }
 
 #ifdef ATH_DIAGAPI
 /*
  * Diagnostic interface to the HAL.  This is used by various
  * tools to do things like retrieve register contents for
  * debugging.  The mechanism is intentionally opaque so that
  * it can change frequently w/o concern for compatiblity.
  */
 static int
 ath_ioctl_diag(struct ath_softc *sc, struct ath_diag *ad)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	u_int id = ad->ad_id & ATH_DIAG_ID;
 	void *indata = NULL;
 	void *outdata = NULL;
 	u_int32_t insize = ad->ad_in_size;
 	u_int32_t outsize = ad->ad_out_size;
 	int error = 0;
 
 	if (ad->ad_id & ATH_DIAG_IN) {
 		/*
 		 * Copy in data.
 		 */
 		indata = malloc(insize, M_TEMP, M_NOWAIT);
 		if (indata == NULL) {
 			error = ENOMEM;
 			goto bad;
 		}
 		error = copyin(ad->ad_in_data, indata, insize);
 		if (error)
 			goto bad;
 	}
 	if (ad->ad_id & ATH_DIAG_DYN) {
 		/*
 		 * Allocate a buffer for the results (otherwise the HAL
 		 * returns a pointer to a buffer where we can read the
 		 * results).  Note that we depend on the HAL leaving this
 		 * pointer for us to use below in reclaiming the buffer;
 		 * may want to be more defensive.
 		 */
 		outdata = malloc(outsize, M_TEMP, M_NOWAIT);
 		if (outdata == NULL) {
 			error = ENOMEM;
 			goto bad;
 		}
 	}
 
 
 	ATH_LOCK(sc);
 	if (id != HAL_DIAG_REGS)
 		ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	if (ath_hal_getdiagstate(ah, id, indata, insize, &outdata, &outsize)) {
 		if (outsize < ad->ad_out_size)
 			ad->ad_out_size = outsize;
 		if (outdata != NULL)
 			error = copyout(outdata, ad->ad_out_data,
 					ad->ad_out_size);
 	} else {
 		error = EINVAL;
 	}
 
 	ATH_LOCK(sc);
 	if (id != HAL_DIAG_REGS)
 		ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 bad:
 	if ((ad->ad_id & ATH_DIAG_IN) && indata != NULL)
 		free(indata, M_TEMP);
 	if ((ad->ad_id & ATH_DIAG_DYN) && outdata != NULL)
 		free(outdata, M_TEMP);
 	return error;
 }
 #endif /* ATH_DIAGAPI */
 
 static int
 ath_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 #define	IS_RUNNING(ifp) \
 	((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))
 	struct ath_softc *sc = ifp->if_softc;
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct ifreq *ifr = (struct ifreq *)data;
 	const HAL_RATE_TABLE *rt;
 	int error = 0;
 
 	switch (cmd) {
 	case SIOCSIFFLAGS:
 		if (IS_RUNNING(ifp)) {
 			/*
 			 * To avoid rescanning another access point,
 			 * do not call ath_init() here.  Instead,
 			 * only reflect promisc mode settings.
 			 */
 			ATH_LOCK(sc);
 			ath_power_set_power_state(sc, HAL_PM_AWAKE);
 			ath_mode_init(sc);
 			ath_power_restore_power_state(sc);
 			ATH_UNLOCK(sc);
 		} else if (ifp->if_flags & IFF_UP) {
 			/*
 			 * Beware of being called during attach/detach
 			 * to reset promiscuous mode.  In that case we
 			 * will still be marked UP but not RUNNING.
 			 * However trying to re-init the interface
 			 * is the wrong thing to do as we've already
 			 * torn down much of our state.  There's
 			 * probably a better way to deal with this.
 			 */
 			if (!sc->sc_invalid)
 				ath_init(sc);	/* XXX lose error */
 		} else {
 			ATH_LOCK(sc);
 			ath_stop_locked(ifp);
 			if (!sc->sc_invalid)
 				ath_power_setpower(sc, HAL_PM_FULL_SLEEP);
 			ATH_UNLOCK(sc);
 		}
 		break;
 	case SIOCGIFMEDIA:
 	case SIOCSIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &ic->ic_media, cmd);
 		break;
 	case SIOCGATHSTATS:
 		/* NB: embed these numbers to get a consistent view */
 		sc->sc_stats.ast_tx_packets = ifp->if_get_counter(ifp,
 		    IFCOUNTER_OPACKETS);
 		sc->sc_stats.ast_rx_packets = ifp->if_get_counter(ifp,
 		    IFCOUNTER_IPACKETS);
 		sc->sc_stats.ast_tx_rssi = ATH_RSSI(sc->sc_halstats.ns_avgtxrssi);
 		sc->sc_stats.ast_rx_rssi = ATH_RSSI(sc->sc_halstats.ns_avgrssi);
 #ifdef IEEE80211_SUPPORT_TDMA
 		sc->sc_stats.ast_tdma_tsfadjp = TDMA_AVG(sc->sc_avgtsfdeltap);
 		sc->sc_stats.ast_tdma_tsfadjm = TDMA_AVG(sc->sc_avgtsfdeltam);
 #endif
 		rt = sc->sc_currates;
 		sc->sc_stats.ast_tx_rate =
 		    rt->info[sc->sc_txrix].dot11Rate &~ IEEE80211_RATE_BASIC;
 		if (rt->info[sc->sc_txrix].phy & IEEE80211_T_HT)
 			sc->sc_stats.ast_tx_rate |= IEEE80211_RATE_MCS;
 		return copyout(&sc->sc_stats,
 		    ifr->ifr_data, sizeof (sc->sc_stats));
 	case SIOCGATHAGSTATS:
 		return copyout(&sc->sc_aggr_stats,
 		    ifr->ifr_data, sizeof (sc->sc_aggr_stats));
 	case SIOCZATHSTATS:
 		error = priv_check(curthread, PRIV_DRIVER);
 		if (error == 0) {
 			memset(&sc->sc_stats, 0, sizeof(sc->sc_stats));
 			memset(&sc->sc_aggr_stats, 0,
 			    sizeof(sc->sc_aggr_stats));
 			memset(&sc->sc_intr_stats, 0,
 			    sizeof(sc->sc_intr_stats));
 		}
 		break;
 #ifdef ATH_DIAGAPI
 	case SIOCGATHDIAG:
 		error = ath_ioctl_diag(sc, (struct ath_diag *) ifr);
 		break;
 	case SIOCGATHPHYERR:
 		error = ath_ioctl_phyerr(sc,(struct ath_diag*) ifr);
 		break;
 #endif
 	case SIOCGATHSPECTRAL:
 		error = ath_ioctl_spectral(sc,(struct ath_diag*) ifr);
 		break;
 	case SIOCGATHNODERATESTATS:
 		error = ath_ioctl_ratestats(sc, (struct ath_rateioctl *) ifr);
 		break;
 	case SIOCGIFADDR:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return error;
 #undef IS_RUNNING
 }
 
 /*
  * Announce various information on device/driver attach.
  */
 static void
 ath_announce(struct ath_softc *sc)
 {
-	struct ifnet *ifp = sc->sc_ifp;
 	struct ath_hal *ah = sc->sc_ah;
 
-	if_printf(ifp, "AR%s mac %d.%d RF%s phy %d.%d\n",
+	device_printf(sc->sc_dev, "AR%s mac %d.%d RF%s phy %d.%d\n",
 		ath_hal_mac_name(ah), ah->ah_macVersion, ah->ah_macRev,
 		ath_hal_rf_name(ah), ah->ah_phyRev >> 4, ah->ah_phyRev & 0xf);
-	if_printf(ifp, "2GHz radio: 0x%.4x; 5GHz radio: 0x%.4x\n",
+	device_printf(sc->sc_dev, "2GHz radio: 0x%.4x; 5GHz radio: 0x%.4x\n",
 		ah->ah_analog2GhzRev, ah->ah_analog5GhzRev);
 	if (bootverbose) {
 		int i;
 		for (i = 0; i <= WME_AC_VO; i++) {
 			struct ath_txq *txq = sc->sc_ac2q[i];
-			if_printf(ifp, "Use hw queue %u for %s traffic\n",
-				txq->axq_qnum, ieee80211_wme_acnames[i]);
+			device_printf(sc->sc_dev,
+			    "Use hw queue %u for %s traffic\n",
+			    txq->axq_qnum, ieee80211_wme_acnames[i]);
 		}
-		if_printf(ifp, "Use hw queue %u for CAB traffic\n",
-			sc->sc_cabq->axq_qnum);
-		if_printf(ifp, "Use hw queue %u for beacons\n", sc->sc_bhalq);
+		device_printf(sc->sc_dev, "Use hw queue %u for CAB traffic\n",
+		    sc->sc_cabq->axq_qnum);
+		device_printf(sc->sc_dev, "Use hw queue %u for beacons\n",
+		    sc->sc_bhalq);
 	}
 	if (ath_rxbuf != ATH_RXBUF)
-		if_printf(ifp, "using %u rx buffers\n", ath_rxbuf);
+		device_printf(sc->sc_dev, "using %u rx buffers\n", ath_rxbuf);
 	if (ath_txbuf != ATH_TXBUF)
-		if_printf(ifp, "using %u tx buffers\n", ath_txbuf);
+		device_printf(sc->sc_dev, "using %u tx buffers\n", ath_txbuf);
 	if (sc->sc_mcastkey && bootverbose)
-		if_printf(ifp, "using multicast key search\n");
+		device_printf(sc->sc_dev, "using multicast key search\n");
 }
 
 static void
 ath_dfs_tasklet(void *p, int npending)
 {
 	struct ath_softc *sc = (struct ath_softc *) p;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 
 	/*
 	 * If previous processing has found a radar event,
 	 * signal this to the net80211 layer to begin DFS
 	 * processing.
 	 */
 	if (ath_dfs_process_radar_event(sc, sc->sc_curchan)) {
 		/* DFS event found, initiate channel change */
 		/*
 		 * XXX doesn't currently tell us whether the event
 		 * XXX was found in the primary or extension
 		 * XXX channel!
 		 */
 		IEEE80211_LOCK(ic);
 		ieee80211_dfs_notify_radar(ic, sc->sc_curchan);
 		IEEE80211_UNLOCK(ic);
 	}
 }
 
 /*
  * Enable/disable power save.  This must be called with
  * no TX driver locks currently held, so it should only
  * be called from the RX path (which doesn't hold any
  * TX driver locks.)
  */
 static void
 ath_node_powersave(struct ieee80211_node *ni, int enable)
 {
 #ifdef	ATH_SW_PSQ
 	struct ath_node *an = ATH_NODE(ni);
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_ifp->if_softc;
 	struct ath_vap *avp = ATH_VAP(ni->ni_vap);
 
 	/* XXX and no TXQ locks should be held here */
 
 	DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE, "%s: %6D: enable=%d\n",
 	    __func__,
 	    ni->ni_macaddr,
 	    ":",
 	    !! enable);
 
 	/* Suspend or resume software queue handling */
 	if (enable)
 		ath_tx_node_sleep(sc, an);
 	else
 		ath_tx_node_wakeup(sc, an);
 
 	/* Update net80211 state */
 	avp->av_node_ps(ni, enable);
 #else
 	struct ath_vap *avp = ATH_VAP(ni->ni_vap);
 
 	/* Update net80211 state */
 	avp->av_node_ps(ni, enable);
 #endif/* ATH_SW_PSQ */
 }
 
 /*
  * Notification from net80211 that the powersave queue state has
  * changed.
  *
  * Since the software queue also may have some frames:
  *
  * + if the node software queue has frames and the TID state
  *   is 0, we set the TIM;
  * + if the node and the stack are both empty, we clear the TIM bit.
  * + If the stack tries to set the bit, always set it.
  * + If the stack tries to clear the bit, only clear it if the
  *   software queue in question is also cleared.
  *
  * TODO: this is called during node teardown; so let's ensure this
  * is all correctly handled and that the TIM bit is cleared.
  * It may be that the node flush is called _AFTER_ the net80211
  * stack clears the TIM.
  *
  * Here is the racy part.  Since it's possible >1 concurrent,
  * overlapping TXes will appear complete with a TX completion in
  * another thread, it's possible that the concurrent TIM calls will
  * clash.  We can't hold the node lock here because setting the
  * TIM grabs the net80211 comlock and this may cause a LOR.
  * The solution is either to totally serialise _everything_ at
  * this point (ie, all TX, completion and any reset/flush go into
  * one taskqueue) or a new "ath TIM lock" needs to be created that
  * just wraps the driver state change and this call to avp->av_set_tim().
  *
  * The same race exists in the net80211 power save queue handling
  * as well.  Since multiple transmitting threads may queue frames
  * into the driver, as well as ps-poll and the driver transmitting
  * frames (and thus clearing the psq), it's quite possible that
  * a packet entering the PSQ and a ps-poll being handled will
  * race, causing the TIM to be cleared and not re-set.
  */
 static int
 ath_node_set_tim(struct ieee80211_node *ni, int enable)
 {
 #ifdef	ATH_SW_PSQ
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_ifp->if_softc;
 	struct ath_node *an = ATH_NODE(ni);
 	struct ath_vap *avp = ATH_VAP(ni->ni_vap);
 	int changed = 0;
 
 	ATH_TX_LOCK(sc);
 	an->an_stack_psq = enable;
 
 	/*
 	 * This will get called for all operating modes,
 	 * even if avp->av_set_tim is unset.
 	 * It's currently set for hostap/ibss modes; but
 	 * the same infrastructure is used for both STA
 	 * and AP/IBSS node power save.
 	 */
 	if (avp->av_set_tim == NULL) {
 		ATH_TX_UNLOCK(sc);
 		return (0);
 	}
 
 	/*
 	 * If setting the bit, always set it here.
 	 * If clearing the bit, only clear it if the
 	 * software queue is also empty.
 	 *
 	 * If the node has left power save, just clear the TIM
 	 * bit regardless of the state of the power save queue.
 	 *
 	 * XXX TODO: although atomics are used, it's quite possible
 	 * that a race will occur between this and setting/clearing
 	 * in another thread.  TX completion will occur always in
 	 * one thread, however setting/clearing the TIM bit can come
 	 * from a variety of different process contexts!
 	 */
 	if (enable && an->an_tim_set == 1) {
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: enable=%d, tim_set=1, ignoring\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    enable);
 		ATH_TX_UNLOCK(sc);
 	} else if (enable) {
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: enable=%d, enabling TIM\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    enable);
 		an->an_tim_set = 1;
 		ATH_TX_UNLOCK(sc);
 		changed = avp->av_set_tim(ni, enable);
 	} else if (an->an_swq_depth == 0) {
 		/* disable */
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: enable=%d, an_swq_depth == 0, disabling\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    enable);
 		an->an_tim_set = 0;
 		ATH_TX_UNLOCK(sc);
 		changed = avp->av_set_tim(ni, enable);
 	} else if (! an->an_is_powersave) {
 		/*
 		 * disable regardless; the node isn't in powersave now
 		 */
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: enable=%d, an_pwrsave=0, disabling\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    enable);
 		an->an_tim_set = 0;
 		ATH_TX_UNLOCK(sc);
 		changed = avp->av_set_tim(ni, enable);
 	} else {
 		/*
 		 * psq disable, node is currently in powersave, node
 		 * software queue isn't empty, so don't clear the TIM bit
 		 * for now.
 		 */
 		ATH_TX_UNLOCK(sc);
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: enable=%d, an_swq_depth > 0, ignoring\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    enable);
 		changed = 0;
 	}
 
 	return (changed);
 #else
 	struct ath_vap *avp = ATH_VAP(ni->ni_vap);
 
 	/*
 	 * Some operating modes don't set av_set_tim(), so don't
 	 * update it here.
 	 */
 	if (avp->av_set_tim == NULL)
 		return (0);
 
 	return (avp->av_set_tim(ni, enable));
 #endif /* ATH_SW_PSQ */
 }
 
 /*
  * Set or update the TIM from the software queue.
  *
  * Check the software queue depth before attempting to do lock
  * anything; that avoids trying to obtain the lock.  Then,
  * re-check afterwards to ensure nothing has changed in the
  * meantime.
  *
  * set:   This is designed to be called from the TX path, after
  *        a frame has been queued; to see if the swq > 0.
  *
  * clear: This is designed to be called from the buffer completion point
  *        (right now it's ath_tx_default_comp()) where the state of
  *        a software queue has changed.
  *
  * It makes sense to place it at buffer free / completion rather
  * than after each software queue operation, as there's no real
  * point in churning the TIM bit as the last frames in the software
  * queue are transmitted.  If they fail and we retry them, we'd
  * just be setting the TIM bit again anyway.
  */
 void
 ath_tx_update_tim(struct ath_softc *sc, struct ieee80211_node *ni,
      int enable)
 {
 #ifdef	ATH_SW_PSQ
 	struct ath_node *an;
 	struct ath_vap *avp;
 
 	/* Don't do this for broadcast/etc frames */
 	if (ni == NULL)
 		return;
 
 	an = ATH_NODE(ni);
 	avp = ATH_VAP(ni->ni_vap);
 
 	/*
 	 * And for operating modes without the TIM handler set, let's
 	 * just skip those.
 	 */
 	if (avp->av_set_tim == NULL)
 		return;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	if (enable) {
 		if (an->an_is_powersave &&
 		    an->an_tim_set == 0 &&
 		    an->an_swq_depth != 0) {
 			DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 			    "%s: %6D: swq_depth>0, tim_set=0, set!\n",
 			    __func__,
 			    ni->ni_macaddr,
 			    ":");
 			an->an_tim_set = 1;
 			(void) avp->av_set_tim(ni, 1);
 		}
 	} else {
 		/*
 		 * Don't bother grabbing the lock unless the queue is empty.
 		 */
 		if (an->an_swq_depth != 0)
 			return;
 
 		if (an->an_is_powersave &&
 		    an->an_stack_psq == 0 &&
 		    an->an_tim_set == 1 &&
 		    an->an_swq_depth == 0) {
 			DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 			    "%s: %6D: swq_depth=0, tim_set=1, psq_set=0,"
 			    " clear!\n",
 			    __func__,
 			    ni->ni_macaddr,
 			    ":");
 			an->an_tim_set = 0;
 			(void) avp->av_set_tim(ni, 0);
 		}
 	}
 #else
 	return;
 #endif	/* ATH_SW_PSQ */
 }
 
 /*
  * Received a ps-poll frame from net80211.
  *
  * Here we get a chance to serve out a software-queued frame ourselves
  * before we punt it to net80211 to transmit us one itself - either
  * because there's traffic in the net80211 psq, or a NULL frame to
  * indicate there's nothing else.
  */
 static void
 ath_node_recv_pspoll(struct ieee80211_node *ni, struct mbuf *m)
 {
 #ifdef	ATH_SW_PSQ
 	struct ath_node *an;
 	struct ath_vap *avp;
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_ifp->if_softc;
 	int tid;
 
 	/* Just paranoia */
 	if (ni == NULL)
 		return;
 
 	/*
 	 * Unassociated (temporary node) station.
 	 */
 	if (ni->ni_associd == 0)
 		return;
 
 	/*
 	 * We do have an active node, so let's begin looking into it.
 	 */
 	an = ATH_NODE(ni);
 	avp = ATH_VAP(ni->ni_vap);
 
 	/*
 	 * For now, we just call the original ps-poll method.
 	 * Once we're ready to flip this on:
 	 *
 	 * + Set leak to 1, as no matter what we're going to have
 	 *   to send a frame;
 	 * + Check the software queue and if there's something in it,
 	 *   schedule the highest TID thas has traffic from this node.
 	 *   Then make sure we schedule the software scheduler to
 	 *   run so it picks up said frame.
 	 *
 	 * That way whatever happens, we'll at least send _a_ frame
 	 * to the given node.
 	 *
 	 * Again, yes, it's crappy QoS if the node has multiple
 	 * TIDs worth of traffic - but let's get it working first
 	 * before we optimise it.
 	 *
 	 * Also yes, there's definitely latency here - we're not
 	 * direct dispatching to the hardware in this path (and
 	 * we're likely being called from the packet receive path,
 	 * so going back into TX may be a little hairy!) but again
 	 * I'd like to get this working first before optimising
 	 * turn-around time.
 	 */
 
 	ATH_TX_LOCK(sc);
 
 	/*
 	 * Legacy - we're called and the node isn't asleep.
 	 * Immediately punt.
 	 */
 	if (! an->an_is_powersave) {
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: not in powersave?\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":");
 		ATH_TX_UNLOCK(sc);
 		avp->av_recv_pspoll(ni, m);
 		return;
 	}
 
 	/*
 	 * We're in powersave.
 	 *
 	 * Leak a frame.
 	 */
 	an->an_leak_count = 1;
 
 	/*
 	 * Now, if there's no frames in the node, just punt to
 	 * recv_pspoll.
 	 *
 	 * Don't bother checking if the TIM bit is set, we really
 	 * only care if there are any frames here!
 	 */
 	if (an->an_swq_depth == 0) {
 		ATH_TX_UNLOCK(sc);
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: SWQ empty; punting to net80211\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":");
 		avp->av_recv_pspoll(ni, m);
 		return;
 	}
 
 	/*
 	 * Ok, let's schedule the highest TID that has traffic
 	 * and then schedule something.
 	 */
 	for (tid = IEEE80211_TID_SIZE - 1; tid >= 0; tid--) {
 		struct ath_tid *atid = &an->an_tid[tid];
 		/*
 		 * No frames? Skip.
 		 */
 		if (atid->axq_depth == 0)
 			continue;
 		ath_tx_tid_sched(sc, atid);
 		/*
 		 * XXX we could do a direct call to the TXQ
 		 * scheduler code here to optimise latency
 		 * at the expense of a REALLY deep callstack.
 		 */
 		ATH_TX_UNLOCK(sc);
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_txqtask);
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: leaking frame to TID %d\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    tid);
 		return;
 	}
 
 	ATH_TX_UNLOCK(sc);
 
 	/*
 	 * XXX nothing in the TIDs at this point? Eek.
 	 */
 	DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 	    "%s: %6D: TIDs empty, but ath_node showed traffic?!\n",
 	    __func__,
 	    ni->ni_macaddr,
 	    ":");
 	avp->av_recv_pspoll(ni, m);
 #else
 	avp->av_recv_pspoll(ni, m);
 #endif	/* ATH_SW_PSQ */
 }
 
 MODULE_VERSION(if_ath, 1);
 MODULE_DEPEND(if_ath, wlan, 1, 1, 1);          /* 802.11 media layer */
 #if	defined(IEEE80211_ALQ) || defined(AH_DEBUG_ALQ) || defined(ATH_DEBUG_ALQ)
 MODULE_DEPEND(if_ath, alq, 1, 1, 1);
 #endif
Index: head/sys/dev/ath/if_ath_rx.c
===================================================================
--- head/sys/dev/ath/if_ath_rx.c	(revision 283743)
+++ head/sys/dev/ath/if_ath_rx.c	(revision 283744)
@@ -1,1469 +1,1469 @@
 /*-
  * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for the Atheros Wireless LAN controller.
  *
  * This software is derived from work of Atsushi Onoe; his contribution
  * is greatly appreciated.
  */
 
 #include "opt_inet.h"
 #include "opt_ath.h"
 /*
  * This is needed for register operations which are performed
  * by the driver - eg, calls to ath_hal_gettsf32().
  *
  * It's also required for any AH_DEBUG checks in here, eg the
  * module dependencies.
  */
 #include "opt_ah.h"
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/callout.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kthread.h>
 #include <sys/taskqueue.h>
 #include <sys/priv.h>
 #include <sys/module.h>
 #include <sys/ktr.h>
 #include <sys/smp.h>	/* for mp_ncpus */
 
 #include <machine/bus.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_llc.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_regdomain.h>
 #ifdef IEEE80211_SUPPORT_SUPERG
 #include <net80211/ieee80211_superg.h>
 #endif
 #ifdef IEEE80211_SUPPORT_TDMA
 #include <net80211/ieee80211_tdma.h>
 #endif
 
 #include <net/bpf.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #endif
 
 #include <dev/ath/if_athvar.h>
 #include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
 #include <dev/ath/ath_hal/ah_diagcodes.h>
 
 #include <dev/ath/if_ath_debug.h>
 #include <dev/ath/if_ath_misc.h>
 #include <dev/ath/if_ath_tsf.h>
 #include <dev/ath/if_ath_tx.h>
 #include <dev/ath/if_ath_sysctl.h>
 #include <dev/ath/if_ath_led.h>
 #include <dev/ath/if_ath_keycache.h>
 #include <dev/ath/if_ath_rx.h>
 #include <dev/ath/if_ath_beacon.h>
 #include <dev/ath/if_athdfs.h>
 
 #ifdef ATH_TX99_DIAG
 #include <dev/ath/ath_tx99/ath_tx99.h>
 #endif
 
 #ifdef	ATH_DEBUG_ALQ
 #include <dev/ath/if_ath_alq.h>
 #endif
 
 #include <dev/ath/if_ath_lna_div.h>
 
 /*
  * Calculate the receive filter according to the
  * operating mode and state:
  *
  * o always accept unicast, broadcast, and multicast traffic
  * o accept PHY error frames when hardware doesn't have MIB support
  *   to count and we need them for ANI (sta mode only until recently)
  *   and we are not scanning (ANI is disabled)
  *   NB: older hal's add rx filter bits out of sight and we need to
  *	 blindly preserve them
  * o probe request frames are accepted only when operating in
  *   hostap, adhoc, mesh, or monitor modes
  * o enable promiscuous mode
  *   - when in monitor mode
  *   - if interface marked PROMISC (assumes bridge setting is filtered)
  * o accept beacons:
  *   - when operating in station mode for collecting rssi data when
  *     the station is otherwise quiet, or
  *   - when operating in adhoc mode so the 802.11 layer creates
  *     node table entries for peers,
  *   - when scanning
  *   - when doing s/w beacon miss (e.g. for ap+sta)
  *   - when operating in ap mode in 11g to detect overlapping bss that
  *     require protection
  *   - when operating in mesh mode to detect neighbors
  * o accept control frames:
  *   - when in monitor mode
  * XXX HT protection for 11n
  */
 u_int32_t
 ath_calcrxfilter(struct ath_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	u_int32_t rfilt;
 
 	rfilt = HAL_RX_FILTER_UCAST | HAL_RX_FILTER_BCAST | HAL_RX_FILTER_MCAST;
 	if (!sc->sc_needmib && !sc->sc_scanning)
 		rfilt |= HAL_RX_FILTER_PHYERR;
 	if (ic->ic_opmode != IEEE80211_M_STA)
 		rfilt |= HAL_RX_FILTER_PROBEREQ;
 	/* XXX ic->ic_monvaps != 0? */
 	if (ic->ic_opmode == IEEE80211_M_MONITOR || (ifp->if_flags & IFF_PROMISC))
 		rfilt |= HAL_RX_FILTER_PROM;
 
 	/*
 	 * Only listen to all beacons if we're scanning.
 	 *
 	 * Otherwise we only really need to hear beacons from
 	 * our own BSSID.
 	 */
 	if (ic->ic_opmode == IEEE80211_M_STA ||
 	    ic->ic_opmode == IEEE80211_M_IBSS || sc->sc_swbmiss) {
 		if (sc->sc_do_mybeacon && ! sc->sc_scanning) {
 			rfilt |= HAL_RX_FILTER_MYBEACON;
 		} else { /* scanning, non-mybeacon chips */
 			rfilt |= HAL_RX_FILTER_BEACON;
 		}
 	}
 
 	/*
 	 * NB: We don't recalculate the rx filter when
 	 * ic_protmode changes; otherwise we could do
 	 * this only when ic_protmode != NONE.
 	 */
 	if (ic->ic_opmode == IEEE80211_M_HOSTAP &&
 	    IEEE80211_IS_CHAN_ANYG(ic->ic_curchan))
 		rfilt |= HAL_RX_FILTER_BEACON;
 
 	/*
 	 * Enable hardware PS-POLL RX only for hostap mode;
 	 * STA mode sends PS-POLL frames but never
 	 * receives them.
 	 */
 	if (ath_hal_getcapability(sc->sc_ah, HAL_CAP_PSPOLL,
 	    0, NULL) == HAL_OK &&
 	    ic->ic_opmode == IEEE80211_M_HOSTAP)
 		rfilt |= HAL_RX_FILTER_PSPOLL;
 
 	if (sc->sc_nmeshvaps) {
 		rfilt |= HAL_RX_FILTER_BEACON;
 		if (sc->sc_hasbmatch)
 			rfilt |= HAL_RX_FILTER_BSSID;
 		else
 			rfilt |= HAL_RX_FILTER_PROM;
 	}
 	if (ic->ic_opmode == IEEE80211_M_MONITOR)
 		rfilt |= HAL_RX_FILTER_CONTROL;
 
 	/*
 	 * Enable RX of compressed BAR frames only when doing
 	 * 802.11n. Required for A-MPDU.
 	 */
 	if (IEEE80211_IS_CHAN_HT(ic->ic_curchan))
 		rfilt |= HAL_RX_FILTER_COMPBAR;
 
 	/*
 	 * Enable radar PHY errors if requested by the
 	 * DFS module.
 	 */
 	if (sc->sc_dodfs)
 		rfilt |= HAL_RX_FILTER_PHYRADAR;
 
 	/*
 	 * Enable spectral PHY errors if requested by the
 	 * spectral module.
 	 */
 	if (sc->sc_dospectral)
 		rfilt |= HAL_RX_FILTER_PHYRADAR;
 
 	DPRINTF(sc, ATH_DEBUG_MODE, "%s: RX filter 0x%x, %s if_flags 0x%x\n",
 	    __func__, rfilt, ieee80211_opmode_name[ic->ic_opmode], ifp->if_flags);
 	return rfilt;
 }
 
 static int
 ath_legacy_rxbuf_init(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	int error;
 	struct mbuf *m;
 	struct ath_desc *ds;
 
 	/* XXX TODO: ATH_RX_LOCK_ASSERT(sc); */
 
 	m = bf->bf_m;
 	if (m == NULL) {
 		/*
 		 * NB: by assigning a page to the rx dma buffer we
 		 * implicitly satisfy the Atheros requirement that
 		 * this buffer be cache-line-aligned and sized to be
 		 * multiple of the cache line size.  Not doing this
 		 * causes weird stuff to happen (for the 5210 at least).
 		 */
 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		if (m == NULL) {
 			DPRINTF(sc, ATH_DEBUG_ANY,
 				"%s: no mbuf/cluster\n", __func__);
 			sc->sc_stats.ast_rx_nombuf++;
 			return ENOMEM;
 		}
 		m->m_pkthdr.len = m->m_len = m->m_ext.ext_size;
 
 		error = bus_dmamap_load_mbuf_sg(sc->sc_dmat,
 					     bf->bf_dmamap, m,
 					     bf->bf_segs, &bf->bf_nseg,
 					     BUS_DMA_NOWAIT);
 		if (error != 0) {
 			DPRINTF(sc, ATH_DEBUG_ANY,
 			    "%s: bus_dmamap_load_mbuf_sg failed; error %d\n",
 			    __func__, error);
 			sc->sc_stats.ast_rx_busdma++;
 			m_freem(m);
 			return error;
 		}
 		KASSERT(bf->bf_nseg == 1,
 			("multi-segment packet; nseg %u", bf->bf_nseg));
 		bf->bf_m = m;
 	}
 	bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREREAD);
 
 	/*
 	 * Setup descriptors.  For receive we always terminate
 	 * the descriptor list with a self-linked entry so we'll
 	 * not get overrun under high load (as can happen with a
 	 * 5212 when ANI processing enables PHY error frames).
 	 *
 	 * To insure the last descriptor is self-linked we create
 	 * each descriptor as self-linked and add it to the end.  As
 	 * each additional descriptor is added the previous self-linked
 	 * entry is ``fixed'' naturally.  This should be safe even
 	 * if DMA is happening.  When processing RX interrupts we
 	 * never remove/process the last, self-linked, entry on the
 	 * descriptor list.  This insures the hardware always has
 	 * someplace to write a new frame.
 	 */
 	/*
 	 * 11N: we can no longer afford to self link the last descriptor.
 	 * MAC acknowledges BA status as long as it copies frames to host
 	 * buffer (or rx fifo). This can incorrectly acknowledge packets
 	 * to a sender if last desc is self-linked.
 	 */
 	ds = bf->bf_desc;
 	if (sc->sc_rxslink)
 		ds->ds_link = bf->bf_daddr;	/* link to self */
 	else
 		ds->ds_link = 0;		/* terminate the list */
 	ds->ds_data = bf->bf_segs[0].ds_addr;
 	ath_hal_setuprxdesc(ah, ds
 		, m->m_len		/* buffer size */
 		, 0
 	);
 
 	if (sc->sc_rxlink != NULL)
 		*sc->sc_rxlink = bf->bf_daddr;
 	sc->sc_rxlink = &ds->ds_link;
 	return 0;
 }
 
 /*
  * Intercept management frames to collect beacon rssi data
  * and to do ibss merges.
  */
 void
 ath_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m,
 	int subtype, const struct ieee80211_rx_stats *rxs, int rssi, int nf)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ath_softc *sc = vap->iv_ic->ic_ifp->if_softc;
 	uint64_t tsf_beacon_old, tsf_beacon;
 	uint64_t nexttbtt;
 	int64_t tsf_delta;
 	int32_t tsf_delta_bmiss;
 	int32_t tsf_remainder;
 	uint64_t tsf_beacon_target;
 	int tsf_intval;
 
 	tsf_beacon_old = ((uint64_t) LE_READ_4(ni->ni_tstamp.data + 4)) << 32;
 	tsf_beacon_old |= LE_READ_4(ni->ni_tstamp.data);
 
 #define	TU_TO_TSF(_tu)	(((u_int64_t)(_tu)) << 10)
 	tsf_intval = 1;
 	if (ni->ni_intval > 0) {
 		tsf_intval = TU_TO_TSF(ni->ni_intval);
 	}
 #undef	TU_TO_TSF
 
 	/*
 	 * Call up first so subsequent work can use information
 	 * potentially stored in the node (e.g. for ibss merge).
 	 */
 	ATH_VAP(vap)->av_recv_mgmt(ni, m, subtype, rxs, rssi, nf);
 	switch (subtype) {
 	case IEEE80211_FC0_SUBTYPE_BEACON:
 		/* update rssi statistics for use by the hal */
 		/* XXX unlocked check against vap->iv_bss? */
 		ATH_RSSI_LPF(sc->sc_halstats.ns_avgbrssi, rssi);
 
 		tsf_beacon = ((uint64_t) LE_READ_4(ni->ni_tstamp.data + 4)) << 32;
 		tsf_beacon |= LE_READ_4(ni->ni_tstamp.data);
 
 		nexttbtt = ath_hal_getnexttbtt(sc->sc_ah);
 
 		/*
 		 * Let's calculate the delta and remainder, so we can see
 		 * if the beacon timer from the AP is varying by more than
 		 * a few TU.  (Which would be a huge, huge problem.)
 		 */
 		tsf_delta = (long long) tsf_beacon - (long long) tsf_beacon_old;
 
 		tsf_delta_bmiss = tsf_delta / tsf_intval;
 
 		/*
 		 * If our delta is greater than half the beacon interval,
 		 * let's round the bmiss value up to the next beacon
 		 * interval.  Ie, we're running really, really early
 		 * on the next beacon.
 		 */
 		if (tsf_delta % tsf_intval > (tsf_intval / 2))
 			tsf_delta_bmiss ++;
 
 		tsf_beacon_target = tsf_beacon_old +
 		    (((unsigned long long) tsf_delta_bmiss) * (long long) tsf_intval);
 
 		/*
 		 * The remainder using '%' is between 0 .. intval-1.
 		 * If we're actually running too fast, then the remainder
 		 * will be some large number just under intval-1.
 		 * So we need to look at whether we're running
 		 * before or after the target beacon interval
 		 * and if we are, modify how we do the remainder
 		 * calculation.
 		 */
 		if (tsf_beacon < tsf_beacon_target) {
 			tsf_remainder =
 			    -(tsf_intval - ((tsf_beacon - tsf_beacon_old) % tsf_intval));
 		} else {
 			tsf_remainder = (tsf_beacon - tsf_beacon_old) % tsf_intval;
 		}
 
 		DPRINTF(sc, ATH_DEBUG_BEACON, "%s: old_tsf=%llu, new_tsf=%llu, target_tsf=%llu, delta=%lld, bmiss=%d, remainder=%d\n",
 		    __func__,
 		    (unsigned long long) tsf_beacon_old,
 		    (unsigned long long) tsf_beacon,
 		    (unsigned long long) tsf_beacon_target,
 		    (long long) tsf_delta,
 		    tsf_delta_bmiss,
 		    tsf_remainder);
 
 		DPRINTF(sc, ATH_DEBUG_BEACON, "%s: tsf=%llu, nexttbtt=%llu, delta=%d\n",
 		    __func__,
 		    (unsigned long long) tsf_beacon,
 		    (unsigned long long) nexttbtt,
 		    (int32_t) tsf_beacon - (int32_t) nexttbtt + tsf_intval);
 
 		if (sc->sc_syncbeacon &&
 		    ni == vap->iv_bss &&
 		    (vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP)) {
 			DPRINTF(sc, ATH_DEBUG_BEACON,
 			    "%s: syncbeacon=1; syncing\n",
 			    __func__);
 			/*
 			 * Resync beacon timers using the tsf of the beacon
 			 * frame we just received.
 			 */
 			ath_beacon_config(sc, vap);
 			sc->sc_syncbeacon = 0;
 		}
 
 
 		/* fall thru... */
 	case IEEE80211_FC0_SUBTYPE_PROBE_RESP:
 		if (vap->iv_opmode == IEEE80211_M_IBSS &&
 		    vap->iv_state == IEEE80211_S_RUN) {
 			uint32_t rstamp = sc->sc_lastrs->rs_tstamp;
 			uint64_t tsf = ath_extend_tsf(sc, rstamp,
 				ath_hal_gettsf64(sc->sc_ah));
 			/*
 			 * Handle ibss merge as needed; check the tsf on the
 			 * frame before attempting the merge.  The 802.11 spec
 			 * says the station should change it's bssid to match
 			 * the oldest station with the same ssid, where oldest
 			 * is determined by the tsf.  Note that hardware
 			 * reconfiguration happens through callback to
 			 * ath_newstate as the state machine will go from
 			 * RUN -> RUN when this happens.
 			 */
 			if (le64toh(ni->ni_tstamp.tsf) >= tsf) {
 				DPRINTF(sc, ATH_DEBUG_STATE,
 				    "ibss merge, rstamp %u tsf %ju "
 				    "tstamp %ju\n", rstamp, (uintmax_t)tsf,
 				    (uintmax_t)ni->ni_tstamp.tsf);
 				(void) ieee80211_ibss_merge(ni);
 			}
 		}
 		break;
 	}
 }
 
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 static void
 ath_rx_tap_vendor(struct ifnet *ifp, struct mbuf *m,
     const struct ath_rx_status *rs, u_int64_t tsf, int16_t nf)
 {
 	struct ath_softc *sc = ifp->if_softc;
 
 	/* Fill in the extension bitmap */
 	sc->sc_rx_th.wr_ext_bitmap = htole32(1 << ATH_RADIOTAP_VENDOR_HEADER);
 
 	/* Fill in the vendor header */
 	sc->sc_rx_th.wr_vh.vh_oui[0] = 0x7f;
 	sc->sc_rx_th.wr_vh.vh_oui[1] = 0x03;
 	sc->sc_rx_th.wr_vh.vh_oui[2] = 0x00;
 
 	/* XXX what should this be? */
 	sc->sc_rx_th.wr_vh.vh_sub_ns = 0;
 	sc->sc_rx_th.wr_vh.vh_skip_len =
 	    htole16(sizeof(struct ath_radiotap_vendor_hdr));
 
 	/* General version info */
 	sc->sc_rx_th.wr_v.vh_version = 1;
 
 	sc->sc_rx_th.wr_v.vh_rx_chainmask = sc->sc_rxchainmask;
 
 	/* rssi */
 	sc->sc_rx_th.wr_v.rssi_ctl[0] = rs->rs_rssi_ctl[0];
 	sc->sc_rx_th.wr_v.rssi_ctl[1] = rs->rs_rssi_ctl[1];
 	sc->sc_rx_th.wr_v.rssi_ctl[2] = rs->rs_rssi_ctl[2];
 	sc->sc_rx_th.wr_v.rssi_ext[0] = rs->rs_rssi_ext[0];
 	sc->sc_rx_th.wr_v.rssi_ext[1] = rs->rs_rssi_ext[1];
 	sc->sc_rx_th.wr_v.rssi_ext[2] = rs->rs_rssi_ext[2];
 
 	/* evm */
 	sc->sc_rx_th.wr_v.evm[0] = rs->rs_evm0;
 	sc->sc_rx_th.wr_v.evm[1] = rs->rs_evm1;
 	sc->sc_rx_th.wr_v.evm[2] = rs->rs_evm2;
 	/* These are only populated from the AR9300 or later */
 	sc->sc_rx_th.wr_v.evm[3] = rs->rs_evm3;
 	sc->sc_rx_th.wr_v.evm[4] = rs->rs_evm4;
 
 	/* direction */
 	sc->sc_rx_th.wr_v.vh_flags = ATH_VENDOR_PKT_RX;
 
 	/* RX rate */
 	sc->sc_rx_th.wr_v.vh_rx_hwrate = rs->rs_rate;
 
 	/* RX flags */
 	sc->sc_rx_th.wr_v.vh_rs_flags = rs->rs_flags;
 
 	if (rs->rs_isaggr)
 		sc->sc_rx_th.wr_v.vh_flags |= ATH_VENDOR_PKT_ISAGGR;
 	if (rs->rs_moreaggr)
 		sc->sc_rx_th.wr_v.vh_flags |= ATH_VENDOR_PKT_MOREAGGR;
 
 	/* phyerr info */
 	if (rs->rs_status & HAL_RXERR_PHY) {
 		sc->sc_rx_th.wr_v.vh_phyerr_code = rs->rs_phyerr;
 		sc->sc_rx_th.wr_v.vh_flags |= ATH_VENDOR_PKT_RXPHYERR;
 	} else {
 		sc->sc_rx_th.wr_v.vh_phyerr_code = 0xff;
 	}
 	sc->sc_rx_th.wr_v.vh_rs_status = rs->rs_status;
 	sc->sc_rx_th.wr_v.vh_rssi = rs->rs_rssi;
 }
 #endif	/* ATH_ENABLE_RADIOTAP_VENDOR_EXT */
 
 static void
 ath_rx_tap(struct ifnet *ifp, struct mbuf *m,
 	const struct ath_rx_status *rs, u_int64_t tsf, int16_t nf)
 {
 #define	CHAN_HT20	htole32(IEEE80211_CHAN_HT20)
 #define	CHAN_HT40U	htole32(IEEE80211_CHAN_HT40U)
 #define	CHAN_HT40D	htole32(IEEE80211_CHAN_HT40D)
 #define	CHAN_HT		(CHAN_HT20|CHAN_HT40U|CHAN_HT40D)
 	struct ath_softc *sc = ifp->if_softc;
 	const HAL_RATE_TABLE *rt;
 	uint8_t rix;
 
 	rt = sc->sc_currates;
 	KASSERT(rt != NULL, ("no rate table, mode %u", sc->sc_curmode));
 	rix = rt->rateCodeToIndex[rs->rs_rate];
 	sc->sc_rx_th.wr_rate = sc->sc_hwmap[rix].ieeerate;
 	sc->sc_rx_th.wr_flags = sc->sc_hwmap[rix].rxflags;
 #ifdef AH_SUPPORT_AR5416
 	sc->sc_rx_th.wr_chan_flags &= ~CHAN_HT;
 	if (rs->rs_status & HAL_RXERR_PHY) {
 		/*
 		 * PHY error - make sure the channel flags
 		 * reflect the actual channel configuration,
 		 * not the received frame.
 		 */
 		if (IEEE80211_IS_CHAN_HT40U(sc->sc_curchan))
 			sc->sc_rx_th.wr_chan_flags |= CHAN_HT40U;
 		else if (IEEE80211_IS_CHAN_HT40D(sc->sc_curchan))
 			sc->sc_rx_th.wr_chan_flags |= CHAN_HT40D;
 		else if (IEEE80211_IS_CHAN_HT20(sc->sc_curchan))
 			sc->sc_rx_th.wr_chan_flags |= CHAN_HT20;
 	} else if (sc->sc_rx_th.wr_rate & IEEE80211_RATE_MCS) {	/* HT rate */
 		struct ieee80211com *ic = ifp->if_l2com;
 
 		if ((rs->rs_flags & HAL_RX_2040) == 0)
 			sc->sc_rx_th.wr_chan_flags |= CHAN_HT20;
 		else if (IEEE80211_IS_CHAN_HT40U(ic->ic_curchan))
 			sc->sc_rx_th.wr_chan_flags |= CHAN_HT40U;
 		else
 			sc->sc_rx_th.wr_chan_flags |= CHAN_HT40D;
 		if ((rs->rs_flags & HAL_RX_GI) == 0)
 			sc->sc_rx_th.wr_flags |= IEEE80211_RADIOTAP_F_SHORTGI;
 	}
 
 #endif
 	sc->sc_rx_th.wr_tsf = htole64(ath_extend_tsf(sc, rs->rs_tstamp, tsf));
 	if (rs->rs_status & HAL_RXERR_CRC)
 		sc->sc_rx_th.wr_flags |= IEEE80211_RADIOTAP_F_BADFCS;
 	/* XXX propagate other error flags from descriptor */
 	sc->sc_rx_th.wr_antnoise = nf;
 	sc->sc_rx_th.wr_antsignal = nf + rs->rs_rssi;
 	sc->sc_rx_th.wr_antenna = rs->rs_antenna;
 #undef CHAN_HT
 #undef CHAN_HT20
 #undef CHAN_HT40U
 #undef CHAN_HT40D
 }
 
 static void
 ath_handle_micerror(struct ieee80211com *ic,
 	struct ieee80211_frame *wh, int keyix)
 {
 	struct ieee80211_node *ni;
 
 	/* XXX recheck MIC to deal w/ chips that lie */
 	/* XXX discard MIC errors on !data frames */
 	ni = ieee80211_find_rxnode(ic, (const struct ieee80211_frame_min *) wh);
 	if (ni != NULL) {
 		ieee80211_notify_michael_failure(ni->ni_vap, wh, keyix);
 		ieee80211_free_node(ni);
 	}
 }
 
 /*
  * Process a single packet.
  *
  * The mbuf must already be synced, unmapped and removed from bf->bf_m
  * by this stage.
  *
  * The mbuf must be consumed by this routine - either passed up the
  * net80211 stack, put on the holding queue, or freed.
  */
 int
 ath_rx_pkt(struct ath_softc *sc, struct ath_rx_status *rs, HAL_STATUS status,
     uint64_t tsf, int nf, HAL_RX_QUEUE qtype, struct ath_buf *bf,
     struct mbuf *m)
 {
 	uint64_t rstamp;
 	int len, type;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct ieee80211_node *ni;
 	int is_good = 0;
 	struct ath_rx_edma *re = &sc->sc_rxedma[qtype];
 
 	/*
 	 * Calculate the correct 64 bit TSF given
 	 * the TSF64 register value and rs_tstamp.
 	 */
 	rstamp = ath_extend_tsf(sc, rs->rs_tstamp, tsf);
 
 	/* These aren't specifically errors */
 #ifdef	AH_SUPPORT_AR5416
 	if (rs->rs_flags & HAL_RX_GI)
 		sc->sc_stats.ast_rx_halfgi++;
 	if (rs->rs_flags & HAL_RX_2040)
 		sc->sc_stats.ast_rx_2040++;
 	if (rs->rs_flags & HAL_RX_DELIM_CRC_PRE)
 		sc->sc_stats.ast_rx_pre_crc_err++;
 	if (rs->rs_flags & HAL_RX_DELIM_CRC_POST)
 		sc->sc_stats.ast_rx_post_crc_err++;
 	if (rs->rs_flags & HAL_RX_DECRYPT_BUSY)
 		sc->sc_stats.ast_rx_decrypt_busy_err++;
 	if (rs->rs_flags & HAL_RX_HI_RX_CHAIN)
 		sc->sc_stats.ast_rx_hi_rx_chain++;
 	if (rs->rs_flags & HAL_RX_STBC)
 		sc->sc_stats.ast_rx_stbc++;
 #endif /* AH_SUPPORT_AR5416 */
 
 	if (rs->rs_status != 0) {
 		if (rs->rs_status & HAL_RXERR_CRC)
 			sc->sc_stats.ast_rx_crcerr++;
 		if (rs->rs_status & HAL_RXERR_FIFO)
 			sc->sc_stats.ast_rx_fifoerr++;
 		if (rs->rs_status & HAL_RXERR_PHY) {
 			sc->sc_stats.ast_rx_phyerr++;
 			/* Process DFS radar events */
 			if ((rs->rs_phyerr == HAL_PHYERR_RADAR) ||
 			    (rs->rs_phyerr == HAL_PHYERR_FALSE_RADAR_EXT)) {
 				/* Now pass it to the radar processing code */
 				ath_dfs_process_phy_err(sc, m, rstamp, rs);
 			}
 
 			/* Be suitably paranoid about receiving phy errors out of the stats array bounds */
 			if (rs->rs_phyerr < 64)
 				sc->sc_stats.ast_rx_phy[rs->rs_phyerr]++;
 			goto rx_error;	/* NB: don't count in ierrors */
 		}
 		if (rs->rs_status & HAL_RXERR_DECRYPT) {
 			/*
 			 * Decrypt error.  If the error occurred
 			 * because there was no hardware key, then
 			 * let the frame through so the upper layers
 			 * can process it.  This is necessary for 5210
 			 * parts which have no way to setup a ``clear''
 			 * key cache entry.
 			 *
 			 * XXX do key cache faulting
 			 */
 			if (rs->rs_keyix == HAL_RXKEYIX_INVALID)
 				goto rx_accept;
 			sc->sc_stats.ast_rx_badcrypt++;
 		}
 		/*
 		 * Similar as above - if the failure was a keymiss
 		 * just punt it up to the upper layers for now.
 		 */
 		if (rs->rs_status & HAL_RXERR_KEYMISS) {
 			sc->sc_stats.ast_rx_keymiss++;
 			goto rx_accept;
 		}
 		if (rs->rs_status & HAL_RXERR_MIC) {
 			sc->sc_stats.ast_rx_badmic++;
 			/*
 			 * Do minimal work required to hand off
 			 * the 802.11 header for notification.
 			 */
 			/* XXX frag's and qos frames */
 			len = rs->rs_datalen;
 			if (len >= sizeof (struct ieee80211_frame)) {
 				ath_handle_micerror(ic,
 				    mtod(m, struct ieee80211_frame *),
 				    sc->sc_splitmic ?
 					rs->rs_keyix-32 : rs->rs_keyix);
 			}
 		}
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 rx_error:
 		/*
 		 * Cleanup any pending partial frame.
 		 */
 		if (re->m_rxpending != NULL) {
 			m_freem(re->m_rxpending);
 			re->m_rxpending = NULL;
 		}
 		/*
 		 * When a tap is present pass error frames
 		 * that have been requested.  By default we
 		 * pass decrypt+mic errors but others may be
 		 * interesting (e.g. crc).
 		 */
 		if (ieee80211_radiotap_active(ic) &&
 		    (rs->rs_status & sc->sc_monpass)) {
 			/* NB: bpf needs the mbuf length setup */
 			len = rs->rs_datalen;
 			m->m_pkthdr.len = m->m_len = len;
 			ath_rx_tap(ifp, m, rs, rstamp, nf);
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 			ath_rx_tap_vendor(ifp, m, rs, rstamp, nf);
 #endif	/* ATH_ENABLE_RADIOTAP_VENDOR_EXT */
 			ieee80211_radiotap_rx_all(ic, m);
 		}
 		/* XXX pass MIC errors up for s/w reclaculation */
 		m_freem(m); m = NULL;
 		goto rx_next;
 	}
 rx_accept:
 	len = rs->rs_datalen;
 	m->m_len = len;
 
 	if (rs->rs_more) {
 		/*
 		 * Frame spans multiple descriptors; save
 		 * it for the next completed descriptor, it
 		 * will be used to construct a jumbogram.
 		 */
 		if (re->m_rxpending != NULL) {
 			/* NB: max frame size is currently 2 clusters */
 			sc->sc_stats.ast_rx_toobig++;
 			m_freem(re->m_rxpending);
 		}
 		m->m_pkthdr.rcvif = ifp;
 		m->m_pkthdr.len = len;
 		re->m_rxpending = m;
 		m = NULL;
 		goto rx_next;
 	} else if (re->m_rxpending != NULL) {
 		/*
 		 * This is the second part of a jumbogram,
 		 * chain it to the first mbuf, adjust the
 		 * frame length, and clear the rxpending state.
 		 */
 		re->m_rxpending->m_next = m;
 		re->m_rxpending->m_pkthdr.len += len;
 		m = re->m_rxpending;
 		re->m_rxpending = NULL;
 	} else {
 		/*
 		 * Normal single-descriptor receive; setup
 		 * the rcvif and packet length.
 		 */
 		m->m_pkthdr.rcvif = ifp;
 		m->m_pkthdr.len = len;
 	}
 
 	/*
 	 * Validate rs->rs_antenna.
 	 *
 	 * Some users w/ AR9285 NICs have reported crashes
 	 * here because rs_antenna field is bogusly large.
 	 * Let's enforce the maximum antenna limit of 8
 	 * (and it shouldn't be hard coded, but that's a
 	 * separate problem) and if there's an issue, print
 	 * out an error and adjust rs_antenna to something
 	 * sensible.
 	 *
 	 * This code should be removed once the actual
 	 * root cause of the issue has been identified.
 	 * For example, it may be that the rs_antenna
 	 * field is only valid for the lsat frame of
 	 * an aggregate and it just happens that it is
 	 * "mostly" right. (This is a general statement -
 	 * the majority of the statistics are only valid
 	 * for the last frame in an aggregate.
 	 */
 	if (rs->rs_antenna > 7) {
 		device_printf(sc->sc_dev, "%s: rs_antenna > 7 (%d)\n",
 		    __func__, rs->rs_antenna);
 #ifdef	ATH_DEBUG
 		ath_printrxbuf(sc, bf, 0, status == HAL_OK);
 #endif /* ATH_DEBUG */
 		rs->rs_antenna = 0;	/* XXX better than nothing */
 	}
 
 	/*
 	 * If this is an AR9285/AR9485, then the receive and LNA
 	 * configuration is stored in RSSI[2] / EXTRSSI[2].
 	 * We can extract this out to build a much better
 	 * receive antenna profile.
 	 *
 	 * Yes, this just blurts over the above RX antenna field
 	 * for now.  It's fine, the AR9285 doesn't really use
 	 * that.
 	 *
 	 * Later on we should store away the fine grained LNA
 	 * information and keep separate counters just for
 	 * that.  It'll help when debugging the AR9285/AR9485
 	 * combined diversity code.
 	 */
 	if (sc->sc_rx_lnamixer) {
 		rs->rs_antenna = 0;
 
 		/* Bits 0:1 - the LNA configuration used */
 		rs->rs_antenna |=
 		    ((rs->rs_rssi_ctl[2] & HAL_RX_LNA_CFG_USED)
 		      >> HAL_RX_LNA_CFG_USED_S);
 
 		/* Bit 2 - the external RX antenna switch */
 		if (rs->rs_rssi_ctl[2] & HAL_RX_LNA_EXTCFG)
 			rs->rs_antenna |= 0x4;
 	}
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	sc->sc_stats.ast_ant_rx[rs->rs_antenna]++;
 
 	/*
 	 * Populate the rx status block.  When there are bpf
 	 * listeners we do the additional work to provide
 	 * complete status.  Otherwise we fill in only the
 	 * material required by ieee80211_input.  Note that
 	 * noise setting is filled in above.
 	 */
 	if (ieee80211_radiotap_active(ic)) {
 		ath_rx_tap(ifp, m, rs, rstamp, nf);
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 		ath_rx_tap_vendor(ifp, m, rs, rstamp, nf);
 #endif	/* ATH_ENABLE_RADIOTAP_VENDOR_EXT */
 	}
 
 	/*
 	 * From this point on we assume the frame is at least
 	 * as large as ieee80211_frame_min; verify that.
 	 */
 	if (len < IEEE80211_MIN_LEN) {
 		if (!ieee80211_radiotap_active(ic)) {
 			DPRINTF(sc, ATH_DEBUG_RECV,
 			    "%s: short packet %d\n", __func__, len);
 			sc->sc_stats.ast_rx_tooshort++;
 		} else {
 			/* NB: in particular this captures ack's */
 			ieee80211_radiotap_rx_all(ic, m);
 		}
 		m_freem(m); m = NULL;
 		goto rx_next;
 	}
 
 	if (IFF_DUMPPKTS(sc, ATH_DEBUG_RECV)) {
 		const HAL_RATE_TABLE *rt = sc->sc_currates;
 		uint8_t rix = rt->rateCodeToIndex[rs->rs_rate];
 
 		ieee80211_dump_pkt(ic, mtod(m, caddr_t), len,
 		    sc->sc_hwmap[rix].ieeerate, rs->rs_rssi);
 	}
 
 	m_adj(m, -IEEE80211_CRC_LEN);
 
 	/*
 	 * Locate the node for sender, track state, and then
 	 * pass the (referenced) node up to the 802.11 layer
 	 * for its use.
 	 */
 	ni = ieee80211_find_rxnode_withkey(ic,
 		mtod(m, const struct ieee80211_frame_min *),
 		rs->rs_keyix == HAL_RXKEYIX_INVALID ?
 			IEEE80211_KEYIX_NONE : rs->rs_keyix);
 	sc->sc_lastrs = rs;
 
 #ifdef	AH_SUPPORT_AR5416
 	if (rs->rs_isaggr)
 		sc->sc_stats.ast_rx_agg++;
 #endif /* AH_SUPPORT_AR5416 */
 
 	if (ni != NULL) {
 		/*
 		 * Only punt packets for ampdu reorder processing for
 		 * 11n nodes; net80211 enforces that M_AMPDU is only
 		 * set for 11n nodes.
 		 */
 		if (ni->ni_flags & IEEE80211_NODE_HT)
 			m->m_flags |= M_AMPDU;
 
 		/*
 		 * Sending station is known, dispatch directly.
 		 */
 		type = ieee80211_input(ni, m, rs->rs_rssi, nf);
 		ieee80211_free_node(ni);
 		m = NULL;
 		/*
 		 * Arrange to update the last rx timestamp only for
 		 * frames from our ap when operating in station mode.
 		 * This assumes the rx key is always setup when
 		 * associated.
 		 */
 		if (ic->ic_opmode == IEEE80211_M_STA &&
 		    rs->rs_keyix != HAL_RXKEYIX_INVALID)
 			is_good = 1;
 	} else {
 		type = ieee80211_input_all(ic, m, rs->rs_rssi, nf);
 		m = NULL;
 	}
 
 	/*
 	 * At this point we have passed the frame up the stack; thus
 	 * the mbuf is no longer ours.
 	 */
 
 	/*
 	 * Track rx rssi and do any rx antenna management.
 	 */
 	ATH_RSSI_LPF(sc->sc_halstats.ns_avgrssi, rs->rs_rssi);
 	if (sc->sc_diversity) {
 		/*
 		 * When using fast diversity, change the default rx
 		 * antenna if diversity chooses the other antenna 3
 		 * times in a row.
 		 */
 		if (sc->sc_defant != rs->rs_antenna) {
 			if (++sc->sc_rxotherant >= 3)
 				ath_setdefantenna(sc, rs->rs_antenna);
 		} else
 			sc->sc_rxotherant = 0;
 	}
 
 	/* Handle slow diversity if enabled */
 	if (sc->sc_dolnadiv) {
 		ath_lna_rx_comb_scan(sc, rs, ticks, hz);
 	}
 
 	if (sc->sc_softled) {
 		/*
 		 * Blink for any data frame.  Otherwise do a
 		 * heartbeat-style blink when idle.  The latter
 		 * is mainly for station mode where we depend on
 		 * periodic beacon frames to trigger the poll event.
 		 */
 		if (type == IEEE80211_FC0_TYPE_DATA) {
 			const HAL_RATE_TABLE *rt = sc->sc_currates;
 			ath_led_event(sc,
 			    rt->rateCodeToIndex[rs->rs_rate]);
 		} else if (ticks - sc->sc_ledevent >= sc->sc_ledidle)
 			ath_led_event(sc, 0);
 		}
 rx_next:
 	/*
 	 * Debugging - complain if we didn't NULL the mbuf pointer
 	 * here.
 	 */
 	if (m != NULL) {
 		device_printf(sc->sc_dev,
 		    "%s: mbuf %p should've been freed!\n",
 		    __func__,
 		    m);
 	}
 	return (is_good);
 }
 
 #define	ATH_RX_MAX		128
 
 /*
  * XXX TODO: break out the "get buffers" from "call ath_rx_pkt()" like
  * the EDMA code does.
  *
  * XXX TODO: then, do all of the RX list management stuff inside
  * ATH_RX_LOCK() so we don't end up potentially racing.  The EDMA
  * code is doing it right.
  */
 static void
 ath_rx_proc(struct ath_softc *sc, int resched)
 {
 #define	PA2DESC(_sc, _pa) \
 	((struct ath_desc *)((caddr_t)(_sc)->sc_rxdma.dd_desc + \
 		((_pa) - (_sc)->sc_rxdma.dd_desc_paddr)))
 	struct ath_buf *bf;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ath_hal *ah = sc->sc_ah;
 #ifdef IEEE80211_SUPPORT_SUPERG
 	struct ieee80211com *ic = ifp->if_l2com;
 #endif
 	struct ath_desc *ds;
 	struct ath_rx_status *rs;
 	struct mbuf *m;
 	int ngood;
 	HAL_STATUS status;
 	int16_t nf;
 	u_int64_t tsf;
 	int npkts = 0;
 	int kickpcu = 0;
 	int ret;
 
 	/* XXX we must not hold the ATH_LOCK here */
 	ATH_UNLOCK_ASSERT(sc);
 	ATH_PCU_UNLOCK_ASSERT(sc);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_rxproc_cnt++;
 	kickpcu = sc->sc_kickpcu;
 	ATH_PCU_UNLOCK(sc);
 
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_RX_PROC, "%s: called\n", __func__);
 	ngood = 0;
 	nf = ath_hal_getchannoise(ah, sc->sc_curchan);
 	sc->sc_stats.ast_rx_noise = nf;
 	tsf = ath_hal_gettsf64(ah);
 	do {
 		/*
 		 * Don't process too many packets at a time; give the
 		 * TX thread time to also run - otherwise the TX
 		 * latency can jump by quite a bit, causing throughput
 		 * degredation.
 		 */
 		if (!kickpcu && npkts >= ATH_RX_MAX)
 			break;
 
 		bf = TAILQ_FIRST(&sc->sc_rxbuf);
 		if (sc->sc_rxslink && bf == NULL) {	/* NB: shouldn't happen */
-			if_printf(ifp, "%s: no buffer!\n", __func__);
+			device_printf(sc->sc_dev, "%s: no buffer!\n", __func__);
 			break;
 		} else if (bf == NULL) {
 			/*
 			 * End of List:
 			 * this can happen for non-self-linked RX chains
 			 */
 			sc->sc_stats.ast_rx_hitqueueend++;
 			break;
 		}
 		m = bf->bf_m;
 		if (m == NULL) {		/* NB: shouldn't happen */
 			/*
 			 * If mbuf allocation failed previously there
 			 * will be no mbuf; try again to re-populate it.
 			 */
 			/* XXX make debug msg */
-			if_printf(ifp, "%s: no mbuf!\n", __func__);
+			device_printf(sc->sc_dev, "%s: no mbuf!\n", __func__);
 			TAILQ_REMOVE(&sc->sc_rxbuf, bf, bf_list);
 			goto rx_proc_next;
 		}
 		ds = bf->bf_desc;
 		if (ds->ds_link == bf->bf_daddr) {
 			/* NB: never process the self-linked entry at the end */
 			sc->sc_stats.ast_rx_hitqueueend++;
 			break;
 		}
 		/* XXX sync descriptor memory */
 		/*
 		 * Must provide the virtual address of the current
 		 * descriptor, the physical address, and the virtual
 		 * address of the next descriptor in the h/w chain.
 		 * This allows the HAL to look ahead to see if the
 		 * hardware is done with a descriptor by checking the
 		 * done bit in the following descriptor and the address
 		 * of the current descriptor the DMA engine is working
 		 * on.  All this is necessary because of our use of
 		 * a self-linked list to avoid rx overruns.
 		 */
 		rs = &bf->bf_status.ds_rxstat;
 		status = ath_hal_rxprocdesc(ah, ds,
 				bf->bf_daddr, PA2DESC(sc, ds->ds_link), rs);
 #ifdef ATH_DEBUG
 		if (sc->sc_debug & ATH_DEBUG_RECV_DESC)
 			ath_printrxbuf(sc, bf, 0, status == HAL_OK);
 #endif
 
 #ifdef	ATH_DEBUG_ALQ
 		if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_RXSTATUS))
 		    if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_RXSTATUS,
 		    sc->sc_rx_statuslen, (char *) ds);
 #endif	/* ATH_DEBUG_ALQ */
 
 		if (status == HAL_EINPROGRESS)
 			break;
 
 		TAILQ_REMOVE(&sc->sc_rxbuf, bf, bf_list);
 		npkts++;
 
 		/*
 		 * Process a single frame.
 		 */
 		bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap);
 		bf->bf_m = NULL;
 		if (ath_rx_pkt(sc, rs, status, tsf, nf, HAL_RX_QUEUE_HP, bf, m))
 			ngood++;
 rx_proc_next:
 		/*
 		 * If there's a holding buffer, insert that onto
 		 * the RX list; the hardware is now definitely not pointing
 		 * to it now.
 		 */
 		ret = 0;
 		if (sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf != NULL) {
 			TAILQ_INSERT_TAIL(&sc->sc_rxbuf,
 			    sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf,
 			    bf_list);
 			ret = ath_rxbuf_init(sc,
 			    sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf);
 		}
 		/*
 		 * Next, throw our buffer into the holding entry.  The hardware
 		 * may use the descriptor to read the link pointer before
 		 * DMAing the next descriptor in to write out a packet.
 		 */
 		sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf = bf;
 	} while (ret == 0);
 
 	/* rx signal state monitoring */
 	ath_hal_rxmonitor(ah, &sc->sc_halstats, sc->sc_curchan);
 	if (ngood)
 		sc->sc_lastrx = tsf;
 
 	ATH_KTR(sc, ATH_KTR_RXPROC, 2, "ath_rx_proc: npkts=%d, ngood=%d", npkts, ngood);
 	/* Queue DFS tasklet if needed */
 	if (resched && ath_dfs_tasklet_needed(sc, sc->sc_curchan))
 		taskqueue_enqueue(sc->sc_tq, &sc->sc_dfstask);
 
 	/*
 	 * Now that all the RX frames were handled that
 	 * need to be handled, kick the PCU if there's
 	 * been an RXEOL condition.
 	 */
 	if (resched && kickpcu) {
 		ATH_PCU_LOCK(sc);
 		ATH_KTR(sc, ATH_KTR_ERROR, 0, "ath_rx_proc: kickpcu");
 		device_printf(sc->sc_dev, "%s: kickpcu; handled %d packets\n",
 		    __func__, npkts);
 
 		/*
 		 * Go through the process of fully tearing down
 		 * the RX buffers and reinitialising them.
 		 *
 		 * There's a hardware bug that causes the RX FIFO
 		 * to get confused under certain conditions and
 		 * constantly write over the same frame, leading
 		 * the RX driver code here to get heavily confused.
 		 */
 		/*
 		 * XXX Has RX DMA stopped enough here to just call
 		 *     ath_startrecv()?
 		 * XXX Do we need to use the holding buffer to restart
 		 *     RX DMA by appending entries to the final
 		 *     descriptor?  Quite likely.
 		 */
 #if 1
 		ath_startrecv(sc);
 #else
 		/*
 		 * Disabled for now - it'd be nice to be able to do
 		 * this in order to limit the amount of CPU time spent
 		 * reinitialising the RX side (and thus minimise RX
 		 * drops) however there's a hardware issue that
 		 * causes things to get too far out of whack.
 		 */
 		/*
 		 * XXX can we hold the PCU lock here?
 		 * Are there any net80211 buffer calls involved?
 		 */
 		bf = TAILQ_FIRST(&sc->sc_rxbuf);
 		ath_hal_putrxbuf(ah, bf->bf_daddr, HAL_RX_QUEUE_HP);
 		ath_hal_rxena(ah);		/* enable recv descriptors */
 		ath_mode_init(sc);		/* set filters, etc. */
 		ath_hal_startpcurecv(ah);	/* re-enable PCU/DMA engine */
 #endif
 
 		ath_hal_intrset(ah, sc->sc_imask);
 		sc->sc_kickpcu = 0;
 		ATH_PCU_UNLOCK(sc);
 	}
 
 	/* XXX check this inside of IF_LOCK? */
 	if (resched && (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
 #ifdef IEEE80211_SUPPORT_SUPERG
 		ieee80211_ff_age_all(ic, 100);
 #endif
 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
 			ath_tx_kick(sc);
 	}
 #undef PA2DESC
 
 	/*
 	 * Put the hardware to sleep again if we're done with it.
 	 */
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	/*
 	 * If we hit the maximum number of frames in this round,
 	 * reschedule for another immediate pass.  This gives
 	 * the TX and TX completion routines time to run, which
 	 * will reduce latency.
 	 */
 	if (npkts >= ATH_RX_MAX)
 		sc->sc_rx.recv_sched(sc, resched);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_rxproc_cnt--;
 	ATH_PCU_UNLOCK(sc);
 }
 
 #undef	ATH_RX_MAX
 
 /*
  * Only run the RX proc if it's not already running.
  * Since this may get run as part of the reset/flush path,
  * the task can't clash with an existing, running tasklet.
  */
 static void
 ath_legacy_rx_tasklet(void *arg, int npending)
 {
 	struct ath_softc *sc = arg;
 
 	ATH_KTR(sc, ATH_KTR_RXPROC, 1, "ath_rx_proc: pending=%d", npending);
 	DPRINTF(sc, ATH_DEBUG_RX_PROC, "%s: pending %u\n", __func__, npending);
 	ATH_PCU_LOCK(sc);
 	if (sc->sc_inreset_cnt > 0) {
 		device_printf(sc->sc_dev,
 		    "%s: sc_inreset_cnt > 0; skipping\n", __func__);
 		ATH_PCU_UNLOCK(sc);
 		return;
 	}
 	ATH_PCU_UNLOCK(sc);
 
 	ath_rx_proc(sc, 1);
 }
 
 static void
 ath_legacy_flushrecv(struct ath_softc *sc)
 {
 
 	ath_rx_proc(sc, 0);
 }
 
 static void
 ath_legacy_flush_rxpending(struct ath_softc *sc)
 {
 
 	/* XXX ATH_RX_LOCK_ASSERT(sc); */
 
 	if (sc->sc_rxedma[HAL_RX_QUEUE_LP].m_rxpending != NULL) {
 		m_freem(sc->sc_rxedma[HAL_RX_QUEUE_LP].m_rxpending);
 		sc->sc_rxedma[HAL_RX_QUEUE_LP].m_rxpending = NULL;
 	}
 	if (sc->sc_rxedma[HAL_RX_QUEUE_HP].m_rxpending != NULL) {
 		m_freem(sc->sc_rxedma[HAL_RX_QUEUE_HP].m_rxpending);
 		sc->sc_rxedma[HAL_RX_QUEUE_HP].m_rxpending = NULL;
 	}
 }
 
 static int
 ath_legacy_flush_rxholdbf(struct ath_softc *sc)
 {
 	struct ath_buf *bf;
 
 	/* XXX ATH_RX_LOCK_ASSERT(sc); */
 	/*
 	 * If there are RX holding buffers, free them here and return
 	 * them to the list.
 	 *
 	 * XXX should just verify that bf->bf_m is NULL, as it must
 	 * be at this point!
 	 */
 	bf = sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf;
 	if (bf != NULL) {
 		if (bf->bf_m != NULL)
 			m_freem(bf->bf_m);
 		bf->bf_m = NULL;
 		TAILQ_INSERT_TAIL(&sc->sc_rxbuf, bf, bf_list);
 		(void) ath_rxbuf_init(sc, bf);
 	}
 	sc->sc_rxedma[HAL_RX_QUEUE_HP].m_holdbf = NULL;
 
 	bf = sc->sc_rxedma[HAL_RX_QUEUE_LP].m_holdbf;
 	if (bf != NULL) {
 		if (bf->bf_m != NULL)
 			m_freem(bf->bf_m);
 		bf->bf_m = NULL;
 		TAILQ_INSERT_TAIL(&sc->sc_rxbuf, bf, bf_list);
 		(void) ath_rxbuf_init(sc, bf);
 	}
 	sc->sc_rxedma[HAL_RX_QUEUE_LP].m_holdbf = NULL;
 
 	return (0);
 }
 
 /*
  * Disable the receive h/w in preparation for a reset.
  */
 static void
 ath_legacy_stoprecv(struct ath_softc *sc, int dodelay)
 {
 #define	PA2DESC(_sc, _pa) \
 	((struct ath_desc *)((caddr_t)(_sc)->sc_rxdma.dd_desc + \
 		((_pa) - (_sc)->sc_rxdma.dd_desc_paddr)))
 	struct ath_hal *ah = sc->sc_ah;
 
 	ATH_RX_LOCK(sc);
 
 	ath_hal_stoppcurecv(ah);	/* disable PCU */
 	ath_hal_setrxfilter(ah, 0);	/* clear recv filter */
 	ath_hal_stopdmarecv(ah);	/* disable DMA engine */
 	/*
 	 * TODO: see if this particular DELAY() is required; it may be
 	 * masking some missing FIFO flush or DMA sync.
 	 */
 #if 0
 	if (dodelay)
 #endif
 		DELAY(3000);		/* 3ms is long enough for 1 frame */
 #ifdef ATH_DEBUG
 	if (sc->sc_debug & (ATH_DEBUG_RESET | ATH_DEBUG_FATAL)) {
 		struct ath_buf *bf;
 		u_int ix;
 
 		device_printf(sc->sc_dev,
 		    "%s: rx queue %p, link %p\n",
 		    __func__,
 		    (caddr_t)(uintptr_t) ath_hal_getrxbuf(ah, HAL_RX_QUEUE_HP),
 		    sc->sc_rxlink);
 		ix = 0;
 		TAILQ_FOREACH(bf, &sc->sc_rxbuf, bf_list) {
 			struct ath_desc *ds = bf->bf_desc;
 			struct ath_rx_status *rs = &bf->bf_status.ds_rxstat;
 			HAL_STATUS status = ath_hal_rxprocdesc(ah, ds,
 				bf->bf_daddr, PA2DESC(sc, ds->ds_link), rs);
 			if (status == HAL_OK || (sc->sc_debug & ATH_DEBUG_FATAL))
 				ath_printrxbuf(sc, bf, ix, status == HAL_OK);
 			ix++;
 		}
 	}
 #endif
 
 	(void) ath_legacy_flush_rxpending(sc);
 	(void) ath_legacy_flush_rxholdbf(sc);
 
 	sc->sc_rxlink = NULL;		/* just in case */
 
 	ATH_RX_UNLOCK(sc);
 #undef PA2DESC
 }
 
 /*
  * XXX TODO: something was calling startrecv without calling
  * stoprecv.  Let's figure out what/why.  It was showing up
  * as a mbuf leak (rxpending) and ath_buf leak (holdbf.)
  */
 
 /*
  * Enable the receive h/w following a reset.
  */
 static int
 ath_legacy_startrecv(struct ath_softc *sc)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_buf *bf;
 
 	ATH_RX_LOCK(sc);
 
 	/*
 	 * XXX should verify these are already all NULL!
 	 */
 	sc->sc_rxlink = NULL;
 	(void) ath_legacy_flush_rxpending(sc);
 	(void) ath_legacy_flush_rxholdbf(sc);
 
 	/*
 	 * Re-chain all of the buffers in the RX buffer list.
 	 */
 	TAILQ_FOREACH(bf, &sc->sc_rxbuf, bf_list) {
 		int error = ath_rxbuf_init(sc, bf);
 		if (error != 0) {
 			DPRINTF(sc, ATH_DEBUG_RECV,
 				"%s: ath_rxbuf_init failed %d\n",
 				__func__, error);
 			return error;
 		}
 	}
 
 	bf = TAILQ_FIRST(&sc->sc_rxbuf);
 	ath_hal_putrxbuf(ah, bf->bf_daddr, HAL_RX_QUEUE_HP);
 	ath_hal_rxena(ah);		/* enable recv descriptors */
 	ath_mode_init(sc);		/* set filters, etc. */
 	ath_hal_startpcurecv(ah);	/* re-enable PCU/DMA engine */
 
 	ATH_RX_UNLOCK(sc);
 	return 0;
 }
 
 static int
 ath_legacy_dma_rxsetup(struct ath_softc *sc)
 {
 	int error;
 
 	error = ath_descdma_setup(sc, &sc->sc_rxdma, &sc->sc_rxbuf,
 	    "rx", sizeof(struct ath_desc), ath_rxbuf, 1);
 	if (error != 0)
 		return (error);
 
 	return (0);
 }
 
 static int
 ath_legacy_dma_rxteardown(struct ath_softc *sc)
 {
 
 	if (sc->sc_rxdma.dd_desc_len != 0)
 		ath_descdma_cleanup(sc, &sc->sc_rxdma, &sc->sc_rxbuf);
 	return (0);
 }
 
 static void
 ath_legacy_recv_sched(struct ath_softc *sc, int dosched)
 {
 
 	taskqueue_enqueue(sc->sc_tq, &sc->sc_rxtask);
 }
 
 static void
 ath_legacy_recv_sched_queue(struct ath_softc *sc, HAL_RX_QUEUE q,
     int dosched)
 {
 
 	taskqueue_enqueue(sc->sc_tq, &sc->sc_rxtask);
 }
 
 void
 ath_recv_setup_legacy(struct ath_softc *sc)
 {
 
 	/* Sensible legacy defaults */
 	/*
 	 * XXX this should be changed to properly support the
 	 * exact RX descriptor size for each HAL.
 	 */
 	sc->sc_rx_statuslen = sizeof(struct ath_desc);
 
 	sc->sc_rx.recv_start = ath_legacy_startrecv;
 	sc->sc_rx.recv_stop = ath_legacy_stoprecv;
 	sc->sc_rx.recv_flush = ath_legacy_flushrecv;
 	sc->sc_rx.recv_tasklet = ath_legacy_rx_tasklet;
 	sc->sc_rx.recv_rxbuf_init = ath_legacy_rxbuf_init;
 
 	sc->sc_rx.recv_setup = ath_legacy_dma_rxsetup;
 	sc->sc_rx.recv_teardown = ath_legacy_dma_rxteardown;
 	sc->sc_rx.recv_sched = ath_legacy_recv_sched;
 	sc->sc_rx.recv_sched_queue = ath_legacy_recv_sched_queue;
 }
Index: head/sys/dev/ath/if_ath_tdma.c
===================================================================
--- head/sys/dev/ath/if_ath_tdma.c	(revision 283743)
+++ head/sys/dev/ath/if_ath_tdma.c	(revision 283744)
@@ -1,687 +1,687 @@
 /*-
  * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for the Atheros Wireless LAN controller.
  *
  * This software is derived from work of Atsushi Onoe; his contribution
  * is greatly appreciated.
  */
 
 #include "opt_inet.h"
 #include "opt_ath.h"
 /*
  * This is needed for register operations which are performed
  * by the driver - eg, calls to ath_hal_gettsf32().
  *
  * It's also required for any AH_DEBUG checks in here, eg the
  * module dependencies.
  */
 #include "opt_ah.h"
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/callout.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kthread.h>
 #include <sys/taskqueue.h>
 #include <sys/priv.h>
 #include <sys/module.h>
 #include <sys/ktr.h>
 #include <sys/smp.h>	/* for mp_ncpus */
 
 #include <machine/bus.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_llc.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_regdomain.h>
 #ifdef IEEE80211_SUPPORT_SUPERG
 #include <net80211/ieee80211_superg.h>
 #endif
 #ifdef IEEE80211_SUPPORT_TDMA
 #include <net80211/ieee80211_tdma.h>
 #endif
 
 #include <net/bpf.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #endif
 
 #include <dev/ath/if_athvar.h>
 #include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
 #include <dev/ath/ath_hal/ah_diagcodes.h>
 
 #include <dev/ath/if_ath_debug.h>
 #include <dev/ath/if_ath_misc.h>
 #include <dev/ath/if_ath_tsf.h>
 #include <dev/ath/if_ath_tx.h>
 #include <dev/ath/if_ath_sysctl.h>
 #include <dev/ath/if_ath_led.h>
 #include <dev/ath/if_ath_keycache.h>
 #include <dev/ath/if_ath_rx.h>
 #include <dev/ath/if_ath_beacon.h>
 #include <dev/ath/if_athdfs.h>
 
 #ifdef ATH_TX99_DIAG
 #include <dev/ath/ath_tx99/ath_tx99.h>
 #endif
 
 #ifdef	ATH_DEBUG_ALQ
 #include <dev/ath/if_ath_alq.h>
 #endif
 
 #ifdef IEEE80211_SUPPORT_TDMA
 #include <dev/ath/if_ath_tdma.h>
 
 static void	ath_tdma_settimers(struct ath_softc *sc, u_int32_t nexttbtt,
 		    u_int32_t bintval);
 static void	ath_tdma_bintvalsetup(struct ath_softc *sc,
 		    const struct ieee80211_tdma_state *tdma);
 #endif /* IEEE80211_SUPPORT_TDMA */
 
 #ifdef IEEE80211_SUPPORT_TDMA
 static void
 ath_tdma_settimers(struct ath_softc *sc, u_int32_t nexttbtt, u_int32_t bintval)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_BEACON_TIMERS bt;
 
 	bt.bt_intval = bintval | HAL_BEACON_ENA;
 	bt.bt_nexttbtt = nexttbtt;
 	bt.bt_nextdba = (nexttbtt<<3) - sc->sc_tdmadbaprep;
 	bt.bt_nextswba = (nexttbtt<<3) - sc->sc_tdmaswbaprep;
 	bt.bt_nextatim = nexttbtt+1;
 	/* Enables TBTT, DBA, SWBA timers by default */
 	bt.bt_flags = 0;
 #if 0
 	DPRINTF(sc, ATH_DEBUG_TDMA_TIMER,
 	    "%s: intval=%d (0x%08x) nexttbtt=%u (0x%08x), nextdba=%u (0x%08x), nextswba=%u (0x%08x),nextatim=%u (0x%08x)\n",
 	    __func__,
 	    bt.bt_intval,
 	    bt.bt_intval,
 	    bt.bt_nexttbtt,
 	    bt.bt_nexttbtt,
 	    bt.bt_nextdba,
 	    bt.bt_nextdba,
 	    bt.bt_nextswba,
 	    bt.bt_nextswba,
 	    bt.bt_nextatim,
 	    bt.bt_nextatim);
 #endif
 
 #ifdef	ATH_DEBUG_ALQ
 	if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_TDMA_TIMER_SET)) {
 		struct if_ath_alq_tdma_timer_set t;
 		t.bt_intval = htobe32(bt.bt_intval);
 		t.bt_nexttbtt = htobe32(bt.bt_nexttbtt);
 		t.bt_nextdba = htobe32(bt.bt_nextdba);
 		t.bt_nextswba = htobe32(bt.bt_nextswba);
 		t.bt_nextatim = htobe32(bt.bt_nextatim);
 		t.bt_flags = htobe32(bt.bt_flags);
 		t.sc_tdmadbaprep = htobe32(sc->sc_tdmadbaprep);
 		t.sc_tdmaswbaprep = htobe32(sc->sc_tdmaswbaprep);
 		if_ath_alq_post(&sc->sc_alq, ATH_ALQ_TDMA_TIMER_SET,
 		    sizeof(t), (char *) &t);
 	}
 #endif
 
 	DPRINTF(sc, ATH_DEBUG_TDMA_TIMER,
 	    "%s: nexttbtt=%u (0x%08x), nexttbtt tsf=%lld (0x%08llx)\n",
 	    __func__,
 	    bt.bt_nexttbtt,
 	    bt.bt_nexttbtt,
 	    (long long) ( ((u_int64_t) (bt.bt_nexttbtt)) << 10),
 	    (long long) ( ((u_int64_t) (bt.bt_nexttbtt)) << 10));
 	ath_hal_beaconsettimers(ah, &bt);
 }
 
 /*
  * Calculate the beacon interval.  This is periodic in the
  * superframe for the bss.  We assume each station is configured
  * identically wrt transmit rate so the guard time we calculate
  * above will be the same on all stations.  Note we need to
  * factor in the xmit time because the hardware will schedule
  * a frame for transmit if the start of the frame is within
  * the burst time.  When we get hardware that properly kills
  * frames in the PCU we can reduce/eliminate the guard time.
  *
  * Roundup to 1024 is so we have 1 TU buffer in the guard time
  * to deal with the granularity of the nexttbtt timer.  11n MAC's
  * with 1us timer granularity should allow us to reduce/eliminate
  * this.
  */
 static void
 ath_tdma_bintvalsetup(struct ath_softc *sc,
 	const struct ieee80211_tdma_state *tdma)
 {
 	/* copy from vap state (XXX check all vaps have same value?) */
 	sc->sc_tdmaslotlen = tdma->tdma_slotlen;
 
 	sc->sc_tdmabintval = roundup((sc->sc_tdmaslotlen+sc->sc_tdmaguard) *
 		tdma->tdma_slotcnt, 1024);
 	sc->sc_tdmabintval >>= 10;		/* TSF -> TU */
 	if (sc->sc_tdmabintval & 1)
 		sc->sc_tdmabintval++;
 
 	if (tdma->tdma_slot == 0) {
 		/*
 		 * Only slot 0 beacons; other slots respond.
 		 */
 		sc->sc_imask |= HAL_INT_SWBA;
 		sc->sc_tdmaswba = 0;		/* beacon immediately */
 	} else {
 		/* XXX all vaps must be slot 0 or slot !0 */
 		sc->sc_imask &= ~HAL_INT_SWBA;
 	}
 }
 
 /*
  * Max 802.11 overhead.  This assumes no 4-address frames and
  * the encapsulation done by ieee80211_encap (llc).  We also
  * include potential crypto overhead.
  */
 #define	IEEE80211_MAXOVERHEAD \
 	(sizeof(struct ieee80211_qosframe) \
 	 + sizeof(struct llc) \
 	 + IEEE80211_ADDR_LEN \
 	 + IEEE80211_WEP_IVLEN \
 	 + IEEE80211_WEP_KIDLEN \
 	 + IEEE80211_WEP_CRCLEN \
 	 + IEEE80211_WEP_MICLEN \
 	 + IEEE80211_CRC_LEN)
 
 /*
  * Setup initially for tdma operation.  Start the beacon
  * timers and enable SWBA if we are slot 0.  Otherwise
  * we wait for slot 0 to arrive so we can sync up before
  * starting to transmit.
  */
 void
 ath_tdma_config(struct ath_softc *sc, struct ieee80211vap *vap)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	const struct ieee80211_txparam *tp;
 	const struct ieee80211_tdma_state *tdma = NULL;
 	int rix;
 
 	if (vap == NULL) {
 		vap = TAILQ_FIRST(&ic->ic_vaps);   /* XXX */
 		if (vap == NULL) {
-			if_printf(ifp, "%s: no vaps?\n", __func__);
+			device_printf(sc->sc_dev, "%s: no vaps?\n", __func__);
 			return;
 		}
 	}
 	/* XXX should take a locked ref to iv_bss */
 	tp = vap->iv_bss->ni_txparms;
 	/*
 	 * Calculate the guard time for each slot.  This is the
 	 * time to send a maximal-size frame according to the
 	 * fixed/lowest transmit rate.  Note that the interface
 	 * mtu does not include the 802.11 overhead so we must
 	 * tack that on (ath_hal_computetxtime includes the
 	 * preamble and plcp in it's calculation).
 	 */
 	tdma = vap->iv_tdma;
 	if (tp->ucastrate != IEEE80211_FIXED_RATE_NONE)
 		rix = ath_tx_findrix(sc, tp->ucastrate);
 	else
 		rix = ath_tx_findrix(sc, tp->mcastrate);
 
 	/*
 	 * If the chip supports enforcing TxOP on transmission,
 	 * we can just delete the guard window.  It isn't at all required.
 	 */
 	if (sc->sc_hasenforcetxop) {
 		sc->sc_tdmaguard = 0;
 	} else {
 		/* XXX short preamble assumed */
 		/* XXX non-11n rate assumed */
 		sc->sc_tdmaguard = ath_hal_computetxtime(ah, sc->sc_currates,
 			ifp->if_mtu + IEEE80211_MAXOVERHEAD, rix, AH_TRUE);
 	}
 
 	ath_hal_intrset(ah, 0);
 
 	ath_beaconq_config(sc);			/* setup h/w beacon q */
 	if (sc->sc_setcca)
 		ath_hal_setcca(ah, AH_FALSE);	/* disable CCA */
 	ath_tdma_bintvalsetup(sc, tdma);	/* calculate beacon interval */
 	ath_tdma_settimers(sc, sc->sc_tdmabintval,
 		sc->sc_tdmabintval | HAL_BEACON_RESET_TSF);
 	sc->sc_syncbeacon = 0;
 
 	sc->sc_avgtsfdeltap = TDMA_DUMMY_MARKER;
 	sc->sc_avgtsfdeltam = TDMA_DUMMY_MARKER;
 
 	ath_hal_intrset(ah, sc->sc_imask);
 
 	DPRINTF(sc, ATH_DEBUG_TDMA, "%s: slot %u len %uus cnt %u "
 	    "bsched %u guard %uus bintval %u TU dba prep %u\n", __func__,
 	    tdma->tdma_slot, tdma->tdma_slotlen, tdma->tdma_slotcnt,
 	    tdma->tdma_bintval, sc->sc_tdmaguard, sc->sc_tdmabintval,
 	    sc->sc_tdmadbaprep);
 
 #ifdef	ATH_DEBUG_ALQ
 	if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_TDMA_TIMER_CONFIG)) {
 		struct if_ath_alq_tdma_timer_config t;
 
 		t.tdma_slot = htobe32(tdma->tdma_slot);
 		t.tdma_slotlen = htobe32(tdma->tdma_slotlen);
 		t.tdma_slotcnt = htobe32(tdma->tdma_slotcnt);
 		t.tdma_bintval = htobe32(tdma->tdma_bintval);
 		t.tdma_guard = htobe32(sc->sc_tdmaguard);
 		t.tdma_scbintval = htobe32(sc->sc_tdmabintval);
 		t.tdma_dbaprep = htobe32(sc->sc_tdmadbaprep);
 
 		if_ath_alq_post(&sc->sc_alq, ATH_ALQ_TDMA_TIMER_CONFIG,
 		    sizeof(t), (char *) &t);
 	}
 #endif	/* ATH_DEBUG_ALQ */
 }
 
 /*
  * Update tdma operation.  Called from the 802.11 layer
  * when a beacon is received from the TDMA station operating
  * in the slot immediately preceding us in the bss.  Use
  * the rx timestamp for the beacon frame to update our
  * beacon timers so we follow their schedule.  Note that
  * by using the rx timestamp we implicitly include the
  * propagation delay in our schedule.
  *
  * XXX TODO: since the changes for the AR5416 and later chips
  * involved changing the TSF/TU calculations, we need to make
  * sure that various calculations wrap consistently.
  *
  * A lot of the problems stemmed from the calculations wrapping
  * at 65,535 TU.  Since a lot of the math is still being done in
  * TU, please audit it to ensure that when the TU values programmed
  * into the timers wrap at (2^31)-1 TSF, all the various terms
  * wrap consistently.
  */
 void
 ath_tdma_update(struct ieee80211_node *ni,
 	const struct ieee80211_tdma_param *tdma, int changed)
 {
 #define	TSF_TO_TU(_h,_l) \
 	((((u_int32_t)(_h)) << 22) | (((u_int32_t)(_l)) >> 10))
 #define	TU_TO_TSF(_tu)	(((u_int64_t)(_tu)) << 10)
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ath_softc *sc = ic->ic_ifp->if_softc;
 	struct ath_hal *ah = sc->sc_ah;
 	const HAL_RATE_TABLE *rt = sc->sc_currates;
 	u_int64_t tsf, rstamp, nextslot, nexttbtt, nexttbtt_full;
 	u_int32_t txtime, nextslottu;
 	int32_t tudelta, tsfdelta;
 	const struct ath_rx_status *rs;
 	int rix;
 
 	sc->sc_stats.ast_tdma_update++;
 
 	/*
 	 * Check for and adopt configuration changes.
 	 */
 	if (changed != 0) {
 		const struct ieee80211_tdma_state *ts = vap->iv_tdma;
 
 		ath_tdma_bintvalsetup(sc, ts);
 		if (changed & TDMA_UPDATE_SLOTLEN)
 			ath_wme_update(ic);
 
 		DPRINTF(sc, ATH_DEBUG_TDMA,
 		    "%s: adopt slot %u slotcnt %u slotlen %u us "
 		    "bintval %u TU\n", __func__,
 		    ts->tdma_slot, ts->tdma_slotcnt, ts->tdma_slotlen,
 		    sc->sc_tdmabintval);
 
 		/* XXX right? */
 		ath_hal_intrset(ah, sc->sc_imask);
 		/* NB: beacon timers programmed below */
 	}
 
 	/* extend rx timestamp to 64 bits */
 	rs = sc->sc_lastrs;
 	tsf = ath_hal_gettsf64(ah);
 	rstamp = ath_extend_tsf(sc, rs->rs_tstamp, tsf);
 	/*
 	 * The rx timestamp is set by the hardware on completing
 	 * reception (at the point where the rx descriptor is DMA'd
 	 * to the host).  To find the start of our next slot we
 	 * must adjust this time by the time required to send
 	 * the packet just received.
 	 */
 	rix = rt->rateCodeToIndex[rs->rs_rate];
 
 	/*
 	 * To calculate the packet duration for legacy rates, we
 	 * only need the rix and preamble.
 	 *
 	 * For 11n non-aggregate frames, we also need the channel
 	 * width and short/long guard interval.
 	 *
 	 * For 11n aggregate frames, the required hacks are a little
 	 * more subtle.  You need to figure out the frame duration
 	 * for each frame, including the delimiters.  However, when
 	 * a frame isn't received successfully, we won't hear it
 	 * (unless you enable reception of CRC errored frames), so
 	 * your duration calculation is going to be off.
 	 *
 	 * However, we can assume that the beacon frames won't be
 	 * transmitted as aggregate frames, so we should be okay.
 	 * Just add a check to ensure that we aren't handed something
 	 * bad.
 	 *
 	 * For ath_hal_pkt_txtime() - for 11n rates, shortPreamble is
 	 * actually short guard interval. For legacy rates,
 	 * it's short preamble.
 	 */
 	txtime = ath_hal_pkt_txtime(ah, rt, rs->rs_datalen,
 	    rix,
 	    !! (rs->rs_flags & HAL_RX_2040),
 	    (rix & 0x80) ?
 	      (! (rs->rs_flags & HAL_RX_GI)) : rt->info[rix].shortPreamble);
 	/* NB: << 9 is to cvt to TU and /2 */
 	nextslot = (rstamp - txtime) + (sc->sc_tdmabintval << 9);
 
 	/*
 	 * For 802.11n chips: nextslottu needs to be the full TSF space,
 	 * not just 0..65535 TU.
 	 */
 	nextslottu = TSF_TO_TU(nextslot>>32, nextslot);
 	/*
 	 * Retrieve the hardware NextTBTT in usecs
 	 * and calculate the difference between what the
 	 * other station thinks and what we have programmed.  This
 	 * lets us figure how to adjust our timers to match.  The
 	 * adjustments are done by pulling the TSF forward and possibly
 	 * rewriting the beacon timers.
 	 */
 	/*
 	 * The logic here assumes the nexttbtt counter is in TSF
 	 * but the prr-11n NICs are in TU.  The HAL shifts them
 	 * to TSF but there's two important differences:
 	 *
 	 * + The TU->TSF values have 0's for the low 9 bits, and
 	 * + The counter wraps at TU_TO_TSF(HAL_BEACON_PERIOD + 1) for
 	 *   the pre-11n NICs, but not for the 11n NICs.
 	 *
 	 * So for now, just make sure the nexttbtt value we get
 	 * matches the second issue or once nexttbtt exceeds this
 	 * value, tsfdelta ends up becoming very negative and all
 	 * of the adjustments get very messed up.
 	 */
 
 	/*
 	 * We need to track the full nexttbtt rather than having it
 	 * truncated at HAL_BEACON_PERIOD, as programming the
 	 * nexttbtt (and related) registers for the 11n chips is
 	 * actually going to take the full 32 bit space, rather than
 	 * just 0..65535 TU.
 	 */
 	nexttbtt_full = ath_hal_getnexttbtt(ah);
 	nexttbtt = nexttbtt_full % (TU_TO_TSF(HAL_BEACON_PERIOD + 1));
 	tsfdelta = (int32_t)((nextslot % TU_TO_TSF(HAL_BEACON_PERIOD + 1)) - nexttbtt);
 
 	DPRINTF(sc, ATH_DEBUG_TDMA_TIMER,
 	    "rs->rstamp %llu rstamp %llu tsf %llu txtime %d, nextslot %llu, "
 	    "nextslottu %d, nextslottume %d\n",
 	    (unsigned long long) rs->rs_tstamp,
 	    (unsigned long long) rstamp,
 	    (unsigned long long) tsf, txtime,
 	    (unsigned long long) nextslot,
 	    nextslottu, TSF_TO_TU(nextslot >> 32, nextslot));
 	DPRINTF(sc, ATH_DEBUG_TDMA,
 	    "  beacon tstamp: %llu (0x%016llx)\n",
 	    (unsigned long long) le64toh(ni->ni_tstamp.tsf),
 	    (unsigned long long) le64toh(ni->ni_tstamp.tsf));
 
 	DPRINTF(sc, ATH_DEBUG_TDMA_TIMER,
 	    "nexttbtt %llu (0x%08llx) tsfdelta %d avg +%d/-%d\n",
 	    (unsigned long long) nexttbtt,
 	    (long long) nexttbtt,
 	    tsfdelta,
 	    TDMA_AVG(sc->sc_avgtsfdeltap), TDMA_AVG(sc->sc_avgtsfdeltam));
 
 	if (tsfdelta < 0) {
 		TDMA_SAMPLE(sc->sc_avgtsfdeltap, 0);
 		TDMA_SAMPLE(sc->sc_avgtsfdeltam, -tsfdelta);
 		tsfdelta = -tsfdelta % 1024;
 		nextslottu++;
 	} else if (tsfdelta > 0) {
 		TDMA_SAMPLE(sc->sc_avgtsfdeltap, tsfdelta);
 		TDMA_SAMPLE(sc->sc_avgtsfdeltam, 0);
 		tsfdelta = 1024 - (tsfdelta % 1024);
 		nextslottu++;
 	} else {
 		TDMA_SAMPLE(sc->sc_avgtsfdeltap, 0);
 		TDMA_SAMPLE(sc->sc_avgtsfdeltam, 0);
 	}
 	tudelta = nextslottu - TSF_TO_TU(nexttbtt_full >> 32, nexttbtt_full);
 
 #ifdef	ATH_DEBUG_ALQ
 	if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_TDMA_BEACON_STATE)) {
 		struct if_ath_alq_tdma_beacon_state t;
 		t.rx_tsf = htobe64(rstamp);
 		t.beacon_tsf = htobe64(le64toh(ni->ni_tstamp.tsf));
 		t.tsf64 = htobe64(tsf);
 		t.nextslot_tsf = htobe64(nextslot);
 		t.nextslot_tu = htobe32(nextslottu);
 		t.txtime = htobe32(txtime);
 		if_ath_alq_post(&sc->sc_alq, ATH_ALQ_TDMA_BEACON_STATE,
 		    sizeof(t), (char *) &t);
 	}
 
 	if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_TDMA_SLOT_CALC)) {
 		struct if_ath_alq_tdma_slot_calc t;
 
 		t.nexttbtt = htobe64(nexttbtt_full);
 		t.next_slot = htobe64(nextslot);
 		t.tsfdelta = htobe32(tsfdelta);
 		t.avg_plus = htobe32(TDMA_AVG(sc->sc_avgtsfdeltap));
 		t.avg_minus = htobe32(TDMA_AVG(sc->sc_avgtsfdeltam));
 
 		if_ath_alq_post(&sc->sc_alq, ATH_ALQ_TDMA_SLOT_CALC,
 		    sizeof(t), (char *) &t);
 	}
 #endif
 
 	/*
 	 * Copy sender's timetstamp into tdma ie so they can
 	 * calculate roundtrip time.  We submit a beacon frame
 	 * below after any timer adjustment.  The frame goes out
 	 * at the next TBTT so the sender can calculate the
 	 * roundtrip by inspecting the tdma ie in our beacon frame.
 	 *
 	 * NB: This tstamp is subtlely preserved when
 	 *     IEEE80211_BEACON_TDMA is marked (e.g. when the
 	 *     slot position changes) because ieee80211_add_tdma
 	 *     skips over the data.
 	 */
 	memcpy(ATH_VAP(vap)->av_boff.bo_tdma +
 		__offsetof(struct ieee80211_tdma_param, tdma_tstamp),
 		&ni->ni_tstamp.data, 8);
 #if 0
 	DPRINTF(sc, ATH_DEBUG_TDMA_TIMER,
 	    "tsf %llu nextslot %llu (%d, %d) nextslottu %u nexttbtt %llu (%d)\n",
 	    (unsigned long long) tsf, (unsigned long long) nextslot,
 	    (int)(nextslot - tsf), tsfdelta, nextslottu, nexttbtt, tudelta);
 #endif
 	/*
 	 * Adjust the beacon timers only when pulling them forward
 	 * or when going back by less than the beacon interval.
 	 * Negative jumps larger than the beacon interval seem to
 	 * cause the timers to stop and generally cause instability.
 	 * This basically filters out jumps due to missed beacons.
 	 */
 	if (tudelta != 0 && (tudelta > 0 || -tudelta < sc->sc_tdmabintval)) {
 		DPRINTF(sc, ATH_DEBUG_TDMA_TIMER,
 		    "%s: calling ath_tdma_settimers; nextslottu=%d, bintval=%d\n",
 		    __func__,
 		    nextslottu,
 		    sc->sc_tdmabintval);
 		ath_tdma_settimers(sc, nextslottu, sc->sc_tdmabintval);
 		sc->sc_stats.ast_tdma_timers++;
 	}
 	if (tsfdelta > 0) {
 		uint64_t tsf;
 
 		/* XXX should just teach ath_hal_adjusttsf() to do this */
 		tsf = ath_hal_gettsf64(ah);
 		ath_hal_settsf64(ah, tsf + tsfdelta);
 		DPRINTF(sc, ATH_DEBUG_TDMA_TIMER,
 		    "%s: calling ath_hal_adjusttsf: TSF=%llu, tsfdelta=%d\n",
 		    __func__,
 		    (unsigned long long) tsf,
 		    tsfdelta);
 
 #ifdef	ATH_DEBUG_ALQ
 		if (if_ath_alq_checkdebug(&sc->sc_alq,
 		    ATH_ALQ_TDMA_TSF_ADJUST)) {
 			struct if_ath_alq_tdma_tsf_adjust t;
 
 			t.tsfdelta = htobe32(tsfdelta);
 			t.tsf64_old = htobe64(tsf);
 			t.tsf64_new = htobe64(tsf + tsfdelta);
 			if_ath_alq_post(&sc->sc_alq, ATH_ALQ_TDMA_TSF_ADJUST,
 			    sizeof(t), (char *) &t);
 		}
 #endif	/* ATH_DEBUG_ALQ */
 		sc->sc_stats.ast_tdma_tsf++;
 	}
 	ath_tdma_beacon_send(sc, vap);		/* prepare response */
 #undef TU_TO_TSF
 #undef TSF_TO_TU
 }
 
 /*
  * Transmit a beacon frame at SWBA.  Dynamic updates
  * to the frame contents are done as needed.
  */
 void
 ath_tdma_beacon_send(struct ath_softc *sc, struct ieee80211vap *vap)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_buf *bf;
 	int otherant;
 
 	/*
 	 * Check if the previous beacon has gone out.  If
 	 * not don't try to post another, skip this period
 	 * and wait for the next.  Missed beacons indicate
 	 * a problem and should not occur.  If we miss too
 	 * many consecutive beacons reset the device.
 	 */
 	if (ath_hal_numtxpending(ah, sc->sc_bhalq) != 0) {
 		sc->sc_bmisscount++;
 		DPRINTF(sc, ATH_DEBUG_BEACON,
 			"%s: missed %u consecutive beacons\n",
 			__func__, sc->sc_bmisscount);
 		if (sc->sc_bmisscount >= ath_bstuck_threshold)
 			taskqueue_enqueue(sc->sc_tq, &sc->sc_bstucktask);
 		return;
 	}
 	if (sc->sc_bmisscount != 0) {
 		DPRINTF(sc, ATH_DEBUG_BEACON,
 			"%s: resume beacon xmit after %u misses\n",
 			__func__, sc->sc_bmisscount);
 		sc->sc_bmisscount = 0;
 	}
 
 	/*
 	 * Check recent per-antenna transmit statistics and flip
 	 * the default antenna if noticeably more frames went out
 	 * on the non-default antenna.
 	 * XXX assumes 2 anntenae
 	 */
 	if (!sc->sc_diversity) {
 		otherant = sc->sc_defant & 1 ? 2 : 1;
 		if (sc->sc_ant_tx[otherant] > sc->sc_ant_tx[sc->sc_defant] + 2)
 			ath_setdefantenna(sc, otherant);
 		sc->sc_ant_tx[1] = sc->sc_ant_tx[2] = 0;
 	}
 
 	bf = ath_beacon_generate(sc, vap);
 	/* XXX We don't do cabq traffic, but just for completeness .. */
 	ATH_TXQ_LOCK(sc->sc_cabq);
 	ath_beacon_cabq_start(sc);
 	ATH_TXQ_UNLOCK(sc->sc_cabq);
 
 	if (bf != NULL) {
 		/*
 		 * Stop any current dma and put the new frame on the queue.
 		 * This should never fail since we check above that no frames
 		 * are still pending on the queue.
 		 */
 		if ((! sc->sc_isedma) &&
 		    (! ath_hal_stoptxdma(ah, sc->sc_bhalq))) {
 			DPRINTF(sc, ATH_DEBUG_ANY,
 				"%s: beacon queue %u did not stop?\n",
 				__func__, sc->sc_bhalq);
 			/* NB: the HAL still stops DMA, so proceed */
 		}
 		ath_hal_puttxbuf(ah, sc->sc_bhalq, bf->bf_daddr);
 		ath_hal_txstart(ah, sc->sc_bhalq);
 
 		sc->sc_stats.ast_be_xmit++;		/* XXX per-vap? */
 
 		/*
 		 * Record local TSF for our last send for use
 		 * in arbitrating slot collisions.
 		 */
 		/* XXX should take a locked ref to iv_bss */
 		vap->iv_bss->ni_tstamp.tsf = ath_hal_gettsf64(ah);
 	}
 }
 #endif /* IEEE80211_SUPPORT_TDMA */
Index: head/sys/dev/ath/if_ath_tx.c
===================================================================
--- head/sys/dev/ath/if_ath_tx.c	(revision 283743)
+++ head/sys/dev/ath/if_ath_tx.c	(revision 283744)
@@ -1,6230 +1,6230 @@
 /*-
  * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
  * Copyright (c) 2010-2012 Adrian Chadd, Xenion Pty Ltd
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for the Atheros Wireless LAN controller.
  *
  * This software is derived from work of Atsushi Onoe; his contribution
  * is greatly appreciated.
  */
 
 #include "opt_inet.h"
 #include "opt_ath.h"
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/callout.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kthread.h>
 #include <sys/taskqueue.h>
 #include <sys/priv.h>
 #include <sys/ktr.h>
 
 #include <machine/bus.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_llc.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_regdomain.h>
 #ifdef IEEE80211_SUPPORT_SUPERG
 #include <net80211/ieee80211_superg.h>
 #endif
 #ifdef IEEE80211_SUPPORT_TDMA
 #include <net80211/ieee80211_tdma.h>
 #endif
 #include <net80211/ieee80211_ht.h>
 
 #include <net/bpf.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #endif
 
 #include <dev/ath/if_athvar.h>
 #include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
 #include <dev/ath/ath_hal/ah_diagcodes.h>
 
 #include <dev/ath/if_ath_debug.h>
 
 #ifdef ATH_TX99_DIAG
 #include <dev/ath/ath_tx99/ath_tx99.h>
 #endif
 
 #include <dev/ath/if_ath_misc.h>
 #include <dev/ath/if_ath_tx.h>
 #include <dev/ath/if_ath_tx_ht.h>
 
 #ifdef	ATH_DEBUG_ALQ
 #include <dev/ath/if_ath_alq.h>
 #endif
 
 /*
  * How many retries to perform in software
  */
 #define	SWMAX_RETRIES		10
 
 /*
  * What queue to throw the non-QoS TID traffic into
  */
 #define	ATH_NONQOS_TID_AC	WME_AC_VO
 
 #if 0
 static int ath_tx_node_is_asleep(struct ath_softc *sc, struct ath_node *an);
 #endif
 static int ath_tx_ampdu_pending(struct ath_softc *sc, struct ath_node *an,
     int tid);
 static int ath_tx_ampdu_running(struct ath_softc *sc, struct ath_node *an,
     int tid);
 static ieee80211_seq ath_tx_tid_seqno_assign(struct ath_softc *sc,
     struct ieee80211_node *ni, struct ath_buf *bf, struct mbuf *m0);
 static int ath_tx_action_frame_override_queue(struct ath_softc *sc,
     struct ieee80211_node *ni, struct mbuf *m0, int *tid);
 static struct ath_buf *
 ath_tx_retry_clone(struct ath_softc *sc, struct ath_node *an,
     struct ath_tid *tid, struct ath_buf *bf);
 
 #ifdef	ATH_DEBUG_ALQ
 void
 ath_tx_alq_post(struct ath_softc *sc, struct ath_buf *bf_first)
 {
 	struct ath_buf *bf;
 	int i, n;
 	const char *ds;
 
 	/* XXX we should skip out early if debugging isn't enabled! */
 	bf = bf_first;
 
 	while (bf != NULL) {
 		/* XXX should ensure bf_nseg > 0! */
 		if (bf->bf_nseg == 0)
 			break;
 		n = ((bf->bf_nseg - 1) / sc->sc_tx_nmaps) + 1;
 		for (i = 0, ds = (const char *) bf->bf_desc;
 		    i < n;
 		    i++, ds += sc->sc_tx_desclen) {
 			if_ath_alq_post(&sc->sc_alq,
 			    ATH_ALQ_EDMA_TXDESC,
 			    sc->sc_tx_desclen,
 			    ds);
 		}
 		bf = bf->bf_next;
 	}
 }
 #endif /* ATH_DEBUG_ALQ */
 
 /*
  * Whether to use the 11n rate scenario functions or not
  */
 static inline int
 ath_tx_is_11n(struct ath_softc *sc)
 {
 	return ((sc->sc_ah->ah_magic == 0x20065416) ||
 		    (sc->sc_ah->ah_magic == 0x19741014));
 }
 
 /*
  * Obtain the current TID from the given frame.
  *
  * Non-QoS frames need to go into TID 16 (IEEE80211_NONQOS_TID.)
  * This has implications for which AC/priority the packet is placed
  * in.
  */
 static int
 ath_tx_gettid(struct ath_softc *sc, const struct mbuf *m0)
 {
 	const struct ieee80211_frame *wh;
 	int pri = M_WME_GETAC(m0);
 
 	wh = mtod(m0, const struct ieee80211_frame *);
 	if (! IEEE80211_QOS_HAS_SEQ(wh))
 		return IEEE80211_NONQOS_TID;
 	else
 		return WME_AC_TO_TID(pri);
 }
 
 static void
 ath_tx_set_retry(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ieee80211_frame *wh;
 
 	wh = mtod(bf->bf_m, struct ieee80211_frame *);
 	/* Only update/resync if needed */
 	if (bf->bf_state.bfs_isretried == 0) {
 		wh->i_fc[1] |= IEEE80211_FC1_RETRY;
 		bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap,
 		    BUS_DMASYNC_PREWRITE);
 	}
 	bf->bf_state.bfs_isretried = 1;
 	bf->bf_state.bfs_retries ++;
 }
 
 /*
  * Determine what the correct AC queue for the given frame
  * should be.
  *
  * This code assumes that the TIDs map consistently to
  * the underlying hardware (or software) ath_txq.
  * Since the sender may try to set an AC which is
  * arbitrary, non-QoS TIDs may end up being put on
  * completely different ACs. There's no way to put a
  * TID into multiple ath_txq's for scheduling, so
  * for now we override the AC/TXQ selection and set
  * non-QOS TID frames into the BE queue.
  *
  * This may be completely incorrect - specifically,
  * some management frames may end up out of order
  * compared to the QoS traffic they're controlling.
  * I'll look into this later.
  */
 static int
 ath_tx_getac(struct ath_softc *sc, const struct mbuf *m0)
 {
 	const struct ieee80211_frame *wh;
 	int pri = M_WME_GETAC(m0);
 	wh = mtod(m0, const struct ieee80211_frame *);
 	if (IEEE80211_QOS_HAS_SEQ(wh))
 		return pri;
 
 	return ATH_NONQOS_TID_AC;
 }
 
 void
 ath_txfrag_cleanup(struct ath_softc *sc,
 	ath_bufhead *frags, struct ieee80211_node *ni)
 {
 	struct ath_buf *bf, *next;
 
 	ATH_TXBUF_LOCK_ASSERT(sc);
 
 	TAILQ_FOREACH_SAFE(bf, frags, bf_list, next) {
 		/* NB: bf assumed clean */
 		TAILQ_REMOVE(frags, bf, bf_list);
 		ath_returnbuf_head(sc, bf);
 		ieee80211_node_decref(ni);
 	}
 }
 
 /*
  * Setup xmit of a fragmented frame.  Allocate a buffer
  * for each frag and bump the node reference count to
  * reflect the held reference to be setup by ath_tx_start.
  */
 int
 ath_txfrag_setup(struct ath_softc *sc, ath_bufhead *frags,
 	struct mbuf *m0, struct ieee80211_node *ni)
 {
 	struct mbuf *m;
 	struct ath_buf *bf;
 
 	ATH_TXBUF_LOCK(sc);
 	for (m = m0->m_nextpkt; m != NULL; m = m->m_nextpkt) {
 		/* XXX non-management? */
 		bf = _ath_getbuf_locked(sc, ATH_BUFTYPE_NORMAL);
 		if (bf == NULL) {	/* out of buffers, cleanup */
 			DPRINTF(sc, ATH_DEBUG_XMIT, "%s: no buffer?\n",
 			    __func__);
 			ath_txfrag_cleanup(sc, frags, ni);
 			break;
 		}
 		ieee80211_node_incref(ni);
 		TAILQ_INSERT_TAIL(frags, bf, bf_list);
 	}
 	ATH_TXBUF_UNLOCK(sc);
 
 	return !TAILQ_EMPTY(frags);
 }
 
 /*
  * Reclaim mbuf resources.  For fragmented frames we
  * need to claim each frag chained with m_nextpkt.
  */
 void
 ath_freetx(struct mbuf *m)
 {
 	struct mbuf *next;
 
 	do {
 		next = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 		m_freem(m);
 	} while ((m = next) != NULL);
 }
 
 static int
 ath_tx_dmasetup(struct ath_softc *sc, struct ath_buf *bf, struct mbuf *m0)
 {
 	struct mbuf *m;
 	int error;
 
 	/*
 	 * Load the DMA map so any coalescing is done.  This
 	 * also calculates the number of descriptors we need.
 	 */
 	error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m0,
 				     bf->bf_segs, &bf->bf_nseg,
 				     BUS_DMA_NOWAIT);
 	if (error == EFBIG) {
 		/* XXX packet requires too many descriptors */
 		bf->bf_nseg = ATH_MAX_SCATTER + 1;
 	} else if (error != 0) {
 		sc->sc_stats.ast_tx_busdma++;
 		ath_freetx(m0);
 		return error;
 	}
 	/*
 	 * Discard null packets and check for packets that
 	 * require too many TX descriptors.  We try to convert
 	 * the latter to a cluster.
 	 */
 	if (bf->bf_nseg > ATH_MAX_SCATTER) {		/* too many desc's, linearize */
 		sc->sc_stats.ast_tx_linear++;
 		m = m_collapse(m0, M_NOWAIT, ATH_MAX_SCATTER);
 		if (m == NULL) {
 			ath_freetx(m0);
 			sc->sc_stats.ast_tx_nombuf++;
 			return ENOMEM;
 		}
 		m0 = m;
 		error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m0,
 					     bf->bf_segs, &bf->bf_nseg,
 					     BUS_DMA_NOWAIT);
 		if (error != 0) {
 			sc->sc_stats.ast_tx_busdma++;
 			ath_freetx(m0);
 			return error;
 		}
 		KASSERT(bf->bf_nseg <= ATH_MAX_SCATTER,
 		    ("too many segments after defrag; nseg %u", bf->bf_nseg));
 	} else if (bf->bf_nseg == 0) {		/* null packet, discard */
 		sc->sc_stats.ast_tx_nodata++;
 		ath_freetx(m0);
 		return EIO;
 	}
 	DPRINTF(sc, ATH_DEBUG_XMIT, "%s: m %p len %u\n",
 		__func__, m0, m0->m_pkthdr.len);
 	bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREWRITE);
 	bf->bf_m = m0;
 
 	return 0;
 }
 
 /*
  * Chain together segments+descriptors for a frame - 11n or otherwise.
  *
  * For aggregates, this is called on each frame in the aggregate.
  */
 static void
 ath_tx_chaindesclist(struct ath_softc *sc, struct ath_desc *ds0,
     struct ath_buf *bf, int is_aggr, int is_first_subframe,
     int is_last_subframe)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	char *ds;
 	int i, bp, dsp;
 	HAL_DMA_ADDR bufAddrList[4];
 	uint32_t segLenList[4];
 	int numTxMaps = 1;
 	int isFirstDesc = 1;
 
 	/*
 	 * XXX There's txdma and txdma_mgmt; the descriptor
 	 * sizes must match.
 	 */
 	struct ath_descdma *dd = &sc->sc_txdma;
 
 	/*
 	 * Fillin the remainder of the descriptor info.
 	 */
 
 	/*
 	 * We need the number of TX data pointers in each descriptor.
 	 * EDMA and later chips support 4 TX buffers per descriptor;
 	 * previous chips just support one.
 	 */
 	numTxMaps = sc->sc_tx_nmaps;
 
 	/*
 	 * For EDMA and later chips ensure the TX map is fully populated
 	 * before advancing to the next descriptor.
 	 */
 	ds = (char *) bf->bf_desc;
 	bp = dsp = 0;
 	bzero(bufAddrList, sizeof(bufAddrList));
 	bzero(segLenList, sizeof(segLenList));
 	for (i = 0; i < bf->bf_nseg; i++) {
 		bufAddrList[bp] = bf->bf_segs[i].ds_addr;
 		segLenList[bp] = bf->bf_segs[i].ds_len;
 		bp++;
 
 		/*
 		 * Go to the next segment if this isn't the last segment
 		 * and there's space in the current TX map.
 		 */
 		if ((i != bf->bf_nseg - 1) && (bp < numTxMaps))
 			continue;
 
 		/*
 		 * Last segment or we're out of buffer pointers.
 		 */
 		bp = 0;
 
 		if (i == bf->bf_nseg - 1)
 			ath_hal_settxdesclink(ah, (struct ath_desc *) ds, 0);
 		else
 			ath_hal_settxdesclink(ah, (struct ath_desc *) ds,
 			    bf->bf_daddr + dd->dd_descsize * (dsp + 1));
 
 		/*
 		 * XXX This assumes that bfs_txq is the actual destination
 		 * hardware queue at this point.  It may not have been
 		 * assigned, it may actually be pointing to the multicast
 		 * software TXQ id.  These must be fixed!
 		 */
 		ath_hal_filltxdesc(ah, (struct ath_desc *) ds
 			, bufAddrList
 			, segLenList
 			, bf->bf_descid		/* XXX desc id */
 			, bf->bf_state.bfs_tx_queue
 			, isFirstDesc		/* first segment */
 			, i == bf->bf_nseg - 1	/* last segment */
 			, (struct ath_desc *) ds0	/* first descriptor */
 		);
 
 		/*
 		 * Make sure the 11n aggregate fields are cleared.
 		 *
 		 * XXX TODO: this doesn't need to be called for
 		 * aggregate frames; as it'll be called on all
 		 * sub-frames.  Since the descriptors are in
 		 * non-cacheable memory, this leads to some
 		 * rather slow writes on MIPS/ARM platforms.
 		 */
 		if (ath_tx_is_11n(sc))
 			ath_hal_clr11n_aggr(sc->sc_ah, (struct ath_desc *) ds);
 
 		/*
 		 * If 11n is enabled, set it up as if it's an aggregate
 		 * frame.
 		 */
 		if (is_last_subframe) {
 			ath_hal_set11n_aggr_last(sc->sc_ah,
 			    (struct ath_desc *) ds);
 		} else if (is_aggr) {
 			/*
 			 * This clears the aggrlen field; so
 			 * the caller needs to call set_aggr_first()!
 			 *
 			 * XXX TODO: don't call this for the first
 			 * descriptor in the first frame in an
 			 * aggregate!
 			 */
 			ath_hal_set11n_aggr_middle(sc->sc_ah,
 			    (struct ath_desc *) ds,
 			    bf->bf_state.bfs_ndelim);
 		}
 		isFirstDesc = 0;
 		bf->bf_lastds = (struct ath_desc *) ds;
 
 		/*
 		 * Don't forget to skip to the next descriptor.
 		 */
 		ds += sc->sc_tx_desclen;
 		dsp++;
 
 		/*
 		 * .. and don't forget to blank these out!
 		 */
 		bzero(bufAddrList, sizeof(bufAddrList));
 		bzero(segLenList, sizeof(segLenList));
 	}
 	bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREWRITE);
 }
 
 /*
  * Set the rate control fields in the given descriptor based on
  * the bf_state fields and node state.
  *
  * The bfs fields should already be set with the relevant rate
  * control information, including whether MRR is to be enabled.
  *
  * Since the FreeBSD HAL currently sets up the first TX rate
  * in ath_hal_setuptxdesc(), this will setup the MRR
  * conditionally for the pre-11n chips, and call ath_buf_set_rate
  * unconditionally for 11n chips. These require the 11n rate
  * scenario to be set if MCS rates are enabled, so it's easier
  * to just always call it. The caller can then only set rates 2, 3
  * and 4 if multi-rate retry is needed.
  */
 static void
 ath_tx_set_ratectrl(struct ath_softc *sc, struct ieee80211_node *ni,
     struct ath_buf *bf)
 {
 	struct ath_rc_series *rc = bf->bf_state.bfs_rc;
 
 	/* If mrr is disabled, blank tries 1, 2, 3 */
 	if (! bf->bf_state.bfs_ismrr)
 		rc[1].tries = rc[2].tries = rc[3].tries = 0;
 
 #if 0
 	/*
 	 * If NOACK is set, just set ntries=1.
 	 */
 	else if (bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) {
 		rc[1].tries = rc[2].tries = rc[3].tries = 0;
 		rc[0].tries = 1;
 	}
 #endif
 
 	/*
 	 * Always call - that way a retried descriptor will
 	 * have the MRR fields overwritten.
 	 *
 	 * XXX TODO: see if this is really needed - setting up
 	 * the first descriptor should set the MRR fields to 0
 	 * for us anyway.
 	 */
 	if (ath_tx_is_11n(sc)) {
 		ath_buf_set_rate(sc, ni, bf);
 	} else {
 		ath_hal_setupxtxdesc(sc->sc_ah, bf->bf_desc
 			, rc[1].ratecode, rc[1].tries
 			, rc[2].ratecode, rc[2].tries
 			, rc[3].ratecode, rc[3].tries
 		);
 	}
 }
 
 /*
  * Setup segments+descriptors for an 11n aggregate.
  * bf_first is the first buffer in the aggregate.
  * The descriptor list must already been linked together using
  * bf->bf_next.
  */
 static void
 ath_tx_setds_11n(struct ath_softc *sc, struct ath_buf *bf_first)
 {
 	struct ath_buf *bf, *bf_prev = NULL;
 	struct ath_desc *ds0 = bf_first->bf_desc;
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: nframes=%d, al=%d\n",
 	    __func__, bf_first->bf_state.bfs_nframes,
 	    bf_first->bf_state.bfs_al);
 
 	bf = bf_first;
 
 	if (bf->bf_state.bfs_txrate0 == 0)
 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: bf=%p, txrate0=%d\n",
 		    __func__, bf, 0);
 	if (bf->bf_state.bfs_rc[0].ratecode == 0)
 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: bf=%p, rix0=%d\n",
 		    __func__, bf, 0);
 
 	/*
 	 * Setup all descriptors of all subframes - this will
 	 * call ath_hal_set11naggrmiddle() on every frame.
 	 */
 	while (bf != NULL) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
 		    "%s: bf=%p, nseg=%d, pktlen=%d, seqno=%d\n",
 		    __func__, bf, bf->bf_nseg, bf->bf_state.bfs_pktlen,
 		    SEQNO(bf->bf_state.bfs_seqno));
 
 		/*
 		 * Setup the initial fields for the first descriptor - all
 		 * the non-11n specific stuff.
 		 */
 		ath_hal_setuptxdesc(sc->sc_ah, bf->bf_desc
 			, bf->bf_state.bfs_pktlen	/* packet length */
 			, bf->bf_state.bfs_hdrlen	/* header length */
 			, bf->bf_state.bfs_atype	/* Atheros packet type */
 			, bf->bf_state.bfs_txpower	/* txpower */
 			, bf->bf_state.bfs_txrate0
 			, bf->bf_state.bfs_try0		/* series 0 rate/tries */
 			, bf->bf_state.bfs_keyix	/* key cache index */
 			, bf->bf_state.bfs_txantenna	/* antenna mode */
 			, bf->bf_state.bfs_txflags | HAL_TXDESC_INTREQ	/* flags */
 			, bf->bf_state.bfs_ctsrate	/* rts/cts rate */
 			, bf->bf_state.bfs_ctsduration	/* rts/cts duration */
 		);
 
 		/*
 		 * First descriptor? Setup the rate control and initial
 		 * aggregate header information.
 		 */
 		if (bf == bf_first) {
 			/*
 			 * setup first desc with rate and aggr info
 			 */
 			ath_tx_set_ratectrl(sc, bf->bf_node, bf);
 		}
 
 		/*
 		 * Setup the descriptors for a multi-descriptor frame.
 		 * This is both aggregate and non-aggregate aware.
 		 */
 		ath_tx_chaindesclist(sc, ds0, bf,
 		    1, /* is_aggr */
 		    !! (bf == bf_first), /* is_first_subframe */
 		    !! (bf->bf_next == NULL) /* is_last_subframe */
 		    );
 
 		if (bf == bf_first) {
 			/*
 			 * Initialise the first 11n aggregate with the
 			 * aggregate length and aggregate enable bits.
 			 */
 			ath_hal_set11n_aggr_first(sc->sc_ah,
 			    ds0,
 			    bf->bf_state.bfs_al,
 			    bf->bf_state.bfs_ndelim);
 		}
 
 		/*
 		 * Link the last descriptor of the previous frame
 		 * to the beginning descriptor of this frame.
 		 */
 		if (bf_prev != NULL)
 			ath_hal_settxdesclink(sc->sc_ah, bf_prev->bf_lastds,
 			    bf->bf_daddr);
 
 		/* Save a copy so we can link the next descriptor in */
 		bf_prev = bf;
 		bf = bf->bf_next;
 	}
 
 	/*
 	 * Set the first descriptor bf_lastds field to point to
 	 * the last descriptor in the last subframe, that's where
 	 * the status update will occur.
 	 */
 	bf_first->bf_lastds = bf_prev->bf_lastds;
 
 	/*
 	 * And bf_last in the first descriptor points to the end of
 	 * the aggregate list.
 	 */
 	bf_first->bf_last = bf_prev;
 
 	/*
 	 * For non-AR9300 NICs, which require the rate control
 	 * in the final descriptor - let's set that up now.
 	 *
 	 * This is because the filltxdesc() HAL call doesn't
 	 * populate the last segment with rate control information
 	 * if firstSeg is also true.  For non-aggregate frames
 	 * that is fine, as the first frame already has rate control
 	 * info.  But if the last frame in an aggregate has one
 	 * descriptor, both firstseg and lastseg will be true and
 	 * the rate info isn't copied.
 	 *
 	 * This is inefficient on MIPS/ARM platforms that have
 	 * non-cachable memory for TX descriptors, but we'll just
 	 * make do for now.
 	 *
 	 * As to why the rate table is stashed in the last descriptor
 	 * rather than the first descriptor?  Because proctxdesc()
 	 * is called on the final descriptor in an MPDU or A-MPDU -
 	 * ie, the one that gets updated by the hardware upon
 	 * completion.  That way proctxdesc() doesn't need to know
 	 * about the first _and_ last TX descriptor.
 	 */
 	ath_hal_setuplasttxdesc(sc->sc_ah, bf_prev->bf_lastds, ds0);
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: end\n", __func__);
 }
 
 /*
  * Hand-off a frame to the multicast TX queue.
  *
  * This is a software TXQ which will be appended to the CAB queue
  * during the beacon setup code.
  *
  * XXX TODO: since the AR9300 EDMA TX queue support wants the QCU ID
  * as part of the TX descriptor, bf_state.bfs_tx_queue must be updated
  * with the actual hardware txq, or all of this will fall apart.
  *
  * XXX It may not be a bad idea to just stuff the QCU ID into bf_state
  * and retire bfs_tx_queue; then make sure the CABQ QCU ID is populated
  * correctly.
  */
 static void
 ath_tx_handoff_mcast(struct ath_softc *sc, struct ath_txq *txq,
     struct ath_buf *bf)
 {
 	ATH_TX_LOCK_ASSERT(sc);
 
 	KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
 	     ("%s: busy status 0x%x", __func__, bf->bf_flags));
 
 	/*
 	 * Ensure that the tx queue is the cabq, so things get
 	 * mapped correctly.
 	 */
 	if (bf->bf_state.bfs_tx_queue != sc->sc_cabq->axq_qnum) {
 		DPRINTF(sc, ATH_DEBUG_XMIT,
 		    "%s: bf=%p, bfs_tx_queue=%d, axq_qnum=%d\n",
 		    __func__, bf, bf->bf_state.bfs_tx_queue,
 		    txq->axq_qnum);
 	}
 
 	ATH_TXQ_LOCK(txq);
 	if (ATH_TXQ_LAST(txq, axq_q_s) != NULL) {
 		struct ath_buf *bf_last = ATH_TXQ_LAST(txq, axq_q_s);
 		struct ieee80211_frame *wh;
 
 		/* mark previous frame */
 		wh = mtod(bf_last->bf_m, struct ieee80211_frame *);
 		wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA;
 		bus_dmamap_sync(sc->sc_dmat, bf_last->bf_dmamap,
 		    BUS_DMASYNC_PREWRITE);
 
 		/* link descriptor */
 		ath_hal_settxdesclink(sc->sc_ah,
 		    bf_last->bf_lastds,
 		    bf->bf_daddr);
 	}
 	ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
 	ATH_TXQ_UNLOCK(txq);
 }
 
 /*
  * Hand-off packet to a hardware queue.
  */
 static void
 ath_tx_handoff_hw(struct ath_softc *sc, struct ath_txq *txq,
     struct ath_buf *bf)
 {
 	struct ath_hal *ah = sc->sc_ah;
 	struct ath_buf *bf_first;
 
 	/*
 	 * Insert the frame on the outbound list and pass it on
 	 * to the hardware.  Multicast frames buffered for power
 	 * save stations and transmit from the CAB queue are stored
 	 * on a s/w only queue and loaded on to the CAB queue in
 	 * the SWBA handler since frames only go out on DTIM and
 	 * to avoid possible races.
 	 */
 	ATH_TX_LOCK_ASSERT(sc);
 	KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
 	     ("%s: busy status 0x%x", __func__, bf->bf_flags));
 	KASSERT(txq->axq_qnum != ATH_TXQ_SWQ,
 	     ("ath_tx_handoff_hw called for mcast queue"));
 
 	/*
 	 * XXX We should instead just verify that sc_txstart_cnt
 	 * or ath_txproc_cnt > 0.  That would mean that
 	 * the reset is going to be waiting for us to complete.
 	 */
 	if (sc->sc_txproc_cnt == 0 && sc->sc_txstart_cnt == 0) {
 		device_printf(sc->sc_dev,
 		    "%s: TX dispatch without holding txcount/txstart refcnt!\n",
 		    __func__);
 	}
 
 	/*
 	 * XXX .. this is going to cause the hardware to get upset;
 	 * so we really should find some way to drop or queue
 	 * things.
 	 */
 
 	ATH_TXQ_LOCK(txq);
 
 	/*
 	 * XXX TODO: if there's a holdingbf, then
 	 * ATH_TXQ_PUTRUNNING should be clear.
 	 *
 	 * If there is a holdingbf and the list is empty,
 	 * then axq_link should be pointing to the holdingbf.
 	 *
 	 * Otherwise it should point to the last descriptor
 	 * in the last ath_buf.
 	 *
 	 * In any case, we should really ensure that we
 	 * update the previous descriptor link pointer to
 	 * this descriptor, regardless of all of the above state.
 	 *
 	 * For now this is captured by having axq_link point
 	 * to either the holdingbf (if the TXQ list is empty)
 	 * or the end of the list (if the TXQ list isn't empty.)
 	 * I'd rather just kill axq_link here and do it as above.
 	 */
 
 	/*
 	 * Append the frame to the TX queue.
 	 */
 	ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
 	ATH_KTR(sc, ATH_KTR_TX, 3,
 	    "ath_tx_handoff: non-tdma: txq=%u, add bf=%p "
 	    "depth=%d",
 	    txq->axq_qnum,
 	    bf,
 	    txq->axq_depth);
 
 	/*
 	 * If there's a link pointer, update it.
 	 *
 	 * XXX we should replace this with the above logic, just
 	 * to kill axq_link with fire.
 	 */
 	if (txq->axq_link != NULL) {
 		*txq->axq_link = bf->bf_daddr;
 		DPRINTF(sc, ATH_DEBUG_XMIT,
 		    "%s: link[%u](%p)=%p (%p) depth %d\n", __func__,
 		    txq->axq_qnum, txq->axq_link,
 		    (caddr_t)bf->bf_daddr, bf->bf_desc,
 		    txq->axq_depth);
 		ATH_KTR(sc, ATH_KTR_TX, 5,
 		    "ath_tx_handoff: non-tdma: link[%u](%p)=%p (%p) "
 		    "lastds=%d",
 		    txq->axq_qnum, txq->axq_link,
 		    (caddr_t)bf->bf_daddr, bf->bf_desc,
 		    bf->bf_lastds);
 	}
 
 	/*
 	 * If we've not pushed anything into the hardware yet,
 	 * push the head of the queue into the TxDP.
 	 *
 	 * Once we've started DMA, there's no guarantee that
 	 * updating the TxDP with a new value will actually work.
 	 * So we just don't do that - if we hit the end of the list,
 	 * we keep that buffer around (the "holding buffer") and
 	 * re-start DMA by updating the link pointer of _that_
 	 * descriptor and then restart DMA.
 	 */
 	if (! (txq->axq_flags & ATH_TXQ_PUTRUNNING)) {
 		bf_first = TAILQ_FIRST(&txq->axq_q);
 		txq->axq_flags |= ATH_TXQ_PUTRUNNING;
 		ath_hal_puttxbuf(ah, txq->axq_qnum, bf_first->bf_daddr);
 		DPRINTF(sc, ATH_DEBUG_XMIT,
 		    "%s: TXDP[%u] = %p (%p) depth %d\n",
 		    __func__, txq->axq_qnum,
 		    (caddr_t)bf_first->bf_daddr, bf_first->bf_desc,
 		    txq->axq_depth);
 		ATH_KTR(sc, ATH_KTR_TX, 5,
 		    "ath_tx_handoff: TXDP[%u] = %p (%p) "
 		    "lastds=%p depth %d",
 		    txq->axq_qnum,
 		    (caddr_t)bf_first->bf_daddr, bf_first->bf_desc,
 		    bf_first->bf_lastds,
 		    txq->axq_depth);
 	}
 
 	/*
 	 * Ensure that the bf TXQ matches this TXQ, so later
 	 * checking and holding buffer manipulation is sane.
 	 */
 	if (bf->bf_state.bfs_tx_queue != txq->axq_qnum) {
 		DPRINTF(sc, ATH_DEBUG_XMIT,
 		    "%s: bf=%p, bfs_tx_queue=%d, axq_qnum=%d\n",
 		    __func__, bf, bf->bf_state.bfs_tx_queue,
 		    txq->axq_qnum);
 	}
 
 	/*
 	 * Track aggregate queue depth.
 	 */
 	if (bf->bf_state.bfs_aggr)
 		txq->axq_aggr_depth++;
 
 	/*
 	 * Update the link pointer.
 	 */
 	ath_hal_gettxdesclinkptr(ah, bf->bf_lastds, &txq->axq_link);
 
 	/*
 	 * Start DMA.
 	 *
 	 * If we wrote a TxDP above, DMA will start from here.
 	 *
 	 * If DMA is running, it'll do nothing.
 	 *
 	 * If the DMA engine hit the end of the QCU list (ie LINK=NULL,
 	 * or VEOL) then it stops at the last transmitted write.
 	 * We then append a new frame by updating the link pointer
 	 * in that descriptor and then kick TxE here; it will re-read
 	 * that last descriptor and find the new descriptor to transmit.
 	 *
 	 * This is why we keep the holding descriptor around.
 	 */
 	ath_hal_txstart(ah, txq->axq_qnum);
 	ATH_TXQ_UNLOCK(txq);
 	ATH_KTR(sc, ATH_KTR_TX, 1,
 	    "ath_tx_handoff: txq=%u, txstart", txq->axq_qnum);
 }
 
 /*
  * Restart TX DMA for the given TXQ.
  *
  * This must be called whether the queue is empty or not.
  */
 static void
 ath_legacy_tx_dma_restart(struct ath_softc *sc, struct ath_txq *txq)
 {
 	struct ath_buf *bf, *bf_last;
 
 	ATH_TXQ_LOCK_ASSERT(txq);
 
 	/* XXX make this ATH_TXQ_FIRST */
 	bf = TAILQ_FIRST(&txq->axq_q);
 	bf_last = ATH_TXQ_LAST(txq, axq_q_s);
 
 	if (bf == NULL)
 		return;
 
 	DPRINTF(sc, ATH_DEBUG_RESET,
 	    "%s: Q%d: bf=%p, bf_last=%p, daddr=0x%08x\n",
 	    __func__,
 	    txq->axq_qnum,
 	    bf,
 	    bf_last,
 	    (uint32_t) bf->bf_daddr);
 
 #ifdef	ATH_DEBUG
 	if (sc->sc_debug & ATH_DEBUG_RESET)
 		ath_tx_dump(sc, txq);
 #endif
 
 	/*
 	 * This is called from a restart, so DMA is known to be
 	 * completely stopped.
 	 */
 	KASSERT((!(txq->axq_flags & ATH_TXQ_PUTRUNNING)),
 	    ("%s: Q%d: called with PUTRUNNING=1\n",
 	    __func__,
 	    txq->axq_qnum));
 
 	ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
 	txq->axq_flags |= ATH_TXQ_PUTRUNNING;
 
 	ath_hal_gettxdesclinkptr(sc->sc_ah, bf_last->bf_lastds,
 	    &txq->axq_link);
 	ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
 }
 
 /*
  * Hand off a packet to the hardware (or mcast queue.)
  *
  * The relevant hardware txq should be locked.
  */
 static void
 ath_legacy_xmit_handoff(struct ath_softc *sc, struct ath_txq *txq,
     struct ath_buf *bf)
 {
 	ATH_TX_LOCK_ASSERT(sc);
 
 #ifdef	ATH_DEBUG_ALQ
 	if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
 		ath_tx_alq_post(sc, bf);
 #endif
 
 	if (txq->axq_qnum == ATH_TXQ_SWQ)
 		ath_tx_handoff_mcast(sc, txq, bf);
 	else
 		ath_tx_handoff_hw(sc, txq, bf);
 }
 
 static int
 ath_tx_tag_crypto(struct ath_softc *sc, struct ieee80211_node *ni,
     struct mbuf *m0, int iswep, int isfrag, int *hdrlen, int *pktlen,
     int *keyix)
 {
 	DPRINTF(sc, ATH_DEBUG_XMIT,
 	    "%s: hdrlen=%d, pktlen=%d, isfrag=%d, iswep=%d, m0=%p\n",
 	    __func__,
 	    *hdrlen,
 	    *pktlen,
 	    isfrag,
 	    iswep,
 	    m0);
 
 	if (iswep) {
 		const struct ieee80211_cipher *cip;
 		struct ieee80211_key *k;
 
 		/*
 		 * Construct the 802.11 header+trailer for an encrypted
 		 * frame. The only reason this can fail is because of an
 		 * unknown or unsupported cipher/key type.
 		 */
 		k = ieee80211_crypto_encap(ni, m0);
 		if (k == NULL) {
 			/*
 			 * This can happen when the key is yanked after the
 			 * frame was queued.  Just discard the frame; the
 			 * 802.11 layer counts failures and provides
 			 * debugging/diagnostics.
 			 */
 			return (0);
 		}
 		/*
 		 * Adjust the packet + header lengths for the crypto
 		 * additions and calculate the h/w key index.  When
 		 * a s/w mic is done the frame will have had any mic
 		 * added to it prior to entry so m0->m_pkthdr.len will
 		 * account for it. Otherwise we need to add it to the
 		 * packet length.
 		 */
 		cip = k->wk_cipher;
 		(*hdrlen) += cip->ic_header;
 		(*pktlen) += cip->ic_header + cip->ic_trailer;
 		/* NB: frags always have any TKIP MIC done in s/w */
 		if ((k->wk_flags & IEEE80211_KEY_SWMIC) == 0 && !isfrag)
 			(*pktlen) += cip->ic_miclen;
 		(*keyix) = k->wk_keyix;
 	} else if (ni->ni_ucastkey.wk_cipher == &ieee80211_cipher_none) {
 		/*
 		 * Use station key cache slot, if assigned.
 		 */
 		(*keyix) = ni->ni_ucastkey.wk_keyix;
 		if ((*keyix) == IEEE80211_KEYIX_NONE)
 			(*keyix) = HAL_TXKEYIX_INVALID;
 	} else
 		(*keyix) = HAL_TXKEYIX_INVALID;
 
 	return (1);
 }
 
 /*
  * Calculate whether interoperability protection is required for
  * this frame.
  *
  * This requires the rate control information be filled in,
  * as the protection requirement depends upon the current
  * operating mode / PHY.
  */
 static void
 ath_tx_calc_protection(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ieee80211_frame *wh;
 	uint8_t rix;
 	uint16_t flags;
 	int shortPreamble;
 	const HAL_RATE_TABLE *rt = sc->sc_currates;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 
 	flags = bf->bf_state.bfs_txflags;
 	rix = bf->bf_state.bfs_rc[0].rix;
 	shortPreamble = bf->bf_state.bfs_shpream;
 	wh = mtod(bf->bf_m, struct ieee80211_frame *);
 
 	/*
 	 * If 802.11g protection is enabled, determine whether
 	 * to use RTS/CTS or just CTS.  Note that this is only
 	 * done for OFDM unicast frames.
 	 */
 	if ((ic->ic_flags & IEEE80211_F_USEPROT) &&
 	    rt->info[rix].phy == IEEE80211_T_OFDM &&
 	    (flags & HAL_TXDESC_NOACK) == 0) {
 		bf->bf_state.bfs_doprot = 1;
 		/* XXX fragments must use CCK rates w/ protection */
 		if (ic->ic_protmode == IEEE80211_PROT_RTSCTS) {
 			flags |= HAL_TXDESC_RTSENA;
 		} else if (ic->ic_protmode == IEEE80211_PROT_CTSONLY) {
 			flags |= HAL_TXDESC_CTSENA;
 		}
 		/*
 		 * For frags it would be desirable to use the
 		 * highest CCK rate for RTS/CTS.  But stations
 		 * farther away may detect it at a lower CCK rate
 		 * so use the configured protection rate instead
 		 * (for now).
 		 */
 		sc->sc_stats.ast_tx_protect++;
 	}
 
 	/*
 	 * If 11n protection is enabled and it's a HT frame,
 	 * enable RTS.
 	 *
 	 * XXX ic_htprotmode or ic_curhtprotmode?
 	 * XXX should it_htprotmode only matter if ic_curhtprotmode 
 	 * XXX indicates it's not a HT pure environment?
 	 */
 	if ((ic->ic_htprotmode == IEEE80211_PROT_RTSCTS) &&
 	    rt->info[rix].phy == IEEE80211_T_HT &&
 	    (flags & HAL_TXDESC_NOACK) == 0) {
 		flags |= HAL_TXDESC_RTSENA;
 		sc->sc_stats.ast_tx_htprotect++;
 	}
 	bf->bf_state.bfs_txflags = flags;
 }
 
 /*
  * Update the frame duration given the currently selected rate.
  *
  * This also updates the frame duration value, so it will require
  * a DMA flush.
  */
 static void
 ath_tx_calc_duration(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ieee80211_frame *wh;
 	uint8_t rix;
 	uint16_t flags;
 	int shortPreamble;
 	struct ath_hal *ah = sc->sc_ah;
 	const HAL_RATE_TABLE *rt = sc->sc_currates;
 	int isfrag = bf->bf_m->m_flags & M_FRAG;
 
 	flags = bf->bf_state.bfs_txflags;
 	rix = bf->bf_state.bfs_rc[0].rix;
 	shortPreamble = bf->bf_state.bfs_shpream;
 	wh = mtod(bf->bf_m, struct ieee80211_frame *);
 
 	/*
 	 * Calculate duration.  This logically belongs in the 802.11
 	 * layer but it lacks sufficient information to calculate it.
 	 */
 	if ((flags & HAL_TXDESC_NOACK) == 0 &&
 	    (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) != IEEE80211_FC0_TYPE_CTL) {
 		u_int16_t dur;
 		if (shortPreamble)
 			dur = rt->info[rix].spAckDuration;
 		else
 			dur = rt->info[rix].lpAckDuration;
 		if (wh->i_fc[1] & IEEE80211_FC1_MORE_FRAG) {
 			dur += dur;		/* additional SIFS+ACK */
 			/*
 			 * Include the size of next fragment so NAV is
 			 * updated properly.  The last fragment uses only
 			 * the ACK duration
 			 *
 			 * XXX TODO: ensure that the rate lookup for each
 			 * fragment is the same as the rate used by the
 			 * first fragment!
 			 */
 			dur += ath_hal_computetxtime(ah,
 			    rt,
 			    bf->bf_nextfraglen,
 			    rix, shortPreamble);
 		}
 		if (isfrag) {
 			/*
 			 * Force hardware to use computed duration for next
 			 * fragment by disabling multi-rate retry which updates
 			 * duration based on the multi-rate duration table.
 			 */
 			bf->bf_state.bfs_ismrr = 0;
 			bf->bf_state.bfs_try0 = ATH_TXMGTTRY;
 			/* XXX update bfs_rc[0].try? */
 		}
 
 		/* Update the duration field itself */
 		*(u_int16_t *)wh->i_dur = htole16(dur);
 	}
 }
 
 static uint8_t
 ath_tx_get_rtscts_rate(struct ath_hal *ah, const HAL_RATE_TABLE *rt,
     int cix, int shortPreamble)
 {
 	uint8_t ctsrate;
 
 	/*
 	 * CTS transmit rate is derived from the transmit rate
 	 * by looking in the h/w rate table.  We must also factor
 	 * in whether or not a short preamble is to be used.
 	 */
 	/* NB: cix is set above where RTS/CTS is enabled */
 	KASSERT(cix != 0xff, ("cix not setup"));
 	ctsrate = rt->info[cix].rateCode;
 
 	/* XXX this should only matter for legacy rates */
 	if (shortPreamble)
 		ctsrate |= rt->info[cix].shortPreamble;
 
 	return (ctsrate);
 }
 
 /*
  * Calculate the RTS/CTS duration for legacy frames.
  */
 static int
 ath_tx_calc_ctsduration(struct ath_hal *ah, int rix, int cix,
     int shortPreamble, int pktlen, const HAL_RATE_TABLE *rt,
     int flags)
 {
 	int ctsduration = 0;
 
 	/* This mustn't be called for HT modes */
 	if (rt->info[cix].phy == IEEE80211_T_HT) {
 		printf("%s: HT rate where it shouldn't be (0x%x)\n",
 		    __func__, rt->info[cix].rateCode);
 		return (-1);
 	}
 
 	/*
 	 * Compute the transmit duration based on the frame
 	 * size and the size of an ACK frame.  We call into the
 	 * HAL to do the computation since it depends on the
 	 * characteristics of the actual PHY being used.
 	 *
 	 * NB: CTS is assumed the same size as an ACK so we can
 	 *     use the precalculated ACK durations.
 	 */
 	if (shortPreamble) {
 		if (flags & HAL_TXDESC_RTSENA)		/* SIFS + CTS */
 			ctsduration += rt->info[cix].spAckDuration;
 		ctsduration += ath_hal_computetxtime(ah,
 			rt, pktlen, rix, AH_TRUE);
 		if ((flags & HAL_TXDESC_NOACK) == 0)	/* SIFS + ACK */
 			ctsduration += rt->info[rix].spAckDuration;
 	} else {
 		if (flags & HAL_TXDESC_RTSENA)		/* SIFS + CTS */
 			ctsduration += rt->info[cix].lpAckDuration;
 		ctsduration += ath_hal_computetxtime(ah,
 			rt, pktlen, rix, AH_FALSE);
 		if ((flags & HAL_TXDESC_NOACK) == 0)	/* SIFS + ACK */
 			ctsduration += rt->info[rix].lpAckDuration;
 	}
 
 	return (ctsduration);
 }
 
 /*
  * Update the given ath_buf with updated rts/cts setup and duration
  * values.
  *
  * To support rate lookups for each software retry, the rts/cts rate
  * and cts duration must be re-calculated.
  *
  * This function assumes the RTS/CTS flags have been set as needed;
  * mrr has been disabled; and the rate control lookup has been done.
  *
  * XXX TODO: MRR need only be disabled for the pre-11n NICs.
  * XXX The 11n NICs support per-rate RTS/CTS configuration.
  */
 static void
 ath_tx_set_rtscts(struct ath_softc *sc, struct ath_buf *bf)
 {
 	uint16_t ctsduration = 0;
 	uint8_t ctsrate = 0;
 	uint8_t rix = bf->bf_state.bfs_rc[0].rix;
 	uint8_t cix = 0;
 	const HAL_RATE_TABLE *rt = sc->sc_currates;
 
 	/*
 	 * No RTS/CTS enabled? Don't bother.
 	 */
 	if ((bf->bf_state.bfs_txflags &
 	    (HAL_TXDESC_RTSENA | HAL_TXDESC_CTSENA)) == 0) {
 		/* XXX is this really needed? */
 		bf->bf_state.bfs_ctsrate = 0;
 		bf->bf_state.bfs_ctsduration = 0;
 		return;
 	}
 
 	/*
 	 * If protection is enabled, use the protection rix control
 	 * rate. Otherwise use the rate0 control rate.
 	 */
 	if (bf->bf_state.bfs_doprot)
 		rix = sc->sc_protrix;
 	else
 		rix = bf->bf_state.bfs_rc[0].rix;
 
 	/*
 	 * If the raw path has hard-coded ctsrate0 to something,
 	 * use it.
 	 */
 	if (bf->bf_state.bfs_ctsrate0 != 0)
 		cix = ath_tx_findrix(sc, bf->bf_state.bfs_ctsrate0);
 	else
 		/* Control rate from above */
 		cix = rt->info[rix].controlRate;
 
 	/* Calculate the rtscts rate for the given cix */
 	ctsrate = ath_tx_get_rtscts_rate(sc->sc_ah, rt, cix,
 	    bf->bf_state.bfs_shpream);
 
 	/* The 11n chipsets do ctsduration calculations for you */
 	if (! ath_tx_is_11n(sc))
 		ctsduration = ath_tx_calc_ctsduration(sc->sc_ah, rix, cix,
 		    bf->bf_state.bfs_shpream, bf->bf_state.bfs_pktlen,
 		    rt, bf->bf_state.bfs_txflags);
 
 	/* Squirrel away in ath_buf */
 	bf->bf_state.bfs_ctsrate = ctsrate;
 	bf->bf_state.bfs_ctsduration = ctsduration;
 	
 	/*
 	 * Must disable multi-rate retry when using RTS/CTS.
 	 */
 	if (!sc->sc_mrrprot) {
 		bf->bf_state.bfs_ismrr = 0;
 		bf->bf_state.bfs_try0 =
 		    bf->bf_state.bfs_rc[0].tries = ATH_TXMGTTRY; /* XXX ew */
 	}
 }
 
 /*
  * Setup the descriptor chain for a normal or fast-frame
  * frame.
  *
  * XXX TODO: extend to include the destination hardware QCU ID.
  * Make sure that is correct.  Make sure that when being added
  * to the mcastq, the CABQ QCUID is set or things will get a bit
  * odd.
  */
 static void
 ath_tx_setds(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ath_desc *ds = bf->bf_desc;
 	struct ath_hal *ah = sc->sc_ah;
 
 	if (bf->bf_state.bfs_txrate0 == 0)
 		DPRINTF(sc, ATH_DEBUG_XMIT, 
 		    "%s: bf=%p, txrate0=%d\n", __func__, bf, 0);
 
 	ath_hal_setuptxdesc(ah, ds
 		, bf->bf_state.bfs_pktlen	/* packet length */
 		, bf->bf_state.bfs_hdrlen	/* header length */
 		, bf->bf_state.bfs_atype	/* Atheros packet type */
 		, bf->bf_state.bfs_txpower	/* txpower */
 		, bf->bf_state.bfs_txrate0
 		, bf->bf_state.bfs_try0		/* series 0 rate/tries */
 		, bf->bf_state.bfs_keyix	/* key cache index */
 		, bf->bf_state.bfs_txantenna	/* antenna mode */
 		, bf->bf_state.bfs_txflags	/* flags */
 		, bf->bf_state.bfs_ctsrate	/* rts/cts rate */
 		, bf->bf_state.bfs_ctsduration	/* rts/cts duration */
 	);
 
 	/*
 	 * This will be overriden when the descriptor chain is written.
 	 */
 	bf->bf_lastds = ds;
 	bf->bf_last = bf;
 
 	/* Set rate control and descriptor chain for this frame */
 	ath_tx_set_ratectrl(sc, bf->bf_node, bf);
 	ath_tx_chaindesclist(sc, ds, bf, 0, 0, 0);
 }
 
 /*
  * Do a rate lookup.
  *
  * This performs a rate lookup for the given ath_buf only if it's required.
  * Non-data frames and raw frames don't require it.
  *
  * This populates the primary and MRR entries; MRR values are
  * then disabled later on if something requires it (eg RTS/CTS on
  * pre-11n chipsets.
  *
  * This needs to be done before the RTS/CTS fields are calculated
  * as they may depend upon the rate chosen.
  */
 static void
 ath_tx_do_ratelookup(struct ath_softc *sc, struct ath_buf *bf)
 {
 	uint8_t rate, rix;
 	int try0;
 
 	if (! bf->bf_state.bfs_doratelookup)
 		return;
 
 	/* Get rid of any previous state */
 	bzero(bf->bf_state.bfs_rc, sizeof(bf->bf_state.bfs_rc));
 
 	ATH_NODE_LOCK(ATH_NODE(bf->bf_node));
 	ath_rate_findrate(sc, ATH_NODE(bf->bf_node), bf->bf_state.bfs_shpream,
 	    bf->bf_state.bfs_pktlen, &rix, &try0, &rate);
 
 	/* In case MRR is disabled, make sure rc[0] is setup correctly */
 	bf->bf_state.bfs_rc[0].rix = rix;
 	bf->bf_state.bfs_rc[0].ratecode = rate;
 	bf->bf_state.bfs_rc[0].tries = try0;
 
 	if (bf->bf_state.bfs_ismrr && try0 != ATH_TXMAXTRY)
 		ath_rate_getxtxrates(sc, ATH_NODE(bf->bf_node), rix,
 		    bf->bf_state.bfs_rc);
 	ATH_NODE_UNLOCK(ATH_NODE(bf->bf_node));
 
 	sc->sc_txrix = rix;	/* for LED blinking */
 	sc->sc_lastdatarix = rix;	/* for fast frames */
 	bf->bf_state.bfs_try0 = try0;
 	bf->bf_state.bfs_txrate0 = rate;
 }
 
 /*
  * Update the CLRDMASK bit in the ath_buf if it needs to be set.
  */
 static void
 ath_tx_update_clrdmask(struct ath_softc *sc, struct ath_tid *tid,
     struct ath_buf *bf)
 {
 	struct ath_node *an = ATH_NODE(bf->bf_node);
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	if (an->clrdmask == 1) {
 		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
 		an->clrdmask = 0;
 	}
 }
 
 /*
  * Return whether this frame should be software queued or
  * direct dispatched.
  *
  * When doing powersave, BAR frames should be queued but other management
  * frames should be directly sent.
  *
  * When not doing powersave, stick BAR frames into the hardware queue
  * so it goes out even though the queue is paused.
  *
  * For now, management frames are also software queued by default.
  */
 static int
 ath_tx_should_swq_frame(struct ath_softc *sc, struct ath_node *an,
     struct mbuf *m0, int *queue_to_head)
 {
 	struct ieee80211_node *ni = &an->an_node;
 	struct ieee80211_frame *wh;
 	uint8_t type, subtype;
 
 	wh = mtod(m0, struct ieee80211_frame *);
 	type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
 	subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
 
 	(*queue_to_head) = 0;
 
 	/* If it's not in powersave - direct-dispatch BAR */
 	if ((ATH_NODE(ni)->an_is_powersave == 0)
 	    && type == IEEE80211_FC0_TYPE_CTL &&
 	    subtype == IEEE80211_FC0_SUBTYPE_BAR) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX,
 		    "%s: BAR: TX'ing direct\n", __func__);
 		return (0);
 	} else if ((ATH_NODE(ni)->an_is_powersave == 1)
 	    && type == IEEE80211_FC0_TYPE_CTL &&
 	    subtype == IEEE80211_FC0_SUBTYPE_BAR) {
 		/* BAR TX whilst asleep; queue */
 		DPRINTF(sc, ATH_DEBUG_SW_TX,
 		    "%s: swq: TX'ing\n", __func__);
 		(*queue_to_head) = 1;
 		return (1);
 	} else if ((ATH_NODE(ni)->an_is_powersave == 1)
 	    && (type == IEEE80211_FC0_TYPE_MGT ||
 	        type == IEEE80211_FC0_TYPE_CTL)) {
 		/*
 		 * Other control/mgmt frame; bypass software queuing
 		 * for now!
 		 */
 		DPRINTF(sc, ATH_DEBUG_XMIT, 
 		    "%s: %6D: Node is asleep; sending mgmt "
 		    "(type=%d, subtype=%d)\n",
 		    __func__, ni->ni_macaddr, ":", type, subtype);
 		return (0);
 	} else {
 		return (1);
 	}
 }
 
 
 /*
  * Transmit the given frame to the hardware.
  *
  * The frame must already be setup; rate control must already have
  * been done.
  *
  * XXX since the TXQ lock is being held here (and I dislike holding
  * it for this long when not doing software aggregation), later on
  * break this function into "setup_normal" and "xmit_normal". The
  * lock only needs to be held for the ath_tx_handoff call.
  *
  * XXX we don't update the leak count here - if we're doing
  * direct frame dispatch, we need to be able to do it without
  * decrementing the leak count (eg multicast queue frames.)
  */
 static void
 ath_tx_xmit_normal(struct ath_softc *sc, struct ath_txq *txq,
     struct ath_buf *bf)
 {
 	struct ath_node *an = ATH_NODE(bf->bf_node);
 	struct ath_tid *tid = &an->an_tid[bf->bf_state.bfs_tid];
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	/*
 	 * For now, just enable CLRDMASK. ath_tx_xmit_normal() does
 	 * set a completion handler however it doesn't (yet) properly
 	 * handle the strict ordering requirements needed for normal,
 	 * non-aggregate session frames.
 	 *
 	 * Once this is implemented, only set CLRDMASK like this for
 	 * frames that must go out - eg management/raw frames.
 	 */
 	bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
 
 	/* Setup the descriptor before handoff */
 	ath_tx_do_ratelookup(sc, bf);
 	ath_tx_calc_duration(sc, bf);
 	ath_tx_calc_protection(sc, bf);
 	ath_tx_set_rtscts(sc, bf);
 	ath_tx_rate_fill_rcflags(sc, bf);
 	ath_tx_setds(sc, bf);
 
 	/* Track per-TID hardware queue depth correctly */
 	tid->hwq_depth++;
 
 	/* Assign the completion handler */
 	bf->bf_comp = ath_tx_normal_comp;
 
 	/* Hand off to hardware */
 	ath_tx_handoff(sc, txq, bf);
 }
 
 /*
  * Do the basic frame setup stuff that's required before the frame
  * is added to a software queue.
  *
  * All frames get mostly the same treatment and it's done once.
  * Retransmits fiddle with things like the rate control setup,
  * setting the retransmit bit in the packet; doing relevant DMA/bus
  * syncing and relinking it (back) into the hardware TX queue.
  *
  * Note that this may cause the mbuf to be reallocated, so
  * m0 may not be valid.
  */
 static int
 ath_tx_normal_setup(struct ath_softc *sc, struct ieee80211_node *ni,
     struct ath_buf *bf, struct mbuf *m0, struct ath_txq *txq)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ath_hal *ah = sc->sc_ah;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	const struct chanAccParams *cap = &ic->ic_wme.wme_chanParams;
 	int error, iswep, ismcast, isfrag, ismrr;
 	int keyix, hdrlen, pktlen, try0 = 0;
 	u_int8_t rix = 0, txrate = 0;
 	struct ath_desc *ds;
 	struct ieee80211_frame *wh;
 	u_int subtype, flags;
 	HAL_PKT_TYPE atype;
 	const HAL_RATE_TABLE *rt;
 	HAL_BOOL shortPreamble;
 	struct ath_node *an;
 	u_int pri;
 
 	/*
 	 * To ensure that both sequence numbers and the CCMP PN handling
 	 * is "correct", make sure that the relevant TID queue is locked.
 	 * Otherwise the CCMP PN and seqno may appear out of order, causing
 	 * re-ordered frames to have out of order CCMP PN's, resulting
 	 * in many, many frame drops.
 	 */
 	ATH_TX_LOCK_ASSERT(sc);
 
 	wh = mtod(m0, struct ieee80211_frame *);
 	iswep = wh->i_fc[1] & IEEE80211_FC1_PROTECTED;
 	ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1);
 	isfrag = m0->m_flags & M_FRAG;
 	hdrlen = ieee80211_anyhdrsize(wh);
 	/*
 	 * Packet length must not include any
 	 * pad bytes; deduct them here.
 	 */
 	pktlen = m0->m_pkthdr.len - (hdrlen & 3);
 
 	/* Handle encryption twiddling if needed */
 	if (! ath_tx_tag_crypto(sc, ni, m0, iswep, isfrag, &hdrlen,
 	    &pktlen, &keyix)) {
 		ath_freetx(m0);
 		return EIO;
 	}
 
 	/* packet header may have moved, reset our local pointer */
 	wh = mtod(m0, struct ieee80211_frame *);
 
 	pktlen += IEEE80211_CRC_LEN;
 
 	/*
 	 * Load the DMA map so any coalescing is done.  This
 	 * also calculates the number of descriptors we need.
 	 */
 	error = ath_tx_dmasetup(sc, bf, m0);
 	if (error != 0)
 		return error;
 	KASSERT((ni != NULL), ("%s: ni=NULL!", __func__));
 	bf->bf_node = ni;			/* NB: held reference */
 	m0 = bf->bf_m;				/* NB: may have changed */
 	wh = mtod(m0, struct ieee80211_frame *);
 
 	/* setup descriptors */
 	ds = bf->bf_desc;
 	rt = sc->sc_currates;
 	KASSERT(rt != NULL, ("no rate table, mode %u", sc->sc_curmode));
 
 	/*
 	 * NB: the 802.11 layer marks whether or not we should
 	 * use short preamble based on the current mode and
 	 * negotiated parameters.
 	 */
 	if ((ic->ic_flags & IEEE80211_F_SHPREAMBLE) &&
 	    (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE)) {
 		shortPreamble = AH_TRUE;
 		sc->sc_stats.ast_tx_shortpre++;
 	} else {
 		shortPreamble = AH_FALSE;
 	}
 
 	an = ATH_NODE(ni);
 	//flags = HAL_TXDESC_CLRDMASK;		/* XXX needed for crypto errs */
 	flags = 0;
 	ismrr = 0;				/* default no multi-rate retry*/
 	pri = M_WME_GETAC(m0);			/* honor classification */
 	/* XXX use txparams instead of fixed values */
 	/*
 	 * Calculate Atheros packet type from IEEE80211 packet header,
 	 * setup for rate calculations, and select h/w transmit queue.
 	 */
 	switch (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) {
 	case IEEE80211_FC0_TYPE_MGT:
 		subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
 		if (subtype == IEEE80211_FC0_SUBTYPE_BEACON)
 			atype = HAL_PKT_TYPE_BEACON;
 		else if (subtype == IEEE80211_FC0_SUBTYPE_PROBE_RESP)
 			atype = HAL_PKT_TYPE_PROBE_RESP;
 		else if (subtype == IEEE80211_FC0_SUBTYPE_ATIM)
 			atype = HAL_PKT_TYPE_ATIM;
 		else
 			atype = HAL_PKT_TYPE_NORMAL;	/* XXX */
 		rix = an->an_mgmtrix;
 		txrate = rt->info[rix].rateCode;
 		if (shortPreamble)
 			txrate |= rt->info[rix].shortPreamble;
 		try0 = ATH_TXMGTTRY;
 		flags |= HAL_TXDESC_INTREQ;	/* force interrupt */
 		break;
 	case IEEE80211_FC0_TYPE_CTL:
 		atype = HAL_PKT_TYPE_PSPOLL;	/* stop setting of duration */
 		rix = an->an_mgmtrix;
 		txrate = rt->info[rix].rateCode;
 		if (shortPreamble)
 			txrate |= rt->info[rix].shortPreamble;
 		try0 = ATH_TXMGTTRY;
 		flags |= HAL_TXDESC_INTREQ;	/* force interrupt */
 		break;
 	case IEEE80211_FC0_TYPE_DATA:
 		atype = HAL_PKT_TYPE_NORMAL;		/* default */
 		/*
 		 * Data frames: multicast frames go out at a fixed rate,
 		 * EAPOL frames use the mgmt frame rate; otherwise consult
 		 * the rate control module for the rate to use.
 		 */
 		if (ismcast) {
 			rix = an->an_mcastrix;
 			txrate = rt->info[rix].rateCode;
 			if (shortPreamble)
 				txrate |= rt->info[rix].shortPreamble;
 			try0 = 1;
 		} else if (m0->m_flags & M_EAPOL) {
 			/* XXX? maybe always use long preamble? */
 			rix = an->an_mgmtrix;
 			txrate = rt->info[rix].rateCode;
 			if (shortPreamble)
 				txrate |= rt->info[rix].shortPreamble;
 			try0 = ATH_TXMAXTRY;	/* XXX?too many? */
 		} else {
 			/*
 			 * Do rate lookup on each TX, rather than using
 			 * the hard-coded TX information decided here.
 			 */
 			ismrr = 1;
 			bf->bf_state.bfs_doratelookup = 1;
 		}
 		if (cap->cap_wmeParams[pri].wmep_noackPolicy)
 			flags |= HAL_TXDESC_NOACK;
 		break;
 	default:
-		if_printf(ifp, "bogus frame type 0x%x (%s)\n",
-			wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK, __func__);
+		device_printf(sc->sc_dev, "bogus frame type 0x%x (%s)\n",
+		    wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK, __func__);
 		/* XXX statistic */
 		/* XXX free tx dmamap */
 		ath_freetx(m0);
 		return EIO;
 	}
 
 	/*
 	 * There are two known scenarios where the frame AC doesn't match
 	 * what the destination TXQ is.
 	 *
 	 * + non-QoS frames (eg management?) that the net80211 stack has
 	 *   assigned a higher AC to, but since it's a non-QoS TID, it's
 	 *   being thrown into TID 16.  TID 16 gets the AC_BE queue.
 	 *   It's quite possible that management frames should just be
 	 *   direct dispatched to hardware rather than go via the software
 	 *   queue; that should be investigated in the future.  There are
 	 *   some specific scenarios where this doesn't make sense, mostly
 	 *   surrounding ADDBA request/response - hence why that is special
 	 *   cased.
 	 *
 	 * + Multicast frames going into the VAP mcast queue.  That shows up
 	 *   as "TXQ 11".
 	 *
 	 * This driver should eventually support separate TID and TXQ locking,
 	 * allowing for arbitrary AC frames to appear on arbitrary software
 	 * queues, being queued to the "correct" hardware queue when needed.
 	 */
 #if 0
 	if (txq != sc->sc_ac2q[pri]) {
 		DPRINTF(sc, ATH_DEBUG_XMIT, 
 		    "%s: txq=%p (%d), pri=%d, pri txq=%p (%d)\n",
 		    __func__,
 		    txq,
 		    txq->axq_qnum,
 		    pri,
 		    sc->sc_ac2q[pri],
 		    sc->sc_ac2q[pri]->axq_qnum);
 	}
 #endif
 
 	/*
 	 * Calculate miscellaneous flags.
 	 */
 	if (ismcast) {
 		flags |= HAL_TXDESC_NOACK;	/* no ack on broad/multicast */
 	} else if (pktlen > vap->iv_rtsthreshold &&
 	    (ni->ni_ath_flags & IEEE80211_NODE_FF) == 0) {
 		flags |= HAL_TXDESC_RTSENA;	/* RTS based on frame length */
 		sc->sc_stats.ast_tx_rts++;
 	}
 	if (flags & HAL_TXDESC_NOACK)		/* NB: avoid double counting */
 		sc->sc_stats.ast_tx_noack++;
 #ifdef IEEE80211_SUPPORT_TDMA
 	if (sc->sc_tdma && (flags & HAL_TXDESC_NOACK) == 0) {
 		DPRINTF(sc, ATH_DEBUG_TDMA,
 		    "%s: discard frame, ACK required w/ TDMA\n", __func__);
 		sc->sc_stats.ast_tdma_ack++;
 		/* XXX free tx dmamap */
 		ath_freetx(m0);
 		return EIO;
 	}
 #endif
 
 	/*
 	 * Determine if a tx interrupt should be generated for
 	 * this descriptor.  We take a tx interrupt to reap
 	 * descriptors when the h/w hits an EOL condition or
 	 * when the descriptor is specifically marked to generate
 	 * an interrupt.  We periodically mark descriptors in this
 	 * way to insure timely replenishing of the supply needed
 	 * for sending frames.  Defering interrupts reduces system
 	 * load and potentially allows more concurrent work to be
 	 * done but if done to aggressively can cause senders to
 	 * backup.
 	 *
 	 * NB: use >= to deal with sc_txintrperiod changing
 	 *     dynamically through sysctl.
 	 */
 	if (flags & HAL_TXDESC_INTREQ) {
 		txq->axq_intrcnt = 0;
 	} else if (++txq->axq_intrcnt >= sc->sc_txintrperiod) {
 		flags |= HAL_TXDESC_INTREQ;
 		txq->axq_intrcnt = 0;
 	}
 
 	/* This point forward is actual TX bits */
 
 	/*
 	 * At this point we are committed to sending the frame
 	 * and we don't need to look at m_nextpkt; clear it in
 	 * case this frame is part of frag chain.
 	 */
 	m0->m_nextpkt = NULL;
 
 	if (IFF_DUMPPKTS(sc, ATH_DEBUG_XMIT))
 		ieee80211_dump_pkt(ic, mtod(m0, const uint8_t *), m0->m_len,
 		    sc->sc_hwmap[rix].ieeerate, -1);
 
 	if (ieee80211_radiotap_active_vap(vap)) {
 		u_int64_t tsf = ath_hal_gettsf64(ah);
 
 		sc->sc_tx_th.wt_tsf = htole64(tsf);
 		sc->sc_tx_th.wt_flags = sc->sc_hwmap[rix].txflags;
 		if (iswep)
 			sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_WEP;
 		if (isfrag)
 			sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_FRAG;
 		sc->sc_tx_th.wt_rate = sc->sc_hwmap[rix].ieeerate;
 		sc->sc_tx_th.wt_txpower = ieee80211_get_node_txpower(ni);
 		sc->sc_tx_th.wt_antenna = sc->sc_txantenna;
 
 		ieee80211_radiotap_tx(vap, m0);
 	}
 
 	/* Blank the legacy rate array */
 	bzero(&bf->bf_state.bfs_rc, sizeof(bf->bf_state.bfs_rc));
 
 	/*
 	 * ath_buf_set_rate needs at least one rate/try to setup
 	 * the rate scenario.
 	 */
 	bf->bf_state.bfs_rc[0].rix = rix;
 	bf->bf_state.bfs_rc[0].tries = try0;
 	bf->bf_state.bfs_rc[0].ratecode = txrate;
 
 	/* Store the decided rate index values away */
 	bf->bf_state.bfs_pktlen = pktlen;
 	bf->bf_state.bfs_hdrlen = hdrlen;
 	bf->bf_state.bfs_atype = atype;
 	bf->bf_state.bfs_txpower = ieee80211_get_node_txpower(ni);
 	bf->bf_state.bfs_txrate0 = txrate;
 	bf->bf_state.bfs_try0 = try0;
 	bf->bf_state.bfs_keyix = keyix;
 	bf->bf_state.bfs_txantenna = sc->sc_txantenna;
 	bf->bf_state.bfs_txflags = flags;
 	bf->bf_state.bfs_shpream = shortPreamble;
 
 	/* XXX this should be done in ath_tx_setrate() */
 	bf->bf_state.bfs_ctsrate0 = 0;	/* ie, no hard-coded ctsrate */
 	bf->bf_state.bfs_ctsrate = 0;	/* calculated later */
 	bf->bf_state.bfs_ctsduration = 0;
 	bf->bf_state.bfs_ismrr = ismrr;
 
 	return 0;
 }
 
 /*
  * Queue a frame to the hardware or software queue.
  *
  * This can be called by the net80211 code.
  *
  * XXX what about locking? Or, push the seqno assign into the
  * XXX aggregate scheduler so its serialised?
  *
  * XXX When sending management frames via ath_raw_xmit(),
  *     should CLRDMASK be set unconditionally?
  */
 int
 ath_tx_start(struct ath_softc *sc, struct ieee80211_node *ni,
     struct ath_buf *bf, struct mbuf *m0)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ath_vap *avp = ATH_VAP(vap);
 	int r = 0;
 	u_int pri;
 	int tid;
 	struct ath_txq *txq;
 	int ismcast;
 	const struct ieee80211_frame *wh;
 	int is_ampdu, is_ampdu_tx, is_ampdu_pending;
 	ieee80211_seq seqno;
 	uint8_t type, subtype;
 	int queue_to_head;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	/*
 	 * Determine the target hardware queue.
 	 *
 	 * For multicast frames, the txq gets overridden appropriately
 	 * depending upon the state of PS.
 	 *
 	 * For any other frame, we do a TID/QoS lookup inside the frame
 	 * to see what the TID should be. If it's a non-QoS frame, the
 	 * AC and TID are overridden. The TID/TXQ code assumes the
 	 * TID is on a predictable hardware TXQ, so we don't support
 	 * having a node TID queued to multiple hardware TXQs.
 	 * This may change in the future but would require some locking
 	 * fudgery.
 	 */
 	pri = ath_tx_getac(sc, m0);
 	tid = ath_tx_gettid(sc, m0);
 
 	txq = sc->sc_ac2q[pri];
 	wh = mtod(m0, struct ieee80211_frame *);
 	ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1);
 	type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
 	subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
 
 	/*
 	 * Enforce how deep the multicast queue can grow.
 	 *
 	 * XXX duplicated in ath_raw_xmit().
 	 */
 	if (IEEE80211_IS_MULTICAST(wh->i_addr1)) {
 		if (sc->sc_cabq->axq_depth + sc->sc_cabq->fifo.axq_depth
 		    > sc->sc_txq_mcastq_maxdepth) {
 			sc->sc_stats.ast_tx_mcastq_overflow++;
 			m_freem(m0);
 			return (ENOBUFS);
 		}
 	}
 
 	/*
 	 * Enforce how deep the unicast queue can grow.
 	 *
 	 * If the node is in power save then we don't want
 	 * the software queue to grow too deep, or a node may
 	 * end up consuming all of the ath_buf entries.
 	 *
 	 * For now, only do this for DATA frames.
 	 *
 	 * We will want to cap how many management/control
 	 * frames get punted to the software queue so it doesn't
 	 * fill up.  But the correct solution isn't yet obvious.
 	 * In any case, this check should at least let frames pass
 	 * that we are direct-dispatching.
 	 *
 	 * XXX TODO: duplicate this to the raw xmit path!
 	 */
 	if (type == IEEE80211_FC0_TYPE_DATA &&
 	    ATH_NODE(ni)->an_is_powersave &&
 	    ATH_NODE(ni)->an_swq_depth >
 	     sc->sc_txq_node_psq_maxdepth) {
 		sc->sc_stats.ast_tx_node_psq_overflow++;
 		m_freem(m0);
 		return (ENOBUFS);
 	}
 
 	/* A-MPDU TX */
 	is_ampdu_tx = ath_tx_ampdu_running(sc, ATH_NODE(ni), tid);
 	is_ampdu_pending = ath_tx_ampdu_pending(sc, ATH_NODE(ni), tid);
 	is_ampdu = is_ampdu_tx | is_ampdu_pending;
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: tid=%d, ac=%d, is_ampdu=%d\n",
 	    __func__, tid, pri, is_ampdu);
 
 	/* Set local packet state, used to queue packets to hardware */
 	bf->bf_state.bfs_tid = tid;
 	bf->bf_state.bfs_tx_queue = txq->axq_qnum;
 	bf->bf_state.bfs_pri = pri;
 
 #if 1
 	/*
 	 * When servicing one or more stations in power-save mode
 	 * (or) if there is some mcast data waiting on the mcast
 	 * queue (to prevent out of order delivery) multicast frames
 	 * must be bufferd until after the beacon.
 	 *
 	 * TODO: we should lock the mcastq before we check the length.
 	 */
 	if (sc->sc_cabq_enable && ismcast && (vap->iv_ps_sta || avp->av_mcastq.axq_depth)) {
 		txq = &avp->av_mcastq;
 		/*
 		 * Mark the frame as eventually belonging on the CAB
 		 * queue, so the descriptor setup functions will
 		 * correctly initialise the descriptor 'qcuId' field.
 		 */
 		bf->bf_state.bfs_tx_queue = sc->sc_cabq->axq_qnum;
 	}
 #endif
 
 	/* Do the generic frame setup */
 	/* XXX should just bzero the bf_state? */
 	bf->bf_state.bfs_dobaw = 0;
 
 	/* A-MPDU TX? Manually set sequence number */
 	/*
 	 * Don't do it whilst pending; the net80211 layer still
 	 * assigns them.
 	 */
 	if (is_ampdu_tx) {
 		/*
 		 * Always call; this function will
 		 * handle making sure that null data frames
 		 * don't get a sequence number from the current
 		 * TID and thus mess with the BAW.
 		 */
 		seqno = ath_tx_tid_seqno_assign(sc, ni, bf, m0);
 
 		/*
 		 * Don't add QoS NULL frames to the BAW.
 		 */
 		if (IEEE80211_QOS_HAS_SEQ(wh) &&
 		    subtype != IEEE80211_FC0_SUBTYPE_QOS_NULL) {
 			bf->bf_state.bfs_dobaw = 1;
 		}
 	}
 
 	/*
 	 * If needed, the sequence number has been assigned.
 	 * Squirrel it away somewhere easy to get to.
 	 */
 	bf->bf_state.bfs_seqno = M_SEQNO_GET(m0) << IEEE80211_SEQ_SEQ_SHIFT;
 
 	/* Is ampdu pending? fetch the seqno and print it out */
 	if (is_ampdu_pending)
 		DPRINTF(sc, ATH_DEBUG_SW_TX,
 		    "%s: tid %d: ampdu pending, seqno %d\n",
 		    __func__, tid, M_SEQNO_GET(m0));
 
 	/* This also sets up the DMA map */
 	r = ath_tx_normal_setup(sc, ni, bf, m0, txq);
 
 	if (r != 0)
 		goto done;
 
 	/* At this point m0 could have changed! */
 	m0 = bf->bf_m;
 
 #if 1
 	/*
 	 * If it's a multicast frame, do a direct-dispatch to the
 	 * destination hardware queue. Don't bother software
 	 * queuing it.
 	 */
 	/*
 	 * If it's a BAR frame, do a direct dispatch to the
 	 * destination hardware queue. Don't bother software
 	 * queuing it, as the TID will now be paused.
 	 * Sending a BAR frame can occur from the net80211 txa timer
 	 * (ie, retries) or from the ath txtask (completion call.)
 	 * It queues directly to hardware because the TID is paused
 	 * at this point (and won't be unpaused until the BAR has
 	 * either been TXed successfully or max retries has been
 	 * reached.)
 	 */
 	/*
 	 * Until things are better debugged - if this node is asleep
 	 * and we're sending it a non-BAR frame, direct dispatch it.
 	 * Why? Because we need to figure out what's actually being
 	 * sent - eg, during reassociation/reauthentication after
 	 * the node (last) disappeared whilst asleep, the driver should
 	 * have unpaused/unsleep'ed the node.  So until that is
 	 * sorted out, use this workaround.
 	 */
 	if (txq == &avp->av_mcastq) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX,
 		    "%s: bf=%p: mcastq: TX'ing\n", __func__, bf);
 		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
 		ath_tx_xmit_normal(sc, txq, bf);
 	} else if (ath_tx_should_swq_frame(sc, ATH_NODE(ni), m0,
 	    &queue_to_head)) {
 		ath_tx_swq(sc, ni, txq, queue_to_head, bf);
 	} else {
 		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
 		ath_tx_xmit_normal(sc, txq, bf);
 	}
 #else
 	/*
 	 * For now, since there's no software queue,
 	 * direct-dispatch to the hardware.
 	 */
 	bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
 	/*
 	 * Update the current leak count if
 	 * we're leaking frames; and set the
 	 * MORE flag as appropriate.
 	 */
 	ath_tx_leak_count_update(sc, tid, bf);
 	ath_tx_xmit_normal(sc, txq, bf);
 #endif
 done:
 	return 0;
 }
 
 static int
 ath_tx_raw_start(struct ath_softc *sc, struct ieee80211_node *ni,
 	struct ath_buf *bf, struct mbuf *m0,
 	const struct ieee80211_bpf_params *params)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct ath_hal *ah = sc->sc_ah;
 	struct ieee80211vap *vap = ni->ni_vap;
 	int error, ismcast, ismrr;
 	int keyix, hdrlen, pktlen, try0, txantenna;
 	u_int8_t rix, txrate;
 	struct ieee80211_frame *wh;
 	u_int flags;
 	HAL_PKT_TYPE atype;
 	const HAL_RATE_TABLE *rt;
 	struct ath_desc *ds;
 	u_int pri;
 	int o_tid = -1;
 	int do_override;
 	uint8_t type, subtype;
 	int queue_to_head;
 	struct ath_node *an = ATH_NODE(ni);
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	wh = mtod(m0, struct ieee80211_frame *);
 	ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1);
 	hdrlen = ieee80211_anyhdrsize(wh);
 	/*
 	 * Packet length must not include any
 	 * pad bytes; deduct them here.
 	 */
 	/* XXX honor IEEE80211_BPF_DATAPAD */
 	pktlen = m0->m_pkthdr.len - (hdrlen & 3) + IEEE80211_CRC_LEN;
 
 	type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
 	subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
 
 	ATH_KTR(sc, ATH_KTR_TX, 2,
 	     "ath_tx_raw_start: ni=%p, bf=%p, raw", ni, bf);
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: ismcast=%d\n",
 	    __func__, ismcast);
 
 	pri = params->ibp_pri & 3;
 	/* Override pri if the frame isn't a QoS one */
 	if (! IEEE80211_QOS_HAS_SEQ(wh))
 		pri = ath_tx_getac(sc, m0);
 
 	/* XXX If it's an ADDBA, override the correct queue */
 	do_override = ath_tx_action_frame_override_queue(sc, ni, m0, &o_tid);
 
 	/* Map ADDBA to the correct priority */
 	if (do_override) {
 #if 0
 		DPRINTF(sc, ATH_DEBUG_XMIT, 
 		    "%s: overriding tid %d pri %d -> %d\n",
 		    __func__, o_tid, pri, TID_TO_WME_AC(o_tid));
 #endif
 		pri = TID_TO_WME_AC(o_tid);
 	}
 
 	/* Handle encryption twiddling if needed */
 	if (! ath_tx_tag_crypto(sc, ni,
 	    m0, params->ibp_flags & IEEE80211_BPF_CRYPTO, 0,
 	    &hdrlen, &pktlen, &keyix)) {
 		ath_freetx(m0);
 		return EIO;
 	}
 	/* packet header may have moved, reset our local pointer */
 	wh = mtod(m0, struct ieee80211_frame *);
 
 	/* Do the generic frame setup */
 	/* XXX should just bzero the bf_state? */
 	bf->bf_state.bfs_dobaw = 0;
 
 	error = ath_tx_dmasetup(sc, bf, m0);
 	if (error != 0)
 		return error;
 	m0 = bf->bf_m;				/* NB: may have changed */
 	wh = mtod(m0, struct ieee80211_frame *);
 	KASSERT((ni != NULL), ("%s: ni=NULL!", __func__));
 	bf->bf_node = ni;			/* NB: held reference */
 
 	/* Always enable CLRDMASK for raw frames for now.. */
 	flags = HAL_TXDESC_CLRDMASK;		/* XXX needed for crypto errs */
 	flags |= HAL_TXDESC_INTREQ;		/* force interrupt */
 	if (params->ibp_flags & IEEE80211_BPF_RTS)
 		flags |= HAL_TXDESC_RTSENA;
 	else if (params->ibp_flags & IEEE80211_BPF_CTS) {
 		/* XXX assume 11g/11n protection? */
 		bf->bf_state.bfs_doprot = 1;
 		flags |= HAL_TXDESC_CTSENA;
 	}
 	/* XXX leave ismcast to injector? */
 	if ((params->ibp_flags & IEEE80211_BPF_NOACK) || ismcast)
 		flags |= HAL_TXDESC_NOACK;
 
 	rt = sc->sc_currates;
 	KASSERT(rt != NULL, ("no rate table, mode %u", sc->sc_curmode));
 
 	/* Fetch first rate information */
 	rix = ath_tx_findrix(sc, params->ibp_rate0);
 	try0 = params->ibp_try0;
 
 	/*
 	 * Override EAPOL rate as appropriate.
 	 */
 	if (m0->m_flags & M_EAPOL) {
 		/* XXX? maybe always use long preamble? */
 		rix = an->an_mgmtrix;
 		try0 = ATH_TXMAXTRY;	/* XXX?too many? */
 	}
 
 	txrate = rt->info[rix].rateCode;
 	if (params->ibp_flags & IEEE80211_BPF_SHORTPRE)
 		txrate |= rt->info[rix].shortPreamble;
 	sc->sc_txrix = rix;
 	ismrr = (params->ibp_try1 != 0);
 	txantenna = params->ibp_pri >> 2;
 	if (txantenna == 0)			/* XXX? */
 		txantenna = sc->sc_txantenna;
 
 	/*
 	 * Since ctsrate is fixed, store it away for later
 	 * use when the descriptor fields are being set.
 	 */
 	if (flags & (HAL_TXDESC_RTSENA|HAL_TXDESC_CTSENA))
 		bf->bf_state.bfs_ctsrate0 = params->ibp_ctsrate;
 
 	/*
 	 * NB: we mark all packets as type PSPOLL so the h/w won't
 	 * set the sequence number, duration, etc.
 	 */
 	atype = HAL_PKT_TYPE_PSPOLL;
 
 	if (IFF_DUMPPKTS(sc, ATH_DEBUG_XMIT))
 		ieee80211_dump_pkt(ic, mtod(m0, caddr_t), m0->m_len,
 		    sc->sc_hwmap[rix].ieeerate, -1);
 
 	if (ieee80211_radiotap_active_vap(vap)) {
 		u_int64_t tsf = ath_hal_gettsf64(ah);
 
 		sc->sc_tx_th.wt_tsf = htole64(tsf);
 		sc->sc_tx_th.wt_flags = sc->sc_hwmap[rix].txflags;
 		if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED)
 			sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_WEP;
 		if (m0->m_flags & M_FRAG)
 			sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_FRAG;
 		sc->sc_tx_th.wt_rate = sc->sc_hwmap[rix].ieeerate;
 		sc->sc_tx_th.wt_txpower = MIN(params->ibp_power,
 		    ieee80211_get_node_txpower(ni));
 		sc->sc_tx_th.wt_antenna = sc->sc_txantenna;
 
 		ieee80211_radiotap_tx(vap, m0);
 	}
 
 	/*
 	 * Formulate first tx descriptor with tx controls.
 	 */
 	ds = bf->bf_desc;
 	/* XXX check return value? */
 
 	/* Store the decided rate index values away */
 	bf->bf_state.bfs_pktlen = pktlen;
 	bf->bf_state.bfs_hdrlen = hdrlen;
 	bf->bf_state.bfs_atype = atype;
 	bf->bf_state.bfs_txpower = MIN(params->ibp_power,
 	    ieee80211_get_node_txpower(ni));
 	bf->bf_state.bfs_txrate0 = txrate;
 	bf->bf_state.bfs_try0 = try0;
 	bf->bf_state.bfs_keyix = keyix;
 	bf->bf_state.bfs_txantenna = txantenna;
 	bf->bf_state.bfs_txflags = flags;
 	bf->bf_state.bfs_shpream =
 	    !! (params->ibp_flags & IEEE80211_BPF_SHORTPRE);
 
 	/* Set local packet state, used to queue packets to hardware */
 	bf->bf_state.bfs_tid = WME_AC_TO_TID(pri);
 	bf->bf_state.bfs_tx_queue = sc->sc_ac2q[pri]->axq_qnum;
 	bf->bf_state.bfs_pri = pri;
 
 	/* XXX this should be done in ath_tx_setrate() */
 	bf->bf_state.bfs_ctsrate = 0;
 	bf->bf_state.bfs_ctsduration = 0;
 	bf->bf_state.bfs_ismrr = ismrr;
 
 	/* Blank the legacy rate array */
 	bzero(&bf->bf_state.bfs_rc, sizeof(bf->bf_state.bfs_rc));
 
 	bf->bf_state.bfs_rc[0].rix = rix;
 	bf->bf_state.bfs_rc[0].tries = try0;
 	bf->bf_state.bfs_rc[0].ratecode = txrate;
 
 	if (ismrr) {
 		int rix;
 
 		rix = ath_tx_findrix(sc, params->ibp_rate1);
 		bf->bf_state.bfs_rc[1].rix = rix;
 		bf->bf_state.bfs_rc[1].tries = params->ibp_try1;
 
 		rix = ath_tx_findrix(sc, params->ibp_rate2);
 		bf->bf_state.bfs_rc[2].rix = rix;
 		bf->bf_state.bfs_rc[2].tries = params->ibp_try2;
 
 		rix = ath_tx_findrix(sc, params->ibp_rate3);
 		bf->bf_state.bfs_rc[3].rix = rix;
 		bf->bf_state.bfs_rc[3].tries = params->ibp_try3;
 	}
 	/*
 	 * All the required rate control decisions have been made;
 	 * fill in the rc flags.
 	 */
 	ath_tx_rate_fill_rcflags(sc, bf);
 
 	/* NB: no buffered multicast in power save support */
 
 	/*
 	 * If we're overiding the ADDBA destination, dump directly
 	 * into the hardware queue, right after any pending
 	 * frames to that node are.
 	 */
 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: dooverride=%d\n",
 	    __func__, do_override);
 
 #if 1
 	/*
 	 * Put addba frames in the right place in the right TID/HWQ.
 	 */
 	if (do_override) {
 		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
 		/*
 		 * XXX if it's addba frames, should we be leaking
 		 * them out via the frame leak method?
 		 * XXX for now let's not risk it; but we may wish
 		 * to investigate this later.
 		 */
 		ath_tx_xmit_normal(sc, sc->sc_ac2q[pri], bf);
 	} else if (ath_tx_should_swq_frame(sc, ATH_NODE(ni), m0,
 	    &queue_to_head)) {
 		/* Queue to software queue */
 		ath_tx_swq(sc, ni, sc->sc_ac2q[pri], queue_to_head, bf);
 	} else {
 		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
 		ath_tx_xmit_normal(sc, sc->sc_ac2q[pri], bf);
 	}
 #else
 	/* Direct-dispatch to the hardware */
 	bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
 	/*
 	 * Update the current leak count if
 	 * we're leaking frames; and set the
 	 * MORE flag as appropriate.
 	 */
 	ath_tx_leak_count_update(sc, tid, bf);
 	ath_tx_xmit_normal(sc, sc->sc_ac2q[pri], bf);
 #endif
 	return 0;
 }
 
 /*
  * Send a raw frame.
  *
  * This can be called by net80211.
  */
 int
 ath_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
 	const struct ieee80211_bpf_params *params)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct ifnet *ifp = ic->ic_ifp;
 	struct ath_softc *sc = ifp->if_softc;
 	struct ath_buf *bf;
 	struct ieee80211_frame *wh = mtod(m, struct ieee80211_frame *);
 	int error = 0;
 
 	ATH_PCU_LOCK(sc);
 	if (sc->sc_inreset_cnt > 0) {
 		DPRINTF(sc, ATH_DEBUG_XMIT, 
 		    "%s: sc_inreset_cnt > 0; bailing\n", __func__);
 		error = EIO;
 		ATH_PCU_UNLOCK(sc);
 		goto badbad;
 	}
 	sc->sc_txstart_cnt++;
 	ATH_PCU_UNLOCK(sc);
 
 	/* Wake the hardware up already */
 	ATH_LOCK(sc);
 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
 	ATH_UNLOCK(sc);
 
 	ATH_TX_LOCK(sc);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->sc_invalid) {
 		DPRINTF(sc, ATH_DEBUG_XMIT, "%s: discard frame, %s", __func__,
 		    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ?
 			"!running" : "invalid");
 		m_freem(m);
 		error = ENETDOWN;
 		goto bad;
 	}
 
 	/*
 	 * Enforce how deep the multicast queue can grow.
 	 *
 	 * XXX duplicated in ath_tx_start().
 	 */
 	if (IEEE80211_IS_MULTICAST(wh->i_addr1)) {
 		if (sc->sc_cabq->axq_depth + sc->sc_cabq->fifo.axq_depth
 		    > sc->sc_txq_mcastq_maxdepth) {
 			sc->sc_stats.ast_tx_mcastq_overflow++;
 			error = ENOBUFS;
 		}
 
 		if (error != 0) {
 			m_freem(m);
 			goto bad;
 		}
 	}
 
 	/*
 	 * Grab a TX buffer and associated resources.
 	 */
 	bf = ath_getbuf(sc, ATH_BUFTYPE_MGMT);
 	if (bf == NULL) {
 		sc->sc_stats.ast_tx_nobuf++;
 		m_freem(m);
 		error = ENOBUFS;
 		goto bad;
 	}
 	ATH_KTR(sc, ATH_KTR_TX, 3, "ath_raw_xmit: m=%p, params=%p, bf=%p\n",
 	    m, params,  bf);
 
 	if (params == NULL) {
 		/*
 		 * Legacy path; interpret frame contents to decide
 		 * precisely how to send the frame.
 		 */
 		if (ath_tx_start(sc, ni, bf, m)) {
 			error = EIO;		/* XXX */
 			goto bad2;
 		}
 	} else {
 		/*
 		 * Caller supplied explicit parameters to use in
 		 * sending the frame.
 		 */
 		if (ath_tx_raw_start(sc, ni, bf, m, params)) {
 			error = EIO;		/* XXX */
 			goto bad2;
 		}
 	}
 	sc->sc_wd_timer = 5;
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	sc->sc_stats.ast_tx_raw++;
 
 	/*
 	 * Update the TIM - if there's anything queued to the
 	 * software queue and power save is enabled, we should
 	 * set the TIM.
 	 */
 	ath_tx_update_tim(sc, ni, 1);
 
 	ATH_TX_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txstart_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 
 	/* Put the hardware back to sleep if required */
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 	return 0;
 
 bad2:
 	ATH_KTR(sc, ATH_KTR_TX, 3, "ath_raw_xmit: bad2: m=%p, params=%p, "
 	    "bf=%p",
 	    m,
 	    params,
 	    bf);
 	ATH_TXBUF_LOCK(sc);
 	ath_returnbuf_head(sc, bf);
 	ATH_TXBUF_UNLOCK(sc);
 
 bad:
 	ATH_TX_UNLOCK(sc);
 
 	ATH_PCU_LOCK(sc);
 	sc->sc_txstart_cnt--;
 	ATH_PCU_UNLOCK(sc);
 
 	/* Put the hardware back to sleep if required */
 	ATH_LOCK(sc);
 	ath_power_restore_power_state(sc);
 	ATH_UNLOCK(sc);
 
 badbad:
 	ATH_KTR(sc, ATH_KTR_TX, 2, "ath_raw_xmit: bad0: m=%p, params=%p",
 	    m, params);
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	sc->sc_stats.ast_tx_raw_fail++;
 	ieee80211_free_node(ni);
 
 	return error;
 }
 
 /* Some helper functions */
 
 /*
  * ADDBA (and potentially others) need to be placed in the same
  * hardware queue as the TID/node it's relating to. This is so
  * it goes out after any pending non-aggregate frames to the
  * same node/TID.
  *
  * If this isn't done, the ADDBA can go out before the frames
  * queued in hardware. Even though these frames have a sequence
  * number -earlier- than the ADDBA can be transmitted (but
  * no frames whose sequence numbers are after the ADDBA should
  * be!) they'll arrive after the ADDBA - and the receiving end
  * will simply drop them as being out of the BAW.
  *
  * The frames can't be appended to the TID software queue - it'll
  * never be sent out. So these frames have to be directly
  * dispatched to the hardware, rather than queued in software.
  * So if this function returns true, the TXQ has to be
  * overridden and it has to be directly dispatched.
  *
  * It's a dirty hack, but someone's gotta do it.
  */
 
 /*
  * XXX doesn't belong here!
  */
 static int
 ieee80211_is_action(struct ieee80211_frame *wh)
 {
 	/* Type: Management frame? */
 	if ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) !=
 	    IEEE80211_FC0_TYPE_MGT)
 		return 0;
 
 	/* Subtype: Action frame? */
 	if ((wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) !=
 	    IEEE80211_FC0_SUBTYPE_ACTION)
 		return 0;
 
 	return 1;
 }
 
 #define	MS(_v, _f)	(((_v) & _f) >> _f##_S)
 /*
  * Return an alternate TID for ADDBA request frames.
  *
  * Yes, this likely should be done in the net80211 layer.
  */
 static int
 ath_tx_action_frame_override_queue(struct ath_softc *sc,
     struct ieee80211_node *ni,
     struct mbuf *m0, int *tid)
 {
 	struct ieee80211_frame *wh = mtod(m0, struct ieee80211_frame *);
 	struct ieee80211_action_ba_addbarequest *ia;
 	uint8_t *frm;
 	uint16_t baparamset;
 
 	/* Not action frame? Bail */
 	if (! ieee80211_is_action(wh))
 		return 0;
 
 	/* XXX Not needed for frames we send? */
 #if 0
 	/* Correct length? */
 	if (! ieee80211_parse_action(ni, m))
 		return 0;
 #endif
 
 	/* Extract out action frame */
 	frm = (u_int8_t *)&wh[1];
 	ia = (struct ieee80211_action_ba_addbarequest *) frm;
 
 	/* Not ADDBA? Bail */
 	if (ia->rq_header.ia_category != IEEE80211_ACTION_CAT_BA)
 		return 0;
 	if (ia->rq_header.ia_action != IEEE80211_ACTION_BA_ADDBA_REQUEST)
 		return 0;
 
 	/* Extract TID, return it */
 	baparamset = le16toh(ia->rq_baparamset);
 	*tid = (int) MS(baparamset, IEEE80211_BAPS_TID);
 
 	return 1;
 }
 #undef	MS
 
 /* Per-node software queue operations */
 
 /*
  * Add the current packet to the given BAW.
  * It is assumed that the current packet
  *
  * + fits inside the BAW;
  * + already has had a sequence number allocated.
  *
  * Since the BAW status may be modified by both the ath task and
  * the net80211/ifnet contexts, the TID must be locked.
  */
 void
 ath_tx_addto_baw(struct ath_softc *sc, struct ath_node *an,
     struct ath_tid *tid, struct ath_buf *bf)
 {
 	int index, cindex;
 	struct ieee80211_tx_ampdu *tap;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	if (bf->bf_state.bfs_isretried)
 		return;
 
 	tap = ath_tx_get_tx_tid(an, tid->tid);
 
 	if (! bf->bf_state.bfs_dobaw) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 		    "%s: dobaw=0, seqno=%d, window %d:%d\n",
 		    __func__, SEQNO(bf->bf_state.bfs_seqno),
 		    tap->txa_start, tap->txa_wnd);
 	}
 
 	if (bf->bf_state.bfs_addedbaw)
 		DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 		    "%s: re-added? tid=%d, seqno %d; window %d:%d; "
 		    "baw head=%d tail=%d\n",
 		    __func__, tid->tid, SEQNO(bf->bf_state.bfs_seqno),
 		    tap->txa_start, tap->txa_wnd, tid->baw_head,
 		    tid->baw_tail);
 
 	/*
 	 * Verify that the given sequence number is not outside of the
 	 * BAW.  Complain loudly if that's the case.
 	 */
 	if (! BAW_WITHIN(tap->txa_start, tap->txa_wnd,
 	    SEQNO(bf->bf_state.bfs_seqno))) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 		    "%s: bf=%p: outside of BAW?? tid=%d, seqno %d; window %d:%d; "
 		    "baw head=%d tail=%d\n",
 		    __func__, bf, tid->tid, SEQNO(bf->bf_state.bfs_seqno),
 		    tap->txa_start, tap->txa_wnd, tid->baw_head,
 		    tid->baw_tail);
 	}
 
 	/*
 	 * ni->ni_txseqs[] is the currently allocated seqno.
 	 * the txa state contains the current baw start.
 	 */
 	index  = ATH_BA_INDEX(tap->txa_start, SEQNO(bf->bf_state.bfs_seqno));
 	cindex = (tid->baw_head + index) & (ATH_TID_MAX_BUFS - 1);
 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 	    "%s: tid=%d, seqno %d; window %d:%d; index=%d cindex=%d "
 	    "baw head=%d tail=%d\n",
 	    __func__, tid->tid, SEQNO(bf->bf_state.bfs_seqno),
 	    tap->txa_start, tap->txa_wnd, index, cindex, tid->baw_head,
 	    tid->baw_tail);
 
 
 #if 0
 	assert(tid->tx_buf[cindex] == NULL);
 #endif
 	if (tid->tx_buf[cindex] != NULL) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 		    "%s: ba packet dup (index=%d, cindex=%d, "
 		    "head=%d, tail=%d)\n",
 		    __func__, index, cindex, tid->baw_head, tid->baw_tail);
 		DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 		    "%s: BA bf: %p; seqno=%d ; new bf: %p; seqno=%d\n",
 		    __func__,
 		    tid->tx_buf[cindex],
 		    SEQNO(tid->tx_buf[cindex]->bf_state.bfs_seqno),
 		    bf,
 		    SEQNO(bf->bf_state.bfs_seqno)
 		);
 	}
 	tid->tx_buf[cindex] = bf;
 
 	if (index >= ((tid->baw_tail - tid->baw_head) &
 	    (ATH_TID_MAX_BUFS - 1))) {
 		tid->baw_tail = cindex;
 		INCR(tid->baw_tail, ATH_TID_MAX_BUFS);
 	}
 }
 
 /*
  * Flip the BAW buffer entry over from the existing one to the new one.
  *
  * When software retransmitting a (sub-)frame, it is entirely possible that
  * the frame ath_buf is marked as BUSY and can't be immediately reused.
  * In that instance the buffer is cloned and the new buffer is used for
  * retransmit. We thus need to update the ath_buf slot in the BAW buf
  * tracking array to maintain consistency.
  */
 static void
 ath_tx_switch_baw_buf(struct ath_softc *sc, struct ath_node *an,
     struct ath_tid *tid, struct ath_buf *old_bf, struct ath_buf *new_bf)
 {
 	int index, cindex;
 	struct ieee80211_tx_ampdu *tap;
 	int seqno = SEQNO(old_bf->bf_state.bfs_seqno);
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	tap = ath_tx_get_tx_tid(an, tid->tid);
 	index  = ATH_BA_INDEX(tap->txa_start, seqno);
 	cindex = (tid->baw_head + index) & (ATH_TID_MAX_BUFS - 1);
 
 	/*
 	 * Just warn for now; if it happens then we should find out
 	 * about it. It's highly likely the aggregation session will
 	 * soon hang.
 	 */
 	if (old_bf->bf_state.bfs_seqno != new_bf->bf_state.bfs_seqno) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 		    "%s: retransmitted buffer"
 		    " has mismatching seqno's, BA session may hang.\n",
 		    __func__);
 		DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 		    "%s: old seqno=%d, new_seqno=%d\n", __func__,
 		    old_bf->bf_state.bfs_seqno, new_bf->bf_state.bfs_seqno);
 	}
 
 	if (tid->tx_buf[cindex] != old_bf) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 		    "%s: ath_buf pointer incorrect; "
 		    " has m BA session may hang.\n", __func__);
 		DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 		    "%s: old bf=%p, new bf=%p\n", __func__, old_bf, new_bf);
 	}
 
 	tid->tx_buf[cindex] = new_bf;
 }
 
 /*
  * seq_start - left edge of BAW
  * seq_next - current/next sequence number to allocate
  *
  * Since the BAW status may be modified by both the ath task and
  * the net80211/ifnet contexts, the TID must be locked.
  */
 static void
 ath_tx_update_baw(struct ath_softc *sc, struct ath_node *an,
     struct ath_tid *tid, const struct ath_buf *bf)
 {
 	int index, cindex;
 	struct ieee80211_tx_ampdu *tap;
 	int seqno = SEQNO(bf->bf_state.bfs_seqno);
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	tap = ath_tx_get_tx_tid(an, tid->tid);
 	index  = ATH_BA_INDEX(tap->txa_start, seqno);
 	cindex = (tid->baw_head + index) & (ATH_TID_MAX_BUFS - 1);
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 	    "%s: tid=%d, baw=%d:%d, seqno=%d, index=%d, cindex=%d, "
 	    "baw head=%d, tail=%d\n",
 	    __func__, tid->tid, tap->txa_start, tap->txa_wnd, seqno, index,
 	    cindex, tid->baw_head, tid->baw_tail);
 
 	/*
 	 * If this occurs then we have a big problem - something else
 	 * has slid tap->txa_start along without updating the BAW
 	 * tracking start/end pointers. Thus the TX BAW state is now
 	 * completely busted.
 	 *
 	 * But for now, since I haven't yet fixed TDMA and buffer cloning,
 	 * it's quite possible that a cloned buffer is making its way
 	 * here and causing it to fire off. Disable TDMA for now.
 	 */
 	if (tid->tx_buf[cindex] != bf) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 		    "%s: comp bf=%p, seq=%d; slot bf=%p, seqno=%d\n",
 		    __func__, bf, SEQNO(bf->bf_state.bfs_seqno),
 		    tid->tx_buf[cindex],
 		    (tid->tx_buf[cindex] != NULL) ?
 		      SEQNO(tid->tx_buf[cindex]->bf_state.bfs_seqno) : -1);
 	}
 
 	tid->tx_buf[cindex] = NULL;
 
 	while (tid->baw_head != tid->baw_tail &&
 	    !tid->tx_buf[tid->baw_head]) {
 		INCR(tap->txa_start, IEEE80211_SEQ_RANGE);
 		INCR(tid->baw_head, ATH_TID_MAX_BUFS);
 	}
 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 	    "%s: tid=%d: baw is now %d:%d, baw head=%d\n",
 	    __func__, tid->tid, tap->txa_start, tap->txa_wnd, tid->baw_head);
 }
 
 static void
 ath_tx_leak_count_update(struct ath_softc *sc, struct ath_tid *tid,
     struct ath_buf *bf)
 {
 	struct ieee80211_frame *wh;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	if (tid->an->an_leak_count > 0) {
 		wh = mtod(bf->bf_m, struct ieee80211_frame *);
 
 		/*
 		 * Update MORE based on the software/net80211 queue states.
 		 */
 		if ((tid->an->an_stack_psq > 0)
 		    || (tid->an->an_swq_depth > 0))
 			wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA;
 		else
 			wh->i_fc[1] &= ~IEEE80211_FC1_MORE_DATA;
 
 		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
 		    "%s: %6D: leak count = %d, psq=%d, swq=%d, MORE=%d\n",
 		    __func__,
 		    tid->an->an_node.ni_macaddr,
 		    ":",
 		    tid->an->an_leak_count,
 		    tid->an->an_stack_psq,
 		    tid->an->an_swq_depth,
 		    !! (wh->i_fc[1] & IEEE80211_FC1_MORE_DATA));
 
 		/*
 		 * Re-sync the underlying buffer.
 		 */
 		bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap,
 		    BUS_DMASYNC_PREWRITE);
 
 		tid->an->an_leak_count --;
 	}
 }
 
 static int
 ath_tx_tid_can_tx_or_sched(struct ath_softc *sc, struct ath_tid *tid)
 {
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	if (tid->an->an_leak_count > 0) {
 		return (1);
 	}
 	if (tid->paused)
 		return (0);
 	return (1);
 }
 
 /*
  * Mark the current node/TID as ready to TX.
  *
  * This is done to make it easy for the software scheduler to
  * find which nodes have data to send.
  *
  * The TXQ lock must be held.
  */
 void
 ath_tx_tid_sched(struct ath_softc *sc, struct ath_tid *tid)
 {
 	struct ath_txq *txq = sc->sc_ac2q[tid->ac];
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	/*
 	 * If we are leaking out a frame to this destination
 	 * for PS-POLL, ensure that we allow scheduling to
 	 * occur.
 	 */
 	if (! ath_tx_tid_can_tx_or_sched(sc, tid))
 		return;		/* paused, can't schedule yet */
 
 	if (tid->sched)
 		return;		/* already scheduled */
 
 	tid->sched = 1;
 
 #if 0
 	/*
 	 * If this is a sleeping node we're leaking to, given
 	 * it a higher priority.  This is so bad for QoS it hurts.
 	 */
 	if (tid->an->an_leak_count) {
 		TAILQ_INSERT_HEAD(&txq->axq_tidq, tid, axq_qelem);
 	} else {
 		TAILQ_INSERT_TAIL(&txq->axq_tidq, tid, axq_qelem);
 	}
 #endif
 
 	/*
 	 * We can't do the above - it'll confuse the TXQ software
 	 * scheduler which will keep checking the _head_ TID
 	 * in the list to see if it has traffic.  If we queue
 	 * a TID to the head of the list and it doesn't transmit,
 	 * we'll check it again.
 	 *
 	 * So, get the rest of this leaking frames support working
 	 * and reliable first and _then_ optimise it so they're
 	 * pushed out in front of any other pending software
 	 * queued nodes.
 	 */
 	TAILQ_INSERT_TAIL(&txq->axq_tidq, tid, axq_qelem);
 }
 
 /*
  * Mark the current node as no longer needing to be polled for
  * TX packets.
  *
  * The TXQ lock must be held.
  */
 static void
 ath_tx_tid_unsched(struct ath_softc *sc, struct ath_tid *tid)
 {
 	struct ath_txq *txq = sc->sc_ac2q[tid->ac];
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	if (tid->sched == 0)
 		return;
 
 	tid->sched = 0;
 	TAILQ_REMOVE(&txq->axq_tidq, tid, axq_qelem);
 }
 
 /*
  * Assign a sequence number manually to the given frame.
  *
  * This should only be called for A-MPDU TX frames.
  */
 static ieee80211_seq
 ath_tx_tid_seqno_assign(struct ath_softc *sc, struct ieee80211_node *ni,
     struct ath_buf *bf, struct mbuf *m0)
 {
 	struct ieee80211_frame *wh;
 	int tid, pri;
 	ieee80211_seq seqno;
 	uint8_t subtype;
 
 	/* TID lookup */
 	wh = mtod(m0, struct ieee80211_frame *);
 	pri = M_WME_GETAC(m0);			/* honor classification */
 	tid = WME_AC_TO_TID(pri);
 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: pri=%d, tid=%d, qos has seq=%d\n",
 	    __func__, pri, tid, IEEE80211_QOS_HAS_SEQ(wh));
 
 	/* XXX Is it a control frame? Ignore */
 
 	/* Does the packet require a sequence number? */
 	if (! IEEE80211_QOS_HAS_SEQ(wh))
 		return -1;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	/*
 	 * Is it a QOS NULL Data frame? Give it a sequence number from
 	 * the default TID (IEEE80211_NONQOS_TID.)
 	 *
 	 * The RX path of everything I've looked at doesn't include the NULL
 	 * data frame sequence number in the aggregation state updates, so
 	 * assigning it a sequence number there will cause a BAW hole on the
 	 * RX side.
 	 */
 	subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
 	if (subtype == IEEE80211_FC0_SUBTYPE_QOS_NULL) {
 		/* XXX no locking for this TID? This is a bit of a problem. */
 		seqno = ni->ni_txseqs[IEEE80211_NONQOS_TID];
 		INCR(ni->ni_txseqs[IEEE80211_NONQOS_TID], IEEE80211_SEQ_RANGE);
 	} else {
 		/* Manually assign sequence number */
 		seqno = ni->ni_txseqs[tid];
 		INCR(ni->ni_txseqs[tid], IEEE80211_SEQ_RANGE);
 	}
 	*(uint16_t *)&wh->i_seq[0] = htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT);
 	M_SEQNO_SET(m0, seqno);
 
 	/* Return so caller can do something with it if needed */
 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s:  -> seqno=%d\n", __func__, seqno);
 	return seqno;
 }
 
 /*
  * Attempt to direct dispatch an aggregate frame to hardware.
  * If the frame is out of BAW, queue.
  * Otherwise, schedule it as a single frame.
  */
 static void
 ath_tx_xmit_aggr(struct ath_softc *sc, struct ath_node *an,
     struct ath_txq *txq, struct ath_buf *bf)
 {
 	struct ath_tid *tid = &an->an_tid[bf->bf_state.bfs_tid];
 	struct ieee80211_tx_ampdu *tap;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	tap = ath_tx_get_tx_tid(an, tid->tid);
 
 	/* paused? queue */
 	if (! ath_tx_tid_can_tx_or_sched(sc, tid)) {
 		ATH_TID_INSERT_HEAD(tid, bf, bf_list);
 		/* XXX don't sched - we're paused! */
 		return;
 	}
 
 	/* outside baw? queue */
 	if (bf->bf_state.bfs_dobaw &&
 	    (! BAW_WITHIN(tap->txa_start, tap->txa_wnd,
 	    SEQNO(bf->bf_state.bfs_seqno)))) {
 		ATH_TID_INSERT_HEAD(tid, bf, bf_list);
 		ath_tx_tid_sched(sc, tid);
 		return;
 	}
 
 	/*
 	 * This is a temporary check and should be removed once
 	 * all the relevant code paths have been fixed.
 	 *
 	 * During aggregate retries, it's possible that the head
 	 * frame will fail (which has the bfs_aggr and bfs_nframes
 	 * fields set for said aggregate) and will be retried as
 	 * a single frame.  In this instance, the values should
 	 * be reset or the completion code will get upset with you.
 	 */
 	if (bf->bf_state.bfs_aggr != 0 || bf->bf_state.bfs_nframes > 1) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
 		    "%s: bfs_aggr=%d, bfs_nframes=%d\n", __func__,
 		    bf->bf_state.bfs_aggr, bf->bf_state.bfs_nframes);
 		bf->bf_state.bfs_aggr = 0;
 		bf->bf_state.bfs_nframes = 1;
 	}
 
 	/* Update CLRDMASK just before this frame is queued */
 	ath_tx_update_clrdmask(sc, tid, bf);
 
 	/* Direct dispatch to hardware */
 	ath_tx_do_ratelookup(sc, bf);
 	ath_tx_calc_duration(sc, bf);
 	ath_tx_calc_protection(sc, bf);
 	ath_tx_set_rtscts(sc, bf);
 	ath_tx_rate_fill_rcflags(sc, bf);
 	ath_tx_setds(sc, bf);
 
 	/* Statistics */
 	sc->sc_aggr_stats.aggr_low_hwq_single_pkt++;
 
 	/* Track per-TID hardware queue depth correctly */
 	tid->hwq_depth++;
 
 	/* Add to BAW */
 	if (bf->bf_state.bfs_dobaw) {
 		ath_tx_addto_baw(sc, an, tid, bf);
 		bf->bf_state.bfs_addedbaw = 1;
 	}
 
 	/* Set completion handler, multi-frame aggregate or not */
 	bf->bf_comp = ath_tx_aggr_comp;
 
 	/*
 	 * Update the current leak count if
 	 * we're leaking frames; and set the
 	 * MORE flag as appropriate.
 	 */
 	ath_tx_leak_count_update(sc, tid, bf);
 
 	/* Hand off to hardware */
 	ath_tx_handoff(sc, txq, bf);
 }
 
 /*
  * Attempt to send the packet.
  * If the queue isn't busy, direct-dispatch.
  * If the queue is busy enough, queue the given packet on the
  *  relevant software queue.
  */
 void
 ath_tx_swq(struct ath_softc *sc, struct ieee80211_node *ni,
     struct ath_txq *txq, int queue_to_head, struct ath_buf *bf)
 {
 	struct ath_node *an = ATH_NODE(ni);
 	struct ieee80211_frame *wh;
 	struct ath_tid *atid;
 	int pri, tid;
 	struct mbuf *m0 = bf->bf_m;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	/* Fetch the TID - non-QoS frames get assigned to TID 16 */
 	wh = mtod(m0, struct ieee80211_frame *);
 	pri = ath_tx_getac(sc, m0);
 	tid = ath_tx_gettid(sc, m0);
 	atid = &an->an_tid[tid];
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: bf=%p, pri=%d, tid=%d, qos=%d\n",
 	    __func__, bf, pri, tid, IEEE80211_QOS_HAS_SEQ(wh));
 
 	/* Set local packet state, used to queue packets to hardware */
 	/* XXX potentially duplicate info, re-check */
 	bf->bf_state.bfs_tid = tid;
 	bf->bf_state.bfs_tx_queue = txq->axq_qnum;
 	bf->bf_state.bfs_pri = pri;
 
 	/*
 	 * If the hardware queue isn't busy, queue it directly.
 	 * If the hardware queue is busy, queue it.
 	 * If the TID is paused or the traffic it outside BAW, software
 	 * queue it.
 	 *
 	 * If the node is in power-save and we're leaking a frame,
 	 * leak a single frame.
 	 */
 	if (! ath_tx_tid_can_tx_or_sched(sc, atid)) {
 		/* TID is paused, queue */
 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: paused\n", __func__);
 		/*
 		 * If the caller requested that it be sent at a high
 		 * priority, queue it at the head of the list.
 		 */
 		if (queue_to_head)
 			ATH_TID_INSERT_HEAD(atid, bf, bf_list);
 		else
 			ATH_TID_INSERT_TAIL(atid, bf, bf_list);
 	} else if (ath_tx_ampdu_pending(sc, an, tid)) {
 		/* AMPDU pending; queue */
 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: pending\n", __func__);
 		ATH_TID_INSERT_TAIL(atid, bf, bf_list);
 		/* XXX sched? */
 	} else if (ath_tx_ampdu_running(sc, an, tid)) {
 		/* AMPDU running, attempt direct dispatch if possible */
 
 		/*
 		 * Always queue the frame to the tail of the list.
 		 */
 		ATH_TID_INSERT_TAIL(atid, bf, bf_list);
 
 		/*
 		 * If the hardware queue isn't busy, direct dispatch
 		 * the head frame in the list.  Don't schedule the
 		 * TID - let it build some more frames first?
 		 *
 		 * When running A-MPDU, always just check the hardware
 		 * queue depth against the aggregate frame limit.
 		 * We don't want to burst a large number of single frames
 		 * out to the hardware; we want to aggressively hold back.
 		 *
 		 * Otherwise, schedule the TID.
 		 */
 		/* XXX TXQ locking */
 		if (txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit_aggr) {
 			bf = ATH_TID_FIRST(atid);
 			ATH_TID_REMOVE(atid, bf, bf_list);
 
 			/*
 			 * Ensure it's definitely treated as a non-AMPDU
 			 * frame - this information may have been left
 			 * over from a previous attempt.
 			 */
 			bf->bf_state.bfs_aggr = 0;
 			bf->bf_state.bfs_nframes = 1;
 
 			/* Queue to the hardware */
 			ath_tx_xmit_aggr(sc, an, txq, bf);
 			DPRINTF(sc, ATH_DEBUG_SW_TX,
 			    "%s: xmit_aggr\n",
 			    __func__);
 		} else {
 			DPRINTF(sc, ATH_DEBUG_SW_TX,
 			    "%s: ampdu; swq'ing\n",
 			    __func__);
 
 			ath_tx_tid_sched(sc, atid);
 		}
 	/*
 	 * If we're not doing A-MPDU, be prepared to direct dispatch
 	 * up to both limits if possible.  This particular corner
 	 * case may end up with packet starvation between aggregate
 	 * traffic and non-aggregate traffic: we wnat to ensure
 	 * that non-aggregate stations get a few frames queued to the
 	 * hardware before the aggregate station(s) get their chance.
 	 *
 	 * So if you only ever see a couple of frames direct dispatched
 	 * to the hardware from a non-AMPDU client, check both here
 	 * and in the software queue dispatcher to ensure that those
 	 * non-AMPDU stations get a fair chance to transmit.
 	 */
 	/* XXX TXQ locking */
 	} else if ((txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit_nonaggr) &&
 		    (txq->axq_aggr_depth < sc->sc_hwq_limit_aggr)) {
 		/* AMPDU not running, attempt direct dispatch */
 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: xmit_normal\n", __func__);
 		/* See if clrdmask needs to be set */
 		ath_tx_update_clrdmask(sc, atid, bf);
 
 		/*
 		 * Update the current leak count if
 		 * we're leaking frames; and set the
 		 * MORE flag as appropriate.
 		 */
 		ath_tx_leak_count_update(sc, atid, bf);
 
 		/*
 		 * Dispatch the frame.
 		 */
 		ath_tx_xmit_normal(sc, txq, bf);
 	} else {
 		/* Busy; queue */
 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: swq'ing\n", __func__);
 		ATH_TID_INSERT_TAIL(atid, bf, bf_list);
 		ath_tx_tid_sched(sc, atid);
 	}
 }
 
 /*
  * Only set the clrdmask bit if none of the nodes are currently
  * filtered.
  *
  * XXX TODO: go through all the callers and check to see
  * which are being called in the context of looping over all
  * TIDs (eg, if all tids are being paused, resumed, etc.)
  * That'll avoid O(n^2) complexity here.
  */
 static void
 ath_tx_set_clrdmask(struct ath_softc *sc, struct ath_node *an)
 {
 	int i;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	for (i = 0; i < IEEE80211_TID_SIZE; i++) {
 		if (an->an_tid[i].isfiltered == 1)
 			return;
 	}
 	an->clrdmask = 1;
 }
 
 /*
  * Configure the per-TID node state.
  *
  * This likely belongs in if_ath_node.c but I can't think of anywhere
  * else to put it just yet.
  *
  * This sets up the SLISTs and the mutex as appropriate.
  */
 void
 ath_tx_tid_init(struct ath_softc *sc, struct ath_node *an)
 {
 	int i, j;
 	struct ath_tid *atid;
 
 	for (i = 0; i < IEEE80211_TID_SIZE; i++) {
 		atid = &an->an_tid[i];
 
 		/* XXX now with this bzer(), is the field 0'ing needed? */
 		bzero(atid, sizeof(*atid));
 
 		TAILQ_INIT(&atid->tid_q);
 		TAILQ_INIT(&atid->filtq.tid_q);
 		atid->tid = i;
 		atid->an = an;
 		for (j = 0; j < ATH_TID_MAX_BUFS; j++)
 			atid->tx_buf[j] = NULL;
 		atid->baw_head = atid->baw_tail = 0;
 		atid->paused = 0;
 		atid->sched = 0;
 		atid->hwq_depth = 0;
 		atid->cleanup_inprogress = 0;
 		if (i == IEEE80211_NONQOS_TID)
 			atid->ac = ATH_NONQOS_TID_AC;
 		else
 			atid->ac = TID_TO_WME_AC(i);
 	}
 	an->clrdmask = 1;	/* Always start by setting this bit */
 }
 
 /*
  * Pause the current TID. This stops packets from being transmitted
  * on it.
  *
  * Since this is also called from upper layers as well as the driver,
  * it will get the TID lock.
  */
 static void
 ath_tx_tid_pause(struct ath_softc *sc, struct ath_tid *tid)
 {
 
 	ATH_TX_LOCK_ASSERT(sc);
 	tid->paused++;
 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: [%6D]: tid=%d, paused = %d\n",
 	    __func__,
 	    tid->an->an_node.ni_macaddr, ":",
 	    tid->tid,
 	    tid->paused);
 }
 
 /*
  * Unpause the current TID, and schedule it if needed.
  */
 static void
 ath_tx_tid_resume(struct ath_softc *sc, struct ath_tid *tid)
 {
 	ATH_TX_LOCK_ASSERT(sc);
 
 	/*
 	 * There's some odd places where ath_tx_tid_resume() is called
 	 * when it shouldn't be; this works around that particular issue
 	 * until it's actually resolved.
 	 */
 	if (tid->paused == 0) {
 		device_printf(sc->sc_dev,
 		    "%s: [%6D]: tid=%d, paused=0?\n",
 		    __func__,
 		    tid->an->an_node.ni_macaddr, ":",
 		    tid->tid);
 	} else {
 		tid->paused--;
 	}
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
 	    "%s: [%6D]: tid=%d, unpaused = %d\n",
 	    __func__,
 	    tid->an->an_node.ni_macaddr, ":",
 	    tid->tid,
 	    tid->paused);
 
 	if (tid->paused)
 		return;
 
 	/*
 	 * Override the clrdmask configuration for the next frame
 	 * from this TID, just to get the ball rolling.
 	 */
 	ath_tx_set_clrdmask(sc, tid->an);
 
 	if (tid->axq_depth == 0)
 		return;
 
 	/* XXX isfiltered shouldn't ever be 0 at this point */
 	if (tid->isfiltered == 1) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: filtered?!\n",
 		    __func__);
 		return;
 	}
 
 	ath_tx_tid_sched(sc, tid);
 
 	/*
 	 * Queue the software TX scheduler.
 	 */
 	ath_tx_swq_kick(sc);
 }
 
 /*
  * Add the given ath_buf to the TID filtered frame list.
  * This requires the TID be filtered.
  */
 static void
 ath_tx_tid_filt_addbuf(struct ath_softc *sc, struct ath_tid *tid,
     struct ath_buf *bf)
 {
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	if (!tid->isfiltered)
 		DPRINTF(sc, ATH_DEBUG_SW_TX_FILT, "%s: not filtered?!\n",
 		    __func__);
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_FILT, "%s: bf=%p\n", __func__, bf);
 
 	/* Set the retry bit and bump the retry counter */
 	ath_tx_set_retry(sc, bf);
 	sc->sc_stats.ast_tx_swfiltered++;
 
 	ATH_TID_FILT_INSERT_TAIL(tid, bf, bf_list);
 }
 
 /*
  * Handle a completed filtered frame from the given TID.
  * This just enables/pauses the filtered frame state if required
  * and appends the filtered frame to the filtered queue.
  */
 static void
 ath_tx_tid_filt_comp_buf(struct ath_softc *sc, struct ath_tid *tid,
     struct ath_buf *bf)
 {
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	if (! tid->isfiltered) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_FILT, "%s: tid=%d; filter transition\n",
 		    __func__, tid->tid);
 		tid->isfiltered = 1;
 		ath_tx_tid_pause(sc, tid);
 	}
 
 	/* Add the frame to the filter queue */
 	ath_tx_tid_filt_addbuf(sc, tid, bf);
 }
 
 /*
  * Complete the filtered frame TX completion.
  *
  * If there are no more frames in the hardware queue, unpause/unfilter
  * the TID if applicable.  Otherwise we will wait for a node PS transition
  * to unfilter.
  */
 static void
 ath_tx_tid_filt_comp_complete(struct ath_softc *sc, struct ath_tid *tid)
 {
 	struct ath_buf *bf;
 	int do_resume = 0;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	if (tid->hwq_depth != 0)
 		return;
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_FILT, "%s: tid=%d, hwq=0, transition back\n",
 	    __func__, tid->tid);
 	if (tid->isfiltered == 1) {
 		tid->isfiltered = 0;
 		do_resume = 1;
 	}
 
 	/* XXX ath_tx_tid_resume() also calls ath_tx_set_clrdmask()! */
 	ath_tx_set_clrdmask(sc, tid->an);
 
 	/* XXX this is really quite inefficient */
 	while ((bf = ATH_TID_FILT_LAST(tid, ath_bufhead_s)) != NULL) {
 		ATH_TID_FILT_REMOVE(tid, bf, bf_list);
 		ATH_TID_INSERT_HEAD(tid, bf, bf_list);
 	}
 
 	/* And only resume if we had paused before */
 	if (do_resume)
 		ath_tx_tid_resume(sc, tid);
 }
 
 /*
  * Called when a single (aggregate or otherwise) frame is completed.
  *
  * Returns 0 if the buffer could be added to the filtered list
  * (cloned or otherwise), 1 if the buffer couldn't be added to the
  * filtered list (failed clone; expired retry) and the caller should
  * free it and handle it like a failure (eg by sending a BAR.)
  *
  * since the buffer may be cloned, bf must be not touched after this
  * if the return value is 0.
  */
 static int
 ath_tx_tid_filt_comp_single(struct ath_softc *sc, struct ath_tid *tid,
     struct ath_buf *bf)
 {
 	struct ath_buf *nbf;
 	int retval;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	/*
 	 * Don't allow a filtered frame to live forever.
 	 */
 	if (bf->bf_state.bfs_retries > SWMAX_RETRIES) {
 		sc->sc_stats.ast_tx_swretrymax++;
 		DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
 		    "%s: bf=%p, seqno=%d, exceeded retries\n",
 		    __func__,
 		    bf,
 		    SEQNO(bf->bf_state.bfs_seqno));
 		retval = 1; /* error */
 		goto finish;
 	}
 
 	/*
 	 * A busy buffer can't be added to the retry list.
 	 * It needs to be cloned.
 	 */
 	if (bf->bf_flags & ATH_BUF_BUSY) {
 		nbf = ath_tx_retry_clone(sc, tid->an, tid, bf);
 		DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
 		    "%s: busy buffer clone: %p -> %p\n",
 		    __func__, bf, nbf);
 	} else {
 		nbf = bf;
 	}
 
 	if (nbf == NULL) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
 		    "%s: busy buffer couldn't be cloned (%p)!\n",
 		    __func__, bf);
 		retval = 1; /* error */
 	} else {
 		ath_tx_tid_filt_comp_buf(sc, tid, nbf);
 		retval = 0; /* ok */
 	}
 finish:
 	ath_tx_tid_filt_comp_complete(sc, tid);
 
 	return (retval);
 }
 
 static void
 ath_tx_tid_filt_comp_aggr(struct ath_softc *sc, struct ath_tid *tid,
     struct ath_buf *bf_first, ath_bufhead *bf_q)
 {
 	struct ath_buf *bf, *bf_next, *nbf;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	bf = bf_first;
 	while (bf) {
 		bf_next = bf->bf_next;
 		bf->bf_next = NULL;	/* Remove it from the aggr list */
 
 		/*
 		 * Don't allow a filtered frame to live forever.
 		 */
 		if (bf->bf_state.bfs_retries > SWMAX_RETRIES) {
 			sc->sc_stats.ast_tx_swretrymax++;
 			DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
 			    "%s: tid=%d, bf=%p, seqno=%d, exceeded retries\n",
 			    __func__,
 			    tid->tid,
 			    bf,
 			    SEQNO(bf->bf_state.bfs_seqno));
 			TAILQ_INSERT_TAIL(bf_q, bf, bf_list);
 			goto next;
 		}
 
 		if (bf->bf_flags & ATH_BUF_BUSY) {
 			nbf = ath_tx_retry_clone(sc, tid->an, tid, bf);
 			DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
 			    "%s: tid=%d, busy buffer cloned: %p -> %p, seqno=%d\n",
 			    __func__, tid->tid, bf, nbf, SEQNO(bf->bf_state.bfs_seqno));
 		} else {
 			nbf = bf;
 		}
 
 		/*
 		 * If the buffer couldn't be cloned, add it to bf_q;
 		 * the caller will free the buffer(s) as required.
 		 */
 		if (nbf == NULL) {
 			DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
 			    "%s: tid=%d, buffer couldn't be cloned! (%p) seqno=%d\n",
 			    __func__, tid->tid, bf, SEQNO(bf->bf_state.bfs_seqno));
 			TAILQ_INSERT_TAIL(bf_q, bf, bf_list);
 		} else {
 			ath_tx_tid_filt_comp_buf(sc, tid, nbf);
 		}
 next:
 		bf = bf_next;
 	}
 
 	ath_tx_tid_filt_comp_complete(sc, tid);
 }
 
 /*
  * Suspend the queue because we need to TX a BAR.
  */
 static void
 ath_tx_tid_bar_suspend(struct ath_softc *sc, struct ath_tid *tid)
 {
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
 	    "%s: tid=%d, bar_wait=%d, bar_tx=%d, called\n",
 	    __func__,
 	    tid->tid,
 	    tid->bar_wait,
 	    tid->bar_tx);
 
 	/* We shouldn't be called when bar_tx is 1 */
 	if (tid->bar_tx) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
 		    "%s: bar_tx is 1?!\n", __func__);
 	}
 
 	/* If we've already been called, just be patient. */
 	if (tid->bar_wait)
 		return;
 
 	/* Wait! */
 	tid->bar_wait = 1;
 
 	/* Only one pause, no matter how many frames fail */
 	ath_tx_tid_pause(sc, tid);
 }
 
 /*
  * We've finished with BAR handling - either we succeeded or
  * failed. Either way, unsuspend TX.
  */
 static void
 ath_tx_tid_bar_unsuspend(struct ath_softc *sc, struct ath_tid *tid)
 {
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
 	    "%s: %6D: TID=%d, called\n",
 	    __func__,
 	    tid->an->an_node.ni_macaddr,
 	    ":",
 	    tid->tid);
 
 	if (tid->bar_tx == 0 || tid->bar_wait == 0) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
 		    "%s: %6D: TID=%d, bar_tx=%d, bar_wait=%d: ?\n",
 		    __func__, tid->an->an_node.ni_macaddr, ":",
 		    tid->tid, tid->bar_tx, tid->bar_wait);
 	}
 
 	tid->bar_tx = tid->bar_wait = 0;
 	ath_tx_tid_resume(sc, tid);
 }
 
 /*
  * Return whether we're ready to TX a BAR frame.
  *
  * Requires the TID lock be held.
  */
 static int
 ath_tx_tid_bar_tx_ready(struct ath_softc *sc, struct ath_tid *tid)
 {
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	if (tid->bar_wait == 0 || tid->hwq_depth > 0)
 		return (0);
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
 	    "%s: %6D: TID=%d, bar ready\n",
 	    __func__,
 	    tid->an->an_node.ni_macaddr,
 	    ":",
 	    tid->tid);
 
 	return (1);
 }
 
 /*
  * Check whether the current TID is ready to have a BAR
  * TXed and if so, do the TX.
  *
  * Since the TID/TXQ lock can't be held during a call to
  * ieee80211_send_bar(), we have to do the dirty thing of unlocking it,
  * sending the BAR and locking it again.
  *
  * Eventually, the code to send the BAR should be broken out
  * from this routine so the lock doesn't have to be reacquired
  * just to be immediately dropped by the caller.
  */
 static void
 ath_tx_tid_bar_tx(struct ath_softc *sc, struct ath_tid *tid)
 {
 	struct ieee80211_tx_ampdu *tap;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
 	    "%s: %6D: TID=%d, called\n",
 	    __func__,
 	    tid->an->an_node.ni_macaddr,
 	    ":",
 	    tid->tid);
 
 	tap = ath_tx_get_tx_tid(tid->an, tid->tid);
 
 	/*
 	 * This is an error condition!
 	 */
 	if (tid->bar_wait == 0 || tid->bar_tx == 1) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
 		    "%s: %6D: TID=%d, bar_tx=%d, bar_wait=%d: ?\n",
 		    __func__, tid->an->an_node.ni_macaddr, ":",
 		    tid->tid, tid->bar_tx, tid->bar_wait);
 		return;
 	}
 
 	/* Don't do anything if we still have pending frames */
 	if (tid->hwq_depth > 0) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
 		    "%s: %6D: TID=%d, hwq_depth=%d, waiting\n",
 		    __func__,
 		    tid->an->an_node.ni_macaddr,
 		    ":",
 		    tid->tid,
 		    tid->hwq_depth);
 		return;
 	}
 
 	/* We're now about to TX */
 	tid->bar_tx = 1;
 
 	/*
 	 * Override the clrdmask configuration for the next frame,
 	 * just to get the ball rolling.
 	 */
 	ath_tx_set_clrdmask(sc, tid->an);
 
 	/*
 	 * Calculate new BAW left edge, now that all frames have either
 	 * succeeded or failed.
 	 *
 	 * XXX verify this is _actually_ the valid value to begin at!
 	 */
 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
 	    "%s: %6D: TID=%d, new BAW left edge=%d\n",
 	    __func__,
 	    tid->an->an_node.ni_macaddr,
 	    ":",
 	    tid->tid,
 	    tap->txa_start);
 
 	/* Try sending the BAR frame */
 	/* We can't hold the lock here! */
 
 	ATH_TX_UNLOCK(sc);
 	if (ieee80211_send_bar(&tid->an->an_node, tap, tap->txa_start) == 0) {
 		/* Success? Now we wait for notification that it's done */
 		ATH_TX_LOCK(sc);
 		return;
 	}
 
 	/* Failure? For now, warn loudly and continue */
 	ATH_TX_LOCK(sc);
 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
 	    "%s: %6D: TID=%d, failed to TX BAR, continue!\n",
 	    __func__, tid->an->an_node.ni_macaddr, ":",
 	    tid->tid);
 	ath_tx_tid_bar_unsuspend(sc, tid);
 }
 
 static void
 ath_tx_tid_drain_pkt(struct ath_softc *sc, struct ath_node *an,
     struct ath_tid *tid, ath_bufhead *bf_cq, struct ath_buf *bf)
 {
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	/*
 	 * If the current TID is running AMPDU, update
 	 * the BAW.
 	 */
 	if (ath_tx_ampdu_running(sc, an, tid->tid) &&
 	    bf->bf_state.bfs_dobaw) {
 		/*
 		 * Only remove the frame from the BAW if it's
 		 * been transmitted at least once; this means
 		 * the frame was in the BAW to begin with.
 		 */
 		if (bf->bf_state.bfs_retries > 0) {
 			ath_tx_update_baw(sc, an, tid, bf);
 			bf->bf_state.bfs_dobaw = 0;
 		}
 #if 0
 		/*
 		 * This has become a non-fatal error now
 		 */
 		if (! bf->bf_state.bfs_addedbaw)
 			DPRINTF(sc, ATH_DEBUG_SW_TX_BAW
 			    "%s: wasn't added: seqno %d\n",
 			    __func__, SEQNO(bf->bf_state.bfs_seqno));
 #endif
 	}
 
 	/* Strip it out of an aggregate list if it was in one */
 	bf->bf_next = NULL;
 
 	/* Insert on the free queue to be freed by the caller */
 	TAILQ_INSERT_TAIL(bf_cq, bf, bf_list);
 }
 
 static void
 ath_tx_tid_drain_print(struct ath_softc *sc, struct ath_node *an,
     const char *pfx, struct ath_tid *tid, struct ath_buf *bf)
 {
 	struct ieee80211_node *ni = &an->an_node;
 	struct ath_txq *txq;
 	struct ieee80211_tx_ampdu *tap;
 
 	txq = sc->sc_ac2q[tid->ac];
 	tap = ath_tx_get_tx_tid(an, tid->tid);
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX | ATH_DEBUG_RESET,
 	    "%s: %s: %6D: bf=%p: addbaw=%d, dobaw=%d, "
 	    "seqno=%d, retry=%d\n",
 	    __func__,
 	    pfx,
 	    ni->ni_macaddr,
 	    ":",
 	    bf,
 	    bf->bf_state.bfs_addedbaw,
 	    bf->bf_state.bfs_dobaw,
 	    SEQNO(bf->bf_state.bfs_seqno),
 	    bf->bf_state.bfs_retries);
 	DPRINTF(sc, ATH_DEBUG_SW_TX | ATH_DEBUG_RESET,
 	    "%s: %s: %6D: bf=%p: txq[%d] axq_depth=%d, axq_aggr_depth=%d\n",
 	    __func__,
 	    pfx,
 	    ni->ni_macaddr,
 	    ":",
 	    bf,
 	    txq->axq_qnum,
 	    txq->axq_depth,
 	    txq->axq_aggr_depth);
 	DPRINTF(sc, ATH_DEBUG_SW_TX | ATH_DEBUG_RESET,
 	    "%s: %s: %6D: bf=%p: tid txq_depth=%d hwq_depth=%d, bar_wait=%d, "
 	      "isfiltered=%d\n",
 	    __func__,
 	    pfx,
 	    ni->ni_macaddr,
 	    ":",
 	    bf,
 	    tid->axq_depth,
 	    tid->hwq_depth,
 	    tid->bar_wait,
 	    tid->isfiltered);
 	DPRINTF(sc, ATH_DEBUG_SW_TX | ATH_DEBUG_RESET,
 	    "%s: %s: %6D: tid %d: "
 	    "sched=%d, paused=%d, "
 	    "incomp=%d, baw_head=%d, "
 	    "baw_tail=%d txa_start=%d, ni_txseqs=%d\n",
 	     __func__,
 	     pfx,
 	     ni->ni_macaddr,
 	     ":",
 	     tid->tid,
 	     tid->sched, tid->paused,
 	     tid->incomp, tid->baw_head,
 	     tid->baw_tail, tap == NULL ? -1 : tap->txa_start,
 	     ni->ni_txseqs[tid->tid]);
 
 	/* XXX Dump the frame, see what it is? */
 	if (IFF_DUMPPKTS(sc, ATH_DEBUG_XMIT))
 		ieee80211_dump_pkt(ni->ni_ic,
 		    mtod(bf->bf_m, const uint8_t *),
 		    bf->bf_m->m_len, 0, -1);
 }
 
 /*
  * Free any packets currently pending in the software TX queue.
  *
  * This will be called when a node is being deleted.
  *
  * It can also be called on an active node during an interface
  * reset or state transition.
  *
  * (From Linux/reference):
  *
  * TODO: For frame(s) that are in the retry state, we will reuse the
  * sequence number(s) without setting the retry bit. The
  * alternative is to give up on these and BAR the receiver's window
  * forward.
  */
 static void
 ath_tx_tid_drain(struct ath_softc *sc, struct ath_node *an,
     struct ath_tid *tid, ath_bufhead *bf_cq)
 {
 	struct ath_buf *bf;
 	struct ieee80211_tx_ampdu *tap;
 	struct ieee80211_node *ni = &an->an_node;
 	int t;
 
 	tap = ath_tx_get_tx_tid(an, tid->tid);
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	/* Walk the queue, free frames */
 	t = 0;
 	for (;;) {
 		bf = ATH_TID_FIRST(tid);
 		if (bf == NULL) {
 			break;
 		}
 
 		if (t == 0) {
 			ath_tx_tid_drain_print(sc, an, "norm", tid, bf);
 //			t = 1;
 		}
 
 		ATH_TID_REMOVE(tid, bf, bf_list);
 		ath_tx_tid_drain_pkt(sc, an, tid, bf_cq, bf);
 	}
 
 	/* And now, drain the filtered frame queue */
 	t = 0;
 	for (;;) {
 		bf = ATH_TID_FILT_FIRST(tid);
 		if (bf == NULL)
 			break;
 
 		if (t == 0) {
 			ath_tx_tid_drain_print(sc, an, "filt", tid, bf);
 //			t = 1;
 		}
 
 		ATH_TID_FILT_REMOVE(tid, bf, bf_list);
 		ath_tx_tid_drain_pkt(sc, an, tid, bf_cq, bf);
 	}
 
 	/*
 	 * Override the clrdmask configuration for the next frame
 	 * in case there is some future transmission, just to get
 	 * the ball rolling.
 	 *
 	 * This won't hurt things if the TID is about to be freed.
 	 */
 	ath_tx_set_clrdmask(sc, tid->an);
 
 	/*
 	 * Now that it's completed, grab the TID lock and update
 	 * the sequence number and BAW window.
 	 * Because sequence numbers have been assigned to frames
 	 * that haven't been sent yet, it's entirely possible
 	 * we'll be called with some pending frames that have not
 	 * been transmitted.
 	 *
 	 * The cleaner solution is to do the sequence number allocation
 	 * when the packet is first transmitted - and thus the "retries"
 	 * check above would be enough to update the BAW/seqno.
 	 */
 
 	/* But don't do it for non-QoS TIDs */
 	if (tap) {
 #if 1
 		DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
 		    "%s: %6D: node %p: TID %d: sliding BAW left edge to %d\n",
 		    __func__,
 		    ni->ni_macaddr,
 		    ":",
 		    an,
 		    tid->tid,
 		    tap->txa_start);
 #endif
 		ni->ni_txseqs[tid->tid] = tap->txa_start;
 		tid->baw_tail = tid->baw_head;
 	}
 }
 
 /*
  * Reset the TID state.  This must be only called once the node has
  * had its frames flushed from this TID, to ensure that no other
  * pause / unpause logic can kick in.
  */
 static void
 ath_tx_tid_reset(struct ath_softc *sc, struct ath_tid *tid)
 {
 
 #if 0
 	tid->bar_wait = tid->bar_tx = tid->isfiltered = 0;
 	tid->paused = tid->sched = tid->addba_tx_pending = 0;
 	tid->incomp = tid->cleanup_inprogress = 0;
 #endif
 
 	/*
 	 * If we have a bar_wait set, we need to unpause the TID
 	 * here.  Otherwise once cleanup has finished, the TID won't
 	 * have the right paused counter.
 	 *
 	 * XXX I'm not going through resume here - I don't want the
 	 * node to be rescheuled just yet.  This however should be
 	 * methodized!
 	 */
 	if (tid->bar_wait) {
 		if (tid->paused > 0) {
 			tid->paused --;
 		}
 	}
 
 	/*
 	 * XXX same with a currently filtered TID.
 	 *
 	 * Since this is being called during a flush, we assume that
 	 * the filtered frame list is actually empty.
 	 *
 	 * XXX TODO: add in a check to ensure that the filtered queue
 	 * depth is actually 0!
 	 */
 	if (tid->isfiltered) {
 		if (tid->paused > 0) {
 			tid->paused --;
 		}
 	}
 
 	/*
 	 * Clear BAR, filtered frames, scheduled and ADDBA pending.
 	 * The TID may be going through cleanup from the last association
 	 * where things in the BAW are still in the hardware queue.
 	 */
 	tid->bar_wait = 0;
 	tid->bar_tx = 0;
 	tid->isfiltered = 0;
 	tid->sched = 0;
 	tid->addba_tx_pending = 0;
 
 	/*
 	 * XXX TODO: it may just be enough to walk the HWQs and mark
 	 * frames for that node as non-aggregate; or mark the ath_node
 	 * with something that indicates that aggregation is no longer
 	 * occuring.  Then we can just toss the BAW complaints and
 	 * do a complete hard reset of state here - no pause, no
 	 * complete counter, etc.
 	 */
 
 }
 
 /*
  * Flush all software queued packets for the given node.
  *
  * This occurs when a completion handler frees the last buffer
  * for a node, and the node is thus freed. This causes the node
  * to be cleaned up, which ends up calling ath_tx_node_flush.
  */
 void
 ath_tx_node_flush(struct ath_softc *sc, struct ath_node *an)
 {
 	int tid;
 	ath_bufhead bf_cq;
 	struct ath_buf *bf;
 
 	TAILQ_INIT(&bf_cq);
 
 	ATH_KTR(sc, ATH_KTR_NODE, 1, "ath_tx_node_flush: flush node; ni=%p",
 	    &an->an_node);
 
 	ATH_TX_LOCK(sc);
 	DPRINTF(sc, ATH_DEBUG_NODE,
 	    "%s: %6D: flush; is_powersave=%d, stack_psq=%d, tim=%d, "
 	    "swq_depth=%d, clrdmask=%d, leak_count=%d\n",
 	    __func__,
 	    an->an_node.ni_macaddr,
 	    ":",
 	    an->an_is_powersave,
 	    an->an_stack_psq,
 	    an->an_tim_set,
 	    an->an_swq_depth,
 	    an->clrdmask,
 	    an->an_leak_count);
 
 	for (tid = 0; tid < IEEE80211_TID_SIZE; tid++) {
 		struct ath_tid *atid = &an->an_tid[tid];
 
 		/* Free packets */
 		ath_tx_tid_drain(sc, an, atid, &bf_cq);
 
 		/* Remove this tid from the list of active tids */
 		ath_tx_tid_unsched(sc, atid);
 
 		/* Reset the per-TID pause, BAR, etc state */
 		ath_tx_tid_reset(sc, atid);
 	}
 
 	/*
 	 * Clear global leak count
 	 */
 	an->an_leak_count = 0;
 	ATH_TX_UNLOCK(sc);
 
 	/* Handle completed frames */
 	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
 		TAILQ_REMOVE(&bf_cq, bf, bf_list);
 		ath_tx_default_comp(sc, bf, 0);
 	}
 }
 
 /*
  * Drain all the software TXQs currently with traffic queued.
  */
 void
 ath_tx_txq_drain(struct ath_softc *sc, struct ath_txq *txq)
 {
 	struct ath_tid *tid;
 	ath_bufhead bf_cq;
 	struct ath_buf *bf;
 
 	TAILQ_INIT(&bf_cq);
 	ATH_TX_LOCK(sc);
 
 	/*
 	 * Iterate over all active tids for the given txq,
 	 * flushing and unsched'ing them
 	 */
 	while (! TAILQ_EMPTY(&txq->axq_tidq)) {
 		tid = TAILQ_FIRST(&txq->axq_tidq);
 		ath_tx_tid_drain(sc, tid->an, tid, &bf_cq);
 		ath_tx_tid_unsched(sc, tid);
 	}
 
 	ATH_TX_UNLOCK(sc);
 
 	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
 		TAILQ_REMOVE(&bf_cq, bf, bf_list);
 		ath_tx_default_comp(sc, bf, 0);
 	}
 }
 
 /*
  * Handle completion of non-aggregate session frames.
  *
  * This (currently) doesn't implement software retransmission of
  * non-aggregate frames!
  *
  * Software retransmission of non-aggregate frames needs to obey
  * the strict sequence number ordering, and drop any frames that
  * will fail this.
  *
  * For now, filtered frames and frame transmission will cause
  * all kinds of issues.  So we don't support them.
  *
  * So anyone queuing frames via ath_tx_normal_xmit() or
  * ath_tx_hw_queue_norm() must override and set CLRDMASK.
  */
 void
 ath_tx_normal_comp(struct ath_softc *sc, struct ath_buf *bf, int fail)
 {
 	struct ieee80211_node *ni = bf->bf_node;
 	struct ath_node *an = ATH_NODE(ni);
 	int tid = bf->bf_state.bfs_tid;
 	struct ath_tid *atid = &an->an_tid[tid];
 	struct ath_tx_status *ts = &bf->bf_status.ds_txstat;
 
 	/* The TID state is protected behind the TXQ lock */
 	ATH_TX_LOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: bf=%p: fail=%d, hwq_depth now %d\n",
 	    __func__, bf, fail, atid->hwq_depth - 1);
 
 	atid->hwq_depth--;
 
 #if 0
 	/*
 	 * If the frame was filtered, stick it on the filter frame
 	 * queue and complain about it.  It shouldn't happen!
 	 */
 	if ((ts->ts_status & HAL_TXERR_FILT) ||
 	    (ts->ts_status != 0 && atid->isfiltered)) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX,
 		    "%s: isfiltered=%d, ts_status=%d: huh?\n",
 		    __func__,
 		    atid->isfiltered,
 		    ts->ts_status);
 		ath_tx_tid_filt_comp_buf(sc, atid, bf);
 	}
 #endif
 	if (atid->isfiltered)
 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: filtered?!\n", __func__);
 	if (atid->hwq_depth < 0)
 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: hwq_depth < 0: %d\n",
 		    __func__, atid->hwq_depth);
 
 	/* If the TID is being cleaned up, track things */
 	/* XXX refactor! */
 	if (atid->cleanup_inprogress) {
 		atid->incomp--;
 		if (atid->incomp == 0) {
 			DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
 			    "%s: TID %d: cleaned up! resume!\n",
 			    __func__, tid);
 			atid->cleanup_inprogress = 0;
 			ath_tx_tid_resume(sc, atid);
 		}
 	}
 
 	/*
 	 * If the queue is filtered, potentially mark it as complete
 	 * and reschedule it as needed.
 	 *
 	 * This is required as there may be a subsequent TX descriptor
 	 * for this end-node that has CLRDMASK set, so it's quite possible
 	 * that a filtered frame will be followed by a non-filtered
 	 * (complete or otherwise) frame.
 	 *
 	 * XXX should we do this before we complete the frame?
 	 */
 	if (atid->isfiltered)
 		ath_tx_tid_filt_comp_complete(sc, atid);
 	ATH_TX_UNLOCK(sc);
 
 	/*
 	 * punt to rate control if we're not being cleaned up
 	 * during a hw queue drain and the frame wanted an ACK.
 	 */
 	if (fail == 0 && ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0))
 		ath_tx_update_ratectrl(sc, ni, bf->bf_state.bfs_rc,
 		    ts, bf->bf_state.bfs_pktlen,
 		    1, (ts->ts_status == 0) ? 0 : 1);
 
 	ath_tx_default_comp(sc, bf, fail);
 }
 
 /*
  * Handle cleanup of aggregate session packets that aren't
  * an A-MPDU.
  *
  * There's no need to update the BAW here - the session is being
  * torn down.
  */
 static void
 ath_tx_comp_cleanup_unaggr(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ieee80211_node *ni = bf->bf_node;
 	struct ath_node *an = ATH_NODE(ni);
 	int tid = bf->bf_state.bfs_tid;
 	struct ath_tid *atid = &an->an_tid[tid];
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: TID %d: incomp=%d\n",
 	    __func__, tid, atid->incomp);
 
 	ATH_TX_LOCK(sc);
 	atid->incomp--;
 
 	/* XXX refactor! */
 	if (bf->bf_state.bfs_dobaw) {
 		ath_tx_update_baw(sc, an, atid, bf);
 		if (!bf->bf_state.bfs_addedbaw)
 			DPRINTF(sc, ATH_DEBUG_SW_TX,
 			    "%s: wasn't added: seqno %d\n",
 			    __func__, SEQNO(bf->bf_state.bfs_seqno));
 	}
 
 	if (atid->incomp == 0) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
 		    "%s: TID %d: cleaned up! resume!\n",
 		    __func__, tid);
 		atid->cleanup_inprogress = 0;
 		ath_tx_tid_resume(sc, atid);
 	}
 	ATH_TX_UNLOCK(sc);
 
 	ath_tx_default_comp(sc, bf, 0);
 }
 
 
 /*
  * This as it currently stands is a bit dumb.  Ideally we'd just
  * fail the frame the normal way and have it permanently fail
  * via the normal aggregate completion path.
  */
 static void
 ath_tx_tid_cleanup_frame(struct ath_softc *sc, struct ath_node *an,
     int tid, struct ath_buf *bf_head, ath_bufhead *bf_cq)
 {
 	struct ath_tid *atid = &an->an_tid[tid];
 	struct ath_buf *bf, *bf_next;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	/*
 	 * Remove this frame from the queue.
 	 */
 	ATH_TID_REMOVE(atid, bf_head, bf_list);
 
 	/*
 	 * Loop over all the frames in the aggregate.
 	 */
 	bf = bf_head;
 	while (bf != NULL) {
 		bf_next = bf->bf_next;	/* next aggregate frame, or NULL */
 
 		/*
 		 * If it's been added to the BAW we need to kick
 		 * it out of the BAW before we continue.
 		 *
 		 * XXX if it's an aggregate, assert that it's in the
 		 * BAW - we shouldn't have it be in an aggregate
 		 * otherwise!
 		 */
 		if (bf->bf_state.bfs_addedbaw) {
 			ath_tx_update_baw(sc, an, atid, bf);
 			bf->bf_state.bfs_dobaw = 0;
 		}
 
 		/*
 		 * Give it the default completion handler.
 		 */
 		bf->bf_comp = ath_tx_normal_comp;
 		bf->bf_next = NULL;
 
 		/*
 		 * Add it to the list to free.
 		 */
 		TAILQ_INSERT_TAIL(bf_cq, bf, bf_list);
 
 		/*
 		 * Now advance to the next frame in the aggregate.
 		 */
 		bf = bf_next;
 	}
 }
 
 /*
  * Performs transmit side cleanup when TID changes from aggregated to
  * unaggregated and during reassociation.
  *
  * For now, this just tosses everything from the TID software queue
  * whether or not it has been retried and marks the TID as
  * pending completion if there's anything for this TID queued to
  * the hardware.
  *
  * The caller is responsible for pausing the TID and unpausing the
  * TID if no cleanup was required. Otherwise the cleanup path will
  * unpause the TID once the last hardware queued frame is completed.
  */
 static void
 ath_tx_tid_cleanup(struct ath_softc *sc, struct ath_node *an, int tid,
     ath_bufhead *bf_cq)
 {
 	struct ath_tid *atid = &an->an_tid[tid];
 	struct ath_buf *bf, *bf_next;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 	    "%s: TID %d: called; inprogress=%d\n", __func__, tid,
 	    atid->cleanup_inprogress);
 
 	/*
 	 * Move the filtered frames to the TX queue, before
 	 * we run off and discard/process things.
 	 */
 
 	/* XXX this is really quite inefficient */
 	while ((bf = ATH_TID_FILT_LAST(atid, ath_bufhead_s)) != NULL) {
 		ATH_TID_FILT_REMOVE(atid, bf, bf_list);
 		ATH_TID_INSERT_HEAD(atid, bf, bf_list);
 	}
 
 	/*
 	 * Update the frames in the software TX queue:
 	 *
 	 * + Discard retry frames in the queue
 	 * + Fix the completion function to be non-aggregate
 	 */
 	bf = ATH_TID_FIRST(atid);
 	while (bf) {
 		/*
 		 * Grab the next frame in the list, we may
 		 * be fiddling with the list.
 		 */
 		bf_next = TAILQ_NEXT(bf, bf_list);
 
 		/*
 		 * Free the frame and all subframes.
 		 */
 		ath_tx_tid_cleanup_frame(sc, an, tid, bf, bf_cq);
 
 		/*
 		 * Next frame!
 		 */
 		bf = bf_next;
 	}
 
 	/*
 	 * If there's anything in the hardware queue we wait
 	 * for the TID HWQ to empty.
 	 */
 	if (atid->hwq_depth > 0) {
 		/*
 		 * XXX how about we kill atid->incomp, and instead
 		 * replace it with a macro that checks that atid->hwq_depth
 		 * is 0?
 		 */
 		atid->incomp = atid->hwq_depth;
 		atid->cleanup_inprogress = 1;
 	}
 
 	if (atid->cleanup_inprogress)
 		DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
 		    "%s: TID %d: cleanup needed: %d packets\n",
 		    __func__, tid, atid->incomp);
 
 	/* Owner now must free completed frames */
 }
 
 static struct ath_buf *
 ath_tx_retry_clone(struct ath_softc *sc, struct ath_node *an,
     struct ath_tid *tid, struct ath_buf *bf)
 {
 	struct ath_buf *nbf;
 	int error;
 
 	/*
 	 * Clone the buffer.  This will handle the dma unmap and
 	 * copy the node reference to the new buffer.  If this
 	 * works out, 'bf' will have no DMA mapping, no mbuf
 	 * pointer and no node reference.
 	 */
 	nbf = ath_buf_clone(sc, bf);
 
 #if 0
 	DPRINTF(sc, ATH_DEBUG_XMIT, "%s: ATH_BUF_BUSY; cloning\n",
 	    __func__);
 #endif
 
 	if (nbf == NULL) {
 		/* Failed to clone */
 		DPRINTF(sc, ATH_DEBUG_XMIT,
 		    "%s: failed to clone a busy buffer\n",
 		    __func__);
 		return NULL;
 	}
 
 	/* Setup the dma for the new buffer */
 	error = ath_tx_dmasetup(sc, nbf, nbf->bf_m);
 	if (error != 0) {
 		DPRINTF(sc, ATH_DEBUG_XMIT,
 		    "%s: failed to setup dma for clone\n",
 		    __func__);
 		/*
 		 * Put this at the head of the list, not tail;
 		 * that way it doesn't interfere with the
 		 * busy buffer logic (which uses the tail of
 		 * the list.)
 		 */
 		ATH_TXBUF_LOCK(sc);
 		ath_returnbuf_head(sc, nbf);
 		ATH_TXBUF_UNLOCK(sc);
 		return NULL;
 	}
 
 	/* Update BAW if required, before we free the original buf */
 	if (bf->bf_state.bfs_dobaw)
 		ath_tx_switch_baw_buf(sc, an, tid, bf, nbf);
 
 	/* Free original buffer; return new buffer */
 	ath_freebuf(sc, bf);
 
 	return nbf;
 }
 
 /*
  * Handle retrying an unaggregate frame in an aggregate
  * session.
  *
  * If too many retries occur, pause the TID, wait for
  * any further retransmits (as there's no reason why
  * non-aggregate frames in an aggregate session are
  * transmitted in-order; they just have to be in-BAW)
  * and then queue a BAR.
  */
 static void
 ath_tx_aggr_retry_unaggr(struct ath_softc *sc, struct ath_buf *bf)
 {
 	struct ieee80211_node *ni = bf->bf_node;
 	struct ath_node *an = ATH_NODE(ni);
 	int tid = bf->bf_state.bfs_tid;
 	struct ath_tid *atid = &an->an_tid[tid];
 	struct ieee80211_tx_ampdu *tap;
 
 	ATH_TX_LOCK(sc);
 
 	tap = ath_tx_get_tx_tid(an, tid);
 
 	/*
 	 * If the buffer is marked as busy, we can't directly
 	 * reuse it. Instead, try to clone the buffer.
 	 * If the clone is successful, recycle the old buffer.
 	 * If the clone is unsuccessful, set bfs_retries to max
 	 * to force the next bit of code to free the buffer
 	 * for us.
 	 */
 	if ((bf->bf_state.bfs_retries < SWMAX_RETRIES) &&
 	    (bf->bf_flags & ATH_BUF_BUSY)) {
 		struct ath_buf *nbf;
 		nbf = ath_tx_retry_clone(sc, an, atid, bf);
 		if (nbf)
 			/* bf has been freed at this point */
 			bf = nbf;
 		else
 			bf->bf_state.bfs_retries = SWMAX_RETRIES + 1;
 	}
 
 	if (bf->bf_state.bfs_retries >= SWMAX_RETRIES) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_RETRIES,
 		    "%s: exceeded retries; seqno %d\n",
 		    __func__, SEQNO(bf->bf_state.bfs_seqno));
 		sc->sc_stats.ast_tx_swretrymax++;
 
 		/* Update BAW anyway */
 		if (bf->bf_state.bfs_dobaw) {
 			ath_tx_update_baw(sc, an, atid, bf);
 			if (! bf->bf_state.bfs_addedbaw)
 				DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 				    "%s: wasn't added: seqno %d\n",
 				    __func__, SEQNO(bf->bf_state.bfs_seqno));
 		}
 		bf->bf_state.bfs_dobaw = 0;
 
 		/* Suspend the TX queue and get ready to send the BAR */
 		ath_tx_tid_bar_suspend(sc, atid);
 
 		/* Send the BAR if there are no other frames waiting */
 		if (ath_tx_tid_bar_tx_ready(sc, atid))
 			ath_tx_tid_bar_tx(sc, atid);
 
 		ATH_TX_UNLOCK(sc);
 
 		/* Free buffer, bf is free after this call */
 		ath_tx_default_comp(sc, bf, 0);
 		return;
 	}
 
 	/*
 	 * This increments the retry counter as well as
 	 * sets the retry flag in the ath_buf and packet
 	 * body.
 	 */
 	ath_tx_set_retry(sc, bf);
 	sc->sc_stats.ast_tx_swretries++;
 
 	/*
 	 * Insert this at the head of the queue, so it's
 	 * retried before any current/subsequent frames.
 	 */
 	ATH_TID_INSERT_HEAD(atid, bf, bf_list);
 	ath_tx_tid_sched(sc, atid);
 	/* Send the BAR if there are no other frames waiting */
 	if (ath_tx_tid_bar_tx_ready(sc, atid))
 		ath_tx_tid_bar_tx(sc, atid);
 
 	ATH_TX_UNLOCK(sc);
 }
 
 /*
  * Common code for aggregate excessive retry/subframe retry.
  * If retrying, queues buffers to bf_q. If not, frees the
  * buffers.
  *
  * XXX should unify this with ath_tx_aggr_retry_unaggr()
  */
 static int
 ath_tx_retry_subframe(struct ath_softc *sc, struct ath_buf *bf,
     ath_bufhead *bf_q)
 {
 	struct ieee80211_node *ni = bf->bf_node;
 	struct ath_node *an = ATH_NODE(ni);
 	int tid = bf->bf_state.bfs_tid;
 	struct ath_tid *atid = &an->an_tid[tid];
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	/* XXX clr11naggr should be done for all subframes */
 	ath_hal_clr11n_aggr(sc->sc_ah, bf->bf_desc);
 	ath_hal_set11nburstduration(sc->sc_ah, bf->bf_desc, 0);
 
 	/* ath_hal_set11n_virtualmorefrag(sc->sc_ah, bf->bf_desc, 0); */
 
 	/*
 	 * If the buffer is marked as busy, we can't directly
 	 * reuse it. Instead, try to clone the buffer.
 	 * If the clone is successful, recycle the old buffer.
 	 * If the clone is unsuccessful, set bfs_retries to max
 	 * to force the next bit of code to free the buffer
 	 * for us.
 	 */
 	if ((bf->bf_state.bfs_retries < SWMAX_RETRIES) &&
 	    (bf->bf_flags & ATH_BUF_BUSY)) {
 		struct ath_buf *nbf;
 		nbf = ath_tx_retry_clone(sc, an, atid, bf);
 		if (nbf)
 			/* bf has been freed at this point */
 			bf = nbf;
 		else
 			bf->bf_state.bfs_retries = SWMAX_RETRIES + 1;
 	}
 
 	if (bf->bf_state.bfs_retries >= SWMAX_RETRIES) {
 		sc->sc_stats.ast_tx_swretrymax++;
 		DPRINTF(sc, ATH_DEBUG_SW_TX_RETRIES,
 		    "%s: max retries: seqno %d\n",
 		    __func__, SEQNO(bf->bf_state.bfs_seqno));
 		ath_tx_update_baw(sc, an, atid, bf);
 		if (!bf->bf_state.bfs_addedbaw)
 			DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
 			    "%s: wasn't added: seqno %d\n",
 			    __func__, SEQNO(bf->bf_state.bfs_seqno));
 		bf->bf_state.bfs_dobaw = 0;
 		return 1;
 	}
 
 	ath_tx_set_retry(sc, bf);
 	sc->sc_stats.ast_tx_swretries++;
 	bf->bf_next = NULL;		/* Just to make sure */
 
 	/* Clear the aggregate state */
 	bf->bf_state.bfs_aggr = 0;
 	bf->bf_state.bfs_ndelim = 0;	/* ??? needed? */
 	bf->bf_state.bfs_nframes = 1;
 
 	TAILQ_INSERT_TAIL(bf_q, bf, bf_list);
 	return 0;
 }
 
 /*
  * error pkt completion for an aggregate destination
  */
 static void
 ath_tx_comp_aggr_error(struct ath_softc *sc, struct ath_buf *bf_first,
     struct ath_tid *tid)
 {
 	struct ieee80211_node *ni = bf_first->bf_node;
 	struct ath_node *an = ATH_NODE(ni);
 	struct ath_buf *bf_next, *bf;
 	ath_bufhead bf_q;
 	int drops = 0;
 	struct ieee80211_tx_ampdu *tap;
 	ath_bufhead bf_cq;
 
 	TAILQ_INIT(&bf_q);
 	TAILQ_INIT(&bf_cq);
 
 	/*
 	 * Update rate control - all frames have failed.
 	 *
 	 * XXX use the length in the first frame in the series;
 	 * XXX just so things are consistent for now.
 	 */
 	ath_tx_update_ratectrl(sc, ni, bf_first->bf_state.bfs_rc,
 	    &bf_first->bf_status.ds_txstat,
 	    bf_first->bf_state.bfs_pktlen,
 	    bf_first->bf_state.bfs_nframes, bf_first->bf_state.bfs_nframes);
 
 	ATH_TX_LOCK(sc);
 	tap = ath_tx_get_tx_tid(an, tid->tid);
 	sc->sc_stats.ast_tx_aggr_failall++;
 
 	/* Retry all subframes */
 	bf = bf_first;
 	while (bf) {
 		bf_next = bf->bf_next;
 		bf->bf_next = NULL;	/* Remove it from the aggr list */
 		sc->sc_stats.ast_tx_aggr_fail++;
 		if (ath_tx_retry_subframe(sc, bf, &bf_q)) {
 			drops++;
 			bf->bf_next = NULL;
 			TAILQ_INSERT_TAIL(&bf_cq, bf, bf_list);
 		}
 		bf = bf_next;
 	}
 
 	/* Prepend all frames to the beginning of the queue */
 	while ((bf = TAILQ_LAST(&bf_q, ath_bufhead_s)) != NULL) {
 		TAILQ_REMOVE(&bf_q, bf, bf_list);
 		ATH_TID_INSERT_HEAD(tid, bf, bf_list);
 	}
 
 	/*
 	 * Schedule the TID to be re-tried.
 	 */
 	ath_tx_tid_sched(sc, tid);
 
 	/*
 	 * send bar if we dropped any frames
 	 *
 	 * Keep the txq lock held for now, as we need to ensure
 	 * that ni_txseqs[] is consistent (as it's being updated
 	 * in the ifnet TX context or raw TX context.)
 	 */
 	if (drops) {
 		/* Suspend the TX queue and get ready to send the BAR */
 		ath_tx_tid_bar_suspend(sc, tid);
 	}
 
 	/*
 	 * Send BAR if required
 	 */
 	if (ath_tx_tid_bar_tx_ready(sc, tid))
 		ath_tx_tid_bar_tx(sc, tid);
 
 	ATH_TX_UNLOCK(sc);
 
 	/* Complete frames which errored out */
 	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
 		TAILQ_REMOVE(&bf_cq, bf, bf_list);
 		ath_tx_default_comp(sc, bf, 0);
 	}
 }
 
 /*
  * Handle clean-up of packets from an aggregate list.
  *
  * There's no need to update the BAW here - the session is being
  * torn down.
  */
 static void
 ath_tx_comp_cleanup_aggr(struct ath_softc *sc, struct ath_buf *bf_first)
 {
 	struct ath_buf *bf, *bf_next;
 	struct ieee80211_node *ni = bf_first->bf_node;
 	struct ath_node *an = ATH_NODE(ni);
 	int tid = bf_first->bf_state.bfs_tid;
 	struct ath_tid *atid = &an->an_tid[tid];
 
 	ATH_TX_LOCK(sc);
 
 	/* update incomp */
 	atid->incomp--;
 
 	/* Update the BAW */
 	bf = bf_first;
 	while (bf) {
 		/* XXX refactor! */
 		if (bf->bf_state.bfs_dobaw) {
 			ath_tx_update_baw(sc, an, atid, bf);
 			if (!bf->bf_state.bfs_addedbaw)
 				DPRINTF(sc, ATH_DEBUG_SW_TX,
 				    "%s: wasn't added: seqno %d\n",
 				    __func__, SEQNO(bf->bf_state.bfs_seqno));
 		}
 		bf = bf->bf_next;
 	}
 
 	if (atid->incomp == 0) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
 		    "%s: TID %d: cleaned up! resume!\n",
 		    __func__, tid);
 		atid->cleanup_inprogress = 0;
 		ath_tx_tid_resume(sc, atid);
 	}
 
 	/* Send BAR if required */
 	/* XXX why would we send a BAR when transitioning to non-aggregation? */
 	/*
 	 * XXX TODO: we should likely just tear down the BAR state here,
 	 * rather than sending a BAR.
 	 */
 	if (ath_tx_tid_bar_tx_ready(sc, atid))
 		ath_tx_tid_bar_tx(sc, atid);
 
 	ATH_TX_UNLOCK(sc);
 
 	/* Handle frame completion as individual frames */
 	bf = bf_first;
 	while (bf) {
 		bf_next = bf->bf_next;
 		bf->bf_next = NULL;
 		ath_tx_default_comp(sc, bf, 1);
 		bf = bf_next;
 	}
 }
 
 /*
  * Handle completion of an set of aggregate frames.
  *
  * Note: the completion handler is the last descriptor in the aggregate,
  * not the last descriptor in the first frame.
  */
 static void
 ath_tx_aggr_comp_aggr(struct ath_softc *sc, struct ath_buf *bf_first,
     int fail)
 {
 	//struct ath_desc *ds = bf->bf_lastds;
 	struct ieee80211_node *ni = bf_first->bf_node;
 	struct ath_node *an = ATH_NODE(ni);
 	int tid = bf_first->bf_state.bfs_tid;
 	struct ath_tid *atid = &an->an_tid[tid];
 	struct ath_tx_status ts;
 	struct ieee80211_tx_ampdu *tap;
 	ath_bufhead bf_q;
 	ath_bufhead bf_cq;
 	int seq_st, tx_ok;
 	int hasba, isaggr;
 	uint32_t ba[2];
 	struct ath_buf *bf, *bf_next;
 	int ba_index;
 	int drops = 0;
 	int nframes = 0, nbad = 0, nf;
 	int pktlen;
 	/* XXX there's too much on the stack? */
 	struct ath_rc_series rc[ATH_RC_NUM];
 	int txseq;
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: called; hwq_depth=%d\n",
 	    __func__, atid->hwq_depth);
 
 	/*
 	 * Take a copy; this may be needed -after- bf_first
 	 * has been completed and freed.
 	 */
 	ts = bf_first->bf_status.ds_txstat;
 
 	TAILQ_INIT(&bf_q);
 	TAILQ_INIT(&bf_cq);
 
 	/* The TID state is kept behind the TXQ lock */
 	ATH_TX_LOCK(sc);
 
 	atid->hwq_depth--;
 	if (atid->hwq_depth < 0)
 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: hwq_depth < 0: %d\n",
 		    __func__, atid->hwq_depth);
 
 	/*
 	 * If the TID is filtered, handle completing the filter
 	 * transition before potentially kicking it to the cleanup
 	 * function.
 	 *
 	 * XXX this is duplicate work, ew.
 	 */
 	if (atid->isfiltered)
 		ath_tx_tid_filt_comp_complete(sc, atid);
 
 	/*
 	 * Punt cleanup to the relevant function, not our problem now
 	 */
 	if (atid->cleanup_inprogress) {
 		if (atid->isfiltered)
 			DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
 			    "%s: isfiltered=1, normal_comp?\n",
 			    __func__);
 		ATH_TX_UNLOCK(sc);
 		ath_tx_comp_cleanup_aggr(sc, bf_first);
 		return;
 	}
 
 	/*
 	 * If the frame is filtered, transition to filtered frame
 	 * mode and add this to the filtered frame list.
 	 *
 	 * XXX TODO: figure out how this interoperates with
 	 * BAR, pause and cleanup states.
 	 */
 	if ((ts.ts_status & HAL_TXERR_FILT) ||
 	    (ts.ts_status != 0 && atid->isfiltered)) {
 		if (fail != 0)
 			DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
 			    "%s: isfiltered=1, fail=%d\n", __func__, fail);
 		ath_tx_tid_filt_comp_aggr(sc, atid, bf_first, &bf_cq);
 
 		/* Remove from BAW */
 		TAILQ_FOREACH_SAFE(bf, &bf_cq, bf_list, bf_next) {
 			if (bf->bf_state.bfs_addedbaw)
 				drops++;
 			if (bf->bf_state.bfs_dobaw) {
 				ath_tx_update_baw(sc, an, atid, bf);
 				if (!bf->bf_state.bfs_addedbaw)
 					DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
 					    "%s: wasn't added: seqno %d\n",
 					    __func__,
 					    SEQNO(bf->bf_state.bfs_seqno));
 			}
 			bf->bf_state.bfs_dobaw = 0;
 		}
 		/*
 		 * If any intermediate frames in the BAW were dropped when
 		 * handling filtering things, send a BAR.
 		 */
 		if (drops)
 			ath_tx_tid_bar_suspend(sc, atid);
 
 		/*
 		 * Finish up by sending a BAR if required and freeing
 		 * the frames outside of the TX lock.
 		 */
 		goto finish_send_bar;
 	}
 
 	/*
 	 * XXX for now, use the first frame in the aggregate for
 	 * XXX rate control completion; it's at least consistent.
 	 */
 	pktlen = bf_first->bf_state.bfs_pktlen;
 
 	/*
 	 * Handle errors first!
 	 *
 	 * Here, handle _any_ error as a "exceeded retries" error.
 	 * Later on (when filtered frames are to be specially handled)
 	 * it'll have to be expanded.
 	 */
 #if 0
 	if (ts.ts_status & HAL_TXERR_XRETRY) {
 #endif
 	if (ts.ts_status != 0) {
 		ATH_TX_UNLOCK(sc);
 		ath_tx_comp_aggr_error(sc, bf_first, atid);
 		return;
 	}
 
 	tap = ath_tx_get_tx_tid(an, tid);
 
 	/*
 	 * extract starting sequence and block-ack bitmap
 	 */
 	/* XXX endian-ness of seq_st, ba? */
 	seq_st = ts.ts_seqnum;
 	hasba = !! (ts.ts_flags & HAL_TX_BA);
 	tx_ok = (ts.ts_status == 0);
 	isaggr = bf_first->bf_state.bfs_aggr;
 	ba[0] = ts.ts_ba_low;
 	ba[1] = ts.ts_ba_high;
 
 	/*
 	 * Copy the TX completion status and the rate control
 	 * series from the first descriptor, as it may be freed
 	 * before the rate control code can get its grubby fingers
 	 * into things.
 	 */
 	memcpy(rc, bf_first->bf_state.bfs_rc, sizeof(rc));
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
 	    "%s: txa_start=%d, tx_ok=%d, status=%.8x, flags=%.8x, "
 	    "isaggr=%d, seq_st=%d, hasba=%d, ba=%.8x, %.8x\n",
 	    __func__, tap->txa_start, tx_ok, ts.ts_status, ts.ts_flags,
 	    isaggr, seq_st, hasba, ba[0], ba[1]);
 
 	/*
 	 * The reference driver doesn't do this; it simply ignores
 	 * this check in its entirety.
 	 *
 	 * I've seen this occur when using iperf to send traffic
 	 * out tid 1 - the aggregate frames are all marked as TID 1,
 	 * but the TXSTATUS has TID=0.  So, let's just ignore this
 	 * check.
 	 */
 #if 0
 	/* Occasionally, the MAC sends a tx status for the wrong TID. */
 	if (tid != ts.ts_tid) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: tid %d != hw tid %d\n",
 		    __func__, tid, ts.ts_tid);
 		tx_ok = 0;
 	}
 #endif
 
 	/* AR5416 BA bug; this requires an interface reset */
 	if (isaggr && tx_ok && (! hasba)) {
 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
 		    "%s: AR5416 bug: hasba=%d; txok=%d, isaggr=%d, "
 		    "seq_st=%d\n",
 		    __func__, hasba, tx_ok, isaggr, seq_st);
 		/* XXX TODO: schedule an interface reset */
 #ifdef ATH_DEBUG
 		ath_printtxbuf(sc, bf_first,
 		    sc->sc_ac2q[atid->ac]->axq_qnum, 0, 0);
 #endif
 	}
 
 	/*
 	 * Walk the list of frames, figure out which ones were correctly
 	 * sent and which weren't.
 	 */
 	bf = bf_first;
 	nf = bf_first->bf_state.bfs_nframes;
 
 	/* bf_first is going to be invalid once this list is walked */
 	bf_first = NULL;
 
 	/*
 	 * Walk the list of completed frames and determine
 	 * which need to be completed and which need to be
 	 * retransmitted.
 	 *
 	 * For completed frames, the completion functions need
 	 * to be called at the end of this function as the last
 	 * node reference may free the node.
 	 *
 	 * Finally, since the TXQ lock can't be held during the
 	 * completion callback (to avoid lock recursion),
 	 * the completion calls have to be done outside of the
 	 * lock.
 	 */
 	while (bf) {
 		nframes++;
 		ba_index = ATH_BA_INDEX(seq_st,
 		    SEQNO(bf->bf_state.bfs_seqno));
 		bf_next = bf->bf_next;
 		bf->bf_next = NULL;	/* Remove it from the aggr list */
 
 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
 		    "%s: checking bf=%p seqno=%d; ack=%d\n",
 		    __func__, bf, SEQNO(bf->bf_state.bfs_seqno),
 		    ATH_BA_ISSET(ba, ba_index));
 
 		if (tx_ok && ATH_BA_ISSET(ba, ba_index)) {
 			sc->sc_stats.ast_tx_aggr_ok++;
 			ath_tx_update_baw(sc, an, atid, bf);
 			bf->bf_state.bfs_dobaw = 0;
 			if (!bf->bf_state.bfs_addedbaw)
 				DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
 				    "%s: wasn't added: seqno %d\n",
 				    __func__, SEQNO(bf->bf_state.bfs_seqno));
 			bf->bf_next = NULL;
 			TAILQ_INSERT_TAIL(&bf_cq, bf, bf_list);
 		} else {
 			sc->sc_stats.ast_tx_aggr_fail++;
 			if (ath_tx_retry_subframe(sc, bf, &bf_q)) {
 				drops++;
 				bf->bf_next = NULL;
 				TAILQ_INSERT_TAIL(&bf_cq, bf, bf_list);
 			}
 			nbad++;
 		}
 		bf = bf_next;
 	}
 
 	/*
 	 * Now that the BAW updates have been done, unlock
 	 *
 	 * txseq is grabbed before the lock is released so we
 	 * have a consistent view of what -was- in the BAW.
 	 * Anything after this point will not yet have been
 	 * TXed.
 	 */
 	txseq = tap->txa_start;
 	ATH_TX_UNLOCK(sc);
 
 	if (nframes != nf)
 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
 		    "%s: num frames seen=%d; bf nframes=%d\n",
 		    __func__, nframes, nf);
 
 	/*
 	 * Now we know how many frames were bad, call the rate
 	 * control code.
 	 */
 	if (fail == 0)
 		ath_tx_update_ratectrl(sc, ni, rc, &ts, pktlen, nframes,
 		    nbad);
 
 	/*
 	 * send bar if we dropped any frames
 	 */
 	if (drops) {
 		/* Suspend the TX queue and get ready to send the BAR */
 		ATH_TX_LOCK(sc);
 		ath_tx_tid_bar_suspend(sc, atid);
 		ATH_TX_UNLOCK(sc);
 	}
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
 	    "%s: txa_start now %d\n", __func__, tap->txa_start);
 
 	ATH_TX_LOCK(sc);
 
 	/* Prepend all frames to the beginning of the queue */
 	while ((bf = TAILQ_LAST(&bf_q, ath_bufhead_s)) != NULL) {
 		TAILQ_REMOVE(&bf_q, bf, bf_list);
 		ATH_TID_INSERT_HEAD(atid, bf, bf_list);
 	}
 
 	/*
 	 * Reschedule to grab some further frames.
 	 */
 	ath_tx_tid_sched(sc, atid);
 
 	/*
 	 * If the queue is filtered, re-schedule as required.
 	 *
 	 * This is required as there may be a subsequent TX descriptor
 	 * for this end-node that has CLRDMASK set, so it's quite possible
 	 * that a filtered frame will be followed by a non-filtered
 	 * (complete or otherwise) frame.
 	 *
 	 * XXX should we do this before we complete the frame?
 	 */
 	if (atid->isfiltered)
 		ath_tx_tid_filt_comp_complete(sc, atid);
 
 finish_send_bar:
 
 	/*
 	 * Send BAR if required
 	 */
 	if (ath_tx_tid_bar_tx_ready(sc, atid))
 		ath_tx_tid_bar_tx(sc, atid);
 
 	ATH_TX_UNLOCK(sc);
 
 	/* Do deferred completion */
 	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
 		TAILQ_REMOVE(&bf_cq, bf, bf_list);
 		ath_tx_default_comp(sc, bf, 0);
 	}
 }
 
 /*
  * Handle completion of unaggregated frames in an ADDBA
  * session.
  *
  * Fail is set to 1 if the entry is being freed via a call to
  * ath_tx_draintxq().
  */
 static void
 ath_tx_aggr_comp_unaggr(struct ath_softc *sc, struct ath_buf *bf, int fail)
 {
 	struct ieee80211_node *ni = bf->bf_node;
 	struct ath_node *an = ATH_NODE(ni);
 	int tid = bf->bf_state.bfs_tid;
 	struct ath_tid *atid = &an->an_tid[tid];
 	struct ath_tx_status ts;
 	int drops = 0;
 
 	/*
 	 * Take a copy of this; filtering/cloning the frame may free the
 	 * bf pointer.
 	 */
 	ts = bf->bf_status.ds_txstat;
 
 	/*
 	 * Update rate control status here, before we possibly
 	 * punt to retry or cleanup.
 	 *
 	 * Do it outside of the TXQ lock.
 	 */
 	if (fail == 0 && ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0))
 		ath_tx_update_ratectrl(sc, ni, bf->bf_state.bfs_rc,
 		    &bf->bf_status.ds_txstat,
 		    bf->bf_state.bfs_pktlen,
 		    1, (ts.ts_status == 0) ? 0 : 1);
 
 	/*
 	 * This is called early so atid->hwq_depth can be tracked.
 	 * This unfortunately means that it's released and regrabbed
 	 * during retry and cleanup. That's rather inefficient.
 	 */
 	ATH_TX_LOCK(sc);
 
 	if (tid == IEEE80211_NONQOS_TID)
 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: TID=16!\n", __func__);
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX,
 	    "%s: bf=%p: tid=%d, hwq_depth=%d, seqno=%d\n",
 	    __func__, bf, bf->bf_state.bfs_tid, atid->hwq_depth,
 	    SEQNO(bf->bf_state.bfs_seqno));
 
 	atid->hwq_depth--;
 	if (atid->hwq_depth < 0)
 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: hwq_depth < 0: %d\n",
 		    __func__, atid->hwq_depth);
 
 	/*
 	 * If the TID is filtered, handle completing the filter
 	 * transition before potentially kicking it to the cleanup
 	 * function.
 	 */
 	if (atid->isfiltered)
 		ath_tx_tid_filt_comp_complete(sc, atid);
 
 	/*
 	 * If a cleanup is in progress, punt to comp_cleanup;
 	 * rather than handling it here. It's thus their
 	 * responsibility to clean up, call the completion
 	 * function in net80211, etc.
 	 */
 	if (atid->cleanup_inprogress) {
 		if (atid->isfiltered)
 			DPRINTF(sc, ATH_DEBUG_SW_TX,
 			    "%s: isfiltered=1, normal_comp?\n",
 			    __func__);
 		ATH_TX_UNLOCK(sc);
 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: cleanup_unaggr\n",
 		    __func__);
 		ath_tx_comp_cleanup_unaggr(sc, bf);
 		return;
 	}
 
 	/*
 	 * XXX TODO: how does cleanup, BAR and filtered frame handling
 	 * overlap?
 	 *
 	 * If the frame is filtered OR if it's any failure but
 	 * the TID is filtered, the frame must be added to the
 	 * filtered frame list.
 	 *
 	 * However - a busy buffer can't be added to the filtered
 	 * list as it will end up being recycled without having
 	 * been made available for the hardware.
 	 */
 	if ((ts.ts_status & HAL_TXERR_FILT) ||
 	    (ts.ts_status != 0 && atid->isfiltered)) {
 		int freeframe;
 
 		if (fail != 0)
 			DPRINTF(sc, ATH_DEBUG_SW_TX,
 			    "%s: isfiltered=1, fail=%d\n",
 			    __func__, fail);
 		freeframe = ath_tx_tid_filt_comp_single(sc, atid, bf);
 		/*
 		 * If freeframe=0 then bf is no longer ours; don't
 		 * touch it.
 		 */
 		if (freeframe) {
 			/* Remove from BAW */
 			if (bf->bf_state.bfs_addedbaw)
 				drops++;
 			if (bf->bf_state.bfs_dobaw) {
 				ath_tx_update_baw(sc, an, atid, bf);
 				if (!bf->bf_state.bfs_addedbaw)
 					DPRINTF(sc, ATH_DEBUG_SW_TX,
 					    "%s: wasn't added: seqno %d\n",
 					    __func__, SEQNO(bf->bf_state.bfs_seqno));
 			}
 			bf->bf_state.bfs_dobaw = 0;
 		}
 
 		/*
 		 * If the frame couldn't be filtered, treat it as a drop and
 		 * prepare to send a BAR.
 		 */
 		if (freeframe && drops)
 			ath_tx_tid_bar_suspend(sc, atid);
 
 		/*
 		 * Send BAR if required
 		 */
 		if (ath_tx_tid_bar_tx_ready(sc, atid))
 			ath_tx_tid_bar_tx(sc, atid);
 
 		ATH_TX_UNLOCK(sc);
 		/*
 		 * If freeframe is set, then the frame couldn't be
 		 * cloned and bf is still valid.  Just complete/free it.
 		 */
 		if (freeframe)
 			ath_tx_default_comp(sc, bf, fail);
 
 		return;
 	}
 	/*
 	 * Don't bother with the retry check if all frames
 	 * are being failed (eg during queue deletion.)
 	 */
 #if 0
 	if (fail == 0 && ts->ts_status & HAL_TXERR_XRETRY) {
 #endif
 	if (fail == 0 && ts.ts_status != 0) {
 		ATH_TX_UNLOCK(sc);
 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: retry_unaggr\n",
 		    __func__);
 		ath_tx_aggr_retry_unaggr(sc, bf);
 		return;
 	}
 
 	/* Success? Complete */
 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: TID=%d, seqno %d\n",
 	    __func__, tid, SEQNO(bf->bf_state.bfs_seqno));
 	if (bf->bf_state.bfs_dobaw) {
 		ath_tx_update_baw(sc, an, atid, bf);
 		bf->bf_state.bfs_dobaw = 0;
 		if (!bf->bf_state.bfs_addedbaw)
 			DPRINTF(sc, ATH_DEBUG_SW_TX,
 			    "%s: wasn't added: seqno %d\n",
 			    __func__, SEQNO(bf->bf_state.bfs_seqno));
 	}
 
 	/*
 	 * If the queue is filtered, re-schedule as required.
 	 *
 	 * This is required as there may be a subsequent TX descriptor
 	 * for this end-node that has CLRDMASK set, so it's quite possible
 	 * that a filtered frame will be followed by a non-filtered
 	 * (complete or otherwise) frame.
 	 *
 	 * XXX should we do this before we complete the frame?
 	 */
 	if (atid->isfiltered)
 		ath_tx_tid_filt_comp_complete(sc, atid);
 
 	/*
 	 * Send BAR if required
 	 */
 	if (ath_tx_tid_bar_tx_ready(sc, atid))
 		ath_tx_tid_bar_tx(sc, atid);
 
 	ATH_TX_UNLOCK(sc);
 
 	ath_tx_default_comp(sc, bf, fail);
 	/* bf is freed at this point */
 }
 
 void
 ath_tx_aggr_comp(struct ath_softc *sc, struct ath_buf *bf, int fail)
 {
 	if (bf->bf_state.bfs_aggr)
 		ath_tx_aggr_comp_aggr(sc, bf, fail);
 	else
 		ath_tx_aggr_comp_unaggr(sc, bf, fail);
 }
 
 /*
  * Schedule some packets from the given node/TID to the hardware.
  *
  * This is the aggregate version.
  */
 void
 ath_tx_tid_hw_queue_aggr(struct ath_softc *sc, struct ath_node *an,
     struct ath_tid *tid)
 {
 	struct ath_buf *bf;
 	struct ath_txq *txq = sc->sc_ac2q[tid->ac];
 	struct ieee80211_tx_ampdu *tap;
 	ATH_AGGR_STATUS status;
 	ath_bufhead bf_q;
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: tid=%d\n", __func__, tid->tid);
 	ATH_TX_LOCK_ASSERT(sc);
 
 	/*
 	 * XXX TODO: If we're called for a queue that we're leaking frames to,
 	 * ensure we only leak one.
 	 */
 
 	tap = ath_tx_get_tx_tid(an, tid->tid);
 
 	if (tid->tid == IEEE80211_NONQOS_TID)
 		DPRINTF(sc, ATH_DEBUG_SW_TX, 
 		    "%s: called for TID=NONQOS_TID?\n", __func__);
 
 	for (;;) {
 		status = ATH_AGGR_DONE;
 
 		/*
 		 * If the upper layer has paused the TID, don't
 		 * queue any further packets.
 		 *
 		 * This can also occur from the completion task because
 		 * of packet loss; but as its serialised with this code,
 		 * it won't "appear" half way through queuing packets.
 		 */
 		if (! ath_tx_tid_can_tx_or_sched(sc, tid))
 			break;
 
 		bf = ATH_TID_FIRST(tid);
 		if (bf == NULL) {
 			break;
 		}
 
 		/*
 		 * If the packet doesn't fall within the BAW (eg a NULL
 		 * data frame), schedule it directly; continue.
 		 */
 		if (! bf->bf_state.bfs_dobaw) {
 			DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
 			    "%s: non-baw packet\n",
 			    __func__);
 			ATH_TID_REMOVE(tid, bf, bf_list);
 
 			if (bf->bf_state.bfs_nframes > 1)
 				DPRINTF(sc, ATH_DEBUG_SW_TX, 
 				    "%s: aggr=%d, nframes=%d\n",
 				    __func__,
 				    bf->bf_state.bfs_aggr,
 				    bf->bf_state.bfs_nframes);
 
 			/*
 			 * This shouldn't happen - such frames shouldn't
 			 * ever have been queued as an aggregate in the
 			 * first place.  However, make sure the fields
 			 * are correctly setup just to be totally sure.
 			 */
 			bf->bf_state.bfs_aggr = 0;
 			bf->bf_state.bfs_nframes = 1;
 
 			/* Update CLRDMASK just before this frame is queued */
 			ath_tx_update_clrdmask(sc, tid, bf);
 
 			ath_tx_do_ratelookup(sc, bf);
 			ath_tx_calc_duration(sc, bf);
 			ath_tx_calc_protection(sc, bf);
 			ath_tx_set_rtscts(sc, bf);
 			ath_tx_rate_fill_rcflags(sc, bf);
 			ath_tx_setds(sc, bf);
 			ath_hal_clr11n_aggr(sc->sc_ah, bf->bf_desc);
 
 			sc->sc_aggr_stats.aggr_nonbaw_pkt++;
 
 			/* Queue the packet; continue */
 			goto queuepkt;
 		}
 
 		TAILQ_INIT(&bf_q);
 
 		/*
 		 * Do a rate control lookup on the first frame in the
 		 * list. The rate control code needs that to occur
 		 * before it can determine whether to TX.
 		 * It's inaccurate because the rate control code doesn't
 		 * really "do" aggregate lookups, so it only considers
 		 * the size of the first frame.
 		 */
 		ath_tx_do_ratelookup(sc, bf);
 		bf->bf_state.bfs_rc[3].rix = 0;
 		bf->bf_state.bfs_rc[3].tries = 0;
 
 		ath_tx_calc_duration(sc, bf);
 		ath_tx_calc_protection(sc, bf);
 
 		ath_tx_set_rtscts(sc, bf);
 		ath_tx_rate_fill_rcflags(sc, bf);
 
 		status = ath_tx_form_aggr(sc, an, tid, &bf_q);
 
 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
 		    "%s: ath_tx_form_aggr() status=%d\n", __func__, status);
 
 		/*
 		 * No frames to be picked up - out of BAW
 		 */
 		if (TAILQ_EMPTY(&bf_q))
 			break;
 
 		/*
 		 * This assumes that the descriptor list in the ath_bufhead
 		 * are already linked together via bf_next pointers.
 		 */
 		bf = TAILQ_FIRST(&bf_q);
 
 		if (status == ATH_AGGR_8K_LIMITED)
 			sc->sc_aggr_stats.aggr_rts_aggr_limited++;
 
 		/*
 		 * If it's the only frame send as non-aggregate
 		 * assume that ath_tx_form_aggr() has checked
 		 * whether it's in the BAW and added it appropriately.
 		 */
 		if (bf->bf_state.bfs_nframes == 1) {
 			DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
 			    "%s: single-frame aggregate\n", __func__);
 
 			/* Update CLRDMASK just before this frame is queued */
 			ath_tx_update_clrdmask(sc, tid, bf);
 
 			bf->bf_state.bfs_aggr = 0;
 			bf->bf_state.bfs_ndelim = 0;
 			ath_tx_setds(sc, bf);
 			ath_hal_clr11n_aggr(sc->sc_ah, bf->bf_desc);
 			if (status == ATH_AGGR_BAW_CLOSED)
 				sc->sc_aggr_stats.aggr_baw_closed_single_pkt++;
 			else
 				sc->sc_aggr_stats.aggr_single_pkt++;
 		} else {
 			DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
 			    "%s: multi-frame aggregate: %d frames, "
 			    "length %d\n",
 			     __func__, bf->bf_state.bfs_nframes,
 			    bf->bf_state.bfs_al);
 			bf->bf_state.bfs_aggr = 1;
 			sc->sc_aggr_stats.aggr_pkts[bf->bf_state.bfs_nframes]++;
 			sc->sc_aggr_stats.aggr_aggr_pkt++;
 
 			/* Update CLRDMASK just before this frame is queued */
 			ath_tx_update_clrdmask(sc, tid, bf);
 
 			/*
 			 * Calculate the duration/protection as required.
 			 */
 			ath_tx_calc_duration(sc, bf);
 			ath_tx_calc_protection(sc, bf);
 
 			/*
 			 * Update the rate and rtscts information based on the
 			 * rate decision made by the rate control code;
 			 * the first frame in the aggregate needs it.
 			 */
 			ath_tx_set_rtscts(sc, bf);
 
 			/*
 			 * Setup the relevant descriptor fields
 			 * for aggregation. The first descriptor
 			 * already points to the rest in the chain.
 			 */
 			ath_tx_setds_11n(sc, bf);
 
 		}
 	queuepkt:
 		/* Set completion handler, multi-frame aggregate or not */
 		bf->bf_comp = ath_tx_aggr_comp;
 
 		if (bf->bf_state.bfs_tid == IEEE80211_NONQOS_TID)
 			DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: TID=16?\n", __func__);
 
 		/*
 		 * Update leak count and frame config if were leaking frames.
 		 *
 		 * XXX TODO: it should update all frames in an aggregate
 		 * correctly!
 		 */
 		ath_tx_leak_count_update(sc, tid, bf);
 
 		/* Punt to txq */
 		ath_tx_handoff(sc, txq, bf);
 
 		/* Track outstanding buffer count to hardware */
 		/* aggregates are "one" buffer */
 		tid->hwq_depth++;
 
 		/*
 		 * Break out if ath_tx_form_aggr() indicated
 		 * there can't be any further progress (eg BAW is full.)
 		 * Checking for an empty txq is done above.
 		 *
 		 * XXX locking on txq here?
 		 */
 		/* XXX TXQ locking */
 		if (txq->axq_aggr_depth >= sc->sc_hwq_limit_aggr ||
 		    (status == ATH_AGGR_BAW_CLOSED ||
 		     status == ATH_AGGR_LEAK_CLOSED))
 			break;
 	}
 }
 
 /*
  * Schedule some packets from the given node/TID to the hardware.
  *
  * XXX TODO: this routine doesn't enforce the maximum TXQ depth.
  * It just dumps frames into the TXQ.  We should limit how deep
  * the transmit queue can grow for frames dispatched to the given
  * TXQ.
  *
  * To avoid locking issues, either we need to own the TXQ lock
  * at this point, or we need to pass in the maximum frame count
  * from the caller.
  */
 void
 ath_tx_tid_hw_queue_norm(struct ath_softc *sc, struct ath_node *an,
     struct ath_tid *tid)
 {
 	struct ath_buf *bf;
 	struct ath_txq *txq = sc->sc_ac2q[tid->ac];
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: node %p: TID %d: called\n",
 	    __func__, an, tid->tid);
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	/* Check - is AMPDU pending or running? then print out something */
 	if (ath_tx_ampdu_pending(sc, an, tid->tid))
 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: tid=%d, ampdu pending?\n",
 		    __func__, tid->tid);
 	if (ath_tx_ampdu_running(sc, an, tid->tid))
 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: tid=%d, ampdu running?\n",
 		    __func__, tid->tid);
 
 	for (;;) {
 
 		/*
 		 * If the upper layers have paused the TID, don't
 		 * queue any further packets.
 		 *
 		 * XXX if we are leaking frames, make sure we decrement
 		 * that counter _and_ we continue here.
 		 */
 		if (! ath_tx_tid_can_tx_or_sched(sc, tid))
 			break;
 
 		bf = ATH_TID_FIRST(tid);
 		if (bf == NULL) {
 			break;
 		}
 
 		ATH_TID_REMOVE(tid, bf, bf_list);
 
 		/* Sanity check! */
 		if (tid->tid != bf->bf_state.bfs_tid) {
 			DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: bfs_tid %d !="
 			    " tid %d\n", __func__, bf->bf_state.bfs_tid,
 			    tid->tid);
 		}
 		/* Normal completion handler */
 		bf->bf_comp = ath_tx_normal_comp;
 
 		/*
 		 * Override this for now, until the non-aggregate
 		 * completion handler correctly handles software retransmits.
 		 */
 		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
 
 		/* Update CLRDMASK just before this frame is queued */
 		ath_tx_update_clrdmask(sc, tid, bf);
 
 		/* Program descriptors + rate control */
 		ath_tx_do_ratelookup(sc, bf);
 		ath_tx_calc_duration(sc, bf);
 		ath_tx_calc_protection(sc, bf);
 		ath_tx_set_rtscts(sc, bf);
 		ath_tx_rate_fill_rcflags(sc, bf);
 		ath_tx_setds(sc, bf);
 
 		/*
 		 * Update the current leak count if
 		 * we're leaking frames; and set the
 		 * MORE flag as appropriate.
 		 */
 		ath_tx_leak_count_update(sc, tid, bf);
 
 		/* Track outstanding buffer count to hardware */
 		/* aggregates are "one" buffer */
 		tid->hwq_depth++;
 
 		/* Punt to hardware or software txq */
 		ath_tx_handoff(sc, txq, bf);
 	}
 }
 
 /*
  * Schedule some packets to the given hardware queue.
  *
  * This function walks the list of TIDs (ie, ath_node TIDs
  * with queued traffic) and attempts to schedule traffic
  * from them.
  *
  * TID scheduling is implemented as a FIFO, with TIDs being
  * added to the end of the queue after some frames have been
  * scheduled.
  */
 void
 ath_txq_sched(struct ath_softc *sc, struct ath_txq *txq)
 {
 	struct ath_tid *tid, *next, *last;
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	/*
 	 * Don't schedule if the hardware queue is busy.
 	 * This (hopefully) gives some more time to aggregate
 	 * some packets in the aggregation queue.
 	 *
 	 * XXX It doesn't stop a parallel sender from sneaking
 	 * in transmitting a frame!
 	 */
 	/* XXX TXQ locking */
 	if (txq->axq_aggr_depth + txq->fifo.axq_depth >= sc->sc_hwq_limit_aggr) {
 		sc->sc_aggr_stats.aggr_sched_nopkt++;
 		return;
 	}
 	if (txq->axq_depth >= sc->sc_hwq_limit_nonaggr) {
 		sc->sc_aggr_stats.aggr_sched_nopkt++;
 		return;
 	}
 
 	last = TAILQ_LAST(&txq->axq_tidq, axq_t_s);
 
 	TAILQ_FOREACH_SAFE(tid, &txq->axq_tidq, axq_qelem, next) {
 		/*
 		 * Suspend paused queues here; they'll be resumed
 		 * once the addba completes or times out.
 		 */
 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: tid=%d, paused=%d\n",
 		    __func__, tid->tid, tid->paused);
 		ath_tx_tid_unsched(sc, tid);
 		/*
 		 * This node may be in power-save and we're leaking
 		 * a frame; be careful.
 		 */
 		if (! ath_tx_tid_can_tx_or_sched(sc, tid)) {
 			goto loop_done;
 		}
 		if (ath_tx_ampdu_running(sc, tid->an, tid->tid))
 			ath_tx_tid_hw_queue_aggr(sc, tid->an, tid);
 		else
 			ath_tx_tid_hw_queue_norm(sc, tid->an, tid);
 
 		/* Not empty? Re-schedule */
 		if (tid->axq_depth != 0)
 			ath_tx_tid_sched(sc, tid);
 
 		/*
 		 * Give the software queue time to aggregate more
 		 * packets.  If we aren't running aggregation then
 		 * we should still limit the hardware queue depth.
 		 */
 		/* XXX TXQ locking */
 		if (txq->axq_aggr_depth + txq->fifo.axq_depth >= sc->sc_hwq_limit_aggr) {
 			break;
 		}
 		if (txq->axq_depth >= sc->sc_hwq_limit_nonaggr) {
 			break;
 		}
 loop_done:
 		/*
 		 * If this was the last entry on the original list, stop.
 		 * Otherwise nodes that have been rescheduled onto the end
 		 * of the TID FIFO list will just keep being rescheduled.
 		 *
 		 * XXX What should we do about nodes that were paused
 		 * but are pending a leaking frame in response to a ps-poll?
 		 * They'll be put at the front of the list; so they'll
 		 * prematurely trigger this condition! Ew.
 		 */
 		if (tid == last)
 			break;
 	}
 }
 
 /*
  * TX addba handling
  */
 
 /*
  * Return net80211 TID struct pointer, or NULL for none
  */
 struct ieee80211_tx_ampdu *
 ath_tx_get_tx_tid(struct ath_node *an, int tid)
 {
 	struct ieee80211_node *ni = &an->an_node;
 	struct ieee80211_tx_ampdu *tap;
 
 	if (tid == IEEE80211_NONQOS_TID)
 		return NULL;
 
 	tap = &ni->ni_tx_ampdu[tid];
 	return tap;
 }
 
 /*
  * Is AMPDU-TX running?
  */
 static int
 ath_tx_ampdu_running(struct ath_softc *sc, struct ath_node *an, int tid)
 {
 	struct ieee80211_tx_ampdu *tap;
 
 	if (tid == IEEE80211_NONQOS_TID)
 		return 0;
 
 	tap = ath_tx_get_tx_tid(an, tid);
 	if (tap == NULL)
 		return 0;	/* Not valid; default to not running */
 
 	return !! (tap->txa_flags & IEEE80211_AGGR_RUNNING);
 }
 
 /*
  * Is AMPDU-TX negotiation pending?
  */
 static int
 ath_tx_ampdu_pending(struct ath_softc *sc, struct ath_node *an, int tid)
 {
 	struct ieee80211_tx_ampdu *tap;
 
 	if (tid == IEEE80211_NONQOS_TID)
 		return 0;
 
 	tap = ath_tx_get_tx_tid(an, tid);
 	if (tap == NULL)
 		return 0;	/* Not valid; default to not pending */
 
 	return !! (tap->txa_flags & IEEE80211_AGGR_XCHGPEND);
 }
 
 /*
  * Is AMPDU-TX pending for the given TID?
  */
 
 
 /*
  * Method to handle sending an ADDBA request.
  *
  * We tap this so the relevant flags can be set to pause the TID
  * whilst waiting for the response.
  *
  * XXX there's no timeout handler we can override?
  */
 int
 ath_addba_request(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap,
     int dialogtoken, int baparamset, int batimeout)
 {
 	struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc;
 	int tid = tap->txa_tid;
 	struct ath_node *an = ATH_NODE(ni);
 	struct ath_tid *atid = &an->an_tid[tid];
 
 	/*
 	 * XXX danger Will Robinson!
 	 *
 	 * Although the taskqueue may be running and scheduling some more
 	 * packets, these should all be _before_ the addba sequence number.
 	 * However, net80211 will keep self-assigning sequence numbers
 	 * until addba has been negotiated.
 	 *
 	 * In the past, these packets would be "paused" (which still works
 	 * fine, as they're being scheduled to the driver in the same
 	 * serialised method which is calling the addba request routine)
 	 * and when the aggregation session begins, they'll be dequeued
 	 * as aggregate packets and added to the BAW. However, now there's
 	 * a "bf->bf_state.bfs_dobaw" flag, and this isn't set for these
 	 * packets. Thus they never get included in the BAW tracking and
 	 * this can cause the initial burst of packets after the addba
 	 * negotiation to "hang", as they quickly fall outside the BAW.
 	 *
 	 * The "eventual" solution should be to tag these packets with
 	 * dobaw. Although net80211 has given us a sequence number,
 	 * it'll be "after" the left edge of the BAW and thus it'll
 	 * fall within it.
 	 */
 	ATH_TX_LOCK(sc);
 	/*
 	 * This is a bit annoying.  Until net80211 HT code inherits some
 	 * (any) locking, we may have this called in parallel BUT only
 	 * one response/timeout will be called.  Grr.
 	 */
 	if (atid->addba_tx_pending == 0) {
 		ath_tx_tid_pause(sc, atid);
 		atid->addba_tx_pending = 1;
 	}
 	ATH_TX_UNLOCK(sc);
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
 	    "%s: %6D: called; dialogtoken=%d, baparamset=%d, batimeout=%d\n",
 	    __func__,
 	    ni->ni_macaddr,
 	    ":",
 	    dialogtoken, baparamset, batimeout);
 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
 	    "%s: txa_start=%d, ni_txseqs=%d\n",
 	    __func__, tap->txa_start, ni->ni_txseqs[tid]);
 
 	return sc->sc_addba_request(ni, tap, dialogtoken, baparamset,
 	    batimeout);
 }
 
 /*
  * Handle an ADDBA response.
  *
  * We unpause the queue so TX'ing can resume.
  *
  * Any packets TX'ed from this point should be "aggregate" (whether
  * aggregate or not) so the BAW is updated.
  *
  * Note! net80211 keeps self-assigning sequence numbers until
  * ampdu is negotiated. This means the initially-negotiated BAW left
  * edge won't match the ni->ni_txseq.
  *
  * So, being very dirty, the BAW left edge is "slid" here to match
  * ni->ni_txseq.
  *
  * What likely SHOULD happen is that all packets subsequent to the
  * addba request should be tagged as aggregate and queued as non-aggregate
  * frames; thus updating the BAW. For now though, I'll just slide the
  * window.
  */
 int
 ath_addba_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap,
     int status, int code, int batimeout)
 {
 	struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc;
 	int tid = tap->txa_tid;
 	struct ath_node *an = ATH_NODE(ni);
 	struct ath_tid *atid = &an->an_tid[tid];
 	int r;
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
 	    "%s: %6D: called; status=%d, code=%d, batimeout=%d\n", __func__,
 	    ni->ni_macaddr,
 	    ":",
 	    status, code, batimeout);
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
 	    "%s: txa_start=%d, ni_txseqs=%d\n",
 	    __func__, tap->txa_start, ni->ni_txseqs[tid]);
 
 	/*
 	 * Call this first, so the interface flags get updated
 	 * before the TID is unpaused. Otherwise a race condition
 	 * exists where the unpaused TID still doesn't yet have
 	 * IEEE80211_AGGR_RUNNING set.
 	 */
 	r = sc->sc_addba_response(ni, tap, status, code, batimeout);
 
 	ATH_TX_LOCK(sc);
 	atid->addba_tx_pending = 0;
 	/*
 	 * XXX dirty!
 	 * Slide the BAW left edge to wherever net80211 left it for us.
 	 * Read above for more information.
 	 */
 	tap->txa_start = ni->ni_txseqs[tid];
 	ath_tx_tid_resume(sc, atid);
 	ATH_TX_UNLOCK(sc);
 	return r;
 }
 
 
 /*
  * Stop ADDBA on a queue.
  *
  * This can be called whilst BAR TX is currently active on the queue,
  * so make sure this is unblocked before continuing.
  */
 void
 ath_addba_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap)
 {
 	struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc;
 	int tid = tap->txa_tid;
 	struct ath_node *an = ATH_NODE(ni);
 	struct ath_tid *atid = &an->an_tid[tid];
 	ath_bufhead bf_cq;
 	struct ath_buf *bf;
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: %6D: called\n",
 	    __func__,
 	    ni->ni_macaddr,
 	    ":");
 
 	/*
 	 * Pause TID traffic early, so there aren't any races
 	 * Unblock the pending BAR held traffic, if it's currently paused.
 	 */
 	ATH_TX_LOCK(sc);
 	ath_tx_tid_pause(sc, atid);
 	if (atid->bar_wait) {
 		/*
 		 * bar_unsuspend() expects bar_tx == 1, as it should be
 		 * called from the TX completion path.  This quietens
 		 * the warning.  It's cleared for us anyway.
 		 */
 		atid->bar_tx = 1;
 		ath_tx_tid_bar_unsuspend(sc, atid);
 	}
 	ATH_TX_UNLOCK(sc);
 
 	/* There's no need to hold the TXQ lock here */
 	sc->sc_addba_stop(ni, tap);
 
 	/*
 	 * ath_tx_tid_cleanup will resume the TID if possible, otherwise
 	 * it'll set the cleanup flag, and it'll be unpaused once
 	 * things have been cleaned up.
 	 */
 	TAILQ_INIT(&bf_cq);
 	ATH_TX_LOCK(sc);
 
 	/*
 	 * In case there's a followup call to this, only call it
 	 * if we don't have a cleanup in progress.
 	 *
 	 * Since we've paused the queue above, we need to make
 	 * sure we unpause if there's already a cleanup in
 	 * progress - it means something else is also doing
 	 * this stuff, so we don't need to also keep it paused.
 	 */
 	if (atid->cleanup_inprogress) {
 		ath_tx_tid_resume(sc, atid);
 	} else {
 		ath_tx_tid_cleanup(sc, an, tid, &bf_cq);
 		/*
 		 * Unpause the TID if no cleanup is required.
 		 */
 		if (! atid->cleanup_inprogress)
 			ath_tx_tid_resume(sc, atid);
 	}
 	ATH_TX_UNLOCK(sc);
 
 	/* Handle completing frames and fail them */
 	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
 		TAILQ_REMOVE(&bf_cq, bf, bf_list);
 		ath_tx_default_comp(sc, bf, 1);
 	}
 
 }
 
 /*
  * Handle a node reassociation.
  *
  * We may have a bunch of frames queued to the hardware; those need
  * to be marked as cleanup.
  */
 void
 ath_tx_node_reassoc(struct ath_softc *sc, struct ath_node *an)
 {
 	struct ath_tid *tid;
 	int i;
 	ath_bufhead bf_cq;
 	struct ath_buf *bf;
 
 	TAILQ_INIT(&bf_cq);
 
 	ATH_TX_UNLOCK_ASSERT(sc);
 
 	ATH_TX_LOCK(sc);
 	for (i = 0; i < IEEE80211_TID_SIZE; i++) {
 		tid = &an->an_tid[i];
 		if (tid->hwq_depth == 0)
 			continue;
 		DPRINTF(sc, ATH_DEBUG_NODE,
 		    "%s: %6D: TID %d: cleaning up TID\n",
 		    __func__,
 		    an->an_node.ni_macaddr,
 		    ":",
 		    i);
 		/*
 		 * In case there's a followup call to this, only call it
 		 * if we don't have a cleanup in progress.
 		 */
 		if (! tid->cleanup_inprogress) {
 			ath_tx_tid_pause(sc, tid);
 			ath_tx_tid_cleanup(sc, an, i, &bf_cq);
 			/*
 			 * Unpause the TID if no cleanup is required.
 			 */
 			if (! tid->cleanup_inprogress)
 				ath_tx_tid_resume(sc, tid);
 		}
 	}
 	ATH_TX_UNLOCK(sc);
 
 	/* Handle completing frames and fail them */
 	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
 		TAILQ_REMOVE(&bf_cq, bf, bf_list);
 		ath_tx_default_comp(sc, bf, 1);
 	}
 }
 
 /*
  * Note: net80211 bar_timeout() doesn't call this function on BAR failure;
  * it simply tears down the aggregation session. Ew.
  *
  * It however will call ieee80211_ampdu_stop() which will call
  * ic->ic_addba_stop().
  *
  * XXX This uses a hard-coded max BAR count value; the whole
  * XXX BAR TX success or failure should be better handled!
  */
 void
 ath_bar_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap,
     int status)
 {
 	struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc;
 	int tid = tap->txa_tid;
 	struct ath_node *an = ATH_NODE(ni);
 	struct ath_tid *atid = &an->an_tid[tid];
 	int attempts = tap->txa_attempts;
 	int old_txa_start;
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
 	    "%s: %6D: called; txa_tid=%d, atid->tid=%d, status=%d, attempts=%d, txa_start=%d, txa_seqpending=%d\n",
 	    __func__,
 	    ni->ni_macaddr,
 	    ":",
 	    tap->txa_tid,
 	    atid->tid,
 	    status,
 	    attempts,
 	    tap->txa_start,
 	    tap->txa_seqpending);
 
 	/* Note: This may update the BAW details */
 	/*
 	 * XXX What if this does slide the BAW along? We need to somehow
 	 * XXX either fix things when it does happen, or prevent the
 	 * XXX seqpending value to be anything other than exactly what
 	 * XXX the hell we want!
 	 *
 	 * XXX So for now, how I do this inside the TX lock for now
 	 * XXX and just correct it afterwards? The below condition should
 	 * XXX never happen and if it does I need to fix all kinds of things.
 	 */
 	ATH_TX_LOCK(sc);
 	old_txa_start = tap->txa_start;
 	sc->sc_bar_response(ni, tap, status);
 	if (tap->txa_start != old_txa_start) {
 		device_printf(sc->sc_dev, "%s: tid=%d; txa_start=%d, old=%d, adjusting\n",
 		    __func__,
 		    tid,
 		    tap->txa_start,
 		    old_txa_start);
 	}
 	tap->txa_start = old_txa_start;
 	ATH_TX_UNLOCK(sc);
 
 	/* Unpause the TID */
 	/*
 	 * XXX if this is attempt=50, the TID will be downgraded
 	 * XXX to a non-aggregate session. So we must unpause the
 	 * XXX TID here or it'll never be done.
 	 *
 	 * Also, don't call it if bar_tx/bar_wait are 0; something
 	 * has beaten us to the punch? (XXX figure out what?)
 	 */
 	if (status == 0 || attempts == 50) {
 		ATH_TX_LOCK(sc);
 		if (atid->bar_tx == 0 || atid->bar_wait == 0)
 			DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
 			    "%s: huh? bar_tx=%d, bar_wait=%d\n",
 			    __func__,
 			    atid->bar_tx, atid->bar_wait);
 		else
 			ath_tx_tid_bar_unsuspend(sc, atid);
 		ATH_TX_UNLOCK(sc);
 	}
 }
 
 /*
  * This is called whenever the pending ADDBA request times out.
  * Unpause and reschedule the TID.
  */
 void
 ath_addba_response_timeout(struct ieee80211_node *ni,
     struct ieee80211_tx_ampdu *tap)
 {
 	struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc;
 	int tid = tap->txa_tid;
 	struct ath_node *an = ATH_NODE(ni);
 	struct ath_tid *atid = &an->an_tid[tid];
 
 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
 	    "%s: %6D: TID=%d, called; resuming\n",
 	    __func__,
 	    ni->ni_macaddr,
 	    ":",
 	    tid);
 
 	ATH_TX_LOCK(sc);
 	atid->addba_tx_pending = 0;
 	ATH_TX_UNLOCK(sc);
 
 	/* Note: This updates the aggregate state to (again) pending */
 	sc->sc_addba_response_timeout(ni, tap);
 
 	/* Unpause the TID; which reschedules it */
 	ATH_TX_LOCK(sc);
 	ath_tx_tid_resume(sc, atid);
 	ATH_TX_UNLOCK(sc);
 }
 
 /*
  * Check if a node is asleep or not.
  */
 int
 ath_tx_node_is_asleep(struct ath_softc *sc, struct ath_node *an)
 {
 
 	ATH_TX_LOCK_ASSERT(sc);
 
 	return (an->an_is_powersave);
 }
 
 /*
  * Mark a node as currently "in powersaving."
  * This suspends all traffic on the node.
  *
  * This must be called with the node/tx locks free.
  *
  * XXX TODO: the locking silliness below is due to how the node
  * locking currently works.  Right now, the node lock is grabbed
  * to do rate control lookups and these are done with the TX
  * queue lock held.  This means the node lock can't be grabbed
  * first here or a LOR will occur.
  *
  * Eventually (hopefully!) the TX path code will only grab
  * the TXQ lock when transmitting and the ath_node lock when
  * doing node/TID operations.  There are other complications -
  * the sched/unsched operations involve walking the per-txq
  * 'active tid' list and this requires both locks to be held.
  */
 void
 ath_tx_node_sleep(struct ath_softc *sc, struct ath_node *an)
 {
 	struct ath_tid *atid;
 	struct ath_txq *txq;
 	int tid;
 
 	ATH_TX_UNLOCK_ASSERT(sc);
 
 	/* Suspend all traffic on the node */
 	ATH_TX_LOCK(sc);
 
 	if (an->an_is_powersave) {
 		DPRINTF(sc, ATH_DEBUG_XMIT,
 		    "%s: %6D: node was already asleep!\n",
 		    __func__, an->an_node.ni_macaddr, ":");
 		ATH_TX_UNLOCK(sc);
 		return;
 	}
 
 	for (tid = 0; tid < IEEE80211_TID_SIZE; tid++) {
 		atid = &an->an_tid[tid];
 		txq = sc->sc_ac2q[atid->ac];
 
 		ath_tx_tid_pause(sc, atid);
 	}
 
 	/* Mark node as in powersaving */
 	an->an_is_powersave = 1;
 
 	ATH_TX_UNLOCK(sc);
 }
 
 /*
  * Mark a node as currently "awake."
  * This resumes all traffic to the node.
  */
 void
 ath_tx_node_wakeup(struct ath_softc *sc, struct ath_node *an)
 {
 	struct ath_tid *atid;
 	struct ath_txq *txq;
 	int tid;
 
 	ATH_TX_UNLOCK_ASSERT(sc);
 
 	ATH_TX_LOCK(sc);
 
 	/* !? */
 	if (an->an_is_powersave == 0) {
 		ATH_TX_UNLOCK(sc);
 		DPRINTF(sc, ATH_DEBUG_XMIT,
 		    "%s: an=%p: node was already awake\n",
 		    __func__, an);
 		return;
 	}
 
 	/* Mark node as awake */
 	an->an_is_powersave = 0;
 	/*
 	 * Clear any pending leaked frame requests
 	 */
 	an->an_leak_count = 0;
 
 	for (tid = 0; tid < IEEE80211_TID_SIZE; tid++) {
 		atid = &an->an_tid[tid];
 		txq = sc->sc_ac2q[atid->ac];
 
 		ath_tx_tid_resume(sc, atid);
 	}
 	ATH_TX_UNLOCK(sc);
 }
 
 static int
 ath_legacy_dma_txsetup(struct ath_softc *sc)
 {
 
 	/* nothing new needed */
 	return (0);
 }
 
 static int
 ath_legacy_dma_txteardown(struct ath_softc *sc)
 {
 
 	/* nothing new needed */
 	return (0);
 }
 
 void
 ath_xmit_setup_legacy(struct ath_softc *sc)
 {
 	/*
 	 * For now, just set the descriptor length to sizeof(ath_desc);
 	 * worry about extracting the real length out of the HAL later.
 	 */
 	sc->sc_tx_desclen = sizeof(struct ath_desc);
 	sc->sc_tx_statuslen = sizeof(struct ath_desc);
 	sc->sc_tx_nmaps = 1;	/* only one buffer per TX desc */
 
 	sc->sc_tx.xmit_setup = ath_legacy_dma_txsetup;
 	sc->sc_tx.xmit_teardown = ath_legacy_dma_txteardown;
 	sc->sc_tx.xmit_attach_comp_func = ath_legacy_attach_comp_func;
 
 	sc->sc_tx.xmit_dma_restart = ath_legacy_tx_dma_restart;
 	sc->sc_tx.xmit_handoff = ath_legacy_xmit_handoff;
 
 	sc->sc_tx.xmit_drain = ath_legacy_tx_drain;
 }